*
*/
-#define A2091_DEBUG 0
+#define A2091_DEBUG 1
#define A3000_DEBUG 0
#define WD33C93_DEBUG 0
static int rombankswitcher, rombank;
static int rom_size, rom_mask;
+static int old_dmac = 0;
static uae_u32 dmac_istr, dmac_cntr;
static uae_u32 dmac_dawr;
static uae_u32 dmac_acr;
set_status (wd_phase, 1);
}
+static void dmacheck (void)
+{
+ dmac_acr++;
+ if (old_dmac && (dmac_cntr & CNTR_TCEN)) {
+ if (dmac_wtc == 0)
+ dmac_istr |= ISTR_E_INT;
+ else
+ dmac_wtc--;
+ }
+}
+
static void do_dma (void)
{
if (currprefs.cs_cdtvscsi)
put_byte (dmac_acr, v);
if (wd_dataoffset < sizeof wd_data)
wd_data[wd_dataoffset++] = v;
- dmac_acr++;
+ dmacheck ();
if (status)
break;
}
if (wd_dataoffset < sizeof wd_data)
wd_data[wd_dataoffset++] = v;
status = scsi_send_data (SCSIID, v);
- dmac_acr++;
+ dmacheck ();
if (status)
break;
}
case 0x43:
v = dmac_cntr;
break;
+ case 0x80:
+ if (old_dmac)
+ v = (dmac_wtc >> 24) & 0xff;
+ break;
+ case 0x81:
+ if (old_dmac)
+ v = (dmac_wtc >> 16) & 0xff;
+ break;
+ case 0x82:
+ if (old_dmac)
+ v = (dmac_wtc >> 8) & 0xff;
+ break;
+ case 0x83:
+ if (old_dmac)
+ v = (dmac_wtc >> 0) & 0xff;
+ break;
case 0x91:
v = wdscsi_getauxstatus ();
break;
break;
case 0xe8:
case 0xe9:
- /* FLUSH */
- dmac_istr |= ISTR_FE_FLG;
+ /* FLUSH (new only) */
+ if (!old_dmac && dmac_dma)
+ dmac_istr |= ISTR_FE_FLG;
break;
}
#if A2091_DEBUG > 0
dmac_acr |= b << 8;
break;
case 0x87:
- dmac_acr &= 0xffffff01;
- dmac_acr |= (b & ~ 1) << 0;
+ dmac_acr &= 0xffffff00;
+ dmac_acr |= b << 0;
+ dmac_acr &= ~1;
+ if (old_dmac)
+ dmac_acr &= ~3;
break;
case 0x8e:
dmac_dawr &= 0x00ff;
memset (dmacmemory, 0xff, 100);
ew (0x00, 0xc0 | 0x01 | 0x10);
/* A590/A2091 hardware id */
- ew (0x04, 0x03);
+ ew (0x04, old_dmac ? 0x02 : 0x03);
/* commodore's manufacturer id */
ew (0x10, 0x02);
ew (0x14, 0x02);
roms[2] = 53;
roms[3] = 56;
roms[4] = -1;
- roms[0] = 53;
rombankswitcher = 0;
rombank = 0;
static uae_u8 cdrom_status3;
static uae_u32 cdrom_address1, cdrom_address2;
static uae_u32 cdrom_longmask;
-static uae_u32 cdrom_readmask_r, cdrom_readmask_w;
+static uae_u32 cdrom_readmask;
static uae_u8 cdrom_command_offset_complete; /* 0x19 */
static uae_u8 cdrom_command_offset_todo; /* 0x1d */
static uae_u8 cdrom_result_complete; /* 0x1a */
static uae_u32 cdrom_toc_crc;
static uae_u8 cdrom_toc_buffer[MAX_TOC_ENTRIES * 13];
static uae_u8 cdrom_toc_cd_buffer[4 + MAX_TOC_ENTRIES * 11];
+static uae_u8 qcode_buf[12];
+static int qcode_valid;
static int cdrom_disk, cdrom_paused, cdrom_playing;
static int cdrom_command_active;
static int cdrom_command_length;
static int cdrom_checksum_error;
static int cdrom_data_offset, cdrom_speed, cdrom_sector_counter;
-static int cdrom_current_sector;
+static int cdrom_current_sector, cdrom_seek_delay;
static int cdrom_data_end, cdrom_leadout;
static int cdrom_audiotimeout;
static int cdrom_led;
static int unitnum = -1;
static int cdromok = 0;
static int cd_hunt;
+static volatile int mediachanged, mediacheckcounter;
+static volatile int frame2counter;
+
+static smp_comm_pipe requests;
+static volatile int akiko_thread_running;
static void checkint (void)
{
return msf;
}
-static void cdaudiostop (void)
+static void cdaudiostop_do (void)
{
- cdrom_playing = 0;
- cdrom_paused = 0;
+ qcode_valid = 0;
if (unitnum < 0)
return;
sys_command_cd_pause (DF_IOCTL, unitnum, 0);
sys_command_cd_stop (DF_IOCTL, unitnum);
sys_command_cd_pause (DF_IOCTL, unitnum, 1);
+}
+
+static void cdaudiostop (void)
+{
+ cdrom_playing = 0;
+ cdrom_paused = 0;
cdrom_audiotimeout = 0;
+ write_comm_pipe_u32 (&requests, 0x104, 1);
+}
+
+static void cdaudioplay_do (void)
+{
+ uae_u32 startmsf = read_comm_pipe_u32_blocking (&requests);
+ uae_u32 endmsf = read_comm_pipe_u32_blocking (&requests);
+ uae_u32 scan = read_comm_pipe_u32_blocking (&requests);
+ qcode_valid = 0;
+ if (unitnum < 0)
+ return;
+ sys_command_cd_play (DF_IOCTL, unitnum, startmsf, endmsf, scan);
}
static uae_u32 last_play_end;
#endif
last_play_end = endmsf;
cdrom_audiotimeout = 0;
- return sys_command_cd_play (DF_IOCTL, unitnum, startmsf, endmsf, scan);
+ write_comm_pipe_u32 (&requests, 0x110, 0);
+ write_comm_pipe_u32 (&requests, startmsf, 0);
+ write_comm_pipe_u32 (&requests, endmsf, 0);
+ write_comm_pipe_u32 (&requests, scan, 1);
+ return 1;
}
if (d)
memset (d, 0, 11);
last_play_pos = 0;
- buf = sys_command_cd_qcode (DF_IOCTL, unitnum);
- if (!buf)
+ if (!qcode_valid)
return 0;
+ buf = qcode_buf;
as = buf[1];
if (as != 0x11 && as != 0x12 && as != 0x13 && as != 0x15) /* audio status ok? */
return 0;
sys_command_close (DF_IOCTL, unitnum);
return 1;
}
- if (!sys_command_ismedia(DF_IOCTL, unitnum, 0))
+ if (!sys_command_ismedia (DF_IOCTL, unitnum, 0))
cd_hunt = 1;
write_log (L"using drive %s (unit %d, media %d)\n", di2->label, unitnum, di2->media_inserted);
/* make sure CD audio is not playing */
- cdaudiostop ();
+ cdaudiostop_do ();
return 0;
}
/* close device */
static void sys_cddev_close (void)
{
- cdaudiostop ();
+ cdaudiostop_do ();
sys_command_close (DF_IOCTL, unitnum);
}
-static int command_lengths[] = { 1,2,1,1,12,2,1,1,4,1,-1,-1,-1,-1,-1 };
+static int command_lengths[] = { 1,2,1,1,12,2,1,1,4,1,-1,-1,-1,-1,-1,-1 };
static void cdrom_return_data (int len)
{
write_log (L"%02X\n", checksum);
#endif
cdrom_result_complete += len + 1;
+ cdrom_result_complete &= 0xff;
set_status (CDSTATUS_DATA_AVAILABLE);
}
if (cdrom_paused)
return 2;
cdrom_audiotimeout = 0;
- sys_command_cd_pause (DF_IOCTL, unitnum,1);
+ write_comm_pipe_u32 (&requests, 0x102, 1);
cdrom_paused = 1;
return 2;
}
if (!cdrom_playing)
return 2;
cdrom_paused = 0;
- sys_command_cd_pause (DF_IOCTL, unitnum,0);
+ write_comm_pipe_u32 (&requests, 0x103, 1);
return 2;
}
if (cdrom_command_buffer[7] == 0x80) { /* data read */
int cdrom_data_offset_end = msf2lsn (endpos);
cdrom_data_offset = msf2lsn (seekpos);
+ cdrom_seek_delay = abs (cdrom_current_sector - cdrom_data_offset);
+ if (cdrom_seek_delay < 100) {
+ cdrom_seek_delay = 1;
+ } else {
+ cdrom_seek_delay /= 1000;
+ cdrom_seek_delay += 10;
+ if (cdrom_seek_delay > 100)
+ cdrom_seek_delay = 100;
+ }
#if AKIKO_DEBUG_IO_CMD
write_log (L"READ DATA %06X (%d) - %06X (%d) SPD=%dx PC=%08X\n",
seekpos, cdrom_data_offset, endpos, cdrom_data_offset_end, cdrom_speed, M68K_GETPC);
/* DMA transfer one CD sector */
static void cdrom_run_read (void)
{
- int i, j, sector;
+ int i, sector, inc;
int read = 0;
- uae_u8 buf[2352];
int sec;
+ static int seccnt;
if (!(cdrom_longmask & 0x04000000))
return;
- if (!cdrom_readmask_w)
+ if (!cdrom_readmask) {
+ cdrom_longmask &= ~0x08000000;
+ return;
+ }
+ if (!(cdrom_longmask & 0x08000000))
return;
if (cdrom_data_offset < 0)
return;
- j = cdrom_sector_counter & 15;
- if (unitnum >= 0 && (cdrom_readmask_w & (1 << j))) {
- sector = cdrom_current_sector = cdrom_data_offset + cdrom_sector_counter;
+ if (unitnum < 0)
+ return;
+
+ inc = 1;
+ // always use highest available slot or Lotus 3 (Lotus Trilogy) fails to load
+ for (seccnt = 15; seccnt >= 0; seccnt--) {
+ if (cdrom_readmask & (1 << seccnt))
+ break;
+ }
+ if (cdrom_readmask & (1 << seccnt)) {
+ sector = cdrom_current_sector = cdrom_data_offset + cdrom_sector_counter;
sec = sector - sector_buffer_sector_1;
if (sector_buffer_sector_1 >= 0 && sec >= 0 && sec < SECTOR_BUFFER_SIZE) {
if (sector_buffer_info_1[sec] != 0xff && sector_buffer_info_1[sec] != 0) {
+ uae_u8 buf[2352];
+
memcpy (buf + 16, sector_buffer_1 + sec * 2048, 2048);
encode_l2 (buf, sector + 150);
buf[0] = 0;
buf[1] = 0;
buf[2] = 0;
- buf[3] = cdrom_sector_counter;
+ buf[3] = cdrom_sector_counter & 31;
for (i = 0; i < 2352; i++)
- put_byte (cdrom_address1 + j * 4096 + i, buf[i]);
- cdrom_readmask_r |= 1 << j;
+ put_byte (cdrom_address1 + seccnt * 4096 + i, buf[i]);
+ for (i = 0; i < 73 * 2; i++)
+ put_byte (cdrom_address1 + seccnt * 4096 + 0xc00 + i, 0);
+ cdrom_readmask &= ~(1 << seccnt);
+ set_status (CDSTATUS_DATASECTOR);
+ } else {
+ inc = 0;
}
if (sector_buffer_info_1[sec] != 0xff)
sector_buffer_info_1[sec]--;
- } else {
- return;
- }
#if AKIKO_DEBUG_IO_CMD
- write_log (L"read sector=%d, scnt=%d -> %d. %08X\n",
- cdrom_data_offset, cdrom_sector_counter, sector, cdrom_address1 + j * 4096);
+ write_log (L"read sector=%d, scnt=%d -> %d. %08X\n",
+ cdrom_data_offset, cdrom_sector_counter, sector, cdrom_address1 + seccnt * 4096);
#endif
- cdrom_readmask_w &= ~(1 << j);
+ } else {
+ inc = 0;
+ }
}
- cdrom_sector_counter++;
- if (cdrom_readmask_w == 0)
- set_status (CDSTATUS_DATASECTOR);
-
+ if (inc)
+ cdrom_sector_counter++;
}
static uae_sem_t akiko_sem;
static void akiko_handler (void)
{
- static int mediacheckcnt;
-
if (unitnum < 0)
return;
if (cdrom_result_complete > cdrom_result_last_pos && cdrom_result_complete - cdrom_result_last_pos < 100) {
}
if (cdrom_result_last_pos < cdrom_result_complete)
return;
- if (mediacheckcnt > 0)
- mediacheckcnt--;
- if (mediacheckcnt == 0) {
- int media = sys_command_ismedia (DF_IOCTL, unitnum, 0);
- mediacheckcnt = 312 * 50 * 2;
- if (media != lastmediastate) {
- write_log (L"media changed = %d\n", media);
- lastmediastate = cdrom_disk = media;
- cdrom_return_data (cdrom_command_media_status ());
- if (!media)
- cd_hunt = 1;
- cdrom_toc ();
- /* do not remove! first try may fail */
- cdrom_toc ();
- return;
- }
+ if (mediachanged) {
+ mediachanged = 0;
+ cdrom_return_data (cdrom_command_media_status ());
+ if (!lastmediastate)
+ cd_hunt = 1;
+ cdrom_toc ();
+ /* do not remove! first try may fail */
+ cdrom_toc ();
+ return;
}
if (cdrom_toc_counter >= 0 && !cdrom_command_active && cdrom_dosomething) {
cdrom_return_data (cdrom_return_toc_entry ());
}
}
-static void do_hunt(void)
+static void do_hunt (void)
{
int i;
for (i = 0; i < MAX_TOTAL_DEVICES; i++) {
- if (sys_command_ismedia(DF_IOCTL, i, 1) > 0)
+ if (sys_command_ismedia (DF_IOCTL, i, 1) > 0)
break;
}
if (i == MAX_TOTAL_DEVICES)
return;
if (unitnum >= 0) {
int ou = unitnum;
- cdaudiostop();
unitnum = -1;
- sys_command_close(DF_IOCTL, ou);
+ sys_command_close (DF_IOCTL, ou);
}
- if (sys_command_open(DF_IOCTL, i) > 0) {
+ if (sys_command_open (DF_IOCTL, i) > 0) {
unitnum = i;
cd_hunt = 0;
write_log (L"CD32: autodetected unit %d\n", unitnum);
if (cd_hunt) {
static int huntcnt;
if (huntcnt <= 0) {
- do_hunt();
+ do_hunt ();
huntcnt = 312 * 50 * 2;
}
huntcnt--;
if (framecounter <= 0) {
if (cdrom_led)
gui_cd_led (0, 1);
- cdrom_run_read ();
- framecounter = 1000000 / (74 * 75 * cdrom_speed);
+ if (cdrom_seek_delay <= 0) {
+ cdrom_run_read ();
+ } else {
+ cdrom_seek_delay--;
+ }
+ framecounter = 1000000 / (59 * 75 * cdrom_speed);
set_status (CDSTATUS_FRAME);
cdrom_status3++;
}
+
+ if (frame2counter > 0)
+ frame2counter--;
+ if (mediacheckcounter > 0)
+ mediacheckcounter--;
+
if (cdrom_playing) {
- static int frame2counter;
if (cdrom_audiotimeout > 0) {
cdrom_audiotimeout--;
if (cdrom_audiotimeout == 0) {
cdrom_return_data (2);
}
}
- frame2counter--;
+ }
+ akiko_internal ();
+ akiko_handler ();
+}
+
+/* cdrom data buffering thread */
+static void *akiko_thread (void *null)
+{
+ int i;
+ uae_u8 *tmp1;
+ uae_u8 *tmp2;
+ int tmp3;
+ uae_u8 *p;
+ int offset;
+ int sector;
+
+ while (akiko_thread_running || comm_pipe_has_data (&requests)) {
+
+ if (comm_pipe_has_data (&requests)) {
+ uae_u32 b = read_comm_pipe_u32_blocking (&requests);
+ switch (b)
+ {
+ case 0x0102: // pause
+ sys_command_cd_pause (DF_IOCTL, unitnum, 1);
+ break;
+ case 0x0103: // unpause
+ sys_command_cd_pause (DF_IOCTL, unitnum, 0);
+ break;
+ case 0x0104: // stop
+ cdaudiostop_do ();
+ break;
+ case 0x0110: // do_play!
+ cdaudioplay_do ();
+ break;
+ }
+ }
+
if (frame2counter <= 0) {
uae_u8 *s;
- frame2counter = 312 * 50 * 2;
+ frame2counter = 312 * 50 / 2;
s = sys_command_cd_qcode (DF_IOCTL, unitnum);
if (s) {
uae_u8 as = s[1];
+ memcpy (qcode_buf, s, sizeof qcode_buf);
+ qcode_valid = 1;
if (as == AUDIO_STATUS_IN_PROGRESS) {
int lsn = msf2lsn ((s[5 + 4] << 16) | (s[6 + 4] << 8) | (s[7 + 4] << 0));
//write_log("%d %d (%d %d)\n", lsn, msf2lsn (last_play_end) - lsn, cdrom_leadout, msf2lsn (last_play_end));
}
}
}
- }
- akiko_internal ();
- akiko_handler ();
-}
-
-static volatile int akiko_thread_running;
-/* cdrom data buffering thread */
-static void *akiko_thread (void *null)
-{
- int i;
- uae_u8 *tmp1;
- uae_u8 *tmp2;
- int tmp3;
- uae_u8 *p;
- int offset;
- int sector;
+ if (mediacheckcounter <= 0) {
+ int media = sys_command_ismedia (DF_IOCTL, unitnum, 1);
+ mediacheckcounter = 312 * 50 * 2;
+ if (media != lastmediastate) {
+ write_log (L"media changed = %d\n", media);
+ lastmediastate = cdrom_disk = media;
+ mediachanged = 1;
+ cdaudiostop_do ();
+ }
+ }
- while(akiko_thread_running) {
uae_sem_wait (&akiko_sem);
sector = cdrom_current_sector;
for (i = 0; i < SECTOR_BUFFER_SIZE; i++) {
break;
case 0x20:
case 0x21:
- v = akiko_get_long (cdrom_readmask_r, addr - 0x20 + 2);
+ v = akiko_get_long (cdrom_readmask, addr - 0x20 + 2);
break;
case 0x24:
case 0x25:
v |= akiko_bget2 (addr + 1, 0) << 16;
v |= akiko_bget2 (addr + 0, 0) << 24;
if (addr < 0x30 && (addr != 4 && addr != 8) && AKIKO_DEBUG_IO)
- write_log (L"akiko_lget %08: %08X %08X\n", M68K_GETPC, addr, v);
+ write_log (L"akiko_lget %08X: %08X %08X\n", M68K_GETPC, addr, v);
return v;
}
-static void write_readmask(uae_u16 v)
-{
- int i, cnt;
-
- cnt = 0;
- for (i = 0; i < 16; i++) {
- if (v & (1 << i))
- cnt++;
- }
- cdrom_readmask_w |= v;
- cdrom_readmask_r = 0;
-}
-
-
static void akiko_bput2 (uaecptr addr, uae_u32 v, int msg)
{
uae_u32 tmp;
case 0x12:
case 0x13:
akiko_put_long (&cdrom_address1, addr - 0x10, v);
+ cdrom_address1 &= ~0xffff;
break;
case 0x14:
case 0x15:
cdrom_result_last_pos = v;
break;
case 0x20:
- write_readmask(v <<8);
- break;
case 0x21:
- write_readmask(v);
+ tmp = cdrom_readmask;
+ akiko_put_long (&cdrom_readmask, addr - 0x20 + 2, v);
+ cdrom_readmask |= tmp;
+ cdrom_readmask &= 0xffff;
+ cdrom_status1 &= ~CDSTATUS_DATASECTOR;
break;
case 0x24:
case 0x25:
akiko_put_long (&cdrom_longmask, addr - 0x24, v);
if ((cdrom_longmask & 0x04000000) && !(tmp & 0x04000000))
cdrom_sector_counter = 0;
+ if (!(cdrom_longmask & 0x08000000) && (tmp & 0x08000000))
+ cdrom_readmask = 0;
break;
} else if (addr < 0x30) {
break;
void akiko_reset (void)
{
- cdaudiostop ();
+ cdaudiostop_do ();
nvram_read ();
state = I2C_WAIT;
bitcounter = -1;
lastmediastate = 0;
if (akiko_thread_running > 0) {
+ cdaudiostop ();
akiko_thread_running = 0;
while(akiko_thread_running == 0)
Sleep (10);
akiko_cdrom_free ();
}
-static uae_u8 patchdata1[]={0x0c,0x82,0x00,0x00,0x03,0xe8,0x64,0x00,0x00,0x46};
-static uae_u8 patchdata2[]={0x0c,0x82,0x00,0x00,0x03,0xe8,0x4e,0x71,0x4e,0x71};
-
-static void patchrom (void)
-{
- int i;
- uae_u8 *p = (uae_u8*)extendedkickmemory;
-
- if (!p)
- return;
- for (i = 0; i < 524288 - sizeof (patchdata1); i++) {
- if (!memcmp (p + i, patchdata1, sizeof(patchdata1))) {
- p[i + 6] = 0x4e;
- p[i + 7] = 0x71;
- p[i + 8] = 0x4e;
- p[i + 9] = 0x71;
- write_log (L"extended rom delay loop patched at 0x%08x\n", i + 6 + 0xe00000);
- return;
- }
- if (!memcmp (p + i, patchdata2, sizeof(patchdata2)))
- return;
- }
- write_log (L"couldn't patch extended rom\n");
-}
void akiko_free (void)
{
sector_buffer_info_2 = xmalloc (SECTOR_BUFFER_SIZE);
sector_buffer_sector_1 = -1;
sector_buffer_sector_2 = -1;
- patchrom ();
}
}
uae_sem_init (&akiko_sem, 0, 1);
}
if (cdromok && !akiko_thread_running) {
akiko_thread_running = 1;
+ init_comm_pipe (&requests, 100, 1);
uae_start_thread (L"akiko", akiko_thread, 0, NULL);
}
return 1;
save_u8 (cdrom_command_offset_todo);
save_u8 (0);
save_u8 (cdrom_result_last_pos);
- save_u16 ((uae_u16)cdrom_readmask_w);
+ save_u16 ((uae_u16)cdrom_readmask);
save_u16 (0);
save_u32 (cdrom_longmask);
save_u32 (0);
return dstbak;
}
-uae_u8 *restore_akiko(uae_u8 *src)
+uae_u8 *restore_akiko (uae_u8 *src)
{
uae_u32 v;
int i;
cdrom_command_offset_todo = restore_u8 ();
restore_u8 ();
cdrom_result_last_pos = restore_u8 ();
- cdrom_readmask_w = restore_u16 ();
+ cdrom_readmask = restore_u16 ();
restore_u16 ();
cdrom_longmask = restore_u32 ();
restore_u32 ();
return src;
}
-void restore_akiko_finish(void)
+void restore_akiko_finish (void)
{
if (!currprefs.cs_cd32cd)
return;
akiko_c2p_do ();
- sys_command_cd_pause (DF_IOCTL, unitnum, 0);
- sys_command_cd_stop (DF_IOCTL, unitnum);
- sys_command_cd_pause (DF_IOCTL, unitnum, 1);
+ write_comm_pipe_u32 (&requests, 0x102, 1); // pause
+ write_comm_pipe_u32 (&requests, 0x104, 1); // stop
+ write_comm_pipe_u32 (&requests, 0x103, 1); // unpause
if (cdrom_playing) {
- sys_command_cd_pause (DF_IOCTL, unitnum, 0);
- sys_command_cd_play (DF_IOCTL, unitnum, last_play_pos, last_play_end, 0);
+ write_comm_pipe_u32 (&requests, 0x103, 1); // unpause
+ write_comm_pipe_u32 (&requests, 0x110, 0); // play
+ write_comm_pipe_u32 (&requests, last_play_pos, 0);
+ write_comm_pipe_u32 (&requests, last_play_end, 0);
+ write_comm_pipe_u32 (&requests, 0, 1);
}
}
void akiko_entergui (void)
{
if (cdrom_playing)
- sys_command_cd_pause (DF_IOCTL, unitnum, 1);
+ write_comm_pipe_u32 (&requests, 0x102, 1);
}
void akiko_exitgui (void)
{
if (cdrom_playing)
- sys_command_cd_pause (DF_IOCTL, unitnum, 0);
+ write_comm_pipe_u32 (&requests, 0x103, 1);
}
typedef uae_s8 sample8_t;
#define DO_CHANNEL_1(v, c) do { (v) *= audio_channel[c].vol; } while (0)
#define SBASEVAL16(logn) ((logn) == 1 ? SOUND16_BASE_VAL >> 1 : SOUND16_BASE_VAL)
-#define FINISH_DATA(data, b, logn) do { if (14 - (b) + (logn) > 0) (data) >>= 14 - (b) + (logn); else (data) <<= (b) - 14 - (logn); } while (0);
+
+STATIC_INLINE int FINISH_DATA (int data, int bits, int logn)
+{
+ if (14 - bits + logn > 0) {
+ data >>= 14 - bits + logn;
+ } else {
+ int shift = bits - 14 - logn;
+ int right = data & ((1 << shift) - 1);
+ data <<= shift;
+ data |= right;
+ }
+ return data;
+}
static uae_u32 right_word_saved[SOUND_MAX_DELAY_BUFFER];
static uae_u32 left_word_saved[SOUND_MAX_DELAY_BUFFER];
samplexx_sinc_handler (datas);
data1 = datas[0] + datas[3] + datas[1] + datas[2];
- FINISH_DATA (data1, 16, 2);
+ data1 = FINISH_DATA (data1, 16, 2);
PUT_SOUND_WORD_MONO (data1);
- check_sound_buffers ();
+ check_sound_buffers (outputsample, doublesample);
}
void sample16_handler (void)
uae_u32 data1 = audio_channel[1].current_sample;
uae_u32 data2 = audio_channel[2].current_sample;
uae_u32 data3 = audio_channel[3].current_sample;
+ uae_u32 data;
+
DO_CHANNEL_1 (data0, 0);
DO_CHANNEL_1 (data1, 1);
DO_CHANNEL_1 (data2, 2);
data0 += data1;
data0 += data2;
data0 += data3;
- if (outputsample) {
- uae_u32 data = SBASEVAL16(2) + data0;
- FINISH_DATA (data, 16, 2);
- PUT_SOUND_WORD_MONO (data);
- check_sound_buffers ();
- if (doublesample) {
- PUT_SOUND_WORD_MONO (data);
- check_sound_buffers ();
- }
- }
+ data = SBASEVAL16(2) + data0;
+ data = FINISH_DATA (data, 16, 2);
+ PUT_SOUND_WORD_MONO (data);
+ check_sound_buffers (outputsample, doublesample);
}
/* This interpolator examines sample points when Paula switches the output
samplexx_anti_handler (datas);
data1 = datas[0] + datas[3] + datas[1] + datas[2];
- if (outputsample) {
- FINISH_DATA (data1, 16, 2);
- PUT_SOUND_WORD_MONO (data1);
- check_sound_buffers ();
- if (doublesample) {
- PUT_SOUND_WORD_MONO (data1);
- check_sound_buffers ();
- }
- }
+ data1 = FINISH_DATA (data1, 16, 2);
+ PUT_SOUND_WORD_MONO (data1);
+ check_sound_buffers (outputsample, doublesample);
}
static void sample16i_rh_handler (void)
uae_u32 data1p = audio_channel[1].last_sample;
uae_u32 data2p = audio_channel[2].last_sample;
uae_u32 data3p = audio_channel[3].last_sample;
+ uae_u32 data;
+
DO_CHANNEL_1 (data0, 0);
DO_CHANNEL_1 (data1, 1);
DO_CHANNEL_1 (data2, 2);
delta = audio_channel[3].per;
ratio = ((audio_channel[3].evtime % delta) << 8) / delta;
data0 += (data3 * (256 - ratio) + data3p * ratio) >> 8;
- if (outputsample) {
- uae_u32 data = SBASEVAL16(2) + data0;
- FINISH_DATA (data, 16, 2);
- PUT_SOUND_WORD_MONO (data);
- check_sound_buffers ();
- if (doublesample) {
- PUT_SOUND_WORD_MONO (data);
- check_sound_buffers ();
- }
- }
+ data = SBASEVAL16(2) + data0;
+ data = FINISH_DATA (data, 16, 2);
+ PUT_SOUND_WORD_MONO (data);
+ check_sound_buffers (outputsample, doublesample);
}
static void sample16i_crux_handler (void)
uae_u32 data1p = audio_channel[1].last_sample;
uae_u32 data2p = audio_channel[2].last_sample;
uae_u32 data3p = audio_channel[3].last_sample;
+ uae_u32 data;
+
DO_CHANNEL_1 (data0, 0);
DO_CHANNEL_1 (data1, 1);
DO_CHANNEL_1 (data2, 2);
data1 += data2;
data0 += data3;
data0 += data1;
- if (outputsample) {
- uae_u32 data = SBASEVAL16(2) + data0;
- FINISH_DATA (data, 16, 2);
- PUT_SOUND_WORD_MONO (data);
- check_sound_buffers ();
- if (doublesample) {
- PUT_SOUND_WORD_MONO (data);
- check_sound_buffers ();
- }
- }
+ data = SBASEVAL16(2) + data0;
+ data = FINISH_DATA (data, 16, 2);
+ PUT_SOUND_WORD_MONO (data);
+ check_sound_buffers (outputsample, doublesample);
}
#ifdef HAVE_STEREO_SUPPORT
data2 &= audio_channel[2].adk_mask;
data3 &= audio_channel[3].adk_mask;
- if (outputsample) {
- put_sound_word_left (data0 << 2);
- put_sound_word_right (data1 << 2);
- if (currprefs.sound_stereo == SND_6CH)
- make6ch (data0, data1, data2, data3);
- put_sound_word_left2 (data3 << 2);
- put_sound_word_right2 (data2 << 2);
- check_sound_buffers ();
- if (doublesample) {
- put_sound_word_left (data0 << 2);
- put_sound_word_right (data1 << 2);
- if (currprefs.sound_stereo == SND_6CH)
- make6ch (data0, data1, data2, data3);
- put_sound_word_left2 (data3 << 2);
- put_sound_word_right2 (data2 << 2);
- check_sound_buffers ();
- }
- }
+ data0 = FINISH_DATA (data0, 16, 0);
+ data1 = FINISH_DATA (data1, 16, 0);
+ data2 = FINISH_DATA (data2, 16, 0);
+ data3 = FINISH_DATA (data3, 16, 0);
+ put_sound_word_left (data0);
+ put_sound_word_right (data1);
+ if (currprefs.sound_stereo == SND_6CH)
+ make6ch (data0, data1, data2, data3);
+ put_sound_word_left2 (data3);
+ put_sound_word_right2 (data2);
+ check_sound_buffers (outputsample, doublesample);
}
/* This interpolator examines sample points when Paula switches the output
void sample16ss_anti_handler (void)
{
+ int data0, data1, data2, data3;
int datas[4];
samplexx_anti_handler (datas);
- if (outputsample) {
- put_sound_word_left (datas[0] << 2);
- put_sound_word_right (datas[1] << 2);
- if (currprefs.sound_stereo == SND_6CH)
- make6ch (datas[0], datas[1], datas[2], datas[3]);
- put_sound_word_left2 (datas[3] << 2);
- put_sound_word_right2 (datas[2] << 2);
- check_sound_buffers ();
- if (doublesample) {
- put_sound_word_left (datas[0] << 2);
- put_sound_word_right (datas[1] << 2);
- if (currprefs.sound_stereo == SND_6CH)
- make6ch (datas[0], datas[1], datas[2], datas[3]);
- put_sound_word_left2 (datas[3] << 2);
- put_sound_word_right2 (datas[2] << 2);
- check_sound_buffers ();
- }
- }
+ data0 = FINISH_DATA (datas[0], 16, 0);
+ data1 = FINISH_DATA (datas[1], 16, 0);
+ data2 = FINISH_DATA (datas[2], 16, 0);
+ data3 = FINISH_DATA (datas[3], 16, 0);
+ put_sound_word_left (data0);
+ put_sound_word_right (data1);
+ if (currprefs.sound_stereo == SND_6CH)
+ make6ch (data0, data1, data2, data3);
+ put_sound_word_left2 (data3);
+ put_sound_word_right2 (data2);
+ check_sound_buffers (outputsample, doublesample);
}
static void sample16si_anti_handler (void)
int datas[4], data1, data2;
samplexx_anti_handler (datas);
- if (outputsample) {
- data1 = datas[0] + datas[3];
- data2 = datas[1] + datas[2];
- FINISH_DATA (data1, 16, 1);
- FINISH_DATA (data2, 16, 1);
- put_sound_word_left (data1);
- put_sound_word_right (data2);
- check_sound_buffers ();
- if (doublesample) {
- put_sound_word_left (data1);
- put_sound_word_right (data2);
- check_sound_buffers ();
- }
- }
+ data1 = datas[0] + datas[3];
+ data2 = datas[1] + datas[2];
+ data1 = FINISH_DATA (data1, 16, 1);
+ data2 = FINISH_DATA (data2, 16, 1);
+ put_sound_word_left (data1);
+ put_sound_word_right (data2);
+ check_sound_buffers (outputsample, doublesample);
}
void sample16ss_sinc_handler (void)
{
+ int data0, data1, data2, data3;
int datas[4];
samplexx_sinc_handler (datas);
- if (outputsample) {
- put_sound_word_left (datas[0] << 2);
- put_sound_word_right (datas[1] << 2);
- if (currprefs.sound_stereo == SND_6CH)
- make6ch (datas[0], datas[1], datas[2], datas[3]);
- put_sound_word_left2 (datas[3] << 2);
- put_sound_word_right2 (datas[2] << 2);
- check_sound_buffers ();
- if (doublesample) {
- put_sound_word_left (datas[0] << 2);
- put_sound_word_right (datas[1] << 2);
- if (currprefs.sound_stereo == SND_6CH)
- make6ch (datas[0], datas[1], datas[2], datas[3]);
- put_sound_word_left2 (datas[3] << 2);
- put_sound_word_right2 (datas[2] << 2);
- check_sound_buffers ();
- }
- }
+ data0 = FINISH_DATA (datas[0], 16, 0);
+ data1 = FINISH_DATA (datas[1], 16, 0);
+ data2 = FINISH_DATA (datas[2], 16, 0);
+ data3 = FINISH_DATA (datas[3], 16, 0);
+ put_sound_word_left (data0);
+ put_sound_word_right (data1);
+ if (currprefs.sound_stereo == SND_6CH)
+ make6ch (data0, data1, data2, data3);
+ put_sound_word_left2 (data3);
+ put_sound_word_right2 (data2);
+ check_sound_buffers (outputsample, doublesample);
}
static void sample16si_sinc_handler (void)
int datas[4], data1, data2;
samplexx_sinc_handler (datas);
- if (outputsample) {
- data1 = datas[0] + datas[3];
- data2 = datas[1] + datas[2];
- FINISH_DATA (data1, 16, 1);
- FINISH_DATA (data2, 16, 1);
- put_sound_word_left (data1);
- put_sound_word_right (data2);
- check_sound_buffers ();
- if (doublesample) {
- put_sound_word_left (data1);
- put_sound_word_right (data2);
- check_sound_buffers ();
- }
- }
+ data1 = datas[0] + datas[3];
+ data2 = datas[1] + datas[2];
+ data1 = FINISH_DATA (data1, 16, 1);
+ data2 = FINISH_DATA (data2, 16, 1);
+ put_sound_word_left (data1);
+ put_sound_word_right (data2);
+ check_sound_buffers (outputsample, doublesample);
}
void sample16s_handler (void)
data0 += data3;
data1 += data2;
- if (outputsample) {
- data2 = SBASEVAL16(1) + data0;
- FINISH_DATA (data2, 16, 1);
- data3 = SBASEVAL16(1) + data1;
- FINISH_DATA (data3, 16, 1);
- put_sound_word_left (data2);
- put_sound_word_right (data3);
- check_sound_buffers ();
- if (doublesample) {
- put_sound_word_left (data2);
- put_sound_word_right (data3);
- check_sound_buffers ();
- }
- }
+ data2 = SBASEVAL16(1) + data0;
+ data2 = FINISH_DATA (data2, 16, 1);
+ data3 = SBASEVAL16(1) + data1;
+ data3 = FINISH_DATA (data3, 16, 1);
+ put_sound_word_left (data2);
+ put_sound_word_right (data3);
+ check_sound_buffers (outputsample, doublesample);
}
static void sample16si_crux_handler (void)
}
data1 += data2;
data0 += data3;
- if (outputsample) {
- data2 = SBASEVAL16(1) + data0;
- FINISH_DATA (data2, 16, 1);
- data3 = SBASEVAL16(1) + data1;
- FINISH_DATA (data3, 16, 1);
- put_sound_word_left (data2);
- put_sound_word_right (data3);
- check_sound_buffers ();
- if (doublesample) {
- put_sound_word_left (data2);
- put_sound_word_right (data3);
- check_sound_buffers ();
- }
- }
+ data2 = SBASEVAL16(1) + data0;
+ data2 = FINISH_DATA (data2, 16, 1);
+ data3 = SBASEVAL16(1) + data1;
+ data3 = FINISH_DATA (data3, 16, 1);
+ put_sound_word_left (data2);
+ put_sound_word_right (data3);
+ check_sound_buffers (outputsample, doublesample);
}
static void sample16si_rh_handler (void)
delta = audio_channel[3].per;
ratio = ((audio_channel[3].evtime % delta) << 8) / delta;
data0 += (data3 * (256 - ratio) + data3p * ratio) >> 8;
- if (outputsample) {
- data2 = SBASEVAL16(1) + data0;
- FINISH_DATA (data2, 16, 1);
- data3 = SBASEVAL16(1) + data1;
- FINISH_DATA (data3, 16, 1);
- put_sound_word_left (data2);
- put_sound_word_right (data3);
- check_sound_buffers ();
- if (doublesample) {
- put_sound_word_left (data2);
- put_sound_word_right (data3);
- check_sound_buffers ();
- }
- }
+ data2 = SBASEVAL16(1) + data0;
+ data2 = FINISH_DATA (data2, 16, 1);
+ data3 = SBASEVAL16(1) + data1;
+ data3 = FINISH_DATA (data3, 16, 1);
+ put_sound_word_left (data2);
+ put_sound_word_right (data3);
+ check_sound_buffers (outputsample, doublesample);
}
#else
void update_audio (void)
{
unsigned long int n_cycles = 0;
+ static int samplecounter;
if (!isaudio ())
goto end;
if (rounded == best_evtime) {
/* Before the following addition, next_sample_evtime is in range [-0.5, 0.5) */
next_sample_evtime += scaled_sample_evtime;
- if (extrasamples > 0) {
- outputsample = 1;
- doublesample = 1;
- extrasamples--;
- } else if (extrasamples < 0) {
- outputsample = 0;
- doublesample = 0;
- extrasamples++;
- } else {
- outputsample = 1;
- doublesample = 0;
+ doublesample = 0;
+ if (--samplecounter <= 0) {
+ samplecounter = currprefs.sound_freq / 100;
+ if (extrasamples > 0) {
+ outputsample = 1;
+ doublesample = 1;
+ extrasamples--;
+ } else if (extrasamples < 0) {
+ outputsample = 0;
+ doublesample = 0;
+ extrasamples++;
+ }
}
(*sample_handler) ();
+ if (outputsample == 0)
+ outputsample = -1;
+ else if (outputsample < 0)
+ outputsample = 1;
+
}
}
deftrap (NULL); /* Generic emulator trap */
- EXPANSION_nullfunc = here ();
- calltrap (deftrap (nullfunc));
- dw (RTS);
+ dw (0);
+ dw (0);
a = here();
/* Dummy trap - removing this breaks the filesys emulation. */
{
if (bltstate == BLT_init) {
write_log (L"blitter was started but DMA was inactive during save\n");
- do_blitter (0);
+ //do_blitter (0);
}
}
}
//catweasel_init_controller(&cwc);
- _stprintf (tmp, L"CW: Catweasel MK%d @%p (%s) enabled. %s.",
- cwc.type, (uae_u8*)cwc.iobase, name, cwc.direct_access ? L"DIRECTIO": L"API");
+ _stprintf (tmp, L"CW: Catweasel MK%d @%08x (%s) enabled. %s.",
+ cwc.type, (int)cwc.iobase, name, cwc.direct_access ? L"DIRECTIO": L"API");
if (cwc.direct_access) {
if (cwc.can_sid) {
TCHAR *p = tmp + _tcslen (tmp);
//#define FMV_DEBUG
+#define FMV_BASE 0x40000
+#define AUDIO_BASE 0x50000
+#define VIDEO_BASE 0x70000
+#define VIDEO_RAM 0x80000
+
+// L64111 registers (from datasheet)
+#define A_DATA 0
+#define A_CONTROL1 2
+#define A_CONTROL2 4
+#define A_CONTROL3 6
+#define A_INT1 8
+#define A_INT2 10
+#define A_TCR 12
+#define A_TORH 14
+#define A_TORL 16
+#define A_PARAM1 18
+#define A_PARAM2 20
+#define A_PARAM3 22
+#define A_PRESENT1 24
+#define A_PRESENT2 26
+#define A_PRESENT3 28
+#define A_PRESENT4 30
+#define A_PRESENT5 32
+#define A_FIFO 34
+#define A_CB_STATUS 36
+#define A_CB_WRITE 38
+#define A_CB_READ 40
+
static int fmv_mask;
static uae_u8 *rom;
static int rom_size = 262144;
#ifdef FMV_DEBUG
write_log (L"fmv_bget2 %08X PC=%8X\n", addr, M68K_GETPC);
#endif
- if (addr >= rom_size) {
+ if (addr >= rom_size && addr < 0x80000) {
write_log (L"fmv_bget2 %08X PC=%8X\n", addr, M68K_GETPC);
return 0;
}
}
static void fmv_bput2 (uaecptr addr, uae_u8 v)
{
- if (addr >= rom_size && addr < 0xf0000) {
- ;//write_log (L"fmv_bput2 %08X=%02X PC=%8X\n", addr, v & 0xff, M68K_GETPC);
+ if (addr >= rom_size && addr < 0x80000) {
+ write_log (L"fmv_bput2 %08X=%02X PC=%8X\n", addr, v & 0xff, M68K_GETPC);
}
}
sys_command_cd_pause (DF_IOCTL, unitnum, 0);
break;
case 0x0104: // stop
- cdaudiostop();
+ cdaudiostop ();
break;
case 0x0110: // do_play!
do_play ();
}
if (_tcscmp (option, L"joyportfriendlyname0") == 0 || _tcscmp (option, L"joyportfriendlyname1") == 0) {
- inputdevice_joyport_config (p, value, _tcscmp (option, L"joyportfriendlyname0") == 0 ? 0 : 1, 2);
+ inputdevice_joyport_config (p, value, _tcscmp (option, L"joyportfriendlyname0") == 0 ? 0 : 1, 0, 2);
return 1;
}
if (_tcscmp (option, L"joyportfriendlyname2") == 0 || _tcscmp (option, L"joyportfriendlyname3") == 0) {
- inputdevice_joyport_config (p, value, _tcscmp (option, L"joyportfriendlyname2") == 0 ? 2 : 3, 2);
+ inputdevice_joyport_config (p, value, _tcscmp (option, L"joyportfriendlyname2") == 0 ? 2 : 3, 0, 2);
return 1;
}
if (_tcscmp (option, L"joyportname0") == 0 || _tcscmp (option, L"joyportname1") == 0) {
- inputdevice_joyport_config (p, value, _tcscmp (option, L"joyportname0") == 0 ? 0 : 1, 1);
+ inputdevice_joyport_config (p, value, _tcscmp (option, L"joyportname0") == 0 ? 0 : 1, 0, 1);
return 1;
}
if (_tcscmp (option, L"joyportname2") == 0 || _tcscmp (option, L"joyportname3") == 0) {
- inputdevice_joyport_config (p, value, _tcscmp (option, L"joyportname2") == 0 ? 2 : 3, 1);
+ inputdevice_joyport_config (p, value, _tcscmp (option, L"joyportname2") == 0 ? 2 : 3, 0, 1);
return 1;
}
if (_tcscmp (option, L"joyport0") == 0 || _tcscmp (option, L"joyport1") == 0) {
- inputdevice_joyport_config (p, value, _tcscmp (option, L"joyport0") == 0 ? 0 : 1, 0);
+ inputdevice_joyport_config (p, value, _tcscmp (option, L"joyport0") == 0 ? 0 : 1, 0, 0);
return 1;
}
if (_tcscmp (option, L"joyport2") == 0 || _tcscmp (option, L"joyport3") == 0) {
- inputdevice_joyport_config (p, value, _tcscmp (option, L"joyport2") == 0 ? 2 : 3, 0);
+ inputdevice_joyport_config (p, value, _tcscmp (option, L"joyport2") == 0 ? 2 : 3, 0, 0);
return 1;
}
if (cfgfile_strval (option, value, L"joyport0mode", &p->jports[0].mode, joyportmodes, 0))
}
}
-STATIC_INLINE uae_u32 ledcolor (uae_u32 c, uae_u32 *rc, uae_u32 *gc, uae_u32 *bc)
+STATIC_INLINE uae_u32 ledcolor (uae_u32 c, uae_u32 *rc, uae_u32 *gc, uae_u32 *bc, uae_u32 *a)
{
- return rc[(c >> 16) & 0xff] | gc[(c >> 8) & 0xff] | bc[(c >> 0) & 0xff];
+ uae_u32 v = rc[(c >> 16) & 0xff] | gc[(c >> 8) & 0xff] | bc[(c >> 0) & 0xff];
+ if (a)
+ v |= a[255 - ((c >> 24) & 0xff)];
+ return v;
}
static void write_tdnumber (uae_u8 *buf, int bpp, int x, int y, int num, uae_u32 c1, uae_u32 c2)
}
}
-void draw_status_line_single (uae_u8 *buf, int bpp, int y, int totalwidth, uae_u32 *rc, uae_u32 *gc, uae_u32 *bc)
+void draw_status_line_single (uae_u8 *buf, int bpp, int y, int totalwidth, uae_u32 *rc, uae_u32 *gc, uae_u32 *bc, uae_u32 *alpha)
{
int x_start, j, led, border;
uae_u32 c1, c2, cb;
- c1 = ledcolor (0xffffff, rc, gc, bc);
- c2 = ledcolor (0x000000, rc, gc, bc);
- cb = ledcolor (TD_BORDER, rc, gc, bc);
+ c1 = ledcolor (0x00ffffff, rc, gc, bc, alpha);
+ c2 = ledcolor (0x00000000, rc, gc, bc, alpha);
+ cb = ledcolor (TD_BORDER, rc, gc, bc, alpha);
if (td_pos & TD_RIGHT)
x_start = totalwidth - TD_PADX - NUM_LEDS * TD_WIDTH;
off_rgb = 0x000000;
am = 3;
}
- c = ledcolor (on ? on_rgb : off_rgb, rc, gc, bc);
+ on_rgb |= 0x33000000;
+ off_rgb |= 0x33000000;
+ c = ledcolor (on ? on_rgb : off_rgb, rc, gc, bc, alpha);
border = 0;
if (y == 0 || y == TD_TOTAL_HEIGHT - 1) {
- c = ledcolor (TD_BORDER, rc, gc, bc);
+ c = ledcolor (TD_BORDER, rc, gc, bc, alpha);
border = 1;
}
if (xlinebuffer == 0)
xlinebuffer = row_map[line];
buf = xlinebuffer;
- draw_status_line_single (buf, bpp, y, gfxvidinfo.width, xredcolors, xgreencolors, xbluecolors);
+ draw_status_line_single (buf, bpp, y, gfxvidinfo.width, xredcolors, xgreencolors, xbluecolors, NULL);
}
#define LIGHTPEN_HEIGHT 12
static void mix (void)
{
- int total = ((uae_u8*)paula_sndbufpt - (uae_u8*)paula_sndbuffer) / (get_audio_nativechannels () * 2);
+ int total = ((uae_u8*)paula_sndbufpt - (uae_u8*)paula_sndbuffer) / (get_audio_nativechannels (currprefs.sound_stereo) * 2);
if (currprefs.dfxclickvolume > 0) {
while (clickcnt < total) {
return;
mix ();
clickcnt = 0;
- switch (get_audio_nativechannels ())
+ switch (get_audio_nativechannels (currprefs.sound_stereo))
{
case 6:
for (i = 0; i < size / 6; i++) {
{
UnitInfo *ui = (UnitInfo *)unit_v;
- uae_set_thread_priority (2);
+ uae_set_thread_priority (NULL, 1);
for (;;) {
uae_u8 *pck;
uae_u8 *msg;
while (get_long (fsnode)) {
dostype2 = get_long (fsnode + 14);
if (dostype2 == dostype) {
- if (get_long (fsnode + 22) & (1 << 7)) {
- put_long (devicenode + 32, get_long (fsnode + 54)); /* dn_SegList */
- put_long (devicenode + 36, -1); /* dn_GlobalVec */
+ int i;
+ uae_u32 pf = get_long (fsnode + 22); // fse_PatchFlags
+ for (i = 0; i < 32; i++) {
+ if (pf & (1 << i))
+ put_long (devicenode + 4 + i * 4, get_long (fsnode + 22 + 4 + i * 4));
}
return 1;
}
*/
#define GAYLE_LOG 0
-#define IDE_LOG 1
+#define IDE_LOG 0
#define MBRES_LOG 0
#define PCMCIA_LOG 1
ide->secbuf[offset ^ 1] = c;
offset++;
}
+ xfree (s);
}
static int isideirq (void)
put_lbachs (ide, lba, cyl, head, sec, nsec, lba48);
ide->data_multi = multi ? ide->multiple_mode : 1;
}
+
static void ide_write_sectors (int flags)
{
unsigned int cyl, head, sec, nsec;
switch (ide_reg)
{
case IDE_DRVADDR:
- v = 0;
+ v = ((ide_drv ? 2 : 1) | ((ide_select & 15) << 2)) ^ 0xff;
break;
case IDE_DATA:
break;
if (currprefs.cs_ide <= 0)
return NULL;
- dstbak = dst = (uae_u8*)malloc (1000);
+ dstbak = dst = malloc (1000);
save_u8 (currprefs.cs_ide);
save_u8 (gayle_int);
save_u8 (gayle_irq);
ide = idedrive[num];
if (ide->hdhfd.size == 0)
return NULL;
- dstbak = dst = (uae_u8*)malloc (1000);
+ dstbak = dst = malloc (1000);
save_u32 (num);
save_u64 (ide->hdhfd.size);
save_string (ide->hdhfd.path);
hfd->vhd_bitmapsize = ((hfd->vhd_blocksize / (8 * 512)) + 511) & ~511;
}
write_log (L"HDF is VHD %s image, virtual size=%dK\n",
- hfd->vhd_type == 2 ? "fixed" : "dynamic",
+ hfd->vhd_type == 2 ? L"fixed" : L"dynamic",
hfd->virtsize / 1024);
return 1;
nonvhd:
char *ss;
*reply_len = *sense_len = 0;
- memset(r, 0, 256);
- memset(s, 0, 256);
+ memset (r, 0, 256);
+ memset (s, 0, 256);
switch (cmdbuf[0])
{
case 0x00: /* TEST UNIT READY */
offset = ((cmdbuf[1] & 31) << 16) | (cmdbuf[2] << 8) | cmdbuf[3];
offset *= hfd->blocksize;
len = cmdbuf[4];
- if (!len) len = 256;
+ if (!len)
+ len = 256;
len *= hfd->blocksize;
if (checkbounds(hfd, offset, len))
scsi_len = (uae_u32)cmd_readx (hfd, scsi_data, offset, len);
case 0x0a: /* WRITE (6) */
if (nodisk (hfd))
goto nodisk;
+ if (hfd->readonly || hfd->dangerous)
+ goto readprot;
offset = ((cmdbuf[1] & 31) << 16) | (cmdbuf[2] << 8) | cmdbuf[3];
offset *= hfd->blocksize;
len = cmdbuf[4];
- if (!len) len = 256;
+ if (!len)
+ len = 256;
len *= hfd->blocksize;
if (checkbounds(hfd, offset, len))
scsi_len = (uae_u32)cmd_writex (hfd, scsi_data, offset, len);
case 0x2a: /* WRITE (10) */
if (nodisk (hfd))
goto nodisk;
+ if (hfd->readonly || hfd->dangerous)
+ goto readprot;
offset = rl (cmdbuf + 2);
offset *= hfd->blocksize;
len = rl (cmdbuf + 7 - 2) & 0xffff;
case 0xaa: /* WRITE (12) */
if (nodisk (hfd))
goto nodisk;
+ if (hfd->readonly || hfd->dangerous)
+ goto readprot;
offset = rl (cmdbuf + 2);
offset *= hfd->blocksize;
len = rl (cmdbuf + 6);
case 0x37: /* READ DEFECT DATA */
if (nodisk (hfd))
goto nodisk;
- write_log (L"UAEHF: READ DEFECT DATA\n");
status = 2; /* CHECK CONDITION */
s[0] = 0x70;
s[2] = 0; /* NO SENSE */
s[12] = 0x1c; /* DEFECT LIST NOT FOUND */
ls = 12;
break;
+readprot:
+ status = 2; /* CHECK CONDITION */
+ s[0] = 0x70;
+ s[2] = 7; /* DATA PROTECT */
+ s[12] = 0x27; /* WRITE PROTECTED */
+ ls = 12;
+ break;
nodisk:
status = 2; /* CHECK CONDITION */
s[0] = 0x70;
}
scsi_log ("\n");
- status = scsi_emulate(hfd, NULL, cmdbuf, scsi_cmd_len, scsi_data_ptr, &scsi_len, reply, &reply_len, sense, &sense_len);
+ status = scsi_emulate (hfd, NULL, cmdbuf, scsi_cmd_len, scsi_data_ptr, &scsi_len, reply, &reply_len, sense, &sense_len);
put_word (acmd + 18, status != 0 ? 0 : scsi_cmd_len); /* fake scsi_CmdActual */
put_byte (acmd + 21, status); /* scsi_Status */
case CMD_FORMAT: /* Format */
if (nodisk (hfd))
goto no_disk;
- if (hfd->readonly) {
+ if (hfd->readonly || hfd->dangerous) {
error = 28; /* write protect */
} else {
offset = get_long (request + 44);
case NSCMD_TD_FORMAT64:
if (nodisk (hfd))
goto no_disk;
- if (hfd->readonly) {
+ if (hfd->readonly || hfd->dangerous) {
error = 28; /* write protect */
} else {
offset64 = get_long (request + 44) | ((uae_u64)get_long (request + 32) << 32);
break;
case CMD_PROTSTATUS:
- if (hfd->readonly)
+ if (hfd->readonly || hfd->dangerous)
actual = -1;
else
actual = 0;
{
struct hardfileprivdata *hfpd = (struct hardfileprivdata*)devs;
- uae_set_thread_priority (2);
+ uae_set_thread_priority (NULL, 1);
hfpd->thread_running = 1;
uae_sem_post (&hfpd->sync_sem);
for (;;) {
enum {
SND_MONO, SND_STEREO, SND_4CH_CLONEDSTEREO, SND_4CH, SND_6CH_CLONEDSTEREO, SND_6CH, SND_NONE };
-STATIC_INLINE int get_audio_nativechannels (void)
+STATIC_INLINE int get_audio_stereomode (int channels)
+{
+ switch (channels)
+ {
+ case 1:
+ return SND_MONO;
+ case 2:
+ return SND_STEREO;
+ case 4:
+ return SND_4CH;
+ case 6:
+ return SND_6CH;
+ }
+ return SND_STEREO;
+}
+STATIC_INLINE int get_audio_nativechannels (int stereomode)
{
int ch[] = { 1, 2, 4, 4, 6, 6, 0 };
- return ch[currprefs.sound_stereo];
+ return ch[stereomode];
}
-STATIC_INLINE int get_audio_amigachannels (void)
+STATIC_INLINE int get_audio_amigachannels (int stereomode)
{
int ch[] = { 1, 2, 2, 4, 2, 4, 0 };
- return ch[currprefs.sound_stereo];
+ return ch[stereomode];
}
-STATIC_INLINE int get_audio_ismono (void)
+STATIC_INLINE int get_audio_ismono (int stereomode)
{
- if (currprefs.sound_stereo == 0)
+ if (stereomode == 0)
return 1;
return 0;
}
void *handle;
int handle_valid;
int readonly;
+ int dangerous;
int flags;
uae_u8 *cache;
int cache_valid;
unsigned int cylinders;
unsigned int sectors;
unsigned int heads;
- int warned;
uae_u8 *virtual_rdb;
uae_u64 virtual_size;
int unitnum;
extern int hdf_read (struct hardfiledata *hfd, void *buffer, uae_u64 offset, int len);
extern int hdf_write (struct hardfiledata *hfd, void *buffer, uae_u64 offset, int len);
extern int hdf_getnumharddrives (void);
-extern TCHAR *hdf_getnameharddrive (int index, int flags, int *sectorsize);
+extern TCHAR *hdf_getnameharddrive (int index, int flags, int *sectorsize, int *dangerousdrive);
extern int isspecialdrive(const TCHAR *name);
extern int get_native_path(uae_u32 lock, TCHAR *out);
extern void hardfile_do_disk_change (struct uaedev_config_info *uci, int insert);
#define UAE_FILTER_NULL 1
#define UAE_FILTER_DIRECT3D 2
-#define UAE_FILTER_OPENGL 3
-#define UAE_FILTER_SCALE2X 4
-#define UAE_FILTER_SUPEREAGLE 5
-#define UAE_FILTER_SUPER2XSAI 6
-#define UAE_FILTER_2XSAI 7
-#define UAE_FILTER_PAL 8
-#define UAE_FILTER_HQ 9
+//#define UAE_FILTER_OPENGL 3
+#define UAE_FILTER_SCALE2X 3
+#define UAE_FILTER_SUPEREAGLE 4
+#define UAE_FILTER_SUPER2XSAI 5
+#define UAE_FILTER_2XSAI 6
+#define UAE_FILTER_PAL 7
+#define UAE_FILTER_HQ 8
#define UAE_FILTER_MODE_16 16
#define UAE_FILTER_MODE_16_16 16
extern void write_inputdevice_config (struct uae_prefs *p, struct zfile *f);
extern void read_inputdevice_config (struct uae_prefs *p, TCHAR *option, TCHAR *value);
extern void reset_inputdevice_config (struct uae_prefs *pr);
-extern int inputdevice_joyport_config (struct uae_prefs *p, TCHAR *value, int portnum, int type);
+extern int inputdevice_joyport_config (struct uae_prefs *p, TCHAR *value, int portnum, int mode, int type);
extern int inputdevice_getjoyportdevice (int jport);
extern void inputdevice_init (void);
if ((b)->baseaddr) \
baseaddr[bankindex(addr)] = (b)->baseaddr - (realstart); \
else \
- baseaddr[bankindex(addr)] = (uae_u8*)(((long)b)+1); \
+ baseaddr[bankindex(addr)] = (uae_u8*)(((uae_u8*)b)+1); \
} while (0)
#else
#define put_mem_bank(addr, b, realstart) \
#define UAEMAJOR 1
#define UAEMINOR 6
-#define UAESUBREV 1
+#define UAESUBREV 2
typedef enum { KBD_LANG_US, KBD_LANG_DK, KBD_LANG_DE, KBD_LANG_SE, KBD_LANG_FR, KBD_LANG_IT, KBD_LANG_ES } KbdLang;
#define STATUSLINE_RTG 2
#define STATUSLINE_TARGET 0x80
-extern void draw_status_line_single (uae_u8 *buf, int bpp, int y, int totalwidth, uae_u32 *rc, uae_u32 *gc, uae_u32 *bc);
+extern void draw_status_line_single (uae_u8 *buf, int bpp, int y, int totalwidth, uae_u32 *rc, uae_u32 *gc, uae_u32 *bc, uae_u32 *alpha);
extern int graphics_init (void);
extern void graphics_leave (void);
extern void handle_events (void);
+extern int handle_msgpump (void);
extern void setup_brkhandler (void);
extern int isfullscreen (void);
extern void toggle_fullscreen (void);
static void freejport (struct uae_prefs *dst, int num)
{
memset (&dst->jports[num], 0, sizeof (struct jport));
+ dst->jports[num].id = -1;
}
static void copyjport (const struct uae_prefs *src, struct uae_prefs *dst, int num)
{
#ifdef CATWEASEL
catweasel_hsync ();
#endif
+ if ((vpos & 31) == 31 && handle_msgpump ()) {
+ idev[IDTYPE_MOUSE].read ();
+ idev[IDTYPE_JOYSTICK].read ();
+ idev[IDTYPE_KEYBOARD].read ();
+ }
if (inputdelay > 0) {
inputdelay--;
if (inputdelay == 0) {
gui_display (-1);
break;
case AKS_SCREENSHOT:
- screenshot(1, 1);
+ screenshot (1, 1);
break;
#ifdef ACTION_REPLAY
case AKS_FREEZEBUTTON:
return 0;
if (name) {
write_log (L"inputdevice change '%s':%d->%d\n", name, num, newport);
- inputdevice_joyport_config (&changed_prefs, name, newport, 2);
+ inputdevice_joyport_config (&changed_prefs, name, newport, -1, 2);
inputdevice_copyconfig (&changed_prefs, &currprefs);
return 1;
}
#endif
}
-/* called when devices get inserted or removed */
+/* called when devices get inserted or removed
+ * store old devices temporarily, enumerate all devices
+ * restore old devices back (order may have changed)
+ */
void inputdevice_devicechange (struct uae_prefs *prefs)
{
int acc = input_acquired;
int i, idx;
TCHAR *jports[MAX_JPORTS];
+ int jportskb[MAX_JPORTS], jportsmode[MAX_JPORTS];
for (i = 0; i < MAX_JPORTS; i++) {
- jports[i] = 0;
- idx = inputdevice_getjoyportdevice (prefs->jports[i].id) - JSEM_LASTKBD;
- if (idx >= 0) {
- struct inputdevice_functions *idf = getidf (idx);
- int devidx = inputdevice_get_device_index (idx);
+ jports[i] = NULL;
+ jportskb[i] = -1;
+ idx = inputdevice_getjoyportdevice (prefs->jports[i].id);
+ if (idx >= JSEM_LASTKBD) {
+ struct inputdevice_functions *idf;
+ int devidx;
+ idx -= JSEM_LASTKBD;
+ idf = getidf (idx);
+ devidx = inputdevice_get_device_index (idx);
jports[i] = my_strdup (idf->get_uniquename (devidx));
+ } else {
+ jportskb[i] = idx;
}
+ jportsmode[i] = prefs->jports[i].mode;
}
inputdevice_unacquire ();
for (i = 0; i < MAX_JPORTS; i++) {
freejport (prefs, i);
- if (jports[i])
- inputdevice_joyport_config (prefs, jports[i], i, 2);
- xfree (jports[i]);
+ if (jports[i]) {
+ inputdevice_joyport_config (prefs, jports[i], i, jportsmode[i], 2);
+ xfree (jports[i]);
+ } else if (jportskb[i] >= 0) {
+ TCHAR tmp[10];
+ _stprintf (tmp, L"kbd%d", jportskb[i]);
+ inputdevice_joyport_config (prefs, tmp, i, jportsmode[i], 0);
+ }
}
if (prefs == &changed_prefs)
return v;
}
-int inputdevice_joyport_config (struct uae_prefs *p, TCHAR *value, int portnum, int type)
+int inputdevice_joyport_config (struct uae_prefs *p, TCHAR *value, int portnum, int mode, int type)
{
switch (type)
{
TCHAR *name2 = idf->get_uniquename (i);
if ((name1 && !_tcscmp (name1, value)) || (name2 && !_tcscmp (name2, value))) {
p->jports[portnum].id = idnum + i;
- p->jports[portnum].mode = 0;
+ if (mode >= 0)
+ p->jports[portnum].mode = mode;
return 1;
}
}
}
if (got == 2) {
p->jports[portnum].id = start;
- p->jports[portnum].mode = 0;
+ if (mode >= 0)
+ p->jports[portnum].mode = mode;
return 1;
}
}
--- /dev/null
+/*
+ * compiler/codegen_x86.cpp - IA-32 code generator
+ *
+ * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
+ *
+ * Adaptation for Basilisk II and improvements, copyright 2000-2005
+ * Gwenole Beauchesne
+ *
+ * Basilisk II (C) 1997-2008 Christian Bauer
+ *
+ * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/* This should eventually end up in machdep/, but for now, x86 is the
+ only target, and it's easier this way... */
+
+#include "flags_x86.h"
+
+/*************************************************************************
+ * Some basic information about the the target CPU *
+ *************************************************************************/
+
+#define EAX_INDEX 0
+#define ECX_INDEX 1
+#define EDX_INDEX 2
+#define EBX_INDEX 3
+#define ESP_INDEX 4
+#define EBP_INDEX 5
+#define ESI_INDEX 6
+#define EDI_INDEX 7
+#if defined(__x86_64__)
+#define R8_INDEX 8
+#define R9_INDEX 9
+#define R10_INDEX 10
+#define R11_INDEX 11
+#define R12_INDEX 12
+#define R13_INDEX 13
+#define R14_INDEX 14
+#define R15_INDEX 15
+#endif
+/* XXX this has to match X86_Reg8H_Base + 4 */
+#define AH_INDEX (0x10+4+EAX_INDEX)
+#define CH_INDEX (0x10+4+ECX_INDEX)
+#define DH_INDEX (0x10+4+EDX_INDEX)
+#define BH_INDEX (0x10+4+EBX_INDEX)
+
+/* The register in which subroutines return an integer return value */
+#define REG_RESULT EAX_INDEX
+
+/* The registers subroutines take their first and second argument in */
+#if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
+/* Handle the _fastcall parameters of ECX and EDX */
+#define REG_PAR1 ECX_INDEX
+#define REG_PAR2 EDX_INDEX
+#elif defined(__x86_64__)
+#define REG_PAR1 EDI_INDEX
+#define REG_PAR2 ESI_INDEX
+#else
+#define REG_PAR1 EAX_INDEX
+#define REG_PAR2 EDX_INDEX
+#endif
+
+#define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */
+#if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
+#define REG_PC_TMP EAX_INDEX
+#else
+#define REG_PC_TMP ECX_INDEX /* Another register that is not the above */
+#endif
+
+#define SHIFTCOUNT_NREG ECX_INDEX /* Register that can be used for shiftcount.
+ -1 if any reg will do */
+#define MUL_NREG1 EAX_INDEX /* %eax will hold the low 32 bits after a 32x32 mul */
+#define MUL_NREG2 EDX_INDEX /* %edx will hold the high 32 bits */
+
+#define STACK_ALIGN 16
+#define STACK_OFFSET sizeof(void *)
+
+uae_s8 always_used[]={4,-1};
+#if defined(__x86_64__)
+uae_s8 can_byte[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
+uae_s8 can_word[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
+#else
+uae_s8 can_byte[]={0,1,2,3,-1};
+uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
+#endif
+
+#if USE_OPTIMIZED_CALLS
+/* Make sure interpretive core does not use cpuopti */
+uae_u8 call_saved[]={0,0,0,1,1,1,1,1};
+#error FIXME: code not ready
+#else
+/* cpuopti mutate instruction handlers to assume registers are saved
+ by the caller */
+uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0};
+#endif
+
+/* This *should* be the same as call_saved. But:
+ - We might not really know which registers are saved, and which aren't,
+ so we need to preserve some, but don't want to rely on everyone else
+ also saving those registers
+ - Special registers (such like the stack pointer) should not be "preserved"
+ by pushing, even though they are "saved" across function calls
+*/
+#if defined(__x86_64__)
+/* callee-saved registers as defined by Linux AMD64 ABI: rbx, rbp, rsp, r12 - r15 */
+/* preserve r11 because it's generally used to hold pointers to functions */
+static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1};
+#else
+/* callee-saved registers as defined by System V IA-32 ABI: edi, esi, ebx, ebp */
+static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,1,1};
+#endif
+
+/* Whether classes of instructions do or don't clobber the native flags */
+#define CLOBBER_MOV
+#define CLOBBER_LEA
+#define CLOBBER_CMOV
+#define CLOBBER_POP
+#define CLOBBER_PUSH
+#define CLOBBER_SUB clobber_flags()
+#define CLOBBER_SBB clobber_flags()
+#define CLOBBER_CMP clobber_flags()
+#define CLOBBER_ADD clobber_flags()
+#define CLOBBER_ADC clobber_flags()
+#define CLOBBER_AND clobber_flags()
+#define CLOBBER_OR clobber_flags()
+#define CLOBBER_XOR clobber_flags()
+
+#define CLOBBER_ROL clobber_flags()
+#define CLOBBER_ROR clobber_flags()
+#define CLOBBER_SHLL clobber_flags()
+#define CLOBBER_SHRL clobber_flags()
+#define CLOBBER_SHRA clobber_flags()
+#define CLOBBER_TEST clobber_flags()
+#define CLOBBER_CL16
+#define CLOBBER_CL8
+#define CLOBBER_SE32
+#define CLOBBER_SE16
+#define CLOBBER_SE8
+#define CLOBBER_ZE32
+#define CLOBBER_ZE16
+#define CLOBBER_ZE8
+#define CLOBBER_SW16 clobber_flags()
+#define CLOBBER_SW32
+#define CLOBBER_SETCC
+#define CLOBBER_MUL clobber_flags()
+#define CLOBBER_BT clobber_flags()
+#define CLOBBER_BSF clobber_flags()
+
+/* The older code generator is now deprecated. */
+#define USE_NEW_RTASM 1
+
+#if USE_NEW_RTASM
+
+#if defined(__x86_64__)
+#define X86_TARGET_64BIT 1
+/* The address override prefix causes a 5 cycles penalty on Intel Core
+ processors. Another solution would be to decompose the load in an LEA,
+ MOV (to zero-extend), MOV (from memory): is it better? */
+#define ADDR32 x86_emit_byte(0x67),
+#else
+#define ADDR32 /**/
+#endif
+#define X86_FLAT_REGISTERS 0
+#define X86_OPTIMIZE_ALU 1
+#define X86_OPTIMIZE_ROTSHI 1
+#include "codegen_x86.h"
+
+#define x86_emit_byte(B) emit_byte(B)
+#define x86_emit_word(W) emit_word(W)
+#define x86_emit_long(L) emit_long(L)
+#define x86_emit_quad(Q) emit_quad(Q)
+#define x86_get_target() get_target()
+#define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
+
+static void jit_fail(const char *msg, const char *file, int line, const char *function)
+{
+ fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
+ function, file, line, msg);
+ abort();
+}
+
+LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
+{
+#if defined(__x86_64__)
+ PUSHQr(r);
+#else
+ PUSHLr(r);
+#endif
+}
+LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
+
+LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
+{
+#if defined(__x86_64__)
+ POPQr(r);
+#else
+ POPLr(r);
+#endif
+}
+LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
+
+LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
+{
+#if defined(__x86_64__)
+ POPQm(d, X86_NOREG, X86_NOREG, 1);
+#else
+ POPLm(d, X86_NOREG, X86_NOREG, 1);
+#endif
+}
+LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
+
+LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
+{
+ BTLir(i, r);
+}
+LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
+{
+ BTLrr(b, r);
+}
+LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
+
+LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
+{
+ BTCLir(i, r);
+}
+LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
+{
+ BTCLrr(b, r);
+}
+LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
+
+LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
+{
+ BTRLir(i, r);
+}
+LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
+{
+ BTRLrr(b, r);
+}
+LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
+
+LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
+{
+ BTSLir(i, r);
+}
+LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
+{
+ BTSLrr(b, r);
+}
+LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
+
+LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
+{
+ SUBWir(i, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
+
+LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
+{
+ MOVLmr(s, X86_NOREG, X86_NOREG, 1, d);
+}
+LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
+
+LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
+{
+ MOVLim(s, d, X86_NOREG, X86_NOREG, 1);
+}
+LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
+
+LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
+{
+ MOVWim(s, d, X86_NOREG, X86_NOREG, 1);
+}
+LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
+
+LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
+{
+ MOVBim(s, d, X86_NOREG, X86_NOREG, 1);
+}
+LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
+
+LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
+{
+ ROLBim(i, d, X86_NOREG, X86_NOREG, 1);
+}
+LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
+{
+ ROLBir(i, r);
+}
+LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
+{
+ ROLWir(i, r);
+}
+LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
+{
+ ROLLir(i, r);
+}
+LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
+{
+ ROLLrr(r, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
+{
+ ROLWrr(r, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
+{
+ ROLBrr(r, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
+{
+ SHLLrr(r, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
+{
+ SHLWrr(r, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
+{
+ SHLBrr(r, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
+{
+ RORBir(i, r);
+}
+LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
+{
+ RORWir(i, r);
+}
+LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
+
+LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
+{
+ ORLmr(s, X86_NOREG, X86_NOREG, 1, d);
+}
+LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
+
+LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
+{
+ RORLir(i, r);
+}
+LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
+{
+ RORLrr(r, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
+{
+ RORWrr(r, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
+{
+ RORBrr(r, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
+{
+ SHRLrr(r, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
+{
+ SHRWrr(r, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
+{
+ SHRBrr(r, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
+{
+ SARLrr(r, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
+{
+ SARWrr(r, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
+{
+ SARBrr(r, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
+{
+ SHLLir(i, r);
+}
+LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
+{
+ SHLWir(i, r);
+}
+LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
+{
+ SHLBir(i, r);
+}
+LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
+{
+ SHRLir(i, r);
+}
+LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
+{
+ SHRWir(i, r);
+}
+LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
+{
+ SHRBir(i, r);
+}
+LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
+{
+ SARLir(i, r);
+}
+LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
+{
+ SARWir(i, r);
+}
+LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
+{
+ SARBir(i, r);
+}
+LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
+
+LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
+{
+ SAHF();
+}
+LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
+
+LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
+{
+ CPUID();
+}
+LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
+
+LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
+{
+ LAHF();
+}
+LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
+
+LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
+{
+ SETCCir(cc, d);
+}
+LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
+
+LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
+{
+ SETCCim(cc, d, X86_NOREG, X86_NOREG, 1);
+}
+LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
+
+LOWFUNC(READ,NONE,3,raw_cmov_b_rr,(RW1 d, R1 s, IMM cc))
+{
+ /* replacement using branch and mov */
+ int8 *target_p = (int8 *)x86_get_target() + 1;
+ JCCSii(cc^1, 0);
+ MOVBrr(s, d);
+ *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1);
+}
+LENDFUNC(READ,NONE,3,raw_cmov_b_rr,(RW1 d, R1 s, IMM cc))
+
+LOWFUNC(READ,NONE,3,raw_cmov_w_rr,(RW2 d, R2 s, IMM cc))
+{
+ if (have_cmov)
+ CMOVWrr(cc, s, d);
+ else { /* replacement using branch and mov */
+ int8 *target_p = (int8 *)x86_get_target() + 1;
+ JCCSii(cc^1, 0);
+ MOVWrr(s, d);
+ *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1);
+ }
+}
+LENDFUNC(READ,NONE,3,raw_cmov_w_rr,(RW2 d, R2 s, IMM cc))
+
+LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
+{
+ if (have_cmov)
+ CMOVLrr(cc, s, d);
+ else { /* replacement using branch and mov */
+ int8 *target_p = (int8 *)x86_get_target() + 1;
+ JCCSii(cc^1, 0);
+ MOVLrr(s, d);
+ *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1);
+ }
+}
+LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
+
+LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
+{
+ BSFLrr(s, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
+
+LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
+{
+ MOVSLQrr(s, d);
+}
+LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
+
+LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
+{
+ MOVSWLrr(s, d);
+}
+LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
+
+LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
+{
+ MOVSBLrr(s, d);
+}
+LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
+
+LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
+{
+ MOVZWLrr(s, d);
+}
+LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
+
+LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
+{
+ MOVZBLrr(s, d);
+}
+LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
+
+LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
+{
+ IMULLrr(s, d);
+}
+LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
+
+LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
+{
+ if (d!=MUL_NREG1 || s!=MUL_NREG2) {
+ write_log("Bad register in IMUL: d=%d, s=%d\n",d,s);
+ abort();
+ }
+ IMULLr(s);
+}
+LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
+
+LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
+{
+ if (d!=MUL_NREG1 || s!=MUL_NREG2) {
+ write_log("Bad register in MUL: d=%d, s=%d\n",d,s);
+ abort();
+ }
+ MULLr(s);
+}
+LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
+
+LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
+{
+ abort(); /* %^$&%^$%#^ x86! */
+}
+LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
+
+LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
+{
+ MOVBrr(s, d);
+}
+LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
+
+LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
+{
+ MOVWrr(s, d);
+}
+LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
+
+LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
+{
+ ADDR32 MOVLmr(0, baser, index, factor, d);
+}
+LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
+
+LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
+{
+ ADDR32 MOVWmr(0, baser, index, factor, d);
+}
+LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
+
+LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
+{
+ ADDR32 MOVBmr(0, baser, index, factor, d);
+}
+LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
+
+LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
+{
+ ADDR32 MOVLrm(s, 0, baser, index, factor);
+}
+LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
+
+LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
+{
+ ADDR32 MOVWrm(s, 0, baser, index, factor);
+}
+LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
+
+LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
+{
+ ADDR32 MOVBrm(s, 0, baser, index, factor);
+}
+LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
+
+LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
+{
+ ADDR32 MOVLrm(s, base, baser, index, factor);
+}
+LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
+
+LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
+{
+ ADDR32 MOVWrm(s, base, baser, index, factor);
+}
+LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
+
+LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
+{
+ ADDR32 MOVBrm(s, base, baser, index, factor);
+}
+LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
+
+LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
+{
+ ADDR32 MOVLmr(base, baser, index, factor, d);
+}
+LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
+
+LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
+{
+ ADDR32 MOVWmr(base, baser, index, factor, d);
+}
+LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
+
+LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
+{
+ ADDR32 MOVBmr(base, baser, index, factor, d);
+}
+LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
+
+LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
+{
+ ADDR32 MOVLmr(base, X86_NOREG, index, factor, d);
+}
+LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
+
+LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
+{
+ if (have_cmov)
+ ADDR32 CMOVLmr(cond, base, X86_NOREG, index, factor, d);
+ else { /* replacement using branch and mov */
+ int8 *target_p = (int8 *)x86_get_target() + 1;
+ JCCSii(cond^1, 0);
+ ADDR32 MOVLmr(base, X86_NOREG, index, factor, d);
+ *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1);
+ }
+}
+LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
+
+LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
+{
+ if (have_cmov)
+ CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d);
+ else { /* replacement using branch and mov */
+ int8 *target_p = (int8 *)x86_get_target() + 1;
+ JCCSii(cond^1, 0);
+ MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d);
+ *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1);
+ }
+}
+LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
+
+LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
+{
+ ADDR32 MOVLmr(offset, s, X86_NOREG, 1, d);
+}
+LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
+{
+ ADDR32 MOVWmr(offset, s, X86_NOREG, 1, d);
+}
+LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
+{
+ ADDR32 MOVBmr(offset, s, X86_NOREG, 1, d);
+}
+LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
+{
+ ADDR32 MOVLmr(offset, s, X86_NOREG, 1, d);
+}
+LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
+{
+ ADDR32 MOVWmr(offset, s, X86_NOREG, 1, d);
+}
+LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
+{
+ ADDR32 MOVBmr(offset, s, X86_NOREG, 1, d);
+}
+LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
+{
+ ADDR32 MOVLim(i, offset, d, X86_NOREG, 1);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
+{
+ ADDR32 MOVWim(i, offset, d, X86_NOREG, 1);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
+{
+ ADDR32 MOVBim(i, offset, d, X86_NOREG, 1);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
+{
+ ADDR32 MOVLrm(s, offset, d, X86_NOREG, 1);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
+{
+ ADDR32 MOVWrm(s, offset, d, X86_NOREG, 1);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
+{
+ ADDR32 MOVBrm(s, offset, d, X86_NOREG, 1);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
+
+LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
+{
+ LEALmr(offset, s, X86_NOREG, 1, d);
+}
+LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
+{
+ LEALmr(offset, s, index, factor, d);
+}
+LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
+
+LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
+{
+ LEALmr(0, s, index, factor, d);
+}
+LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
+
+LOWFUNC(NONE,NONE,4,raw_lea_l_r_scaled,(W4 d, R4 index, IMM factor))
+{
+ LEALmr(0, X86_NOREG, index, factor, d);
+}
+LENDFUNC(NONE,NONE,4,raw_lea_l_r_scaled,(W4 d, R4 index, IMM factor))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
+{
+ ADDR32 MOVLrm(s, offset, d, X86_NOREG, 1);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
+{
+ ADDR32 MOVWrm(s, offset, d, X86_NOREG, 1);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
+{
+ ADDR32 MOVBrm(s, offset, d, X86_NOREG, 1);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
+
+LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
+{
+ BSWAPLr(r);
+}
+LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
+
+LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
+{
+ ROLWir(8, r);
+}
+LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
+
+LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
+{
+ MOVLrr(s, d);
+}
+LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
+
+LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
+{
+ MOVLrm(s, d, X86_NOREG, X86_NOREG, 1);
+}
+LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
+
+LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
+{
+ MOVWrm(s, d, X86_NOREG, X86_NOREG, 1);
+}
+LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
+
+LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
+{
+ MOVWmr(s, X86_NOREG, X86_NOREG, 1, d);
+}
+LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
+
+LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
+{
+ MOVBrm(s, d, X86_NOREG, X86_NOREG, 1);
+}
+LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
+
+LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
+{
+ MOVBmr(s, X86_NOREG, X86_NOREG, 1, d);
+}
+LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
+
+LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
+{
+ MOVLir(s, d);
+}
+LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
+
+LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
+{
+ MOVWir(s, d);
+}
+LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
+
+LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
+{
+ MOVBir(s, d);
+}
+LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
+
+LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
+{
+ ADCLim(s, d, X86_NOREG, X86_NOREG, 1);
+}
+LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
+
+LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
+{
+ ADDLim(s, d, X86_NOREG, X86_NOREG, 1);
+}
+LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
+
+LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
+{
+ ADDWim(s, d, X86_NOREG, X86_NOREG, 1);
+}
+LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
+
+LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
+{
+ ADDBim(s, d, X86_NOREG, X86_NOREG, 1);
+}
+LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
+
+LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
+{
+ TESTLir(i, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
+{
+ TESTLrr(s, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
+
+LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
+{
+ TESTWrr(s, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
+
+LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
+{
+ TESTBrr(s, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
+
+LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
+{
+ XORLir(i, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
+{
+ ANDLir(i, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
+{
+ ANDWir(i, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
+{
+ ANDLrr(s, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
+
+LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
+{
+ ANDWrr(s, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
+
+LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
+{
+ ANDBrr(s, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
+
+LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
+{
+ ORLir(i, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
+{
+ ORLrr(s, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
+
+LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
+{
+ ORWrr(s, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
+
+LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
+{
+ ORBrr(s, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
+
+LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
+{
+ ADCLrr(s, d);
+}
+LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
+
+LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
+{
+ ADCWrr(s, d);
+}
+LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
+
+LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
+{
+ ADCBrr(s, d);
+}
+LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
+
+LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
+{
+ ADDLrr(s, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
+
+LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
+{
+ ADDWrr(s, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
+
+LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
+{
+ ADDBrr(s, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
+
+LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
+{
+ SUBLir(i, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
+{
+ SUBBir(i, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
+{
+ ADDLir(i, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
+{
+ ADDWir(i, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
+{
+ ADDBir(i, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
+
+LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
+{
+ SBBLrr(s, d);
+}
+LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
+
+LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
+{
+ SBBWrr(s, d);
+}
+LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
+
+LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
+{
+ SBBBrr(s, d);
+}
+LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
+
+LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
+{
+ SUBLrr(s, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
+
+LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
+{
+ SUBWrr(s, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
+
+LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
+{
+ SUBBrr(s, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
+
+LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
+{
+ CMPLrr(s, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
+
+LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
+{
+ CMPLir(i, r);
+}
+LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
+{
+ CMPWrr(s, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
+
+LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
+{
+ CMPBim(s, d, X86_NOREG, X86_NOREG, 1);
+}
+LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
+
+LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
+{
+ CMPBir(i, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
+{
+ CMPBrr(s, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
+
+LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
+{
+ ADDR32 CMPLmr(offset, X86_NOREG, index, factor, d);
+}
+LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
+
+LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
+{
+ XORLrr(s, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
+
+LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
+{
+ XORWrr(s, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
+
+LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
+{
+ XORBrr(s, d);
+}
+LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
+
+LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
+{
+ SUBLim(s, d, X86_NOREG, X86_NOREG, 1);
+}
+LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
+
+LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
+{
+ CMPLim(s, d, X86_NOREG, X86_NOREG, 1);
+}
+LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
+
+LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
+{
+ XCHGLrr(r2, r1);
+}
+LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
+
+LOWFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2))
+{
+ XCHGBrr(r2, r1);
+}
+LENDFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2))
+
+LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
+{
+ PUSHF();
+}
+LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
+
+LOWFUNC(WRITE,READ,0,raw_popfl,(void))
+{
+ POPF();
+}
+LENDFUNC(WRITE,READ,0,raw_popfl,(void))
+
+/* Generate floating-point instructions */
+static inline void x86_fadd_m(MEMR s)
+{
+ FADDDm(s,X86_NOREG,X86_NOREG,1);
+}
+
+#else
+
+const bool optimize_accum = true;
+const bool optimize_imm8 = true;
+const bool optimize_shift_once = true;
+
+/*************************************************************************
+ * Actual encoding of the instructions on the target CPU *
+ *************************************************************************/
+
+static __inline__ int isaccum(int r)
+{
+ return (r == EAX_INDEX);
+}
+
+static __inline__ int isbyte(uae_s32 x)
+{
+ return (x>=-128 && x<=127);
+}
+
+static __inline__ int isword(uae_s32 x)
+{
+ return (x>=-32768 && x<=32767);
+}
+
+LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
+{
+ emit_byte(0x50+r);
+}
+LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
+
+LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
+{
+ emit_byte(0x58+r);
+}
+LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
+
+LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
+{
+ emit_byte(0x8f);
+ emit_byte(0x05);
+ emit_long(d);
+}
+LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
+
+LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
+{
+ emit_byte(0x0f);
+ emit_byte(0xba);
+ emit_byte(0xe0+r);
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
+{
+ emit_byte(0x0f);
+ emit_byte(0xa3);
+ emit_byte(0xc0+8*b+r);
+}
+LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
+
+LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
+{
+ emit_byte(0x0f);
+ emit_byte(0xba);
+ emit_byte(0xf8+r);
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
+{
+ emit_byte(0x0f);
+ emit_byte(0xbb);
+ emit_byte(0xc0+8*b+r);
+}
+LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
+
+
+LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
+{
+ emit_byte(0x0f);
+ emit_byte(0xba);
+ emit_byte(0xf0+r);
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
+{
+ emit_byte(0x0f);
+ emit_byte(0xb3);
+ emit_byte(0xc0+8*b+r);
+}
+LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
+
+LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
+{
+ emit_byte(0x0f);
+ emit_byte(0xba);
+ emit_byte(0xe8+r);
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
+{
+ emit_byte(0x0f);
+ emit_byte(0xab);
+ emit_byte(0xc0+8*b+r);
+}
+LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
+
+LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
+{
+ emit_byte(0x66);
+ if (isbyte(i)) {
+ emit_byte(0x83);
+ emit_byte(0xe8+d);
+ emit_byte(i);
+ }
+ else {
+ if (optimize_accum && isaccum(d))
+ emit_byte(0x2d);
+ else {
+ emit_byte(0x81);
+ emit_byte(0xe8+d);
+ }
+ emit_word(i);
+ }
+}
+LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
+
+
+LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
+{
+ emit_byte(0x8b);
+ emit_byte(0x05+8*d);
+ emit_long(s);
+}
+LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
+
+LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
+{
+ emit_byte(0xc7);
+ emit_byte(0x05);
+ emit_long(d);
+ emit_long(s);
+}
+LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
+
+LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
+{
+ emit_byte(0x66);
+ emit_byte(0xc7);
+ emit_byte(0x05);
+ emit_long(d);
+ emit_word(s);
+}
+LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
+
+LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
+{
+ emit_byte(0xc6);
+ emit_byte(0x05);
+ emit_long(d);
+ emit_byte(s);
+}
+LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
+
+LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
+{
+ if (optimize_shift_once && (i == 1)) {
+ emit_byte(0xd0);
+ emit_byte(0x05);
+ emit_long(d);
+ }
+ else {
+ emit_byte(0xc0);
+ emit_byte(0x05);
+ emit_long(d);
+ emit_byte(i);
+ }
+}
+LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
+{
+ if (optimize_shift_once && (i == 1)) {
+ emit_byte(0xd0);
+ emit_byte(0xc0+r);
+ }
+ else {
+ emit_byte(0xc0);
+ emit_byte(0xc0+r);
+ emit_byte(i);
+ }
+}
+LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
+{
+ emit_byte(0x66);
+ emit_byte(0xc1);
+ emit_byte(0xc0+r);
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
+{
+ if (optimize_shift_once && (i == 1)) {
+ emit_byte(0xd1);
+ emit_byte(0xc0+r);
+ }
+ else {
+ emit_byte(0xc1);
+ emit_byte(0xc0+r);
+ emit_byte(i);
+ }
+}
+LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
+{
+ emit_byte(0xd3);
+ emit_byte(0xc0+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
+{
+ emit_byte(0x66);
+ emit_byte(0xd3);
+ emit_byte(0xc0+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
+{
+ emit_byte(0xd2);
+ emit_byte(0xc0+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
+{
+ emit_byte(0xd3);
+ emit_byte(0xe0+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
+{
+ emit_byte(0x66);
+ emit_byte(0xd3);
+ emit_byte(0xe0+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
+{
+ emit_byte(0xd2);
+ emit_byte(0xe0+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
+{
+ if (optimize_shift_once && (i == 1)) {
+ emit_byte(0xd0);
+ emit_byte(0xc8+r);
+ }
+ else {
+ emit_byte(0xc0);
+ emit_byte(0xc8+r);
+ emit_byte(i);
+ }
+}
+LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
+{
+ emit_byte(0x66);
+ emit_byte(0xc1);
+ emit_byte(0xc8+r);
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
+
+// gb-- used for making an fpcr value in compemu_fpp.cpp
+LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
+{
+ emit_byte(0x0b);
+ emit_byte(0x05+8*d);
+ emit_long(s);
+}
+LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
+
+LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
+{
+ if (optimize_shift_once && (i == 1)) {
+ emit_byte(0xd1);
+ emit_byte(0xc8+r);
+ }
+ else {
+ emit_byte(0xc1);
+ emit_byte(0xc8+r);
+ emit_byte(i);
+ }
+}
+LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
+{
+ emit_byte(0xd3);
+ emit_byte(0xc8+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
+{
+ emit_byte(0x66);
+ emit_byte(0xd3);
+ emit_byte(0xc8+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
+{
+ emit_byte(0xd2);
+ emit_byte(0xc8+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
+{
+ emit_byte(0xd3);
+ emit_byte(0xe8+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
+{
+ emit_byte(0x66);
+ emit_byte(0xd3);
+ emit_byte(0xe8+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
+{
+ emit_byte(0xd2);
+ emit_byte(0xe8+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
+{
+ emit_byte(0xd3);
+ emit_byte(0xf8+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
+{
+ emit_byte(0x66);
+ emit_byte(0xd3);
+ emit_byte(0xf8+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
+{
+ emit_byte(0xd2);
+ emit_byte(0xf8+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
+
+LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
+{
+ if (optimize_shift_once && (i == 1)) {
+ emit_byte(0xd1);
+ emit_byte(0xe0+r);
+ }
+ else {
+ emit_byte(0xc1);
+ emit_byte(0xe0+r);
+ emit_byte(i);
+ }
+}
+LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
+{
+ emit_byte(0x66);
+ emit_byte(0xc1);
+ emit_byte(0xe0+r);
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
+{
+ if (optimize_shift_once && (i == 1)) {
+ emit_byte(0xd0);
+ emit_byte(0xe0+r);
+ }
+ else {
+ emit_byte(0xc0);
+ emit_byte(0xe0+r);
+ emit_byte(i);
+ }
+}
+LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
+{
+ if (optimize_shift_once && (i == 1)) {
+ emit_byte(0xd1);
+ emit_byte(0xe8+r);
+ }
+ else {
+ emit_byte(0xc1);
+ emit_byte(0xe8+r);
+ emit_byte(i);
+ }
+}
+LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
+{
+ emit_byte(0x66);
+ emit_byte(0xc1);
+ emit_byte(0xe8+r);
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
+{
+ if (optimize_shift_once && (i == 1)) {
+ emit_byte(0xd0);
+ emit_byte(0xe8+r);
+ }
+ else {
+ emit_byte(0xc0);
+ emit_byte(0xe8+r);
+ emit_byte(i);
+ }
+}
+LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
+{
+ if (optimize_shift_once && (i == 1)) {
+ emit_byte(0xd1);
+ emit_byte(0xf8+r);
+ }
+ else {
+ emit_byte(0xc1);
+ emit_byte(0xf8+r);
+ emit_byte(i);
+ }
+}
+LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
+{
+ emit_byte(0x66);
+ emit_byte(0xc1);
+ emit_byte(0xf8+r);
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
+{
+ if (optimize_shift_once && (i == 1)) {
+ emit_byte(0xd0);
+ emit_byte(0xf8+r);
+ }
+ else {
+ emit_byte(0xc0);
+ emit_byte(0xf8+r);
+ emit_byte(i);
+ }
+}
+LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
+
+LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
+{
+ emit_byte(0x9e);
+}
+LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
+
+LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
+{
+ emit_byte(0x0f);
+ emit_byte(0xa2);
+}
+LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
+
+LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
+{
+ emit_byte(0x9f);
+}
+LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
+
+LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
+{
+ emit_byte(0x0f);
+ emit_byte(0x90+cc);
+ emit_byte(0xc0+d);
+}
+LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
+
+LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
+{
+ emit_byte(0x0f);
+ emit_byte(0x90+cc);
+ emit_byte(0x05);
+ emit_long(d);
+}
+LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
+
+LOWFUNC(READ,NONE,3,raw_cmov_b_rr,(RW1 d, R1 s, IMM cc))
+{
+ /* replacement using branch and mov */
+ int uncc=(cc^1);
+ emit_byte(0x70+uncc);
+ emit_byte(3); /* skip next 2 bytes if not cc=true */
+ emit_byte(0x88);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(READ,NONE,3,raw_cmov_b_rr,(RW1 d, R1 s, IMM cc))
+
+LOWFUNC(READ,NONE,3,raw_cmov_w_rr,(RW2 d, R2 s, IMM cc))
+{
+ if (have_cmov) {
+ emit_byte(0x66);
+ emit_byte(0x0f);
+ emit_byte(0x40+cc);
+ emit_byte(0xc0+8*d+s);
+ }
+ else { /* replacement using branch and mov */
+ int uncc=(cc^1);
+ emit_byte(0x70+uncc);
+ emit_byte(3); /* skip next 3 bytes if not cc=true */
+ emit_byte(0x66);
+ emit_byte(0x89);
+ emit_byte(0xc0+8*s+d);
+ }
+}
+LENDFUNC(READ,NONE,3,raw_cmov_w_rr,(RW2 d, R2 s, IMM cc))
+
+LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
+{
+ if (have_cmov) {
+ emit_byte(0x0f);
+ emit_byte(0x40+cc);
+ emit_byte(0xc0+8*d+s);
+ }
+ else { /* replacement using branch and mov */
+ int uncc=(cc^1);
+ emit_byte(0x70+uncc);
+ emit_byte(2); /* skip next 2 bytes if not cc=true */
+ emit_byte(0x89);
+ emit_byte(0xc0+8*s+d);
+ }
+}
+LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
+
+LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
+{
+ emit_byte(0x0f);
+ emit_byte(0xbc);
+ emit_byte(0xc0+8*d+s);
+}
+LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
+
+LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
+{
+ emit_byte(0x0f);
+ emit_byte(0xbf);
+ emit_byte(0xc0+8*d+s);
+}
+LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
+
+LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
+{
+ emit_byte(0x0f);
+ emit_byte(0xbe);
+ emit_byte(0xc0+8*d+s);
+}
+LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
+
+LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
+{
+ emit_byte(0x0f);
+ emit_byte(0xb7);
+ emit_byte(0xc0+8*d+s);
+}
+LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
+
+LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
+{
+ emit_byte(0x0f);
+ emit_byte(0xb6);
+ emit_byte(0xc0+8*d+s);
+}
+LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
+
+LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
+{
+ emit_byte(0x0f);
+ emit_byte(0xaf);
+ emit_byte(0xc0+8*d+s);
+}
+LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
+
+LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
+{
+ if (d!=MUL_NREG1 || s!=MUL_NREG2)
+ abort();
+ emit_byte(0xf7);
+ emit_byte(0xea);
+}
+LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
+
+LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
+{
+ if (d!=MUL_NREG1 || s!=MUL_NREG2) {
+ printf("Bad register in MUL: d=%d, s=%d\n",d,s);
+ abort();
+ }
+ emit_byte(0xf7);
+ emit_byte(0xe2);
+}
+LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
+
+LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
+{
+ abort(); /* %^$&%^$%#^ x86! */
+ emit_byte(0x0f);
+ emit_byte(0xaf);
+ emit_byte(0xc0+8*d+s);
+}
+LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
+
+LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
+{
+ emit_byte(0x88);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
+
+LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
+{
+ emit_byte(0x66);
+ emit_byte(0x89);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
+
+LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
+{
+ int isebp=(baser==5)?0x40:0;
+ int fi;
+
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default: abort();
+ }
+
+
+ emit_byte(0x8b);
+ emit_byte(0x04+8*d+isebp);
+ emit_byte(baser+8*index+0x40*fi);
+ if (isebp)
+ emit_byte(0x00);
+}
+LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
+
+LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
+{
+ int fi;
+ int isebp;
+
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default: abort();
+ }
+ isebp=(baser==5)?0x40:0;
+
+ emit_byte(0x66);
+ emit_byte(0x8b);
+ emit_byte(0x04+8*d+isebp);
+ emit_byte(baser+8*index+0x40*fi);
+ if (isebp)
+ emit_byte(0x00);
+}
+LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
+
+LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
+{
+ int fi;
+ int isebp;
+
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default: abort();
+ }
+ isebp=(baser==5)?0x40:0;
+
+ emit_byte(0x8a);
+ emit_byte(0x04+8*d+isebp);
+ emit_byte(baser+8*index+0x40*fi);
+ if (isebp)
+ emit_byte(0x00);
+}
+LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
+
+LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
+{
+ int fi;
+ int isebp;
+
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default: abort();
+ }
+
+
+ isebp=(baser==5)?0x40:0;
+
+ emit_byte(0x89);
+ emit_byte(0x04+8*s+isebp);
+ emit_byte(baser+8*index+0x40*fi);
+ if (isebp)
+ emit_byte(0x00);
+}
+LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
+
+LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
+{
+ int fi;
+ int isebp;
+
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default: abort();
+ }
+ isebp=(baser==5)?0x40:0;
+
+ emit_byte(0x66);
+ emit_byte(0x89);
+ emit_byte(0x04+8*s+isebp);
+ emit_byte(baser+8*index+0x40*fi);
+ if (isebp)
+ emit_byte(0x00);
+}
+LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
+
+LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
+{
+ int fi;
+ int isebp;
+
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default: abort();
+ }
+ isebp=(baser==5)?0x40:0;
+
+ emit_byte(0x88);
+ emit_byte(0x04+8*s+isebp);
+ emit_byte(baser+8*index+0x40*fi);
+ if (isebp)
+ emit_byte(0x00);
+}
+LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
+
+LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
+{
+ int fi;
+
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default: abort();
+ }
+
+ emit_byte(0x89);
+ emit_byte(0x84+8*s);
+ emit_byte(baser+8*index+0x40*fi);
+ emit_long(base);
+}
+LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
+
+LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
+{
+ int fi;
+
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default: abort();
+ }
+
+ emit_byte(0x66);
+ emit_byte(0x89);
+ emit_byte(0x84+8*s);
+ emit_byte(baser+8*index+0x40*fi);
+ emit_long(base);
+}
+LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
+
+LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
+{
+ int fi;
+
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default: abort();
+ }
+
+ emit_byte(0x88);
+ emit_byte(0x84+8*s);
+ emit_byte(baser+8*index+0x40*fi);
+ emit_long(base);
+}
+LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
+
+LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
+{
+ int fi;
+
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default: abort();
+ }
+
+ emit_byte(0x8b);
+ emit_byte(0x84+8*d);
+ emit_byte(baser+8*index+0x40*fi);
+ emit_long(base);
+}
+LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
+
+LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
+{
+ int fi;
+
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default: abort();
+ }
+
+ emit_byte(0x66);
+ emit_byte(0x8b);
+ emit_byte(0x84+8*d);
+ emit_byte(baser+8*index+0x40*fi);
+ emit_long(base);
+}
+LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
+
+LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
+{
+ int fi;
+
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default: abort();
+ }
+
+ emit_byte(0x8a);
+ emit_byte(0x84+8*d);
+ emit_byte(baser+8*index+0x40*fi);
+ emit_long(base);
+}
+LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
+
+LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
+{
+ int fi;
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default:
+ fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
+ abort();
+ }
+ emit_byte(0x8b);
+ emit_byte(0x04+8*d);
+ emit_byte(0x05+8*index+64*fi);
+ emit_long(base);
+}
+LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
+
+LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
+{
+ int fi;
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default:
+ fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
+ abort();
+ }
+ if (have_cmov) {
+ emit_byte(0x0f);
+ emit_byte(0x40+cond);
+ emit_byte(0x04+8*d);
+ emit_byte(0x05+8*index+64*fi);
+ emit_long(base);
+ }
+ else { /* replacement using branch and mov */
+ int uncc=(cond^1);
+ emit_byte(0x70+uncc);
+ emit_byte(7); /* skip next 7 bytes if not cc=true */
+ emit_byte(0x8b);
+ emit_byte(0x04+8*d);
+ emit_byte(0x05+8*index+64*fi);
+ emit_long(base);
+ }
+}
+LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
+
+LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
+{
+ if (have_cmov) {
+ emit_byte(0x0f);
+ emit_byte(0x40+cond);
+ emit_byte(0x05+8*d);
+ emit_long(mem);
+ }
+ else { /* replacement using branch and mov */
+ int uncc=(cond^1);
+ emit_byte(0x70+uncc);
+ emit_byte(6); /* skip next 6 bytes if not cc=true */
+ emit_byte(0x8b);
+ emit_byte(0x05+8*d);
+ emit_long(mem);
+ }
+}
+LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
+
+LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
+{
+ Dif(!isbyte(offset)) abort();
+ emit_byte(0x8b);
+ emit_byte(0x40+8*d+s);
+ emit_byte(offset);
+}
+LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
+{
+ Dif(!isbyte(offset)) abort();
+ emit_byte(0x66);
+ emit_byte(0x8b);
+ emit_byte(0x40+8*d+s);
+ emit_byte(offset);
+}
+LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
+{
+ Dif(!isbyte(offset)) abort();
+ emit_byte(0x8a);
+ emit_byte(0x40+8*d+s);
+ emit_byte(offset);
+}
+LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
+{
+ emit_byte(0x8b);
+ emit_byte(0x80+8*d+s);
+ emit_long(offset);
+}
+LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
+{
+ emit_byte(0x66);
+ emit_byte(0x8b);
+ emit_byte(0x80+8*d+s);
+ emit_long(offset);
+}
+LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
+{
+ emit_byte(0x8a);
+ emit_byte(0x80+8*d+s);
+ emit_long(offset);
+}
+LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
+{
+ Dif(!isbyte(offset)) abort();
+ emit_byte(0xc7);
+ emit_byte(0x40+d);
+ emit_byte(offset);
+ emit_long(i);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
+{
+ Dif(!isbyte(offset)) abort();
+ emit_byte(0x66);
+ emit_byte(0xc7);
+ emit_byte(0x40+d);
+ emit_byte(offset);
+ emit_word(i);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
+{
+ Dif(!isbyte(offset)) abort();
+ emit_byte(0xc6);
+ emit_byte(0x40+d);
+ emit_byte(offset);
+ emit_byte(i);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
+{
+ Dif(!isbyte(offset)) abort();
+ emit_byte(0x89);
+ emit_byte(0x40+8*s+d);
+ emit_byte(offset);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
+{
+ Dif(!isbyte(offset)) abort();
+ emit_byte(0x66);
+ emit_byte(0x89);
+ emit_byte(0x40+8*s+d);
+ emit_byte(offset);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
+{
+ Dif(!isbyte(offset)) abort();
+ emit_byte(0x88);
+ emit_byte(0x40+8*s+d);
+ emit_byte(offset);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
+
+LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
+{
+ if (optimize_imm8 && isbyte(offset)) {
+ emit_byte(0x8d);
+ emit_byte(0x40+8*d+s);
+ emit_byte(offset);
+ }
+ else {
+ emit_byte(0x8d);
+ emit_byte(0x80+8*d+s);
+ emit_long(offset);
+ }
+}
+LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
+{
+ int fi;
+
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default: abort();
+ }
+
+ if (optimize_imm8 && isbyte(offset)) {
+ emit_byte(0x8d);
+ emit_byte(0x44+8*d);
+ emit_byte(0x40*fi+8*index+s);
+ emit_byte(offset);
+ }
+ else {
+ emit_byte(0x8d);
+ emit_byte(0x84+8*d);
+ emit_byte(0x40*fi+8*index+s);
+ emit_long(offset);
+ }
+}
+LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
+
+LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
+{
+ int isebp=(s==5)?0x40:0;
+ int fi;
+
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default: abort();
+ }
+
+ emit_byte(0x8d);
+ emit_byte(0x04+8*d+isebp);
+ emit_byte(0x40*fi+8*index+s);
+ if (isebp)
+ emit_byte(0);
+}
+LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
+{
+ if (optimize_imm8 && isbyte(offset)) {
+ emit_byte(0x89);
+ emit_byte(0x40+8*s+d);
+ emit_byte(offset);
+ }
+ else {
+ emit_byte(0x89);
+ emit_byte(0x80+8*s+d);
+ emit_long(offset);
+ }
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
+{
+ emit_byte(0x66);
+ emit_byte(0x89);
+ emit_byte(0x80+8*s+d);
+ emit_long(offset);
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
+
+LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
+{
+ if (optimize_imm8 && isbyte(offset)) {
+ emit_byte(0x88);
+ emit_byte(0x40+8*s+d);
+ emit_byte(offset);
+ }
+ else {
+ emit_byte(0x88);
+ emit_byte(0x80+8*s+d);
+ emit_long(offset);
+ }
+}
+LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
+
+LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
+{
+ emit_byte(0x0f);
+ emit_byte(0xc8+r);
+}
+LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
+
+LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
+{
+ emit_byte(0x66);
+ emit_byte(0xc1);
+ emit_byte(0xc0+r);
+ emit_byte(0x08);
+}
+LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
+
+LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
+{
+ emit_byte(0x89);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
+
+LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
+{
+ emit_byte(0x89);
+ emit_byte(0x05+8*s);
+ emit_long(d);
+}
+LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
+
+LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
+{
+ emit_byte(0x66);
+ emit_byte(0x89);
+ emit_byte(0x05+8*s);
+ emit_long(d);
+}
+LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
+
+LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
+{
+ emit_byte(0x66);
+ emit_byte(0x8b);
+ emit_byte(0x05+8*d);
+ emit_long(s);
+}
+LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
+
+LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
+{
+ emit_byte(0x88);
+ emit_byte(0x05+8*(s&0xf)); /* XXX this handles %ah case (defined as 0x10+4) and others */
+ emit_long(d);
+}
+LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
+
+LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
+{
+ emit_byte(0x8a);
+ emit_byte(0x05+8*d);
+ emit_long(s);
+}
+LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
+
+LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
+{
+ emit_byte(0xb8+d);
+ emit_long(s);
+}
+LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
+
+LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
+{
+ emit_byte(0x66);
+ emit_byte(0xb8+d);
+ emit_word(s);
+}
+LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
+
+LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
+{
+ emit_byte(0xb0+d);
+ emit_byte(s);
+}
+LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
+
+LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
+{
+ emit_byte(0x81);
+ emit_byte(0x15);
+ emit_long(d);
+ emit_long(s);
+}
+LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
+
+LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
+{
+ if (optimize_imm8 && isbyte(s)) {
+ emit_byte(0x83);
+ emit_byte(0x05);
+ emit_long(d);
+ emit_byte(s);
+ }
+ else {
+ emit_byte(0x81);
+ emit_byte(0x05);
+ emit_long(d);
+ emit_long(s);
+ }
+}
+LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
+
+LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
+{
+ emit_byte(0x66);
+ emit_byte(0x81);
+ emit_byte(0x05);
+ emit_long(d);
+ emit_word(s);
+}
+LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
+
+LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
+{
+ emit_byte(0x80);
+ emit_byte(0x05);
+ emit_long(d);
+ emit_byte(s);
+}
+LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
+
+LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
+{
+ if (optimize_accum && isaccum(d))
+ emit_byte(0xa9);
+ else {
+ emit_byte(0xf7);
+ emit_byte(0xc0+d);
+ }
+ emit_long(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
+{
+ emit_byte(0x85);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
+
+LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
+{
+ emit_byte(0x66);
+ emit_byte(0x85);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
+
+LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
+{
+ emit_byte(0x84);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
+
+LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
+{
+ emit_byte(0x81);
+ emit_byte(0xf0+d);
+ emit_long(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
+{
+ if (optimize_imm8 && isbyte(i)) {
+ emit_byte(0x83);
+ emit_byte(0xe0+d);
+ emit_byte(i);
+ }
+ else {
+ if (optimize_accum && isaccum(d))
+ emit_byte(0x25);
+ else {
+ emit_byte(0x81);
+ emit_byte(0xe0+d);
+ }
+ emit_long(i);
+ }
+}
+LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
+{
+ emit_byte(0x66);
+ if (optimize_imm8 && isbyte(i)) {
+ emit_byte(0x83);
+ emit_byte(0xe0+d);
+ emit_byte(i);
+ }
+ else {
+ if (optimize_accum && isaccum(d))
+ emit_byte(0x25);
+ else {
+ emit_byte(0x81);
+ emit_byte(0xe0+d);
+ }
+ emit_word(i);
+ }
+}
+LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
+{
+ emit_byte(0x21);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
+
+LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
+{
+ emit_byte(0x66);
+ emit_byte(0x21);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
+
+LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
+{
+ emit_byte(0x20);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
+
+LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
+{
+ if (optimize_imm8 && isbyte(i)) {
+ emit_byte(0x83);
+ emit_byte(0xc8+d);
+ emit_byte(i);
+ }
+ else {
+ if (optimize_accum && isaccum(d))
+ emit_byte(0x0d);
+ else {
+ emit_byte(0x81);
+ emit_byte(0xc8+d);
+ }
+ emit_long(i);
+ }
+}
+LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
+{
+ emit_byte(0x09);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
+
+LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
+{
+ emit_byte(0x66);
+ emit_byte(0x09);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
+
+LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
+{
+ emit_byte(0x08);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
+
+LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
+{
+ emit_byte(0x11);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
+
+LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
+{
+ emit_byte(0x66);
+ emit_byte(0x11);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
+
+LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
+{
+ emit_byte(0x10);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
+
+LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
+{
+ emit_byte(0x01);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
+
+LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
+{
+ emit_byte(0x66);
+ emit_byte(0x01);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
+
+LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
+{
+ emit_byte(0x00);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
+
+LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
+{
+ if (isbyte(i)) {
+ emit_byte(0x83);
+ emit_byte(0xe8+d);
+ emit_byte(i);
+ }
+ else {
+ if (optimize_accum && isaccum(d))
+ emit_byte(0x2d);
+ else {
+ emit_byte(0x81);
+ emit_byte(0xe8+d);
+ }
+ emit_long(i);
+ }
+}
+LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
+{
+ if (optimize_accum && isaccum(d))
+ emit_byte(0x2c);
+ else {
+ emit_byte(0x80);
+ emit_byte(0xe8+d);
+ }
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
+{
+ if (isbyte(i)) {
+ emit_byte(0x83);
+ emit_byte(0xc0+d);
+ emit_byte(i);
+ }
+ else {
+ if (optimize_accum && isaccum(d))
+ emit_byte(0x05);
+ else {
+ emit_byte(0x81);
+ emit_byte(0xc0+d);
+ }
+ emit_long(i);
+ }
+}
+LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
+{
+ emit_byte(0x66);
+ if (isbyte(i)) {
+ emit_byte(0x83);
+ emit_byte(0xc0+d);
+ emit_byte(i);
+ }
+ else {
+ if (optimize_accum && isaccum(d))
+ emit_byte(0x05);
+ else {
+ emit_byte(0x81);
+ emit_byte(0xc0+d);
+ }
+ emit_word(i);
+ }
+}
+LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
+{
+ if (optimize_accum && isaccum(d))
+ emit_byte(0x04);
+ else {
+ emit_byte(0x80);
+ emit_byte(0xc0+d);
+ }
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
+
+LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
+{
+ emit_byte(0x19);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
+
+LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
+{
+ emit_byte(0x66);
+ emit_byte(0x19);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
+
+LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
+{
+ emit_byte(0x18);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
+
+LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
+{
+ emit_byte(0x29);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
+
+LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
+{
+ emit_byte(0x66);
+ emit_byte(0x29);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
+
+LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
+{
+ emit_byte(0x28);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
+
+LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
+{
+ emit_byte(0x39);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
+
+LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
+{
+ if (optimize_imm8 && isbyte(i)) {
+ emit_byte(0x83);
+ emit_byte(0xf8+r);
+ emit_byte(i);
+ }
+ else {
+ if (optimize_accum && isaccum(r))
+ emit_byte(0x3d);
+ else {
+ emit_byte(0x81);
+ emit_byte(0xf8+r);
+ }
+ emit_long(i);
+ }
+}
+LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
+{
+ emit_byte(0x66);
+ emit_byte(0x39);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
+
+LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
+{
+ emit_byte(0x80);
+ emit_byte(0x3d);
+ emit_long(d);
+ emit_byte(s);
+}
+LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
+
+LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
+{
+ if (optimize_accum && isaccum(d))
+ emit_byte(0x3c);
+ else {
+ emit_byte(0x80);
+ emit_byte(0xf8+d);
+ }
+ emit_byte(i);
+}
+LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
+
+LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
+{
+ emit_byte(0x38);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
+
+LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
+{
+ int fi;
+
+ switch(factor) {
+ case 1: fi=0; break;
+ case 2: fi=1; break;
+ case 4: fi=2; break;
+ case 8: fi=3; break;
+ default: abort();
+ }
+ emit_byte(0x39);
+ emit_byte(0x04+8*d);
+ emit_byte(5+8*index+0x40*fi);
+ emit_long(offset);
+}
+LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
+
+LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
+{
+ emit_byte(0x31);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
+
+LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
+{
+ emit_byte(0x66);
+ emit_byte(0x31);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
+
+LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
+{
+ emit_byte(0x30);
+ emit_byte(0xc0+8*s+d);
+}
+LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
+
+LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
+{
+ if (optimize_imm8 && isbyte(s)) {
+ emit_byte(0x83);
+ emit_byte(0x2d);
+ emit_long(d);
+ emit_byte(s);
+ }
+ else {
+ emit_byte(0x81);
+ emit_byte(0x2d);
+ emit_long(d);
+ emit_long(s);
+ }
+}
+LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
+
+LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
+{
+ if (optimize_imm8 && isbyte(s)) {
+ emit_byte(0x83);
+ emit_byte(0x3d);
+ emit_long(d);
+ emit_byte(s);
+ }
+ else {
+ emit_byte(0x81);
+ emit_byte(0x3d);
+ emit_long(d);
+ emit_long(s);
+ }
+}
+LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
+
+LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
+{
+ emit_byte(0x87);
+ emit_byte(0xc0+8*r1+r2);
+}
+LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
+
+LOWFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2))
+{
+ emit_byte(0x86);
+ emit_byte(0xc0+8*(r1&0xf)+(r2&0xf)); /* XXX this handles upper-halves registers (e.g. %ah defined as 0x10+4) */
+}
+LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
+
+/*************************************************************************
+ * FIXME: mem access modes probably wrong *
+ *************************************************************************/
+
+LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
+{
+ emit_byte(0x9c);
+}
+LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
+
+LOWFUNC(WRITE,READ,0,raw_popfl,(void))
+{
+ emit_byte(0x9d);
+}
+LENDFUNC(WRITE,READ,0,raw_popfl,(void))
+
+/* Generate floating-point instructions */
+static inline void x86_fadd_m(MEMR s)
+{
+ emit_byte(0xdc);
+ emit_byte(0x05);
+ emit_long(s);
+}
+
+#endif
+
+/*************************************************************************
+ * Unoptimizable stuff --- jump *
+ *************************************************************************/
+
+static __inline__ void raw_call_r(R4 r)
+{
+#if USE_NEW_RTASM
+ CALLsr(r);
+#else
+ emit_byte(0xff);
+ emit_byte(0xd0+r);
+#endif
+}
+
+static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
+{
+#if USE_NEW_RTASM
+ CALLsm(base, X86_NOREG, r, m);
+#else
+ int mu;
+ switch(m) {
+ case 1: mu=0; break;
+ case 2: mu=1; break;
+ case 4: mu=2; break;
+ case 8: mu=3; break;
+ default: abort();
+ }
+ emit_byte(0xff);
+ emit_byte(0x14);
+ emit_byte(0x05+8*r+0x40*mu);
+ emit_long(base);
+#endif
+}
+
+static __inline__ void raw_jmp_r(R4 r)
+{
+#if USE_NEW_RTASM
+ JMPsr(r);
+#else
+ emit_byte(0xff);
+ emit_byte(0xe0+r);
+#endif
+}
+
+static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
+{
+#if USE_NEW_RTASM
+ JMPsm(base, X86_NOREG, r, m);
+#else
+ int mu;
+ switch(m) {
+ case 1: mu=0; break;
+ case 2: mu=1; break;
+ case 4: mu=2; break;
+ case 8: mu=3; break;
+ default: abort();
+ }
+ emit_byte(0xff);
+ emit_byte(0x24);
+ emit_byte(0x05+8*r+0x40*mu);
+ emit_long(base);
+#endif
+}
+
+static __inline__ void raw_jmp_m(uae_u32 base)
+{
+ emit_byte(0xff);
+ emit_byte(0x25);
+ emit_long(base);
+}
+
+
+static __inline__ void raw_call(uae_u32 t)
+{
+#if USE_NEW_RTASM
+ CALLm(t);
+#else
+ emit_byte(0xe8);
+ emit_long(t-(uae_u32)target-4);
+#endif
+}
+
+static __inline__ void raw_jmp(uae_u32 t)
+{
+#if USE_NEW_RTASM
+ JMPm(t);
+#else
+ emit_byte(0xe9);
+ emit_long(t-(uae_u32)target-4);
+#endif
+}
+
+static __inline__ void raw_jl(uae_u32 t)
+{
+ emit_byte(0x0f);
+ emit_byte(0x8c);
+ emit_long(t-(uintptr)target-4);
+}
+
+static __inline__ void raw_jz(uae_u32 t)
+{
+ emit_byte(0x0f);
+ emit_byte(0x84);
+ emit_long(t-(uintptr)target-4);
+}
+
+static __inline__ void raw_jnz(uae_u32 t)
+{
+ emit_byte(0x0f);
+ emit_byte(0x85);
+ emit_long(t-(uintptr)target-4);
+}
+
+static __inline__ void raw_jnz_l_oponly(void)
+{
+ emit_byte(0x0f);
+ emit_byte(0x85);
+}
+
+static __inline__ void raw_jcc_l_oponly(int cc)
+{
+ emit_byte(0x0f);
+ emit_byte(0x80+cc);
+}
+
+static __inline__ void raw_jnz_b_oponly(void)
+{
+ emit_byte(0x75);
+}
+
+static __inline__ void raw_jz_b_oponly(void)
+{
+ emit_byte(0x74);
+}
+
+static __inline__ void raw_jcc_b_oponly(int cc)
+{
+ emit_byte(0x70+cc);
+}
+
+static __inline__ void raw_jmp_l_oponly(void)
+{
+ emit_byte(0xe9);
+}
+
+static __inline__ void raw_jmp_b_oponly(void)
+{
+ emit_byte(0xeb);
+}
+
+static __inline__ void raw_ret(void)
+{
+ emit_byte(0xc3);
+}
+
+static __inline__ void raw_nop(void)
+{
+ emit_byte(0x90);
+}
+
+static __inline__ void raw_emit_nop_filler(int nbytes)
+{
+ /* Source: GNU Binutils 2.12.90.0.15 */
+ /* Various efficient no-op patterns for aligning code labels.
+ Note: Don't try to assemble the instructions in the comments.
+ 0L and 0w are not legal. */
+ static const uae_u8 f32_1[] =
+ {0x90}; /* nop */
+ static const uae_u8 f32_2[] =
+ {0x89,0xf6}; /* movl %esi,%esi */
+ static const uae_u8 f32_3[] =
+ {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
+ static const uae_u8 f32_4[] =
+ {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
+ static const uae_u8 f32_5[] =
+ {0x90, /* nop */
+ 0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
+ static const uae_u8 f32_6[] =
+ {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
+ static const uae_u8 f32_7[] =
+ {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
+ static const uae_u8 f32_8[] =
+ {0x90, /* nop */
+ 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
+ static const uae_u8 f32_9[] =
+ {0x89,0xf6, /* movl %esi,%esi */
+ 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
+ static const uae_u8 f32_10[] =
+ {0x8d,0x76,0x00, /* leal 0(%esi),%esi */
+ 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
+ static const uae_u8 f32_11[] =
+ {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */
+ 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
+ static const uae_u8 f32_12[] =
+ {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
+ 0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */
+ static const uae_u8 f32_13[] =
+ {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
+ 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
+ static const uae_u8 f32_14[] =
+ {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */
+ 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
+ static const uae_u8 f32_15[] =
+ {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
+ 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
+ static const uae_u8 f32_16[] =
+ {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
+ 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
+ static const uae_u8 *const f32_patt[] = {
+ f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
+ f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
+ };
+ static const uae_u8 prefixes[4] = { 0x66, 0x66, 0x66, 0x66 };
+
+#if defined(__x86_64__)
+ /* The recommended way to pad 64bit code is to use NOPs preceded by
+ maximally four 0x66 prefixes. Balance the size of nops. */
+ if (nbytes == 0)
+ return;
+
+ int i;
+ int nnops = (nbytes + 3) / 4;
+ int len = nbytes / nnops;
+ int remains = nbytes - nnops * len;
+
+ for (i = 0; i < remains; i++) {
+ emit_block(prefixes, len);
+ raw_nop();
+ }
+ for (; i < nnops; i++) {
+ emit_block(prefixes, len - 1);
+ raw_nop();
+ }
+#else
+ int nloops = nbytes / 16;
+ while (nloops-- > 0)
+ emit_block(f32_16, sizeof(f32_16));
+
+ nbytes %= 16;
+ if (nbytes)
+ emit_block(f32_patt[nbytes - 1], nbytes);
+#endif
+}
+
+
+/*************************************************************************
+ * Flag handling, to and fro UAE flag register *
+ *************************************************************************/
+
+static __inline__ void raw_flags_evicted(int r)
+{
+ //live.state[FLAGTMP].status=CLEAN;
+ live.state[FLAGTMP].status=INMEM;
+ live.state[FLAGTMP].realreg=-1;
+ /* We just "evicted" FLAGTMP. */
+ if (live.nat[r].nholds!=1) {
+ /* Huh? */
+ abort();
+ }
+ live.nat[r].nholds=0;
+}
+
+#define FLAG_NREG1_FLAGREG 0 /* Set to -1 if any register will do */
+static __inline__ void raw_flags_to_reg_FLAGREG(int r)
+{
+ raw_lahf(0); /* Most flags in AH */
+ //raw_setcc(r,0); /* V flag in AL */
+ raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0);
+
+#if 1 /* Let's avoid those nasty partial register stalls */
+ //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r);
+ raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,AH_INDEX);
+ raw_flags_evicted(r);
+#endif
+}
+
+#define FLAG_NREG2_FLAGREG 0 /* Set to -1 if any register will do */
+static __inline__ void raw_reg_to_flags_FLAGREG(int r)
+{
+ raw_cmp_b_ri(r,-127); /* set V */
+ raw_sahf(0);
+}
+
+#define FLAG_NREG3_FLAGREG 0 /* Set to -1 if any register will do */
+static __inline__ void raw_flags_set_zero_FLAGREG(int s, int tmp)
+{
+ raw_mov_l_rr(tmp,s);
+ raw_lahf(s); /* flags into ah */
+ raw_and_l_ri(s,0xffffbfff);
+ raw_and_l_ri(tmp,0x00004000);
+ raw_xor_l_ri(tmp,0x00004000);
+ raw_or_l(s,tmp);
+ raw_sahf(s);
+}
+
+static __inline__ void raw_flags_init_FLAGREG(void) { }
+
+#define FLAG_NREG1_FLAGSTK -1 /* Set to -1 if any register will do */
+static __inline__ void raw_flags_to_reg_FLAGSTK(int r)
+{
+ raw_pushfl();
+ raw_pop_l_r(r);
+ raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r);
+ raw_flags_evicted(r);
+}
+
+#define FLAG_NREG2_FLAGSTK -1 /* Set to -1 if any register will do */
+static __inline__ void raw_reg_to_flags_FLAGSTK(int r)
+{
+ raw_push_l_r(r);
+ raw_popfl();
+}
+
+#define FLAG_NREG3_FLAGSTK -1 /* Set to -1 if any register will do */
+static __inline__ void raw_flags_set_zero_FLAGSTK(int s, int tmp)
+{
+ raw_mov_l_rr(tmp,s);
+ raw_pushfl();
+ raw_pop_l_r(s);
+ raw_and_l_ri(s,0xffffffbf);
+ raw_and_l_ri(tmp,0x00000040);
+ raw_xor_l_ri(tmp,0x00000040);
+ raw_or_l(s,tmp);
+ raw_push_l_r(s);
+ raw_popfl();
+}
+
+static __inline__ void raw_flags_init_FLAGSTK(void) { }
+
+#if defined(__x86_64__)
+/* Try to use the LAHF/SETO method on x86_64 since it is faster.
+ This can't be the default because some older CPUs don't support
+ LAHF/SAHF in long mode. */
+static int FLAG_NREG1_FLAGGEN = 0;
+static __inline__ void raw_flags_to_reg_FLAGGEN(int r)
+{
+ if (have_lahf_lm) {
+ // NOTE: the interpreter uses the normal EFLAGS layout
+ // pushf/popf CF(0) ZF( 6) SF( 7) OF(11)
+ // sahf/lahf CF(8) ZF(14) SF(15) OF( 0)
+ assert(r == 0);
+ raw_setcc(r,0); /* V flag in AL */
+ raw_lea_l_r_scaled(0,0,8); /* move it to its EFLAGS location */
+ raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,0);
+ raw_lahf(0); /* most flags in AH */
+ raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,AH_INDEX);
+ raw_flags_evicted(r);
+ }
+ else
+ raw_flags_to_reg_FLAGSTK(r);
+}
+
+static int FLAG_NREG2_FLAGGEN = 0;
+static __inline__ void raw_reg_to_flags_FLAGGEN(int r)
+{
+ if (have_lahf_lm) {
+ raw_xchg_b_rr(0,AH_INDEX);
+ raw_cmp_b_ri(r,-120); /* set V */
+ raw_sahf(0);
+ }
+ else
+ raw_reg_to_flags_FLAGSTK(r);
+}
+
+static int FLAG_NREG3_FLAGGEN = 0;
+static __inline__ void raw_flags_set_zero_FLAGGEN(int s, int tmp)
+{
+ if (have_lahf_lm)
+ raw_flags_set_zero_FLAGREG(s, tmp);
+ else
+ raw_flags_set_zero_FLAGSTK(s, tmp);
+}
+
+static __inline__ void raw_flags_init_FLAGGEN(void)
+{
+ if (have_lahf_lm) {
+ FLAG_NREG1_FLAGGEN = FLAG_NREG1_FLAGREG;
+ FLAG_NREG2_FLAGGEN = FLAG_NREG2_FLAGREG;
+ FLAG_NREG1_FLAGGEN = FLAG_NREG3_FLAGREG;
+ }
+ else {
+ FLAG_NREG1_FLAGGEN = FLAG_NREG1_FLAGSTK;
+ FLAG_NREG2_FLAGGEN = FLAG_NREG2_FLAGSTK;
+ FLAG_NREG1_FLAGGEN = FLAG_NREG3_FLAGSTK;
+ }
+}
+#endif
+
+#ifdef SAHF_SETO_PROFITABLE
+#define FLAG_SUFFIX FLAGREG
+#elif defined __x86_64__
+#define FLAG_SUFFIX FLAGGEN
+#else
+#define FLAG_SUFFIX FLAGSTK
+#endif
+
+#define FLAG_GLUE_2(x, y) x ## _ ## y
+#define FLAG_GLUE_1(x, y) FLAG_GLUE_2(x, y)
+#define FLAG_GLUE(x) FLAG_GLUE_1(x, FLAG_SUFFIX)
+
+#define raw_flags_init FLAG_GLUE(raw_flags_init)
+#define FLAG_NREG1 FLAG_GLUE(FLAG_NREG1)
+#define raw_flags_to_reg FLAG_GLUE(raw_flags_to_reg)
+#define FLAG_NREG2 FLAG_GLUE(FLAG_NREG2)
+#define raw_reg_to_flags FLAG_GLUE(raw_reg_to_flags)
+#define FLAG_NREG3 FLAG_GLUE(FLAG_NREG3)
+#define raw_flags_set_zero FLAG_GLUE(raw_flags_set_zero)
+
+/* Apparently, there are enough instructions between flag store and
+ flag reload to avoid the partial memory stall */
+static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
+{
+#if 1
+ raw_mov_l_rm(target,(uintptr)live.state[r].mem);
+#else
+ raw_mov_b_rm(target,(uintptr)live.state[r].mem);
+ raw_mov_b_rm(target+4,((uintptr)live.state[r].mem)+1);
+#endif
+}
+
+/* FLAGX is byte sized, and we *do* write it at that size */
+static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
+{
+ if (live.nat[target].canbyte)
+ raw_mov_b_rm(target,(uintptr)live.state[r].mem);
+ else if (live.nat[target].canword)
+ raw_mov_w_rm(target,(uintptr)live.state[r].mem);
+ else
+ raw_mov_l_rm(target,(uintptr)live.state[r].mem);
+}
+
+static __inline__ void raw_dec_sp(int off)
+{
+ if (off) raw_sub_l_ri(ESP_INDEX,off);
+}
+
+static __inline__ void raw_inc_sp(int off)
+{
+ if (off) raw_add_l_ri(ESP_INDEX,off);
+}
+
+/*************************************************************************
+ * Handling mistaken direct memory access *
+ *************************************************************************/
+
+// gb-- I don't need that part for JIT Basilisk II
+#if defined(NATMEM_OFFSET) && 0
+#include <asm/sigcontext.h>
+#include <signal.h>
+
+#define SIG_READ 1
+#define SIG_WRITE 2
+
+static int in_handler=0;
+static uae_u8 veccode[256];
+
+static void vec(int x, struct sigcontext sc)
+{
+ uae_u8* i=(uae_u8*)sc.eip;
+ uae_u32 addr=sc.cr2;
+ int r=-1;
+ int size=4;
+ int dir=-1;
+ int len=0;
+ int j;
+
+ write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
+ if (!canbang)
+ write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
+ if (in_handler)
+ write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
+
+ if (canbang && i>=compiled_code && i<=current_compile_p) {
+ if (*i==0x66) {
+ i++;
+ size=2;
+ len++;
+ }
+
+ switch(i[0]) {
+ case 0x8a:
+ if ((i[1]&0xc0)==0x80) {
+ r=(i[1]>>3)&7;
+ dir=SIG_READ;
+ size=1;
+ len+=6;
+ break;
+ }
+ break;
+ case 0x88:
+ if ((i[1]&0xc0)==0x80) {
+ r=(i[1]>>3)&7;
+ dir=SIG_WRITE;
+ size=1;
+ len+=6;
+ break;
+ }
+ break;
+ case 0x8b:
+ if ((i[1]&0xc0)==0x80) {
+ r=(i[1]>>3)&7;
+ dir=SIG_READ;
+ len+=6;
+ break;
+ }
+ if ((i[1]&0xc0)==0x40) {
+ r=(i[1]>>3)&7;
+ dir=SIG_READ;
+ len+=3;
+ break;
+ }
+ break;
+ case 0x89:
+ if ((i[1]&0xc0)==0x80) {
+ r=(i[1]>>3)&7;
+ dir=SIG_WRITE;
+ len+=6;
+ break;
+ }
+ if ((i[1]&0xc0)==0x40) {
+ r=(i[1]>>3)&7;
+ dir=SIG_WRITE;
+ len+=3;
+ break;
+ }
+ break;
+ }
+ }
+
+ if (r!=-1) {
+ void* pr=NULL;
+ write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
+
+ switch(r) {
+ case 0: pr=&(sc.eax); break;
+ case 1: pr=&(sc.ecx); break;
+ case 2: pr=&(sc.edx); break;
+ case 3: pr=&(sc.ebx); break;
+ case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
+ case 5: pr=(size>1)?
+ (void*)(&(sc.ebp)):
+ (void*)(((uae_u8*)&(sc.ecx))+1); break;
+ case 6: pr=(size>1)?
+ (void*)(&(sc.esi)):
+ (void*)(((uae_u8*)&(sc.edx))+1); break;
+ case 7: pr=(size>1)?
+ (void*)(&(sc.edi)):
+ (void*)(((uae_u8*)&(sc.ebx))+1); break;
+ default: abort();
+ }
+ if (pr) {
+ blockinfo* bi;
+
+ if (currprefs.comp_oldsegv) {
+ addr-=NATMEM_OFFSET;
+
+ if ((addr>=0x10000000 && addr<0x40000000) ||
+ (addr>=0x50000000)) {
+ write_log("Suspicious address in %x SEGV handler.\n",addr);
+ }
+ if (dir==SIG_READ) {
+ switch(size) {
+ case 1: *((uae_u8*)pr)=get_byte(addr); break;
+ case 2: *((uae_u16*)pr)=get_word(addr); break;
+ case 4: *((uae_u32*)pr)=get_long(addr); break;
+ default: abort();
+ }
+ }
+ else { /* write */
+ switch(size) {
+ case 1: put_byte(addr,*((uae_u8*)pr)); break;
+ case 2: put_word(addr,*((uae_u16*)pr)); break;
+ case 4: put_long(addr,*((uae_u32*)pr)); break;
+ default: abort();
+ }
+ }
+ write_log("Handled one access!\n");
+ fflush(stdout);
+ segvcount++;
+ sc.eip+=len;
+ }
+ else {
+ void* tmp=target;
+ int i;
+ uae_u8 vecbuf[5];
+
+ addr-=NATMEM_OFFSET;
+
+ if ((addr>=0x10000000 && addr<0x40000000) ||
+ (addr>=0x50000000)) {
+ write_log("Suspicious address in %x SEGV handler.\n",addr);
+ }
+
+ target=(uae_u8*)sc.eip;
+ for (i=0;i<5;i++)
+ vecbuf[i]=target[i];
+ emit_byte(0xe9);
+ emit_long((uintptr)veccode-(uintptr)target-4);
+ write_log("Create jump to %p\n",veccode);
+
+ write_log("Handled one access!\n");
+ fflush(stdout);
+ segvcount++;
+
+ target=veccode;
+
+ if (dir==SIG_READ) {
+ switch(size) {
+ case 1: raw_mov_b_ri(r,get_byte(addr)); break;
+ case 2: raw_mov_w_ri(r,get_byte(addr)); break;
+ case 4: raw_mov_l_ri(r,get_byte(addr)); break;
+ default: abort();
+ }
+ }
+ else { /* write */
+ switch(size) {
+ case 1: put_byte(addr,*((uae_u8*)pr)); break;
+ case 2: put_word(addr,*((uae_u16*)pr)); break;
+ case 4: put_long(addr,*((uae_u32*)pr)); break;
+ default: abort();
+ }
+ }
+ for (i=0;i<5;i++)
+ raw_mov_b_mi(sc.eip+i,vecbuf[i]);
+ raw_mov_l_mi((uintptr)&in_handler,0);
+ emit_byte(0xe9);
+ emit_long(sc.eip+len-(uintptr)target-4);
+ in_handler=1;
+ target=tmp;
+ }
+ bi=active;
+ while (bi) {
+ if (bi->handler &&
+ (uae_u8*)bi->direct_handler<=i &&
+ (uae_u8*)bi->nexthandler>i) {
+ write_log("deleted trigger (%p<%p<%p) %p\n",
+ bi->handler,
+ i,
+ bi->nexthandler,
+ bi->pc_p);
+ invalidate_block(bi);
+ raise_in_cl_list(bi);
+ set_special(0);
+ return;
+ }
+ bi=bi->next;
+ }
+ /* Not found in the active list. Might be a rom routine that
+ is in the dormant list */
+ bi=dormant;
+ while (bi) {
+ if (bi->handler &&
+ (uae_u8*)bi->direct_handler<=i &&
+ (uae_u8*)bi->nexthandler>i) {
+ write_log("deleted trigger (%p<%p<%p) %p\n",
+ bi->handler,
+ i,
+ bi->nexthandler,
+ bi->pc_p);
+ invalidate_block(bi);
+ raise_in_cl_list(bi);
+ set_special(0);
+ return;
+ }
+ bi=bi->next;
+ }
+ write_log("Huh? Could not find trigger!\n");
+ return;
+ }
+ }
+ write_log("Can't handle access!\n");
+ for (j=0;j<10;j++) {
+ write_log("instruction byte %2d is %02x\n",j,i[j]);
+ }
+ write_log("Please send the above info (starting at \"fault address\") to\n"
+ "bmeyer@csse.monash.edu.au\n"
+ "This shouldn't happen ;-)\n");
+ fflush(stdout);
+ signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
+}
+#endif
+
+
+/*************************************************************************
+ * Checking for CPU features *
+ *************************************************************************/
+
+struct cpuinfo_x86 {
+ uae_u8 x86; // CPU family
+ uae_u8 x86_vendor; // CPU vendor
+ uae_u8 x86_processor; // CPU canonical processor type
+ uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
+ uae_u32 x86_hwcap;
+ uae_u8 x86_model;
+ uae_u8 x86_mask;
+ int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
+ char x86_vendor_id[16];
+};
+struct cpuinfo_x86 cpuinfo;
+
+enum {
+ X86_VENDOR_INTEL = 0,
+ X86_VENDOR_CYRIX = 1,
+ X86_VENDOR_AMD = 2,
+ X86_VENDOR_UMC = 3,
+ X86_VENDOR_NEXGEN = 4,
+ X86_VENDOR_CENTAUR = 5,
+ X86_VENDOR_RISE = 6,
+ X86_VENDOR_TRANSMETA = 7,
+ X86_VENDOR_NSC = 8,
+ X86_VENDOR_UNKNOWN = 0xff
+};
+
+enum {
+ X86_PROCESSOR_I386, /* 80386 */
+ X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
+ X86_PROCESSOR_PENTIUM,
+ X86_PROCESSOR_PENTIUMPRO,
+ X86_PROCESSOR_K6,
+ X86_PROCESSOR_ATHLON,
+ X86_PROCESSOR_PENTIUM4,
+ X86_PROCESSOR_X86_64,
+ X86_PROCESSOR_max
+};
+
+static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
+ "80386",
+ "80486",
+ "Pentium",
+ "PentiumPro",
+ "K6",
+ "Athlon",
+ "Pentium4",
+ "x86-64"
+};
+
+static struct ptt {
+ const int align_loop;
+ const int align_loop_max_skip;
+ const int align_jump;
+ const int align_jump_max_skip;
+ const int align_func;
+}
+x86_alignments[X86_PROCESSOR_max] = {
+ { 4, 3, 4, 3, 4 },
+ { 16, 15, 16, 15, 16 },
+ { 16, 7, 16, 7, 16 },
+ { 16, 15, 16, 7, 16 },
+ { 32, 7, 32, 7, 32 },
+ { 16, 7, 16, 7, 16 },
+ { 0, 0, 0, 0, 0 },
+ { 16, 7, 16, 7, 16 }
+};
+
+static void
+x86_get_cpu_vendor(struct cpuinfo_x86 *c)
+{
+ char *v = c->x86_vendor_id;
+
+ if (!strcmp(v, "GenuineIntel"))
+ c->x86_vendor = X86_VENDOR_INTEL;
+ else if (!strcmp(v, "AuthenticAMD"))
+ c->x86_vendor = X86_VENDOR_AMD;
+ else if (!strcmp(v, "CyrixInstead"))
+ c->x86_vendor = X86_VENDOR_CYRIX;
+ else if (!strcmp(v, "Geode by NSC"))
+ c->x86_vendor = X86_VENDOR_NSC;
+ else if (!strcmp(v, "UMC UMC UMC "))
+ c->x86_vendor = X86_VENDOR_UMC;
+ else if (!strcmp(v, "CentaurHauls"))
+ c->x86_vendor = X86_VENDOR_CENTAUR;
+ else if (!strcmp(v, "NexGenDriven"))
+ c->x86_vendor = X86_VENDOR_NEXGEN;
+ else if (!strcmp(v, "RiseRiseRise"))
+ c->x86_vendor = X86_VENDOR_RISE;
+ else if (!strcmp(v, "GenuineTMx86") ||
+ !strcmp(v, "TransmetaCPU"))
+ c->x86_vendor = X86_VENDOR_TRANSMETA;
+ else
+ c->x86_vendor = X86_VENDOR_UNKNOWN;
+}
+
+static void
+cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
+{
+ const int CPUID_SPACE = 4096;
+ uae_u8* cpuid_space = (uae_u8 *)vm_acquire(CPUID_SPACE);
+ if (cpuid_space == VM_MAP_FAILED)
+ abort();
+ vm_protect(cpuid_space, CPUID_SPACE, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
+
+ static uae_u32 s_op, s_eax, s_ebx, s_ecx, s_edx;
+ uae_u8* tmp=get_target();
+
+ s_op = op;
+ set_target(cpuid_space);
+ raw_push_l_r(0); /* eax */
+ raw_push_l_r(1); /* ecx */
+ raw_push_l_r(2); /* edx */
+ raw_push_l_r(3); /* ebx */
+ raw_mov_l_rm(0,(uintptr)&s_op);
+ raw_cpuid(0);
+ raw_mov_l_mr((uintptr)&s_eax,0);
+ raw_mov_l_mr((uintptr)&s_ebx,3);
+ raw_mov_l_mr((uintptr)&s_ecx,1);
+ raw_mov_l_mr((uintptr)&s_edx,2);
+ raw_pop_l_r(3);
+ raw_pop_l_r(2);
+ raw_pop_l_r(1);
+ raw_pop_l_r(0);
+ raw_ret();
+ set_target(tmp);
+
+ ((cpuop_func*)cpuid_space)(0);
+ if (eax != NULL) *eax = s_eax;
+ if (ebx != NULL) *ebx = s_ebx;
+ if (ecx != NULL) *ecx = s_ecx;
+ if (edx != NULL) *edx = s_edx;
+
+ vm_release(cpuid_space, CPUID_SPACE);
+}
+
+static void
+raw_init_cpu(void)
+{
+ struct cpuinfo_x86 *c = &cpuinfo;
+
+ /* Defaults */
+ c->x86_processor = X86_PROCESSOR_max;
+ c->x86_vendor = X86_VENDOR_UNKNOWN;
+ c->cpuid_level = -1; /* CPUID not detected */
+ c->x86_model = c->x86_mask = 0; /* So far unknown... */
+ c->x86_vendor_id[0] = '\0'; /* Unset */
+ c->x86_hwcap = 0;
+
+ /* Get vendor name */
+ c->x86_vendor_id[12] = '\0';
+ cpuid(0x00000000,
+ (uae_u32 *)&c->cpuid_level,
+ (uae_u32 *)&c->x86_vendor_id[0],
+ (uae_u32 *)&c->x86_vendor_id[8],
+ (uae_u32 *)&c->x86_vendor_id[4]);
+ x86_get_cpu_vendor(c);
+
+ /* Intel-defined flags: level 0x00000001 */
+ c->x86_brand_id = 0;
+ if ( c->cpuid_level >= 0x00000001 ) {
+ uae_u32 tfms, brand_id;
+ cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
+ c->x86 = (tfms >> 8) & 15;
+ if (c->x86 == 0xf)
+ c->x86 += (tfms >> 20) & 0xff; /* extended family */
+ c->x86_model = (tfms >> 4) & 15;
+ if (c->x86_model == 0xf)
+ c->x86_model |= (tfms >> 12) & 0xf0; /* extended model */
+ c->x86_brand_id = brand_id & 0xff;
+ c->x86_mask = tfms & 15;
+ } else {
+ /* Have CPUID level 0 only - unheard of */
+ c->x86 = 4;
+ }
+
+ /* AMD-defined flags: level 0x80000001 */
+ uae_u32 xlvl;
+ cpuid(0x80000000, &xlvl, NULL, NULL, NULL);
+ if ( (xlvl & 0xffff0000) == 0x80000000 ) {
+ if ( xlvl >= 0x80000001 ) {
+ uae_u32 features, extra_features;
+ cpuid(0x80000001, NULL, NULL, &extra_features, &features);
+ if (features & (1 << 29)) {
+ /* Assume x86-64 if long mode is supported */
+ c->x86_processor = X86_PROCESSOR_X86_64;
+ }
+ if (extra_features & (1 << 0))
+ have_lahf_lm = true;
+ }
+ }
+
+ /* Canonicalize processor ID */
+ switch (c->x86) {
+ case 3:
+ c->x86_processor = X86_PROCESSOR_I386;
+ break;
+ case 4:
+ c->x86_processor = X86_PROCESSOR_I486;
+ break;
+ case 5:
+ if (c->x86_vendor == X86_VENDOR_AMD)
+ c->x86_processor = X86_PROCESSOR_K6;
+ else
+ c->x86_processor = X86_PROCESSOR_PENTIUM;
+ break;
+ case 6:
+ if (c->x86_vendor == X86_VENDOR_AMD)
+ c->x86_processor = X86_PROCESSOR_ATHLON;
+ else
+ c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
+ break;
+ case 15:
+ if (c->x86_processor == X86_PROCESSOR_max) {
+ switch (c->x86_vendor) {
+ case X86_VENDOR_INTEL:
+ c->x86_processor = X86_PROCESSOR_PENTIUM4;
+ break;
+ case X86_VENDOR_AMD:
+ /* Assume a 32-bit Athlon processor if not in long mode */
+ c->x86_processor = X86_PROCESSOR_ATHLON;
+ break;
+ }
+ }
+ break;
+ }
+ if (c->x86_processor == X86_PROCESSOR_max) {
+ c->x86_processor = X86_PROCESSOR_I386;
+ fprintf(stderr, "Error: unknown processor type, assuming i386\n");
+ fprintf(stderr, " Family : %d\n", c->x86);
+ fprintf(stderr, " Model : %d\n", c->x86_model);
+ fprintf(stderr, " Mask : %d\n", c->x86_mask);
+ fprintf(stderr, " Vendor : %s [%d]\n", c->x86_vendor_id, c->x86_vendor);
+ if (c->x86_brand_id)
+ fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
+ }
+
+ /* Have CMOV support? */
+ have_cmov = c->x86_hwcap & (1 << 15);
+#if defined(__x86_64__)
+ if (!have_cmov) {
+ write_log("x86-64 implementations are bound to have CMOV!\n");
+ abort();
+ }
+#endif
+
+ /* Can the host CPU suffer from partial register stalls? */
+ have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
+#if 1
+ /* It appears that partial register writes are a bad idea even on
+ AMD K7 cores, even though they are not supposed to have the
+ dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
+ if (c->x86_processor == X86_PROCESSOR_ATHLON)
+ have_rat_stall = true;
+#endif
+
+ /* Alignments */
+ if (tune_alignment) {
+ align_loops = x86_alignments[c->x86_processor].align_loop;
+ align_jumps = x86_alignments[c->x86_processor].align_jump;
+ }
+
+ write_log("Max CPUID level=%d Processor is %s [%s]\n",
+ c->cpuid_level, c->x86_vendor_id,
+ x86_processor_string_table[c->x86_processor]);
+
+ raw_flags_init();
+}
+
+static bool target_check_bsf(void)
+{
+ bool mismatch = false;
+ for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
+ for (int g_CF = 0; g_CF <= 1; g_CF++) {
+ for (int g_OF = 0; g_OF <= 1; g_OF++) {
+ for (int g_SF = 0; g_SF <= 1; g_SF++) {
+ for (int value = -1; value <= 1; value++) {
+ unsigned long flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
+ unsigned long tmp = value;
+ __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
+ : "+r" (flags), "+r" (tmp) : : "cc");
+ int OF = (flags >> 11) & 1;
+ int SF = (flags >> 7) & 1;
+ int ZF = (flags >> 6) & 1;
+ int CF = flags & 1;
+ tmp = (value == 0);
+ if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
+ mismatch = true;
+ }
+ }}}}
+ if (mismatch)
+ write_log("Target CPU defines all flags on BSF instruction\n");
+ return !mismatch;
+}
+
+
+/*************************************************************************
+ * FPU stuff *
+ *************************************************************************/
+
+
+static __inline__ void raw_fp_init(void)
+{
+ int i;
+
+ for (i=0;i<N_FREGS;i++)
+ live.spos[i]=-2;
+ live.tos=-1; /* Stack is empty */
+}
+
+static __inline__ void raw_fp_cleanup_drop(void)
+{
+#if 0
+ /* using FINIT instead of popping all the entries.
+ Seems to have side effects --- there is display corruption in
+ Quake when this is used */
+ if (live.tos>1) {
+ emit_byte(0x9b);
+ emit_byte(0xdb);
+ emit_byte(0xe3);
+ live.tos=-1;
+ }
+#endif
+ while (live.tos>=1) {
+ emit_byte(0xde);
+ emit_byte(0xd9);
+ live.tos-=2;
+ }
+ while (live.tos>=0) {
+ emit_byte(0xdd);
+ emit_byte(0xd8);
+ live.tos--;
+ }
+ raw_fp_init();
+}
+
+static __inline__ void make_tos(int r)
+{
+ int p,q;
+
+ if (live.spos[r]<0) { /* Register not yet on stack */
+ emit_byte(0xd9);
+ emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
+ live.tos++;
+ live.spos[r]=live.tos;
+ live.onstack[live.tos]=r;
+ return;
+ }
+ /* Register is on stack */
+ if (live.tos==live.spos[r])
+ return;
+ p=live.spos[r];
+ q=live.onstack[live.tos];
+
+ emit_byte(0xd9);
+ emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
+ live.onstack[live.tos]=r;
+ live.spos[r]=live.tos;
+ live.onstack[p]=q;
+ live.spos[q]=p;
+}
+
+static __inline__ void make_tos2(int r, int r2)
+{
+ int q;
+
+ make_tos(r2); /* Put the reg that's supposed to end up in position2
+ on top */
+
+ if (live.spos[r]<0) { /* Register not yet on stack */
+ make_tos(r); /* This will extend the stack */
+ return;
+ }
+ /* Register is on stack */
+ emit_byte(0xd9);
+ emit_byte(0xc9); /* Move r2 into position 2 */
+
+ q=live.onstack[live.tos-1];
+ live.onstack[live.tos]=q;
+ live.spos[q]=live.tos;
+ live.onstack[live.tos-1]=r2;
+ live.spos[r2]=live.tos-1;
+
+ make_tos(r); /* And r into 1 */
+}
+
+static __inline__ int stackpos(int r)
+{
+ if (live.spos[r]<0)
+ abort();
+ if (live.tos<live.spos[r]) {
+ printf("Looking for spos for fnreg %d\n",r);
+ abort();
+ }
+ return live.tos-live.spos[r];
+}
+
+static __inline__ void usereg(int r)
+{
+ if (live.spos[r]<0)
+ make_tos(r);
+}
+
+/* This is called with one FP value in a reg *above* tos, which it will
+ pop off the stack if necessary */
+static __inline__ void tos_make(int r)
+{
+ if (live.spos[r]<0) {
+ live.tos++;
+ live.spos[r]=live.tos;
+ live.onstack[live.tos]=r;
+ return;
+ }
+ emit_byte(0xdd);
+ emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
+ and pop it*/
+}
+
+/* FP helper functions */
+#if USE_NEW_RTASM
+#define DEFINE_OP(NAME, GEN) \
+static inline void raw_##NAME(uint32 m) \
+{ \
+ GEN(m, X86_NOREG, X86_NOREG, 1); \
+}
+DEFINE_OP(fstl, FSTDm);
+DEFINE_OP(fstpl, FSTPDm);
+DEFINE_OP(fldl, FLDDm);
+DEFINE_OP(fildl, FILDLm);
+DEFINE_OP(fistl, FISTLm);
+DEFINE_OP(flds, FLDSm);
+DEFINE_OP(fsts, FSTSm);
+DEFINE_OP(fstpt, FSTPTm);
+DEFINE_OP(fldt, FLDTm);
+#else
+#define DEFINE_OP(NAME, OP1, OP2) \
+static inline void raw_##NAME(uint32 m) \
+{ \
+ emit_byte(OP1); \
+ emit_byte(OP2); \
+ emit_long(m); \
+}
+DEFINE_OP(fstl, 0xdd, 0x15);
+DEFINE_OP(fstpl, 0xdd, 0x1d);
+DEFINE_OP(fldl, 0xdd, 0x05);
+DEFINE_OP(fildl, 0xdb, 0x05);
+DEFINE_OP(fistl, 0xdb, 0x15);
+DEFINE_OP(flds, 0xd9, 0x05);
+DEFINE_OP(fsts, 0xd9, 0x15);
+DEFINE_OP(fstpt, 0xdb, 0x3d);
+DEFINE_OP(fldt, 0xdb, 0x2d);
+#endif
+#undef DEFINE_OP
+
+LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
+{
+ make_tos(r);
+ raw_fstl(m);
+}
+LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
+
+LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
+{
+ make_tos(r);
+ raw_fstpl(m);
+ live.onstack[live.tos]=-1;
+ live.tos--;
+ live.spos[r]=-2;
+}
+LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
+
+LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
+{
+ raw_fldl(m);
+ tos_make(r);
+}
+LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
+
+LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
+{
+ raw_fildl(m);
+ tos_make(r);
+}
+LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
+
+LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
+{
+ make_tos(r);
+ raw_fistl(m);
+}
+LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
+
+LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
+{
+ raw_flds(m);
+ tos_make(r);
+}
+LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
+
+LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
+{
+ make_tos(r);
+ raw_fsts(m);
+}
+LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
+
+LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
+{
+ int rs;
+
+ /* Stupid x87 can't write a long double to mem without popping the
+ stack! */
+ usereg(r);
+ rs=stackpos(r);
+ emit_byte(0xd9); /* Get a copy to the top of stack */
+ emit_byte(0xc0+rs);
+
+ raw_fstpt(m); /* store and pop it */
+}
+LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
+
+LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
+{
+ int rs;
+
+ make_tos(r);
+ raw_fstpt(m); /* store and pop it */
+ live.onstack[live.tos]=-1;
+ live.tos--;
+ live.spos[r]=-2;
+}
+LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
+
+LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
+{
+ raw_fldt(m);
+ tos_make(r);
+}
+LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
+
+LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
+{
+ emit_byte(0xd9);
+ emit_byte(0xeb);
+ tos_make(r);
+}
+LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
+
+LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
+{
+ emit_byte(0xd9);
+ emit_byte(0xec);
+ tos_make(r);
+}
+LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
+
+LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
+{
+ emit_byte(0xd9);
+ emit_byte(0xea);
+ tos_make(r);
+}
+LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
+
+LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
+{
+ emit_byte(0xd9);
+ emit_byte(0xed);
+ tos_make(r);
+}
+LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
+
+LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
+{
+ emit_byte(0xd9);
+ emit_byte(0xe8);
+ tos_make(r);
+}
+LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
+
+LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
+{
+ emit_byte(0xd9);
+ emit_byte(0xee);
+ tos_make(r);
+}
+LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
+
+LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
+{
+ int ds;
+
+ usereg(s);
+ ds=stackpos(s);
+ if (ds==0 && live.spos[d]>=0) {
+ /* source is on top of stack, and we already have the dest */
+ int dd=stackpos(d);
+ emit_byte(0xdd);
+ emit_byte(0xd0+dd);
+ }
+ else {
+ emit_byte(0xd9);
+ emit_byte(0xc0+ds); /* duplicate source on tos */
+ tos_make(d); /* store to destination, pop if necessary */
+ }
+}
+LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
+
+LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
+{
+ emit_byte(0xd9);
+ emit_byte(0xa8+index);
+ emit_long(base);
+}
+LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
+
+
+LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
+{
+ int ds;
+
+ if (d!=s) {
+ usereg(s);
+ ds=stackpos(s);
+ emit_byte(0xd9);
+ emit_byte(0xc0+ds); /* duplicate source */
+ emit_byte(0xd9);
+ emit_byte(0xfa); /* take square root */
+ tos_make(d); /* store to destination */
+ }
+ else {
+ make_tos(d);
+ emit_byte(0xd9);
+ emit_byte(0xfa); /* take square root */
+ }
+}
+LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
+{
+ int ds;
+
+ if (d!=s) {
+ usereg(s);
+ ds=stackpos(s);
+ emit_byte(0xd9);
+ emit_byte(0xc0+ds); /* duplicate source */
+ emit_byte(0xd9);
+ emit_byte(0xe1); /* take fabs */
+ tos_make(d); /* store to destination */
+ }
+ else {
+ make_tos(d);
+ emit_byte(0xd9);
+ emit_byte(0xe1); /* take fabs */
+ }
+}
+LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
+{
+ int ds;
+
+ if (d!=s) {
+ usereg(s);
+ ds=stackpos(s);
+ emit_byte(0xd9);
+ emit_byte(0xc0+ds); /* duplicate source */
+ emit_byte(0xd9);
+ emit_byte(0xfc); /* take frndint */
+ tos_make(d); /* store to destination */
+ }
+ else {
+ make_tos(d);
+ emit_byte(0xd9);
+ emit_byte(0xfc); /* take frndint */
+ }
+}
+LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
+{
+ int ds;
+
+ if (d!=s) {
+ usereg(s);
+ ds=stackpos(s);
+ emit_byte(0xd9);
+ emit_byte(0xc0+ds); /* duplicate source */
+ emit_byte(0xd9);
+ emit_byte(0xff); /* take cos */
+ tos_make(d); /* store to destination */
+ }
+ else {
+ make_tos(d);
+ emit_byte(0xd9);
+ emit_byte(0xff); /* take cos */
+ }
+}
+LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
+{
+ int ds;
+
+ if (d!=s) {
+ usereg(s);
+ ds=stackpos(s);
+ emit_byte(0xd9);
+ emit_byte(0xc0+ds); /* duplicate source */
+ emit_byte(0xd9);
+ emit_byte(0xfe); /* take sin */
+ tos_make(d); /* store to destination */
+ }
+ else {
+ make_tos(d);
+ emit_byte(0xd9);
+ emit_byte(0xfe); /* take sin */
+ }
+}
+LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
+
+static const double one=1;
+LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
+{
+ int ds;
+
+ usereg(s);
+ ds=stackpos(s);
+ emit_byte(0xd9);
+ emit_byte(0xc0+ds); /* duplicate source */
+
+ emit_byte(0xd9);
+ emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
+ emit_byte(0xd9);
+ emit_byte(0xfc); /* rndint */
+ emit_byte(0xd9);
+ emit_byte(0xc9); /* swap top two elements */
+ emit_byte(0xd8);
+ emit_byte(0xe1); /* subtract rounded from original */
+ emit_byte(0xd9);
+ emit_byte(0xf0); /* f2xm1 */
+ x86_fadd_m((uintptr)&one); /* Add '1' without using extra stack space */
+ emit_byte(0xd9);
+ emit_byte(0xfd); /* and scale it */
+ emit_byte(0xdd);
+ emit_byte(0xd9); /* take he rounded value off */
+ tos_make(d); /* store to destination */
+}
+LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
+{
+ int ds;
+
+ usereg(s);
+ ds=stackpos(s);
+ emit_byte(0xd9);
+ emit_byte(0xc0+ds); /* duplicate source */
+ emit_byte(0xd9);
+ emit_byte(0xea); /* fldl2e */
+ emit_byte(0xde);
+ emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
+
+ emit_byte(0xd9);
+ emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
+ emit_byte(0xd9);
+ emit_byte(0xfc); /* rndint */
+ emit_byte(0xd9);
+ emit_byte(0xc9); /* swap top two elements */
+ emit_byte(0xd8);
+ emit_byte(0xe1); /* subtract rounded from original */
+ emit_byte(0xd9);
+ emit_byte(0xf0); /* f2xm1 */
+ x86_fadd_m((uintptr)&one); /* Add '1' without using extra stack space */
+ emit_byte(0xd9);
+ emit_byte(0xfd); /* and scale it */
+ emit_byte(0xdd);
+ emit_byte(0xd9); /* take he rounded value off */
+ tos_make(d); /* store to destination */
+}
+LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
+{
+ int ds;
+
+ usereg(s);
+ ds=stackpos(s);
+ emit_byte(0xd9);
+ emit_byte(0xc0+ds); /* duplicate source */
+ emit_byte(0xd9);
+ emit_byte(0xe8); /* push '1' */
+ emit_byte(0xd9);
+ emit_byte(0xc9); /* swap top two */
+ emit_byte(0xd9);
+ emit_byte(0xf1); /* take 1*log2(x) */
+ tos_make(d); /* store to destination */
+}
+LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
+
+
+LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
+{
+ int ds;
+
+ if (d!=s) {
+ usereg(s);
+ ds=stackpos(s);
+ emit_byte(0xd9);
+ emit_byte(0xc0+ds); /* duplicate source */
+ emit_byte(0xd9);
+ emit_byte(0xe0); /* take fchs */
+ tos_make(d); /* store to destination */
+ }
+ else {
+ make_tos(d);
+ emit_byte(0xd9);
+ emit_byte(0xe0); /* take fchs */
+ }
+}
+LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
+{
+ int ds;
+
+ usereg(s);
+ usereg(d);
+
+ if (live.spos[s]==live.tos) {
+ /* Source is on top of stack */
+ ds=stackpos(d);
+ emit_byte(0xdc);
+ emit_byte(0xc0+ds); /* add source to dest*/
+ }
+ else {
+ make_tos(d);
+ ds=stackpos(s);
+
+ emit_byte(0xd8);
+ emit_byte(0xc0+ds); /* add source to dest*/
+ }
+}
+LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
+{
+ int ds;
+
+ usereg(s);
+ usereg(d);
+
+ if (live.spos[s]==live.tos) {
+ /* Source is on top of stack */
+ ds=stackpos(d);
+ emit_byte(0xdc);
+ emit_byte(0xe8+ds); /* sub source from dest*/
+ }
+ else {
+ make_tos(d);
+ ds=stackpos(s);
+
+ emit_byte(0xd8);
+ emit_byte(0xe0+ds); /* sub src from dest */
+ }
+}
+LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
+{
+ int ds;
+
+ usereg(s);
+ usereg(d);
+
+ make_tos(d);
+ ds=stackpos(s);
+
+ emit_byte(0xdd);
+ emit_byte(0xe0+ds); /* cmp dest with source*/
+}
+LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
+{
+ int ds;
+
+ usereg(s);
+ usereg(d);
+
+ if (live.spos[s]==live.tos) {
+ /* Source is on top of stack */
+ ds=stackpos(d);
+ emit_byte(0xdc);
+ emit_byte(0xc8+ds); /* mul dest by source*/
+ }
+ else {
+ make_tos(d);
+ ds=stackpos(s);
+
+ emit_byte(0xd8);
+ emit_byte(0xc8+ds); /* mul dest by source*/
+ }
+}
+LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
+{
+ int ds;
+
+ usereg(s);
+ usereg(d);
+
+ if (live.spos[s]==live.tos) {
+ /* Source is on top of stack */
+ ds=stackpos(d);
+ emit_byte(0xdc);
+ emit_byte(0xf8+ds); /* div dest by source */
+ }
+ else {
+ make_tos(d);
+ ds=stackpos(s);
+
+ emit_byte(0xd8);
+ emit_byte(0xf0+ds); /* div dest by source*/
+ }
+}
+LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
+{
+ int ds;
+
+ usereg(s);
+ usereg(d);
+
+ make_tos2(d,s);
+ ds=stackpos(s);
+
+ if (ds!=1) {
+ printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
+ abort();
+ }
+ emit_byte(0xd9);
+ emit_byte(0xf8); /* take rem from dest by source */
+}
+LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
+
+LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
+{
+ int ds;
+
+ usereg(s);
+ usereg(d);
+
+ make_tos2(d,s);
+ ds=stackpos(s);
+
+ if (ds!=1) {
+ printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
+ abort();
+ }
+ emit_byte(0xd9);
+ emit_byte(0xf5); /* take rem1 from dest by source */
+}
+LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
+
+
+LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
+{
+ make_tos(r);
+ emit_byte(0xd9); /* ftst */
+ emit_byte(0xe4);
+}
+LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
+
+/* %eax register is clobbered if target processor doesn't support fucomi */
+#define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
+#define FFLAG_NREG EAX_INDEX
+
+static __inline__ void raw_fflags_into_flags(int r)
+{
+ int p;
+
+ usereg(r);
+ p=stackpos(r);
+
+ emit_byte(0xd9);
+ emit_byte(0xee); /* Push 0 */
+ emit_byte(0xd9);
+ emit_byte(0xc9+p); /* swap top two around */
+ if (have_cmov) {
+ // gb-- fucomi is for P6 cores only, not K6-2 then...
+ emit_byte(0xdb);
+ emit_byte(0xe9+p); /* fucomi them */
+ }
+ else {
+ emit_byte(0xdd);
+ emit_byte(0xe1+p); /* fucom them */
+ emit_byte(0x9b);
+ emit_byte(0xdf);
+ emit_byte(0xe0); /* fstsw ax */
+ raw_sahf(0); /* sahf */
+ }
+ emit_byte(0xdd);
+ emit_byte(0xd9+p); /* store value back, and get rid of 0 */
+}
--- /dev/null
+/******************** -*- mode: C; tab-width: 8 -*- ********************
+ *
+ * Run-time assembler for IA-32 and AMD64
+ *
+ ***********************************************************************/
+
+
+/***********************************************************************
+ *
+ * This file is derived from CCG.
+ *
+ * Copyright 1999, 2000, 2001, 2002, 2003 Ian Piumarta
+ *
+ * Adaptations and enhancements for AMD64 support, Copyright 2003-2008
+ * Gwenole Beauchesne
+ *
+ * Basilisk II (C) 1997-2008 Christian Bauer
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ ***********************************************************************/
+
+#ifndef X86_RTASM_H
+#define X86_RTASM_H
+
+/* NOTES
+ *
+ * o Best viewed on a 1024x768 screen with fixed-6x10 font ;-)
+ *
+ * TODO
+ *
+ * o Fix FIXMEs
+ * o SSE instructions
+ * o Optimize for cases where register numbers are not integral constants
+ */
+
+/* --- Configuration ------------------------------------------------------- */
+
+/* Define to settle a "flat" register set, i.e. different regno for
+ each size variant. */
+#ifndef X86_FLAT_REGISTERS
+#define X86_FLAT_REGISTERS 1
+#endif
+
+/* Define to generate x86-64 code. */
+#ifndef X86_TARGET_64BIT
+#define X86_TARGET_64BIT 0
+#endif
+
+/* Define to optimize ALU instructions. */
+#ifndef X86_OPTIMIZE_ALU
+#define X86_OPTIMIZE_ALU 1
+#endif
+
+/* Define to optimize rotate/shift instructions. */
+#ifndef X86_OPTIMIZE_ROTSHI
+#define X86_OPTIMIZE_ROTSHI 1
+#endif
+
+/* Define to optimize absolute addresses for RIP relative addressing. */
+#ifndef X86_RIP_RELATIVE_ADDR
+#define X86_RIP_RELATIVE_ADDR 1
+#endif
+
+
+/* --- Macros -------------------------------------------------------------- */
+
+/* Functions used to emit code.
+ *
+ * x86_emit_byte(B)
+ * x86_emit_word(W)
+ * x86_emit_long(L)
+ */
+
+/* Get pointer to current code
+ *
+ * x86_get_target()
+ */
+
+/* Abort assembler, fatal failure.
+ *
+ * x86_emit_failure(MSG)
+ */
+
+#define x86_emit_failure0(MSG) (x86_emit_failure(MSG),0)
+
+
+/* --- Register set -------------------------------------------------------- */
+
+enum {
+ X86_RIP = -2,
+#if X86_FLAT_REGISTERS
+ X86_NOREG = 0,
+ X86_Reg8L_Base = 0x10,
+ X86_Reg8H_Base = 0x20,
+ X86_Reg16_Base = 0x30,
+ X86_Reg32_Base = 0x40,
+ X86_Reg64_Base = 0x50,
+ X86_RegMMX_Base = 0x60,
+ X86_RegXMM_Base = 0x70,
+ X86_RegFPU_Base = 0x80
+#else
+ X86_NOREG = -1,
+ X86_Reg8L_Base = 0,
+ X86_Reg8H_Base = 16,
+ X86_Reg16_Base = 0,
+ X86_Reg32_Base = 0,
+ X86_Reg64_Base = 0,
+ X86_RegMMX_Base = 0,
+ X86_RegXMM_Base = 0,
+ X86_RegFPU_Base = 0
+#endif
+};
+
+enum {
+ X86_AL = X86_Reg8L_Base,
+ X86_CL, X86_DL, X86_BL,
+ X86_SPL, X86_BPL, X86_SIL, X86_DIL,
+ X86_R8B, X86_R9B, X86_R10B, X86_R11B,
+ X86_R12B, X86_R13B, X86_R14B, X86_R15B,
+ X86_AH = X86_Reg8H_Base + 4,
+ X86_CH, X86_DH, X86_BH
+};
+
+enum {
+ X86_AX = X86_Reg16_Base,
+ X86_CX, X86_DX, X86_BX,
+ X86_SP, X86_BP, X86_SI, X86_DI,
+ X86_R8W, X86_R9W, X86_R10W, X86_R11W,
+ X86_R12W, X86_R13W, X86_R14W, X86_R15W
+};
+
+enum {
+ X86_EAX = X86_Reg32_Base,
+ X86_ECX, X86_EDX, X86_EBX,
+ X86_ESP, X86_EBP, X86_ESI, X86_EDI,
+ X86_R8D, X86_R9D, X86_R10D, X86_R11D,
+ X86_R12D, X86_R13D, X86_R14D, X86_R15D
+};
+
+enum {
+ X86_RAX = X86_Reg64_Base,
+ X86_RCX, X86_RDX, X86_RBX,
+ X86_RSP, X86_RBP, X86_RSI, X86_RDI,
+ X86_R8, X86_R9, X86_R10, X86_R11,
+ X86_R12, X86_R13, X86_R14, X86_R15
+};
+
+enum {
+ X86_MM0 = X86_RegMMX_Base,
+ X86_MM1, X86_MM2, X86_MM3,
+ X86_MM4, X86_MM5, X86_MM6, X86_MM7,
+};
+
+enum {
+ X86_XMM0 = X86_RegXMM_Base,
+ X86_XMM1, X86_XMM2, X86_XMM3,
+ X86_XMM4, X86_XMM5, X86_XMM6, X86_XMM7,
+ X86_XMM8, X86_XMM9, X86_XMM10, X86_XMM11,
+ X86_XMM12, X86_XMM13, X86_XMM14, X86_XMM15
+};
+
+enum {
+ X86_ST0 = X86_RegFPU_Base,
+ X86_ST1, X86_ST2, X86_ST3,
+ X86_ST4, X86_ST5, X86_ST6, X86_ST7
+};
+
+/* Register control and access
+ *
+ * _r0P(R) Null register?
+ * _rIP(R) RIP register?
+ * _rXP(R) Extended register?
+ *
+ * _rC(R) Class of register (only valid if X86_FLAT_REGISTERS)
+ * _rR(R) Full register number
+ * _rN(R) Short register number for encoding
+ *
+ * _r1(R) 8-bit register ID
+ * _r2(R) 16-bit register ID
+ * _r4(R) 32-bit register ID
+ * _r8(R) 64-bit register ID
+ * _rM(R) MMX register ID
+ * _rX(R) XMM register ID
+ * _rF(R) FPU register ID
+ * _rA(R) Address register ID used for EA calculation
+ */
+
+#define _rST0P(R) ((int)(R) == (int)X86_ST0)
+#define _r0P(R) ((int)(R) == (int)X86_NOREG)
+#define _rIP(R) (X86_TARGET_64BIT ? ((int)(R) == (int)X86_RIP) : 0)
+
+#if X86_FLAT_REGISTERS
+#define _rC(R) ((R) & 0xf0)
+#define _rR(R) ((R) & 0x0f)
+#define _rN(R) ((R) & 0x07)
+#define _rXP(R) ((R) > 0 && _rR(R) > 7)
+#else
+#define _rN(R) ((R) & 0x07)
+#define _rR(R) (int(R))
+#define _rXP(R) (_rR(R) > 7 && _rR(R) < 16)
+#endif
+
+#if !defined(_ASM_SAFETY) || ! X86_FLAT_REGISTERS
+#define _r1(R) _rN(R)
+#define _r2(R) _rN(R)
+#define _r4(R) _rN(R)
+#define _r8(R) _rN(R)
+#define _rA(R) _rN(R)
+#define _rM(R) _rN(R)
+#define _rX(R) _rN(R)
+#define _rF(R) _rN(R)
+#else
+#define _r1(R) ( ((_rC(R) & (X86_Reg8L_Base | X86_Reg8H_Base)) != 0) ? _rN(R) : x86_emit_failure0( "8-bit register required"))
+#define _r2(R) ( (_rC(R) == X86_Reg16_Base) ? _rN(R) : x86_emit_failure0("16-bit register required"))
+#define _r4(R) ( (_rC(R) == X86_Reg32_Base) ? _rN(R) : x86_emit_failure0("32-bit register required"))
+#define _r8(R) ( (_rC(R) == X86_Reg64_Base) ? _rN(R) : x86_emit_failure0("64-bit register required"))
+#define _rA(R) ( X86_TARGET_64BIT ? \
+ ( (_rC(R) == X86_Reg64_Base) ? _rN(R) : x86_emit_failure0("not a valid 64-bit base/index expression")) : \
+ ( (_rC(R) == X86_Reg32_Base) ? _rN(R) : x86_emit_failure0("not a valid 32-bit base/index expression")) )
+#define _rM(R) ( (_rC(R) == X86_RegMMX_Base) ? _rN(R) : x86_emit_failure0("MMX register required"))
+#define _rX(R) ( (_rC(R) == X86_RegXMM_Base) ? _rN(R) : x86_emit_failure0("SSE register required"))
+#define _rF(R) ( (_rC(R) == X86_RegFPU_Base) ? _rN(R) : x86_emit_failure0("FPU register required"))
+#endif
+
+#define _rSP() (X86_TARGET_64BIT ? (int)X86_RSP : (int)X86_ESP)
+#define _r1e8lP(R) (int(R) >= X86_SPL && int(R) <= X86_DIL)
+#define _rbpP(R) (_rR(R) == _rR(X86_RBP))
+#define _rspP(R) (_rR(R) == _rR(X86_RSP))
+#define _rbp13P(R) (_rN(R) == _rN(X86_RBP))
+#define _rsp12P(R) (_rN(R) == _rN(X86_RSP))
+
+
+/* ========================================================================= */
+/* --- UTILITY ------------------------------------------------------------- */
+/* ========================================================================= */
+
+typedef signed char _sc;
+typedef unsigned char _uc;
+typedef signed short _ss;
+typedef unsigned short _us;
+typedef signed int _sl;
+typedef unsigned int _ul;
+
+#define _UC(X) ((_uc )(unsigned long)(X))
+#define _US(X) ((_us )(unsigned long)(X))
+#define _SL(X) ((_sl )(unsigned long)(X))
+#define _UL(X) ((_ul )(unsigned long)(X))
+
+#define _PUC(X) ((_uc *)(X))
+#define _PUS(X) ((_us *)(X))
+#define _PSL(X) ((_sl *)(X))
+#define _PUL(X) ((_ul *)(X))
+
+#define _B(B) x86_emit_byte((B))
+#define _W(W) x86_emit_word((W))
+#define _L(L) x86_emit_long((L))
+#define _Q(Q) x86_emit_quad((Q))
+
+#define _MASK(N) ((unsigned)((1<<(N)))-1)
+#define _siP(N,I) (!((((unsigned)(I))^(((unsigned)(I))<<1))&~_MASK(N)))
+#define _uiP(N,I) (!(((unsigned)(I))&~_MASK(N)))
+#define _suiP(N,I) (_siP(N,I) | _uiP(N,I))
+
+#ifndef _ASM_SAFETY
+#define _ck_s(W,I) (_UL(I) & _MASK(W))
+#define _ck_u(W,I) (_UL(I) & _MASK(W))
+#define _ck_su(W,I) (_UL(I) & _MASK(W))
+#define _ck_d(W,I) (_UL(I) & _MASK(W))
+#else
+#define _ck_s(W,I) (_siP(W,I) ? (_UL(I) & _MASK(W)) : x86_emit_failure0( "signed integer `"#I"' too large for "#W"-bit field"))
+#define _ck_u(W,I) (_uiP(W,I) ? (_UL(I) & _MASK(W)) : x86_emit_failure0("unsigned integer `"#I"' too large for "#W"-bit field"))
+#define _ck_su(W,I) (_suiP(W,I) ? (_UL(I) & _MASK(W)) : x86_emit_failure0( "integer `"#I"' too large for "#W"-bit field"))
+#define _ck_d(W,I) (_siP(W,I) ? (_UL(I) & _MASK(W)) : x86_emit_failure0( "displacement `"#I"' too large for "#W"-bit field"))
+#endif
+
+#define _s0P(I) ((I)==0)
+#define _s8P(I) _siP(8,I)
+#define _s16P(I) _siP(16,I)
+#define _u8P(I) _uiP(8,I)
+#define _u16P(I) _uiP(16,I)
+
+#define _su8(I) _ck_su(8,I)
+#define _su16(I) _ck_su(16,I)
+
+#define _s1(I) _ck_s( 1,I)
+#define _s2(I) _ck_s( 2,I)
+#define _s3(I) _ck_s( 3,I)
+#define _s4(I) _ck_s( 4,I)
+#define _s5(I) _ck_s( 5,I)
+#define _s6(I) _ck_s( 6,I)
+#define _s7(I) _ck_s( 7,I)
+#define _s8(I) _ck_s( 8,I)
+#define _s9(I) _ck_s( 9,I)
+#define _s10(I) _ck_s(10,I)
+#define _s11(I) _ck_s(11,I)
+#define _s12(I) _ck_s(12,I)
+#define _s13(I) _ck_s(13,I)
+#define _s14(I) _ck_s(14,I)
+#define _s15(I) _ck_s(15,I)
+#define _s16(I) _ck_s(16,I)
+#define _s17(I) _ck_s(17,I)
+#define _s18(I) _ck_s(18,I)
+#define _s19(I) _ck_s(19,I)
+#define _s20(I) _ck_s(20,I)
+#define _s21(I) _ck_s(21,I)
+#define _s22(I) _ck_s(22,I)
+#define _s23(I) _ck_s(23,I)
+#define _s24(I) _ck_s(24,I)
+#define _s25(I) _ck_s(25,I)
+#define _s26(I) _ck_s(26,I)
+#define _s27(I) _ck_s(27,I)
+#define _s28(I) _ck_s(28,I)
+#define _s29(I) _ck_s(29,I)
+#define _s30(I) _ck_s(30,I)
+#define _s31(I) _ck_s(31,I)
+#define _u1(I) _ck_u( 1,I)
+#define _u2(I) _ck_u( 2,I)
+#define _u3(I) _ck_u( 3,I)
+#define _u4(I) _ck_u( 4,I)
+#define _u5(I) _ck_u( 5,I)
+#define _u6(I) _ck_u( 6,I)
+#define _u7(I) _ck_u( 7,I)
+#define _u8(I) _ck_u( 8,I)
+#define _u9(I) _ck_u( 9,I)
+#define _u10(I) _ck_u(10,I)
+#define _u11(I) _ck_u(11,I)
+#define _u12(I) _ck_u(12,I)
+#define _u13(I) _ck_u(13,I)
+#define _u14(I) _ck_u(14,I)
+#define _u15(I) _ck_u(15,I)
+#define _u16(I) _ck_u(16,I)
+#define _u17(I) _ck_u(17,I)
+#define _u18(I) _ck_u(18,I)
+#define _u19(I) _ck_u(19,I)
+#define _u20(I) _ck_u(20,I)
+#define _u21(I) _ck_u(21,I)
+#define _u22(I) _ck_u(22,I)
+#define _u23(I) _ck_u(23,I)
+#define _u24(I) _ck_u(24,I)
+#define _u25(I) _ck_u(25,I)
+#define _u26(I) _ck_u(26,I)
+#define _u27(I) _ck_u(27,I)
+#define _u28(I) _ck_u(28,I)
+#define _u29(I) _ck_u(29,I)
+#define _u30(I) _ck_u(30,I)
+#define _u31(I) _ck_u(31,I)
+
+/* ========================================================================= */
+/* --- ASSEMBLER ----------------------------------------------------------- */
+/* ========================================================================= */
+
+#define _b00 0
+#define _b01 1
+#define _b10 2
+#define _b11 3
+
+#define _b000 0
+#define _b001 1
+#define _b010 2
+#define _b011 3
+#define _b100 4
+#define _b101 5
+#define _b110 6
+#define _b111 7
+
+#define _OFF4(D) (_UL(D) - _UL(x86_get_target()))
+#define _CKD8(D) _ck_d(8, ((_uc) _OFF4(D)) )
+
+#define _D8(D) (_B(0), ((*(_PUC(x86_get_target())-1))= _CKD8(D)))
+#define _D32(D) (_L(0), ((*(_PUL(x86_get_target())-1))= _OFF4(D)))
+
+#ifndef _ASM_SAFETY
+# define _M(M) (M)
+# define _r(R) (R)
+# define _m(M) (M)
+# define _s(S) (S)
+# define _i(I) (I)
+# define _b(B) (B)
+#else
+# define _M(M) (((M)>3) ? x86_emit_failure0("internal error: mod = " #M) : (M))
+# define _r(R) (((R)>7) ? x86_emit_failure0("internal error: reg = " #R) : (R))
+# define _m(M) (((M)>7) ? x86_emit_failure0("internal error: r/m = " #M) : (M))
+# define _s(S) (((S)>3) ? x86_emit_failure0("internal error: memory scale = " #S) : (S))
+# define _i(I) (((I)>7) ? x86_emit_failure0("internal error: memory index = " #I) : (I))
+# define _b(B) (((B)>7) ? x86_emit_failure0("internal error: memory base = " #B) : (B))
+#endif
+
+#define _Mrm(Md,R,M) _B((_M(Md)<<6)|(_r(R)<<3)|_m(M))
+#define _SIB(Sc,I, B) _B((_s(Sc)<<6)|(_i(I)<<3)|_b(B))
+
+#define _SCL(S) ((((S)==1) ? _b00 : \
+ (((S)==2) ? _b01 : \
+ (((S)==4) ? _b10 : \
+ (((S)==8) ? _b11 : x86_emit_failure0("illegal scale: " #S))))))
+
+
+/* --- Memory subformats - urgh! ------------------------------------------- */
+
+/* _r_D() is RIP addressing mode if X86_TARGET_64BIT, use _r_DSIB() instead */
+#define _r_D( R, D ) (_Mrm(_b00,_rN(R),_b101 ) ,_L((_sl)(D)))
+#define _r_DSIB(R, D ) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(1),_b100 ,_b101 ),_L((_sl)(D)))
+#define _r_0B( R, B ) (_Mrm(_b00,_rN(R),_rA(B)) )
+#define _r_0BIS(R, B,I,S) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_rA(B)) )
+#define _r_1B( R, D,B ) (_Mrm(_b01,_rN(R),_rA(B)) ,_B((_sc)(D)))
+#define _r_1BIS(R, D,B,I,S) (_Mrm(_b01,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_rA(B)),_B((_sc)(D)))
+#define _r_4B( R, D,B ) (_Mrm(_b10,_rN(R),_rA(B)) ,_L((_sl)(D)))
+#define _r_4IS( R, D,I,S) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_b101 ),_L((_sl)(D)))
+#define _r_4BIS(R, D,B,I,S) (_Mrm(_b10,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_rA(B)),_L((_sl)(D)))
+
+#define _r_DB( R, D,B ) ((_s0P(D) && (!_rbp13P(B)) ? _r_0B (R, B ) : (_s8P(D) ? _r_1B( R,D,B ) : _r_4B( R,D,B ))))
+#define _r_DBIS(R, D,B,I,S) ((_s0P(D) && (!_rbp13P(B)) ? _r_0BIS(R, B,I,S) : (_s8P(D) ? _r_1BIS(R,D,B,I,S) : _r_4BIS(R,D,B,I,S))))
+
+/* Use RIP-addressing in 64-bit mode, if possible */
+#define _x86_RIP_addressing_possible(D,O) (X86_RIP_RELATIVE_ADDR && \
+ ((uintptr)x86_get_target() + 4 + (O) - (D) <= 0xffffffff))
+
+#define _r_X( R, D,B,I,S,O) (_r0P(I) ? (_r0P(B) ? (!X86_TARGET_64BIT ? _r_D(R,D) : \
+ (_x86_RIP_addressing_possible(D, O) ? \
+ _r_D(R, (D) - ((uintptr)x86_get_target() + 4 + (O))) : \
+ _r_DSIB(R,D))) : \
+ (_rIP(B) ? _r_D (R,D ) : \
+ (_rsp12P(B) ? _r_DBIS(R,D,_rSP(),_rSP(),1) : \
+ _r_DB (R,D, B )))) : \
+ (_r0P(B) ? _r_4IS (R,D, I,S) : \
+ (!_rspP(I) ? _r_DBIS(R,D, B, I,S) : \
+ x86_emit_failure("illegal index register: %esp"))))
+
+
+/* --- Instruction formats ------------------------------------------------- */
+
+#define _m32only(X) (! X86_TARGET_64BIT ? X : x86_emit_failure("invalid instruction in 64-bit mode"))
+#define _m64only(X) ( X86_TARGET_64BIT ? X : x86_emit_failure("invalid instruction in 32-bit mode"))
+#define _m64(X) ( X86_TARGET_64BIT ? X : ((void)0) )
+
+/* _format Opcd ModR/M dN(rB,rI,Sc) imm... */
+
+#define _d16() ( _B(0x66 ) )
+#define _O( OP ) ( _B( OP ) )
+#define _Or( OP,R ) ( _B( (OP)|_r(R)) )
+#define _OO( OP ) ( _B((OP)>>8), _B(( (OP) )&0xff) )
+#define _OOr( OP,R ) ( _B((OP)>>8), _B(( (OP)|_r(R))&0xff) )
+#define _Os( OP,B ) ( _s8P(B) ? _B(((OP)|_b10)) : _B(OP) )
+#define _sW( W ) ( _s8P(W) ? _B(W):_W(W) )
+#define _sL( L ) ( _s8P(L) ? _B(L):_L(L) )
+#define _sWO( W ) ( _s8P(W) ? 1 : 2 )
+#define _sLO( L ) ( _s8P(L) ? 1 : 4 )
+#define _O_B( OP ,B ) ( _O ( OP ) ,_B(B) )
+#define _O_W( OP ,W ) ( _O ( OP ) ,_W(W) )
+#define _O_L( OP ,L ) ( _O ( OP ) ,_L(L) )
+#define _OO_L( OP ,L ) ( _OO ( OP ) ,_L(L) )
+#define _O_D8( OP ,D ) ( _O ( OP ) ,_D8(D) )
+#define _O_D32( OP ,D ) ( _O ( OP ) ,_D32(D) )
+#define _OO_D32( OP ,D ) ( _OO ( OP ) ,_D32(D) )
+#define _Os_sW( OP ,W ) ( _Os ( OP,W) ,_sW(W) )
+#define _Os_sL( OP ,L ) ( _Os ( OP,L) ,_sL(L) )
+#define _O_W_B( OP ,W,B) ( _O ( OP ) ,_W(W),_B(B))
+#define _Or_B( OP,R ,B ) ( _Or ( OP,R) ,_B(B) )
+#define _Or_W( OP,R ,W ) ( _Or ( OP,R) ,_W(W) )
+#define _Or_L( OP,R ,L ) ( _Or ( OP,R) ,_L(L) )
+#define _Or_Q( OP,R ,Q ) ( _Or ( OP,R) ,_Q(Q) )
+#define _O_Mrm( OP ,MO,R,M ) ( _O ( OP ),_Mrm(MO,R,M ) )
+#define _OO_Mrm( OP ,MO,R,M ) ( _OO ( OP ),_Mrm(MO,R,M ) )
+#define _O_Mrm_B( OP ,MO,R,M ,B ) ( _O ( OP ),_Mrm(MO,R,M ) ,_B(B) )
+#define _O_Mrm_W( OP ,MO,R,M ,W ) ( _O ( OP ),_Mrm(MO,R,M ) ,_W(W) )
+#define _O_Mrm_L( OP ,MO,R,M ,L ) ( _O ( OP ),_Mrm(MO,R,M ) ,_L(L) )
+#define _OO_Mrm_B( OP ,MO,R,M ,B ) ( _OO ( OP ),_Mrm(MO,R,M ) ,_B(B) )
+#define _Os_Mrm_sW(OP ,MO,R,M ,W ) ( _Os ( OP,W),_Mrm(MO,R,M ),_sW(W) )
+#define _Os_Mrm_sL(OP ,MO,R,M ,L ) ( _Os ( OP,L),_Mrm(MO,R,M ),_sL(L) )
+#define _O_r_X( OP ,R ,MD,MB,MI,MS ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS,0) )
+#define _OO_r_X( OP ,R ,MD,MB,MI,MS ) ( _OO ( OP ),_r_X( R ,MD,MB,MI,MS,0) )
+#define _O_r_X_B( OP ,R ,MD,MB,MI,MS,B ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS,1) ,_B(B) )
+#define _O_r_X_W( OP ,R ,MD,MB,MI,MS,W ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS,2) ,_W(W) )
+#define _O_r_X_L( OP ,R ,MD,MB,MI,MS,L ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS,4) ,_L(L) )
+#define _OO_r_X_B( OP ,R ,MD,MB,MI,MS,B ) ( _OO ( OP ),_r_X( R ,MD,MB,MI,MS,1) ,_B(B) )
+#define _Os_r_X_sW(OP ,R ,MD,MB,MI,MS,W ) ( _Os ( OP,W),_r_X( R ,MD,MB,MI,MS,_sWO(W)),_sW(W))
+#define _Os_r_X_sL(OP ,R ,MD,MB,MI,MS,L ) ( _Os ( OP,L),_r_X( R ,MD,MB,MI,MS,_sLO(L)),_sL(L))
+#define _O_X_B( OP ,MD,MB,MI,MS,B ) ( _O_r_X_B( OP ,0 ,MD,MB,MI,MS ,B) )
+#define _O_X_W( OP ,MD,MB,MI,MS,W ) ( _O_r_X_W( OP ,0 ,MD,MB,MI,MS ,W) )
+#define _O_X_L( OP ,MD,MB,MI,MS,L ) ( _O_r_X_L( OP ,0 ,MD,MB,MI,MS ,L) )
+
+
+/* --- REX prefixes -------------------------------------------------------- */
+
+#define _VOID() ((void)0)
+#define _BIT(X) (!!(X))
+#define _d64(W,R,X,B) (_B(0x40|(W)<<3|(R)<<2|(X)<<1|(B)))
+
+#define __REXwrxb(L,W,R,X,B) ((W|R|X|B) || (L) ? _d64(W,R,X,B) : _VOID())
+#define __REXwrx_(L,W,R,X,MR) (__REXwrxb(L,W,R,X,_BIT(_rIP(MR)?0:_rXP(MR))))
+#define __REXw_x_(L,W,R,X,MR) (__REXwrx_(L,W,_BIT(_rXP(R)),X,MR))
+#define __REX_reg(RR) (__REXwrxb(0,0,0,00,_BIT(_rXP(RR))))
+#define __REX_mem(MB,MI) (__REXwrxb(0,0,0,_BIT(_rXP(MI)),_BIT(_rXP(MB))))
+
+// FIXME: can't mix new (SPL,BPL,SIL,DIL) with (AH,BH,CH,DH)
+#define _REXBrr(RR,MR) _m64(__REXw_x_(_r1e8lP(RR)||_r1e8lP(MR),0,RR,0,MR))
+#define _REXBmr(MB,MI,RD) _m64(__REXw_x_(_r1e8lP(RD)||_r1e8lP(MB),0,RD,_BIT(_rXP(MI)),MB))
+#define _REXBrm(RS,MB,MI) _REXBmr(MB,MI,RS)
+
+#define _REXBLrr(RR,MR) _m64(__REXw_x_(_r1e8lP(MR),0,RR,0,MR))
+#define _REXLrr(RR,MR) _m64(__REXw_x_(0,0,RR,0,MR))
+#define _REXLmr(MB,MI,RD) _m64(__REXw_x_(0,0,RD,_BIT(_rXP(MI)),MB))
+#define _REXLrm(RS,MB,MI) _REXLmr(MB,MI,RS)
+#define _REXLr(RR) _m64(__REX_reg(RR))
+#define _REXLm(MB,MI) _m64(__REX_mem(MB,MI))
+
+#define _REXQrr(RR,MR) _m64only(__REXw_x_(0,1,RR,0,MR))
+#define _REXQmr(MB,MI,RD) _m64only(__REXw_x_(0,1,RD,_BIT(_rXP(MI)),MB))
+#define _REXQrm(RS,MB,MI) _REXQmr(MB,MI,RS)
+#define _REXQr(RR) _m64only(__REX_reg(RR))
+#define _REXQm(MB,MI) _m64only(__REX_mem(MB,MI))
+
+
+/* ========================================================================= */
+/* --- Fully-qualified intrinsic instructions ------------------------------ */
+/* ========================================================================= */
+
+/* OPCODE + i = immediate operand
+ * + r = register operand
+ * + m = memory operand (disp,base,index,scale)
+ * + sr/sm = a star preceding a register or memory
+ * + 0 = top of stack register (for FPU instructions)
+ *
+ * NOTE in x86-64 mode: a memory operand with only a valid
+ * displacement value will lead to the expect absolute mode. If
+ * RIP addressing is necessary, X86_RIP shall be used as the base
+ * register argument.
+ */
+
+/* --- ALU instructions ---------------------------------------------------- */
+
+enum {
+ X86_ADD = 0,
+ X86_OR = 1,
+ X86_ADC = 2,
+ X86_SBB = 3,
+ X86_AND = 4,
+ X86_SUB = 5,
+ X86_XOR = 6,
+ X86_CMP = 7,
+};
+
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+
+#define _ALUBrr(OP,RS, RD) (_REXBrr(RS, RD), _O_Mrm (((OP) << 3) ,_b11,_r1(RS),_r1(RD) ))
+#define _ALUBmr(OP, MD, MB, MI, MS, RD) (_REXBmr(MB, MI, RD), _O_r_X (((OP) << 3) + 2 ,_r1(RD) ,MD,MB,MI,MS ))
+#define _ALUBrm(OP, RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (((OP) << 3) ,_r1(RS) ,MD,MB,MI,MS ))
+#define _ALUBir(OP, IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_AL) ? \
+ (_REXBrr(0, RD), _O_B (((OP) << 3) + 4 ,_su8(IM))) : \
+ (_REXBrr(0, RD), _O_Mrm_B (0x80 ,_b11,OP ,_r1(RD) ,_su8(IM))) )
+#define _ALUBim(OP, IM, MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X_B (0x80 ,OP ,MD,MB,MI,MS ,_su8(IM)))
+
+#define _ALUWrr(OP, RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (((OP) << 3) + 1,_b11,_r2(RS),_r2(RD) ))
+#define _ALUWmr(OP, MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _O_r_X (((OP) << 3) + 3 ,_r2(RD) ,MD,MB,MI,MS ))
+#define _ALUWrm(OP, RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (((OP) << 3) + 1 ,_r2(RS) ,MD,MB,MI,MS ))
+#define _ALUWir(OP, IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_AX) ? \
+ (_d16(), _REXLrr(0, RD), _O_W (((OP) << 3) + 5 ,_su16(IM))) : \
+ (_d16(), _REXLrr(0, RD), _Os_Mrm_sW (0x81 ,_b11,OP ,_r2(RD) ,_su16(IM))) )
+#define _ALUWim(OP, IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _Os_r_X_sW (0x81 ,OP ,MD,MB,MI,MS ,_su16(IM)))
+
+#define _ALULrr(OP, RS, RD) (_REXLrr(RS, RD), _O_Mrm (((OP) << 3) + 1,_b11,_r4(RS),_r4(RD) ))
+#define _ALULmr(OP, MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _O_r_X (((OP) << 3) + 3 ,_r4(RD) ,MD,MB,MI,MS ))
+#define _ALULrm(OP, RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _O_r_X (((OP) << 3) + 1 ,_r4(RS) ,MD,MB,MI,MS ))
+#define _ALULir(OP, IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_EAX) ? \
+ (_REXLrr(0, RD), _O_L (((OP) << 3) + 5 ,IM )) : \
+ (_REXLrr(0, RD), _Os_Mrm_sL (0x81 ,_b11,OP ,_r4(RD) ,IM )) )
+#define _ALULim(OP, IM, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _Os_r_X_sL (0x81 ,OP ,MD,MB,MI,MS ,IM ))
+
+#define _ALUQrr(OP, RS, RD) (_REXQrr(RS, RD), _O_Mrm (((OP) << 3) + 1,_b11,_r8(RS),_r8(RD) ))
+#define _ALUQmr(OP, MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _O_r_X (((OP) << 3) + 3 ,_r8(RD) ,MD,MB,MI,MS ))
+#define _ALUQrm(OP, RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _O_r_X (((OP) << 3) + 1 ,_r8(RS) ,MD,MB,MI,MS ))
+#define _ALUQir(OP, IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_RAX) ? \
+ (_REXQrr(0, RD), _O_L (((OP) << 3) + 5 ,IM )) : \
+ (_REXQrr(0, RD), _Os_Mrm_sL (0x81 ,_b11,OP ,_r8(RD) ,IM )) )
+#define _ALUQim(OP, IM, MD, MB, MI, MS) (_REXQrm(0, MB, MI), _Os_r_X_sL (0x81 ,OP ,MD,MB,MI,MS ,IM ))
+
+#define ADCBrr(RS, RD) _ALUBrr(X86_ADC, RS, RD)
+#define ADCBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_ADC, MD, MB, MI, MS, RD)
+#define ADCBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_ADC, RS, MD, MB, MI, MS)
+#define ADCBir(IM, RD) _ALUBir(X86_ADC, IM, RD)
+#define ADCBim(IM, MD, MB, MI, MS) _ALUBim(X86_ADC, IM, MD, MB, MI, MS)
+
+#define ADCWrr(RS, RD) _ALUWrr(X86_ADC, RS, RD)
+#define ADCWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_ADC, MD, MB, MI, MS, RD)
+#define ADCWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_ADC, RS, MD, MB, MI, MS)
+#define ADCWir(IM, RD) _ALUWir(X86_ADC, IM, RD)
+#define ADCWim(IM, MD, MB, MI, MS) _ALUWim(X86_ADC, IM, MD, MB, MI, MS)
+
+#define ADCLrr(RS, RD) _ALULrr(X86_ADC, RS, RD)
+#define ADCLmr(MD, MB, MI, MS, RD) _ALULmr(X86_ADC, MD, MB, MI, MS, RD)
+#define ADCLrm(RS, MD, MB, MI, MS) _ALULrm(X86_ADC, RS, MD, MB, MI, MS)
+#define ADCLir(IM, RD) _ALULir(X86_ADC, IM, RD)
+#define ADCLim(IM, MD, MB, MI, MS) _ALULim(X86_ADC, IM, MD, MB, MI, MS)
+
+#define ADCQrr(RS, RD) _ALUQrr(X86_ADC, RS, RD)
+#define ADCQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_ADC, MD, MB, MI, MS, RD)
+#define ADCQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_ADC, RS, MD, MB, MI, MS)
+#define ADCQir(IM, RD) _ALUQir(X86_ADC, IM, RD)
+#define ADCQim(IM, MD, MB, MI, MS) _ALUQim(X86_ADC, IM, MD, MB, MI, MS)
+
+#define ADDBrr(RS, RD) _ALUBrr(X86_ADD, RS, RD)
+#define ADDBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_ADD, MD, MB, MI, MS, RD)
+#define ADDBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_ADD, RS, MD, MB, MI, MS)
+#define ADDBir(IM, RD) _ALUBir(X86_ADD, IM, RD)
+#define ADDBim(IM, MD, MB, MI, MS) _ALUBim(X86_ADD, IM, MD, MB, MI, MS)
+
+#define ADDWrr(RS, RD) _ALUWrr(X86_ADD, RS, RD)
+#define ADDWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_ADD, MD, MB, MI, MS, RD)
+#define ADDWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_ADD, RS, MD, MB, MI, MS)
+#define ADDWir(IM, RD) _ALUWir(X86_ADD, IM, RD)
+#define ADDWim(IM, MD, MB, MI, MS) _ALUWim(X86_ADD, IM, MD, MB, MI, MS)
+
+#define ADDLrr(RS, RD) _ALULrr(X86_ADD, RS, RD)
+#define ADDLmr(MD, MB, MI, MS, RD) _ALULmr(X86_ADD, MD, MB, MI, MS, RD)
+#define ADDLrm(RS, MD, MB, MI, MS) _ALULrm(X86_ADD, RS, MD, MB, MI, MS)
+#define ADDLir(IM, RD) _ALULir(X86_ADD, IM, RD)
+#define ADDLim(IM, MD, MB, MI, MS) _ALULim(X86_ADD, IM, MD, MB, MI, MS)
+
+#define ADDQrr(RS, RD) _ALUQrr(X86_ADD, RS, RD)
+#define ADDQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_ADD, MD, MB, MI, MS, RD)
+#define ADDQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_ADD, RS, MD, MB, MI, MS)
+#define ADDQir(IM, RD) _ALUQir(X86_ADD, IM, RD)
+#define ADDQim(IM, MD, MB, MI, MS) _ALUQim(X86_ADD, IM, MD, MB, MI, MS)
+
+#define ANDBrr(RS, RD) _ALUBrr(X86_AND, RS, RD)
+#define ANDBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_AND, MD, MB, MI, MS, RD)
+#define ANDBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_AND, RS, MD, MB, MI, MS)
+#define ANDBir(IM, RD) _ALUBir(X86_AND, IM, RD)
+#define ANDBim(IM, MD, MB, MI, MS) _ALUBim(X86_AND, IM, MD, MB, MI, MS)
+
+#define ANDWrr(RS, RD) _ALUWrr(X86_AND, RS, RD)
+#define ANDWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_AND, MD, MB, MI, MS, RD)
+#define ANDWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_AND, RS, MD, MB, MI, MS)
+#define ANDWir(IM, RD) _ALUWir(X86_AND, IM, RD)
+#define ANDWim(IM, MD, MB, MI, MS) _ALUWim(X86_AND, IM, MD, MB, MI, MS)
+
+#define ANDLrr(RS, RD) _ALULrr(X86_AND, RS, RD)
+#define ANDLmr(MD, MB, MI, MS, RD) _ALULmr(X86_AND, MD, MB, MI, MS, RD)
+#define ANDLrm(RS, MD, MB, MI, MS) _ALULrm(X86_AND, RS, MD, MB, MI, MS)
+#define ANDLir(IM, RD) _ALULir(X86_AND, IM, RD)
+#define ANDLim(IM, MD, MB, MI, MS) _ALULim(X86_AND, IM, MD, MB, MI, MS)
+
+#define ANDQrr(RS, RD) _ALUQrr(X86_AND, RS, RD)
+#define ANDQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_AND, MD, MB, MI, MS, RD)
+#define ANDQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_AND, RS, MD, MB, MI, MS)
+#define ANDQir(IM, RD) _ALUQir(X86_AND, IM, RD)
+#define ANDQim(IM, MD, MB, MI, MS) _ALUQim(X86_AND, IM, MD, MB, MI, MS)
+
+#define CMPBrr(RS, RD) _ALUBrr(X86_CMP, RS, RD)
+#define CMPBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_CMP, MD, MB, MI, MS, RD)
+#define CMPBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_CMP, RS, MD, MB, MI, MS)
+#define CMPBir(IM, RD) _ALUBir(X86_CMP, IM, RD)
+#define CMPBim(IM, MD, MB, MI, MS) _ALUBim(X86_CMP, IM, MD, MB, MI, MS)
+
+#define CMPWrr(RS, RD) _ALUWrr(X86_CMP, RS, RD)
+#define CMPWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_CMP, MD, MB, MI, MS, RD)
+#define CMPWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_CMP, RS, MD, MB, MI, MS)
+#define CMPWir(IM, RD) _ALUWir(X86_CMP, IM, RD)
+#define CMPWim(IM, MD, MB, MI, MS) _ALUWim(X86_CMP, IM, MD, MB, MI, MS)
+
+#define CMPLrr(RS, RD) _ALULrr(X86_CMP, RS, RD)
+#define CMPLmr(MD, MB, MI, MS, RD) _ALULmr(X86_CMP, MD, MB, MI, MS, RD)
+#define CMPLrm(RS, MD, MB, MI, MS) _ALULrm(X86_CMP, RS, MD, MB, MI, MS)
+#define CMPLir(IM, RD) _ALULir(X86_CMP, IM, RD)
+#define CMPLim(IM, MD, MB, MI, MS) _ALULim(X86_CMP, IM, MD, MB, MI, MS)
+
+#define CMPQrr(RS, RD) _ALUQrr(X86_CMP, RS, RD)
+#define CMPQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_CMP, MD, MB, MI, MS, RD)
+#define CMPQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_CMP, RS, MD, MB, MI, MS)
+#define CMPQir(IM, RD) _ALUQir(X86_CMP, IM, RD)
+#define CMPQim(IM, MD, MB, MI, MS) _ALUQim(X86_CMP, IM, MD, MB, MI, MS)
+
+#define ORBrr(RS, RD) _ALUBrr(X86_OR, RS, RD)
+#define ORBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_OR, MD, MB, MI, MS, RD)
+#define ORBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_OR, RS, MD, MB, MI, MS)
+#define ORBir(IM, RD) _ALUBir(X86_OR, IM, RD)
+#define ORBim(IM, MD, MB, MI, MS) _ALUBim(X86_OR, IM, MD, MB, MI, MS)
+
+#define ORWrr(RS, RD) _ALUWrr(X86_OR, RS, RD)
+#define ORWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_OR, MD, MB, MI, MS, RD)
+#define ORWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_OR, RS, MD, MB, MI, MS)
+#define ORWir(IM, RD) _ALUWir(X86_OR, IM, RD)
+#define ORWim(IM, MD, MB, MI, MS) _ALUWim(X86_OR, IM, MD, MB, MI, MS)
+
+#define ORLrr(RS, RD) _ALULrr(X86_OR, RS, RD)
+#define ORLmr(MD, MB, MI, MS, RD) _ALULmr(X86_OR, MD, MB, MI, MS, RD)
+#define ORLrm(RS, MD, MB, MI, MS) _ALULrm(X86_OR, RS, MD, MB, MI, MS)
+#define ORLir(IM, RD) _ALULir(X86_OR, IM, RD)
+#define ORLim(IM, MD, MB, MI, MS) _ALULim(X86_OR, IM, MD, MB, MI, MS)
+
+#define ORQrr(RS, RD) _ALUQrr(X86_OR, RS, RD)
+#define ORQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_OR, MD, MB, MI, MS, RD)
+#define ORQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_OR, RS, MD, MB, MI, MS)
+#define ORQir(IM, RD) _ALUQir(X86_OR, IM, RD)
+#define ORQim(IM, MD, MB, MI, MS) _ALUQim(X86_OR, IM, MD, MB, MI, MS)
+
+#define SBBBrr(RS, RD) _ALUBrr(X86_SBB, RS, RD)
+#define SBBBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_SBB, MD, MB, MI, MS, RD)
+#define SBBBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_SBB, RS, MD, MB, MI, MS)
+#define SBBBir(IM, RD) _ALUBir(X86_SBB, IM, RD)
+#define SBBBim(IM, MD, MB, MI, MS) _ALUBim(X86_SBB, IM, MD, MB, MI, MS)
+
+#define SBBWrr(RS, RD) _ALUWrr(X86_SBB, RS, RD)
+#define SBBWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_SBB, MD, MB, MI, MS, RD)
+#define SBBWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_SBB, RS, MD, MB, MI, MS)
+#define SBBWir(IM, RD) _ALUWir(X86_SBB, IM, RD)
+#define SBBWim(IM, MD, MB, MI, MS) _ALUWim(X86_SBB, IM, MD, MB, MI, MS)
+
+#define SBBLrr(RS, RD) _ALULrr(X86_SBB, RS, RD)
+#define SBBLmr(MD, MB, MI, MS, RD) _ALULmr(X86_SBB, MD, MB, MI, MS, RD)
+#define SBBLrm(RS, MD, MB, MI, MS) _ALULrm(X86_SBB, RS, MD, MB, MI, MS)
+#define SBBLir(IM, RD) _ALULir(X86_SBB, IM, RD)
+#define SBBLim(IM, MD, MB, MI, MS) _ALULim(X86_SBB, IM, MD, MB, MI, MS)
+
+#define SBBQrr(RS, RD) _ALUQrr(X86_SBB, RS, RD)
+#define SBBQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_SBB, MD, MB, MI, MS, RD)
+#define SBBQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_SBB, RS, MD, MB, MI, MS)
+#define SBBQir(IM, RD) _ALUQir(X86_SBB, IM, RD)
+#define SBBQim(IM, MD, MB, MI, MS) _ALUQim(X86_SBB, IM, MD, MB, MI, MS)
+
+#define SUBBrr(RS, RD) _ALUBrr(X86_SUB, RS, RD)
+#define SUBBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_SUB, MD, MB, MI, MS, RD)
+#define SUBBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_SUB, RS, MD, MB, MI, MS)
+#define SUBBir(IM, RD) _ALUBir(X86_SUB, IM, RD)
+#define SUBBim(IM, MD, MB, MI, MS) _ALUBim(X86_SUB, IM, MD, MB, MI, MS)
+
+#define SUBWrr(RS, RD) _ALUWrr(X86_SUB, RS, RD)
+#define SUBWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_SUB, MD, MB, MI, MS, RD)
+#define SUBWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_SUB, RS, MD, MB, MI, MS)
+#define SUBWir(IM, RD) _ALUWir(X86_SUB, IM, RD)
+#define SUBWim(IM, MD, MB, MI, MS) _ALUWim(X86_SUB, IM, MD, MB, MI, MS)
+
+#define SUBLrr(RS, RD) _ALULrr(X86_SUB, RS, RD)
+#define SUBLmr(MD, MB, MI, MS, RD) _ALULmr(X86_SUB, MD, MB, MI, MS, RD)
+#define SUBLrm(RS, MD, MB, MI, MS) _ALULrm(X86_SUB, RS, MD, MB, MI, MS)
+#define SUBLir(IM, RD) _ALULir(X86_SUB, IM, RD)
+#define SUBLim(IM, MD, MB, MI, MS) _ALULim(X86_SUB, IM, MD, MB, MI, MS)
+
+#define SUBQrr(RS, RD) _ALUQrr(X86_SUB, RS, RD)
+#define SUBQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_SUB, MD, MB, MI, MS, RD)
+#define SUBQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_SUB, RS, MD, MB, MI, MS)
+#define SUBQir(IM, RD) _ALUQir(X86_SUB, IM, RD)
+#define SUBQim(IM, MD, MB, MI, MS) _ALUQim(X86_SUB, IM, MD, MB, MI, MS)
+
+#define XORBrr(RS, RD) _ALUBrr(X86_XOR, RS, RD)
+#define XORBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_XOR, MD, MB, MI, MS, RD)
+#define XORBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_XOR, RS, MD, MB, MI, MS)
+#define XORBir(IM, RD) _ALUBir(X86_XOR, IM, RD)
+#define XORBim(IM, MD, MB, MI, MS) _ALUBim(X86_XOR, IM, MD, MB, MI, MS)
+
+#define XORWrr(RS, RD) _ALUWrr(X86_XOR, RS, RD)
+#define XORWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_XOR, MD, MB, MI, MS, RD)
+#define XORWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_XOR, RS, MD, MB, MI, MS)
+#define XORWir(IM, RD) _ALUWir(X86_XOR, IM, RD)
+#define XORWim(IM, MD, MB, MI, MS) _ALUWim(X86_XOR, IM, MD, MB, MI, MS)
+
+#define XORLrr(RS, RD) _ALULrr(X86_XOR, RS, RD)
+#define XORLmr(MD, MB, MI, MS, RD) _ALULmr(X86_XOR, MD, MB, MI, MS, RD)
+#define XORLrm(RS, MD, MB, MI, MS) _ALULrm(X86_XOR, RS, MD, MB, MI, MS)
+#define XORLir(IM, RD) _ALULir(X86_XOR, IM, RD)
+#define XORLim(IM, MD, MB, MI, MS) _ALULim(X86_XOR, IM, MD, MB, MI, MS)
+
+#define XORQrr(RS, RD) _ALUQrr(X86_XOR, RS, RD)
+#define XORQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_XOR, MD, MB, MI, MS, RD)
+#define XORQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_XOR, RS, MD, MB, MI, MS)
+#define XORQir(IM, RD) _ALUQir(X86_XOR, IM, RD)
+#define XORQim(IM, MD, MB, MI, MS) _ALUQim(X86_XOR, IM, MD, MB, MI, MS)
+
+
+/* --- Shift/Rotate instructions ------------------------------------------- */
+
+enum {
+ X86_ROL = 0,
+ X86_ROR = 1,
+ X86_RCL = 2,
+ X86_RCR = 3,
+ X86_SHL = 4,
+ X86_SHR = 5,
+ X86_SAR = 7,
+};
+
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+
+#define _ROTSHIBir(OP,IM,RD) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \
+ (_REXBrr(0, RD), _O_Mrm (0xd0 ,_b11,OP,_r1(RD) )) : \
+ (_REXBrr(0, RD), _O_Mrm_B (0xc0 ,_b11,OP,_r1(RD) ,_u8(IM))) )
+#define _ROTSHIBim(OP,IM,MD,MB,MI,MS) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \
+ (_REXBrm(0, MB, MI), _O_r_X (0xd0 ,OP ,MD,MB,MI,MS )) : \
+ (_REXBrm(0, MB, MI), _O_r_X_B (0xc0 ,OP ,MD,MB,MI,MS ,_u8(IM))) )
+#define _ROTSHIBrr(OP,RS,RD) (((RS) == X86_CL) ? \
+ (_REXBrr(RS, RD), _O_Mrm (0xd2 ,_b11,OP,_r1(RD) )) : \
+ x86_emit_failure("source register must be CL" ) )
+#define _ROTSHIBrm(OP,RS,MD,MB,MI,MS) (((RS) == X86_CL) ? \
+ (_REXBrm(RS, MB, MI), _O_r_X (0xd2 ,OP ,MD,MB,MI,MS )) : \
+ x86_emit_failure("source register must be CL" ) )
+
+#define _ROTSHIWir(OP,IM,RD) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \
+ (_d16(), _REXLrr(0, RD), _O_Mrm (0xd1 ,_b11,OP,_r2(RD) )) : \
+ (_d16(), _REXLrr(0, RD), _O_Mrm_B (0xc1 ,_b11,OP,_r2(RD) ,_u8(IM))) )
+#define _ROTSHIWim(OP,IM,MD,MB,MI,MS) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \
+ (_d16(), _REXLrm(0, MB, MI), _O_r_X (0xd1 ,OP ,MD,MB,MI,MS )) : \
+ (_d16(), _REXLrm(0, MB, MI), _O_r_X_B (0xc1 ,OP ,MD,MB,MI,MS ,_u8(IM))) )
+#define _ROTSHIWrr(OP,RS,RD) (((RS) == X86_CL) ? \
+ (_d16(), _REXLrr(RS, RD), _O_Mrm (0xd3 ,_b11,OP,_r2(RD) )) : \
+ x86_emit_failure("source register must be CL" ) )
+#define _ROTSHIWrm(OP,RS,MD,MB,MI,MS) (((RS) == X86_CL) ? \
+ (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0xd3 ,OP ,MD,MB,MI,MS )) : \
+ x86_emit_failure("source register must be CL" ) )
+
+#define _ROTSHILir(OP,IM,RD) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \
+ (_REXLrr(0, RD), _O_Mrm (0xd1 ,_b11,OP,_r4(RD) )) : \
+ (_REXLrr(0, RD), _O_Mrm_B (0xc1 ,_b11,OP,_r4(RD) ,_u8(IM))) )
+#define _ROTSHILim(OP,IM,MD,MB,MI,MS) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \
+ (_REXLrm(0, MB, MI), _O_r_X (0xd1 ,OP ,MD,MB,MI,MS )) : \
+ (_REXLrm(0, MB, MI), _O_r_X_B (0xc1 ,OP ,MD,MB,MI,MS ,_u8(IM))) )
+#define _ROTSHILrr(OP,RS,RD) (((RS) == X86_CL) ? \
+ (_REXLrr(RS, RD), _O_Mrm (0xd3 ,_b11,OP,_r4(RD) )) : \
+ x86_emit_failure("source register must be CL" ) )
+#define _ROTSHILrm(OP,RS,MD,MB,MI,MS) (((RS) == X86_CL) ? \
+ (_REXLrm(RS, MB, MI), _O_r_X (0xd3 ,OP ,MD,MB,MI,MS )) : \
+ x86_emit_failure("source register must be CL" ) )
+
+#define _ROTSHIQir(OP,IM,RD) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \
+ (_REXQrr(0, RD), _O_Mrm (0xd1 ,_b11,OP,_r8(RD) )) : \
+ (_REXQrr(0, RD), _O_Mrm_B (0xc1 ,_b11,OP,_r8(RD) ,_u8(IM))) )
+#define _ROTSHIQim(OP,IM,MD,MB,MI,MS) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \
+ (_REXQrm(0, MB, MI), _O_r_X (0xd1 ,OP ,MD,MB,MI,MS )) : \
+ (_REXQrm(0, MB, MI), _O_r_X_B (0xc1 ,OP ,MD,MB,MI,MS ,_u8(IM))) )
+#define _ROTSHIQrr(OP,RS,RD) (((RS) == X86_CL) ? \
+ (_REXQrr(RS, RD), _O_Mrm (0xd3 ,_b11,OP,_r8(RD) )) : \
+ x86_emit_failure("source register must be CL" ) )
+#define _ROTSHIQrm(OP,RS,MD,MB,MI,MS) (((RS) == X86_CL) ? \
+ (_REXQrm(RS, MB, MI), _O_r_X (0xd3 ,OP ,MD,MB,MI,MS )) : \
+ x86_emit_failure("source register must be CL" ) )
+
+#define ROLBir(IM, RD) _ROTSHIBir(X86_ROL, IM, RD)
+#define ROLBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_ROL, IM, MD, MB, MI, MS)
+#define ROLBrr(RS, RD) _ROTSHIBrr(X86_ROL, RS, RD)
+#define ROLBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_ROL, RS, MD, MB, MI, MS)
+
+#define ROLWir(IM, RD) _ROTSHIWir(X86_ROL, IM, RD)
+#define ROLWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_ROL, IM, MD, MB, MI, MS)
+#define ROLWrr(RS, RD) _ROTSHIWrr(X86_ROL, RS, RD)
+#define ROLWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_ROL, RS, MD, MB, MI, MS)
+
+#define ROLLir(IM, RD) _ROTSHILir(X86_ROL, IM, RD)
+#define ROLLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_ROL, IM, MD, MB, MI, MS)
+#define ROLLrr(RS, RD) _ROTSHILrr(X86_ROL, RS, RD)
+#define ROLLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_ROL, RS, MD, MB, MI, MS)
+
+#define ROLQir(IM, RD) _ROTSHIQir(X86_ROL, IM, RD)
+#define ROLQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_ROL, IM, MD, MB, MI, MS)
+#define ROLQrr(RS, RD) _ROTSHIQrr(X86_ROL, RS, RD)
+#define ROLQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_ROL, RS, MD, MB, MI, MS)
+
+#define RORBir(IM, RD) _ROTSHIBir(X86_ROR, IM, RD)
+#define RORBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_ROR, IM, MD, MB, MI, MS)
+#define RORBrr(RS, RD) _ROTSHIBrr(X86_ROR, RS, RD)
+#define RORBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_ROR, RS, MD, MB, MI, MS)
+
+#define RORWir(IM, RD) _ROTSHIWir(X86_ROR, IM, RD)
+#define RORWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_ROR, IM, MD, MB, MI, MS)
+#define RORWrr(RS, RD) _ROTSHIWrr(X86_ROR, RS, RD)
+#define RORWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_ROR, RS, MD, MB, MI, MS)
+
+#define RORLir(IM, RD) _ROTSHILir(X86_ROR, IM, RD)
+#define RORLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_ROR, IM, MD, MB, MI, MS)
+#define RORLrr(RS, RD) _ROTSHILrr(X86_ROR, RS, RD)
+#define RORLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_ROR, RS, MD, MB, MI, MS)
+
+#define RORQir(IM, RD) _ROTSHIQir(X86_ROR, IM, RD)
+#define RORQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_ROR, IM, MD, MB, MI, MS)
+#define RORQrr(RS, RD) _ROTSHIQrr(X86_ROR, RS, RD)
+#define RORQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_ROR, RS, MD, MB, MI, MS)
+
+#define RCLBir(IM, RD) _ROTSHIBir(X86_RCL, IM, RD)
+#define RCLBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_RCL, IM, MD, MB, MI, MS)
+#define RCLBrr(RS, RD) _ROTSHIBrr(X86_RCL, RS, RD)
+#define RCLBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_RCL, RS, MD, MB, MI, MS)
+
+#define RCLWir(IM, RD) _ROTSHIWir(X86_RCL, IM, RD)
+#define RCLWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_RCL, IM, MD, MB, MI, MS)
+#define RCLWrr(RS, RD) _ROTSHIWrr(X86_RCL, RS, RD)
+#define RCLWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_RCL, RS, MD, MB, MI, MS)
+
+#define RCLLir(IM, RD) _ROTSHILir(X86_RCL, IM, RD)
+#define RCLLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_RCL, IM, MD, MB, MI, MS)
+#define RCLLrr(RS, RD) _ROTSHILrr(X86_RCL, RS, RD)
+#define RCLLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_RCL, RS, MD, MB, MI, MS)
+
+#define RCLQir(IM, RD) _ROTSHIQir(X86_RCL, IM, RD)
+#define RCLQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_RCL, IM, MD, MB, MI, MS)
+#define RCLQrr(RS, RD) _ROTSHIQrr(X86_RCL, RS, RD)
+#define RCLQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_RCL, RS, MD, MB, MI, MS)
+
+#define RCRBir(IM, RD) _ROTSHIBir(X86_RCR, IM, RD)
+#define RCRBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_RCR, IM, MD, MB, MI, MS)
+#define RCRBrr(RS, RD) _ROTSHIBrr(X86_RCR, RS, RD)
+#define RCRBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_RCR, RS, MD, MB, MI, MS)
+
+#define RCRWir(IM, RD) _ROTSHIWir(X86_RCR, IM, RD)
+#define RCRWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_RCR, IM, MD, MB, MI, MS)
+#define RCRWrr(RS, RD) _ROTSHIWrr(X86_RCR, RS, RD)
+#define RCRWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_RCR, RS, MD, MB, MI, MS)
+
+#define RCRLir(IM, RD) _ROTSHILir(X86_RCR, IM, RD)
+#define RCRLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_RCR, IM, MD, MB, MI, MS)
+#define RCRLrr(RS, RD) _ROTSHILrr(X86_RCR, RS, RD)
+#define RCRLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_RCR, RS, MD, MB, MI, MS)
+
+#define RCRQir(IM, RD) _ROTSHIQir(X86_RCR, IM, RD)
+#define RCRQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_RCR, IM, MD, MB, MI, MS)
+#define RCRQrr(RS, RD) _ROTSHIQrr(X86_RCR, RS, RD)
+#define RCRQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_RCR, RS, MD, MB, MI, MS)
+
+#define SHLBir(IM, RD) _ROTSHIBir(X86_SHL, IM, RD)
+#define SHLBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_SHL, IM, MD, MB, MI, MS)
+#define SHLBrr(RS, RD) _ROTSHIBrr(X86_SHL, RS, RD)
+#define SHLBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_SHL, RS, MD, MB, MI, MS)
+
+#define SHLWir(IM, RD) _ROTSHIWir(X86_SHL, IM, RD)
+#define SHLWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_SHL, IM, MD, MB, MI, MS)
+#define SHLWrr(RS, RD) _ROTSHIWrr(X86_SHL, RS, RD)
+#define SHLWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_SHL, RS, MD, MB, MI, MS)
+
+#define SHLLir(IM, RD) _ROTSHILir(X86_SHL, IM, RD)
+#define SHLLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_SHL, IM, MD, MB, MI, MS)
+#define SHLLrr(RS, RD) _ROTSHILrr(X86_SHL, RS, RD)
+#define SHLLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_SHL, RS, MD, MB, MI, MS)
+
+#define SHLQir(IM, RD) _ROTSHIQir(X86_SHL, IM, RD)
+#define SHLQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_SHL, IM, MD, MB, MI, MS)
+#define SHLQrr(RS, RD) _ROTSHIQrr(X86_SHL, RS, RD)
+#define SHLQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_SHL, RS, MD, MB, MI, MS)
+
+#define SHRBir(IM, RD) _ROTSHIBir(X86_SHR, IM, RD)
+#define SHRBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_SHR, IM, MD, MB, MI, MS)
+#define SHRBrr(RS, RD) _ROTSHIBrr(X86_SHR, RS, RD)
+#define SHRBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_SHR, RS, MD, MB, MI, MS)
+
+#define SHRWir(IM, RD) _ROTSHIWir(X86_SHR, IM, RD)
+#define SHRWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_SHR, IM, MD, MB, MI, MS)
+#define SHRWrr(RS, RD) _ROTSHIWrr(X86_SHR, RS, RD)
+#define SHRWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_SHR, RS, MD, MB, MI, MS)
+
+#define SHRLir(IM, RD) _ROTSHILir(X86_SHR, IM, RD)
+#define SHRLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_SHR, IM, MD, MB, MI, MS)
+#define SHRLrr(RS, RD) _ROTSHILrr(X86_SHR, RS, RD)
+#define SHRLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_SHR, RS, MD, MB, MI, MS)
+
+#define SHRQir(IM, RD) _ROTSHIQir(X86_SHR, IM, RD)
+#define SHRQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_SHR, IM, MD, MB, MI, MS)
+#define SHRQrr(RS, RD) _ROTSHIQrr(X86_SHR, RS, RD)
+#define SHRQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_SHR, RS, MD, MB, MI, MS)
+
+#define SALBir SHLBir
+#define SALBim SHLBim
+#define SALBrr SHLBrr
+#define SALBrm SHLBrm
+
+#define SALWir SHLWir
+#define SALWim SHLWim
+#define SALWrr SHLWrr
+#define SALWrm SHLWrm
+
+#define SALLir SHLLir
+#define SALLim SHLLim
+#define SALLrr SHLLrr
+#define SALLrm SHLLrm
+
+#define SALQir SHLQir
+#define SALQim SHLQim
+#define SALQrr SHLQrr
+#define SALQrm SHLQrm
+
+#define SARBir(IM, RD) _ROTSHIBir(X86_SAR, IM, RD)
+#define SARBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_SAR, IM, MD, MB, MI, MS)
+#define SARBrr(RS, RD) _ROTSHIBrr(X86_SAR, RS, RD)
+#define SARBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_SAR, RS, MD, MB, MI, MS)
+
+#define SARWir(IM, RD) _ROTSHIWir(X86_SAR, IM, RD)
+#define SARWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_SAR, IM, MD, MB, MI, MS)
+#define SARWrr(RS, RD) _ROTSHIWrr(X86_SAR, RS, RD)
+#define SARWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_SAR, RS, MD, MB, MI, MS)
+
+#define SARLir(IM, RD) _ROTSHILir(X86_SAR, IM, RD)
+#define SARLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_SAR, IM, MD, MB, MI, MS)
+#define SARLrr(RS, RD) _ROTSHILrr(X86_SAR, RS, RD)
+#define SARLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_SAR, RS, MD, MB, MI, MS)
+
+#define SARQir(IM, RD) _ROTSHIQir(X86_SAR, IM, RD)
+#define SARQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_SAR, IM, MD, MB, MI, MS)
+#define SARQrr(RS, RD) _ROTSHIQrr(X86_SAR, RS, RD)
+#define SARQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_SAR, RS, MD, MB, MI, MS)
+
+
+/* --- Bit test instructions ----------------------------------------------- */
+
+enum {
+ X86_BT = 4,
+ X86_BTS = 5,
+ X86_BTR = 6,
+ X86_BTC = 7,
+};
+
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+
+#define _BTWir(OP, IM, RD) (_d16(), _REXLrr(0, RD), _OO_Mrm_B (0x0fba ,_b11,OP ,_r2(RD) ,_u8(IM)))
+#define _BTWim(OP, IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _OO_r_X_B (0x0fba ,OP ,MD,MB,MI,MS ,_u8(IM)))
+#define _BTWrr(OP, RS, RD) (_d16(), _REXLrr(RS, RD), _OO_Mrm (0x0f83|((OP)<<3),_b11,_r2(RS),_r2(RD) ))
+#define _BTWrm(OP, RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _OO_r_X (0x0f83|((OP)<<3) ,_r2(RS) ,MD,MB,MI,MS ))
+
+#define _BTLir(OP, IM, RD) (_REXLrr(0, RD), _OO_Mrm_B (0x0fba ,_b11,OP ,_r4(RD) ,_u8(IM)))
+#define _BTLim(OP, IM, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _OO_r_X_B (0x0fba ,OP ,MD,MB,MI,MS ,_u8(IM)))
+#define _BTLrr(OP, RS, RD) (_REXLrr(RS, RD), _OO_Mrm (0x0f83|((OP)<<3),_b11,_r4(RS),_r4(RD) ))
+#define _BTLrm(OP, RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0f83|((OP)<<3) ,_r4(RS) ,MD,MB,MI,MS ))
+
+#define _BTQir(OP, IM, RD) (_REXQrr(0, RD), _OO_Mrm_B (0x0fba ,_b11,OP ,_r8(RD) ,_u8(IM)))
+#define _BTQim(OP, IM, MD, MB, MI, MS) (_REXQrm(0, MB, MI), _OO_r_X_B (0x0fba ,OP ,MD,MB,MI,MS ,_u8(IM)))
+#define _BTQrr(OP, RS, RD) (_REXQrr(RS, RD), _OO_Mrm (0x0f83|((OP)<<3),_b11,_r8(RS),_r8(RD) ))
+#define _BTQrm(OP, RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0f83|((OP)<<3) ,_r8(RS) ,MD,MB,MI,MS ))
+
+#define BTWir(IM, RD) _BTWir(X86_BT, IM, RD)
+#define BTWim(IM, MD, MB, MI, MS) _BTWim(X86_BT, IM, MD, MB, MI, MS)
+#define BTWrr(RS, RD) _BTWrr(X86_BT, RS, RD)
+#define BTWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BT, RS, MD, MB, MI, MS)
+
+#define BTLir(IM, RD) _BTLir(X86_BT, IM, RD)
+#define BTLim(IM, MD, MB, MI, MS) _BTLim(X86_BT, IM, MD, MB, MI, MS)
+#define BTLrr(RS, RD) _BTLrr(X86_BT, RS, RD)
+#define BTLrm(RS, MD, MB, MI, MS) _BTLrm(X86_BT, RS, MD, MB, MI, MS)
+
+#define BTQir(IM, RD) _BTQir(X86_BT, IM, RD)
+#define BTQim(IM, MD, MB, MI, MS) _BTQim(X86_BT, IM, MD, MB, MI, MS)
+#define BTQrr(RS, RD) _BTQrr(X86_BT, RS, RD)
+#define BTQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BT, RS, MD, MB, MI, MS)
+
+#define BTCWir(IM, RD) _BTWir(X86_BTC, IM, RD)
+#define BTCWim(IM, MD, MB, MI, MS) _BTWim(X86_BTC, IM, MD, MB, MI, MS)
+#define BTCWrr(RS, RD) _BTWrr(X86_BTC, RS, RD)
+#define BTCWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BTC, RS, MD, MB, MI, MS)
+
+#define BTCLir(IM, RD) _BTLir(X86_BTC, IM, RD)
+#define BTCLim(IM, MD, MB, MI, MS) _BTLim(X86_BTC, IM, MD, MB, MI, MS)
+#define BTCLrr(RS, RD) _BTLrr(X86_BTC, RS, RD)
+#define BTCLrm(RS, MD, MB, MI, MS) _BTLrm(X86_BTC, RS, MD, MB, MI, MS)
+
+#define BTCQir(IM, RD) _BTQir(X86_BTC, IM, RD)
+#define BTCQim(IM, MD, MB, MI, MS) _BTQim(X86_BTC, IM, MD, MB, MI, MS)
+#define BTCQrr(RS, RD) _BTQrr(X86_BTC, RS, RD)
+#define BTCQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BTC, RS, MD, MB, MI, MS)
+
+#define BTRWir(IM, RD) _BTWir(X86_BTR, IM, RD)
+#define BTRWim(IM, MD, MB, MI, MS) _BTWim(X86_BTR, IM, MD, MB, MI, MS)
+#define BTRWrr(RS, RD) _BTWrr(X86_BTR, RS, RD)
+#define BTRWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BTR, RS, MD, MB, MI, MS)
+
+#define BTRLir(IM, RD) _BTLir(X86_BTR, IM, RD)
+#define BTRLim(IM, MD, MB, MI, MS) _BTLim(X86_BTR, IM, MD, MB, MI, MS)
+#define BTRLrr(RS, RD) _BTLrr(X86_BTR, RS, RD)
+#define BTRLrm(RS, MD, MB, MI, MS) _BTLrm(X86_BTR, RS, MD, MB, MI, MS)
+
+#define BTRQir(IM, RD) _BTQir(X86_BTR, IM, RD)
+#define BTRQim(IM, MD, MB, MI, MS) _BTQim(X86_BTR, IM, MD, MB, MI, MS)
+#define BTRQrr(RS, RD) _BTQrr(X86_BTR, RS, RD)
+#define BTRQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BTR, RS, MD, MB, MI, MS)
+
+#define BTSWir(IM, RD) _BTWir(X86_BTS, IM, RD)
+#define BTSWim(IM, MD, MB, MI, MS) _BTWim(X86_BTS, IM, MD, MB, MI, MS)
+#define BTSWrr(RS, RD) _BTWrr(X86_BTS, RS, RD)
+#define BTSWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BTS, RS, MD, MB, MI, MS)
+
+#define BTSLir(IM, RD) _BTLir(X86_BTS, IM, RD)
+#define BTSLim(IM, MD, MB, MI, MS) _BTLim(X86_BTS, IM, MD, MB, MI, MS)
+#define BTSLrr(RS, RD) _BTLrr(X86_BTS, RS, RD)
+#define BTSLrm(RS, MD, MB, MI, MS) _BTLrm(X86_BTS, RS, MD, MB, MI, MS)
+
+#define BTSQir(IM, RD) _BTQir(X86_BTS, IM, RD)
+#define BTSQim(IM, MD, MB, MI, MS) _BTQim(X86_BTS, IM, MD, MB, MI, MS)
+#define BTSQrr(RS, RD) _BTQrr(X86_BTS, RS, RD)
+#define BTSQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BTS, RS, MD, MB, MI, MS)
+
+
+/* --- Move instructions --------------------------------------------------- */
+
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+
+#define MOVBrr(RS, RD) (_REXBrr(RS, RD), _O_Mrm (0x88 ,_b11,_r1(RS),_r1(RD) ))
+#define MOVBmr(MD, MB, MI, MS, RD) (_REXBmr(MB, MI, RD), _O_r_X (0x8a ,_r1(RD) ,MD,MB,MI,MS ))
+#define MOVBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (0x88 ,_r1(RS) ,MD,MB,MI,MS ))
+#define MOVBir(IM, R) (_REXBrr(0, R), _Or_B (0xb0,_r1(R) ,_su8(IM)))
+#define MOVBim(IM, MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_X_B (0xc6 ,MD,MB,MI,MS ,_su8(IM)))
+
+#define MOVWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (0x89 ,_b11,_r2(RS),_r2(RD) ))
+#define MOVWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _O_r_X (0x8b ,_r2(RD) ,MD,MB,MI,MS ))
+#define MOVWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0x89 ,_r2(RS) ,MD,MB,MI,MS ))
+#define MOVWir(IM, R) (_d16(), _REXLrr(0, R), _Or_W (0xb8,_r2(R) ,_su16(IM)))
+#define MOVWim(IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_X_W (0xc7 ,MD,MB,MI,MS ,_su16(IM)))
+
+#define MOVLrr(RS, RD) (_REXLrr(RS, RD), _O_Mrm (0x89 ,_b11,_r4(RS),_r4(RD) ))
+#define MOVLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _O_r_X (0x8b ,_r4(RD) ,MD,MB,MI,MS ))
+#define MOVLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _O_r_X (0x89 ,_r4(RS) ,MD,MB,MI,MS ))
+#define MOVLir(IM, R) (_REXLrr(0, R), _Or_L (0xb8,_r4(R) ,IM ))
+#define MOVLim(IM, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_X_L (0xc7 ,MD,MB,MI,MS ,IM ))
+
+#define MOVQrr(RS, RD) (_REXQrr(RS, RD), _O_Mrm (0x89 ,_b11,_r8(RS),_r8(RD) ))
+#define MOVQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _O_r_X (0x8b ,_r8(RD) ,MD,MB,MI,MS ))
+#define MOVQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _O_r_X (0x89 ,_r8(RS) ,MD,MB,MI,MS ))
+#define MOVQir(IM, R) (_REXQrr(0, R), _Or_Q (0xb8,_r8(R) ,IM ))
+#define MOVQim(IM, MD, MB, MI, MS) (_REXQrm(0, MB, MI), _O_X_L (0xc7 ,MD,MB,MI,MS ,IM ))
+
+
+/* --- Unary and Multiply/Divide instructions ------------------------------ */
+
+enum {
+ X86_NOT = 2,
+ X86_NEG = 3,
+ X86_MUL = 4,
+ X86_IMUL = 5,
+ X86_DIV = 6,
+ X86_IDIV = 7,
+};
+
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+
+#define _UNARYBr(OP, RS) (_REXBrr(0, RS), _O_Mrm (0xf6 ,_b11,OP ,_r1(RS) ))
+#define _UNARYBm(OP, MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X (0xf6 ,OP ,MD,MB,MI,MS ))
+#define _UNARYWr(OP, RS) (_d16(), _REXLrr(0, RS), _O_Mrm (0xf7 ,_b11,OP ,_r2(RS) ))
+#define _UNARYWm(OP, MD, MB, MI, MS) (_d16(), _REXLmr(MB, MI, 0), _O_r_X (0xf7 ,OP ,MD,MB,MI,MS ))
+#define _UNARYLr(OP, RS) (_REXLrr(0, RS), _O_Mrm (0xf7 ,_b11,OP ,_r4(RS) ))
+#define _UNARYLm(OP, MD, MB, MI, MS) (_REXLmr(MB, MI, 0), _O_r_X (0xf7 ,OP ,MD,MB,MI,MS ))
+#define _UNARYQr(OP, RS) (_REXQrr(0, RS), _O_Mrm (0xf7 ,_b11,OP ,_r8(RS) ))
+#define _UNARYQm(OP, MD, MB, MI, MS) (_REXQmr(MB, MI, 0), _O_r_X (0xf7 ,OP ,MD,MB,MI,MS ))
+
+#define NOTBr(RS) _UNARYBr(X86_NOT, RS)
+#define NOTBm(MD, MB, MI, MS) _UNARYBm(X86_NOT, MD, MB, MI, MS)
+#define NOTWr(RS) _UNARYWr(X86_NOT, RS)
+#define NOTWm(MD, MB, MI, MS) _UNARYWm(X86_NOT, MD, MB, MI, MS)
+#define NOTLr(RS) _UNARYLr(X86_NOT, RS)
+#define NOTLm(MD, MB, MI, MS) _UNARYLm(X86_NOT, MD, MB, MI, MS)
+#define NOTQr(RS) _UNARYQr(X86_NOT, RS)
+#define NOTQm(MD, MB, MI, MS) _UNARYQm(X86_NOT, MD, MB, MI, MS)
+
+#define NEGBr(RS) _UNARYBr(X86_NEG, RS)
+#define NEGBm(MD, MB, MI, MS) _UNARYBm(X86_NEG, MD, MB, MI, MS)
+#define NEGWr(RS) _UNARYWr(X86_NEG, RS)
+#define NEGWm(MD, MB, MI, MS) _UNARYWm(X86_NEG, MD, MB, MI, MS)
+#define NEGLr(RS) _UNARYLr(X86_NEG, RS)
+#define NEGLm(MD, MB, MI, MS) _UNARYLm(X86_NEG, MD, MB, MI, MS)
+#define NEGQr(RS) _UNARYQr(X86_NEG, RS)
+#define NEGQm(MD, MB, MI, MS) _UNARYQm(X86_NEG, MD, MB, MI, MS)
+
+#define MULBr(RS) _UNARYBr(X86_MUL, RS)
+#define MULBm(MD, MB, MI, MS) _UNARYBm(X86_MUL, MD, MB, MI, MS)
+#define MULWr(RS) _UNARYWr(X86_MUL, RS)
+#define MULWm(MD, MB, MI, MS) _UNARYWm(X86_MUL, MD, MB, MI, MS)
+#define MULLr(RS) _UNARYLr(X86_MUL, RS)
+#define MULLm(MD, MB, MI, MS) _UNARYLm(X86_MUL, MD, MB, MI, MS)
+#define MULQr(RS) _UNARYQr(X86_MUL, RS)
+#define MULQm(MD, MB, MI, MS) _UNARYQm(X86_MUL, MD, MB, MI, MS)
+
+#define IMULBr(RS) _UNARYBr(X86_IMUL, RS)
+#define IMULBm(MD, MB, MI, MS) _UNARYBm(X86_IMUL, MD, MB, MI, MS)
+#define IMULWr(RS) _UNARYWr(X86_IMUL, RS)
+#define IMULWm(MD, MB, MI, MS) _UNARYWm(X86_IMUL, MD, MB, MI, MS)
+#define IMULLr(RS) _UNARYLr(X86_IMUL, RS)
+#define IMULLm(MD, MB, MI, MS) _UNARYLm(X86_IMUL, MD, MB, MI, MS)
+#define IMULQr(RS) _UNARYQr(X86_IMUL, RS)
+#define IMULQm(MD, MB, MI, MS) _UNARYQm(X86_IMUL, MD, MB, MI, MS)
+
+#define DIVBr(RS) _UNARYBr(X86_DIV, RS)
+#define DIVBm(MD, MB, MI, MS) _UNARYBm(X86_DIV, MD, MB, MI, MS)
+#define DIVWr(RS) _UNARYWr(X86_DIV, RS)
+#define DIVWm(MD, MB, MI, MS) _UNARYWm(X86_DIV, MD, MB, MI, MS)
+#define DIVLr(RS) _UNARYLr(X86_DIV, RS)
+#define DIVLm(MD, MB, MI, MS) _UNARYLm(X86_DIV, MD, MB, MI, MS)
+#define DIVQr(RS) _UNARYQr(X86_DIV, RS)
+#define DIVQm(MD, MB, MI, MS) _UNARYQm(X86_DIV, MD, MB, MI, MS)
+
+#define IDIVBr(RS) _UNARYBr(X86_IDIV, RS)
+#define IDIVBm(MD, MB, MI, MS) _UNARYBm(X86_IDIV, MD, MB, MI, MS)
+#define IDIVWr(RS) _UNARYWr(X86_IDIV, RS)
+#define IDIVWm(MD, MB, MI, MS) _UNARYWm(X86_IDIV, MD, MB, MI, MS)
+#define IDIVLr(RS) _UNARYLr(X86_IDIV, RS)
+#define IDIVLm(MD, MB, MI, MS) _UNARYLm(X86_IDIV, MD, MB, MI, MS)
+#define IDIVQr(RS) _UNARYQr(X86_IDIV, RS)
+#define IDIVQm(MD, MB, MI, MS) _UNARYQm(X86_IDIV, MD, MB, MI, MS)
+
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+
+#define IMULWrr(RS, RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0faf ,_b11,_r2(RD),_r2(RS) ))
+#define IMULWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0faf ,_r2(RD) ,MD,MB,MI,MS ))
+
+#define IMULWirr(IM,RS,RD) (_d16(), _REXLrr(RS, RD), _Os_Mrm_sW (0x69 ,_b11,_r2(RS),_r2(RD) ,_su16(IM) ))
+#define IMULWimr(IM,MD,MB,MI,MS,RD) (_d16(), _REXLmr(MB, MI, RD), _Os_r_X_sW (0x69 ,_r2(RD) ,MD,MB,MI,MS ,_su16(IM) ))
+
+#define IMULLir(IM, RD) (_REXLrr(0, RD), _Os_Mrm_sL (0x69 ,_b11,_r4(RD),_r4(RD) ,IM ))
+#define IMULLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0faf ,_b11,_r4(RD),_r4(RS) ))
+#define IMULLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0faf ,_r4(RD) ,MD,MB,MI,MS ))
+
+#define IMULQir(IM, RD) (_REXQrr(0, RD), _Os_Mrm_sL (0x69 ,_b11,_r8(RD),_r8(RD) ,IM ))
+#define IMULQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0faf ,_b11,_r8(RD),_r8(RS) ))
+#define IMULQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0faf ,_r8(RD) ,MD,MB,MI,MS ))
+
+#define IMULLirr(IM,RS,RD) (_REXLrr(RS, RD), _Os_Mrm_sL (0x69 ,_b11,_r4(RS),_r4(RD) ,IM ))
+#define IMULLimr(IM,MD,MB,MI,MS,RD) (_REXLmr(MB, MI, RD), _Os_r_X_sL (0x69 ,_r4(RD) ,MD,MB,MI,MS ,IM ))
+
+#define IMULQirr(IM,RS,RD) (_REXQrr(RS, RD), _Os_Mrm_sL (0x69 ,_b11,_r8(RS),_r8(RD) ,IM ))
+#define IMULQimr(IM,MD,MB,MI,MS,RD) (_REXQmr(MB, MI, RD), _Os_r_X_sL (0x69 ,_r8(RD) ,MD,MB,MI,MS ,IM ))
+
+
+/* --- Control Flow related instructions ----------------------------------- */
+
+enum {
+ X86_CC_O = 0x0,
+ X86_CC_NO = 0x1,
+ X86_CC_NAE = 0x2,
+ X86_CC_B = 0x2,
+ X86_CC_C = 0x2,
+ X86_CC_AE = 0x3,
+ X86_CC_NB = 0x3,
+ X86_CC_NC = 0x3,
+ X86_CC_E = 0x4,
+ X86_CC_Z = 0x4,
+ X86_CC_NE = 0x5,
+ X86_CC_NZ = 0x5,
+ X86_CC_BE = 0x6,
+ X86_CC_NA = 0x6,
+ X86_CC_A = 0x7,
+ X86_CC_NBE = 0x7,
+ X86_CC_S = 0x8,
+ X86_CC_NS = 0x9,
+ X86_CC_P = 0xa,
+ X86_CC_PE = 0xa,
+ X86_CC_NP = 0xb,
+ X86_CC_PO = 0xb,
+ X86_CC_L = 0xc,
+ X86_CC_NGE = 0xc,
+ X86_CC_GE = 0xd,
+ X86_CC_NL = 0xd,
+ X86_CC_LE = 0xe,
+ X86_CC_NG = 0xe,
+ X86_CC_G = 0xf,
+ X86_CC_NLE = 0xf,
+};
+
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+
+// FIXME: no prefix is availble to encode a 32-bit operand size in 64-bit mode
+#define CALLm(M) _O_D32 (0xe8 ,(int)(M) )
+#define _CALLLsr(R) (_REXLrr(0, R), _O_Mrm (0xff ,_b11,_b010,_r4(R) ))
+#define _CALLQsr(R) (_REXLrr(0, R), _O_Mrm (0xff ,_b11,_b010,_r8(R) ))
+#define CALLsr(R) ( X86_TARGET_64BIT ? _CALLQsr(R) : _CALLLsr(R))
+#define CALLsm(D,B,I,S) (_REXLrm(0, B, I), _O_r_X (0xff ,_b010 ,(int)(D),B,I,S ))
+
+// FIXME: no prefix is availble to encode a 32-bit operand size in 64-bit mode
+#define JMPSm(M) _O_D8 (0xeb ,(int)(M) )
+#define JMPm(M) _O_D32 (0xe9 ,(int)(M) )
+#define _JMPLsr(R) (_REXLrr(0, R), _O_Mrm (0xff ,_b11,_b100,_r4(R) ))
+#define _JMPQsr(R) (_REXLrr(0, R), _O_Mrm (0xff ,_b11,_b100,_r8(R) ))
+#define JMPsr(R) ( X86_TARGET_64BIT ? _JMPQsr(R) : _JMPLsr(R))
+#define JMPsm(D,B,I,S) (_REXLrm(0, B, I), _O_r_X (0xff ,_b100 ,(int)(D),B,I,S ))
+
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+#define JCCSii(CC, D) _O_B (0x70|(CC) ,(_sc)(int)(D) )
+#define JCCSim(CC, D) _O_D8 (0x70|(CC) ,(int)(D) )
+#define JOSm(D) JCCSim(X86_CC_O, D)
+#define JNOSm(D) JCCSim(X86_CC_NO, D)
+#define JBSm(D) JCCSim(X86_CC_B, D)
+#define JNAESm(D) JCCSim(X86_CC_NAE, D)
+#define JNBSm(D) JCCSim(X86_CC_NB, D)
+#define JAESm(D) JCCSim(X86_CC_AE, D)
+#define JESm(D) JCCSim(X86_CC_E, D)
+#define JZSm(D) JCCSim(X86_CC_Z, D)
+#define JNESm(D) JCCSim(X86_CC_NE, D)
+#define JNZSm(D) JCCSim(X86_CC_NZ, D)
+#define JBESm(D) JCCSim(X86_CC_BE, D)
+#define JNASm(D) JCCSim(X86_CC_NA, D)
+#define JNBESm(D) JCCSim(X86_CC_NBE, D)
+#define JASm(D) JCCSim(X86_CC_A, D)
+#define JSSm(D) JCCSim(X86_CC_S, D)
+#define JNSSm(D) JCCSim(X86_CC_NS, D)
+#define JPSm(D) JCCSim(X86_CC_P, D)
+#define JPESm(D) JCCSim(X86_CC_PE, D)
+#define JNPSm(D) JCCSim(X86_CC_NP, D)
+#define JPOSm(D) JCCSim(X86_CC_PO, D)
+#define JLSm(D) JCCSim(X86_CC_L, D)
+#define JNGESm(D) JCCSim(X86_CC_NGE, D)
+#define JNLSm(D) JCCSim(X86_CC_NL, D)
+#define JGESm(D) JCCSim(X86_CC_GE, D)
+#define JLESm(D) JCCSim(X86_CC_LE, D)
+#define JNGSm(D) JCCSim(X86_CC_NG, D)
+#define JNLESm(D) JCCSim(X86_CC_NLE, D)
+#define JGSm(D) JCCSim(X86_CC_G, D)
+
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+#define JCCii(CC, D) _OO_L (0x0f80|(CC) ,(int)(D) )
+#define JCCim(CC, D) _OO_D32 (0x0f80|(CC) ,(int)(D) )
+#define JOm(D) JCCim(X86_CC_O, D)
+#define JNOm(D) JCCim(X86_CC_NO, D)
+#define JBm(D) JCCim(X86_CC_B, D)
+#define JNAEm(D) JCCim(X86_CC_NAE, D)
+#define JNBm(D) JCCim(X86_CC_NB, D)
+#define JAEm(D) JCCim(X86_CC_AE, D)
+#define JEm(D) JCCim(X86_CC_E, D)
+#define JZm(D) JCCim(X86_CC_Z, D)
+#define JNEm(D) JCCim(X86_CC_NE, D)
+#define JNZm(D) JCCim(X86_CC_NZ, D)
+#define JBEm(D) JCCim(X86_CC_BE, D)
+#define JNAm(D) JCCim(X86_CC_NA, D)
+#define JNBEm(D) JCCim(X86_CC_NBE, D)
+#define JAm(D) JCCim(X86_CC_A, D)
+#define JSm(D) JCCim(X86_CC_S, D)
+#define JNSm(D) JCCim(X86_CC_NS, D)
+#define JPm(D) JCCim(X86_CC_P, D)
+#define JPEm(D) JCCim(X86_CC_PE, D)
+#define JNPm(D) JCCim(X86_CC_NP, D)
+#define JPOm(D) JCCim(X86_CC_PO, D)
+#define JLm(D) JCCim(X86_CC_L, D)
+#define JNGEm(D) JCCim(X86_CC_NGE, D)
+#define JNLm(D) JCCim(X86_CC_NL, D)
+#define JGEm(D) JCCim(X86_CC_GE, D)
+#define JLEm(D) JCCim(X86_CC_LE, D)
+#define JNGm(D) JCCim(X86_CC_NG, D)
+#define JNLEm(D) JCCim(X86_CC_NLE, D)
+#define JGm(D) JCCim(X86_CC_G, D)
+
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+#define SETCCir(CC, RD) (_REXBrr(0, RD), _OO_Mrm (0x0f90|(CC) ,_b11,_b000,_r1(RD) ))
+#define SETOr(RD) SETCCir(X86_CC_O, RD)
+#define SETNOr(RD) SETCCir(X86_CC_NO, RD)
+#define SETBr(RD) SETCCir(X86_CC_B, RD)
+#define SETNAEr(RD) SETCCir(X86_CC_NAE, RD)
+#define SETNBr(RD) SETCCir(X86_CC_NB, RD)
+#define SETAEr(RD) SETCCir(X86_CC_AE, RD)
+#define SETEr(RD) SETCCir(X86_CC_E, RD)
+#define SETZr(RD) SETCCir(X86_CC_Z, RD)
+#define SETNEr(RD) SETCCir(X86_CC_NE, RD)
+#define SETNZr(RD) SETCCir(X86_CC_NZ, RD)
+#define SETBEr(RD) SETCCir(X86_CC_BE, RD)
+#define SETNAr(RD) SETCCir(X86_CC_NA, RD)
+#define SETNBEr(RD) SETCCir(X86_CC_NBE, RD)
+#define SETAr(RD) SETCCir(X86_CC_A, RD)
+#define SETSr(RD) SETCCir(X86_CC_S, RD)
+#define SETNSr(RD) SETCCir(X86_CC_NS, RD)
+#define SETPr(RD) SETCCir(X86_CC_P, RD)
+#define SETPEr(RD) SETCCir(X86_CC_PE, RD)
+#define SETNPr(RD) SETCCir(X86_CC_NP, RD)
+#define SETPOr(RD) SETCCir(X86_CC_PO, RD)
+#define SETLr(RD) SETCCir(X86_CC_L, RD)
+#define SETNGEr(RD) SETCCir(X86_CC_NGE, RD)
+#define SETNLr(RD) SETCCir(X86_CC_NL, RD)
+#define SETGEr(RD) SETCCir(X86_CC_GE, RD)
+#define SETLEr(RD) SETCCir(X86_CC_LE, RD)
+#define SETNGr(RD) SETCCir(X86_CC_NG, RD)
+#define SETNLEr(RD) SETCCir(X86_CC_NLE, RD)
+#define SETGr(RD) SETCCir(X86_CC_G, RD)
+
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+#define SETCCim(CC,MD,MB,MI,MS) (_REXBrm(0, MB, MI), _OO_r_X (0x0f90|(CC) ,_b000 ,MD,MB,MI,MS ))
+#define SETOm(D, B, I, S) SETCCim(X86_CC_O, D, B, I, S)
+#define SETNOm(D, B, I, S) SETCCim(X86_CC_NO, D, B, I, S)
+#define SETBm(D, B, I, S) SETCCim(X86_CC_B, D, B, I, S)
+#define SETNAEm(D, B, I, S) SETCCim(X86_CC_NAE, D, B, I, S)
+#define SETNBm(D, B, I, S) SETCCim(X86_CC_NB, D, B, I, S)
+#define SETAEm(D, B, I, S) SETCCim(X86_CC_AE, D, B, I, S)
+#define SETEm(D, B, I, S) SETCCim(X86_CC_E, D, B, I, S)
+#define SETZm(D, B, I, S) SETCCim(X86_CC_Z, D, B, I, S)
+#define SETNEm(D, B, I, S) SETCCim(X86_CC_NE, D, B, I, S)
+#define SETNZm(D, B, I, S) SETCCim(X86_CC_NZ, D, B, I, S)
+#define SETBEm(D, B, I, S) SETCCim(X86_CC_BE, D, B, I, S)
+#define SETNAm(D, B, I, S) SETCCim(X86_CC_NA, D, B, I, S)
+#define SETNBEm(D, B, I, S) SETCCim(X86_CC_NBE, D, B, I, S)
+#define SETAm(D, B, I, S) SETCCim(X86_CC_A, D, B, I, S)
+#define SETSm(D, B, I, S) SETCCim(X86_CC_S, D, B, I, S)
+#define SETNSm(D, B, I, S) SETCCim(X86_CC_NS, D, B, I, S)
+#define SETPm(D, B, I, S) SETCCim(X86_CC_P, D, B, I, S)
+#define SETPEm(D, B, I, S) SETCCim(X86_CC_PE, D, B, I, S)
+#define SETNPm(D, B, I, S) SETCCim(X86_CC_NP, D, B, I, S)
+#define SETPOm(D, B, I, S) SETCCim(X86_CC_PO, D, B, I, S)
+#define SETLm(D, B, I, S) SETCCim(X86_CC_L, D, B, I, S)
+#define SETNGEm(D, B, I, S) SETCCim(X86_CC_NGE, D, B, I, S)
+#define SETNLm(D, B, I, S) SETCCim(X86_CC_NL, D, B, I, S)
+#define SETGEm(D, B, I, S) SETCCim(X86_CC_GE, D, B, I, S)
+#define SETLEm(D, B, I, S) SETCCim(X86_CC_LE, D, B, I, S)
+#define SETNGm(D, B, I, S) SETCCim(X86_CC_NG, D, B, I, S)
+#define SETNLEm(D, B, I, S) SETCCim(X86_CC_NLE, D, B, I, S)
+#define SETGm(D, B, I, S) SETCCim(X86_CC_G, D, B, I, S)
+
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+#define CMOVWrr(CC,RS,RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0f40|(CC) ,_b11,_r2(RD),_r2(RS) ))
+#define CMOVWmr(CC,MD,MB,MI,MS,RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0f40|(CC) ,_r2(RD) ,MD,MB,MI,MS ))
+#define CMOVLrr(CC,RS,RD) (_REXLrr(RD, RS), _OO_Mrm (0x0f40|(CC) ,_b11,_r4(RD),_r4(RS) ))
+#define CMOVLmr(CC,MD,MB,MI,MS,RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0f40|(CC) ,_r4(RD) ,MD,MB,MI,MS ))
+#define CMOVQrr(CC,RS,RD) (_REXQrr(RD, RS), _OO_Mrm (0x0f40|(CC) ,_b11,_r8(RD),_r8(RS) ))
+#define CMOVQmr(CC,MD,MB,MI,MS,RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0f40|(CC) ,_r8(RD) ,MD,MB,MI,MS ))
+
+
+/* --- Push/Pop instructions ----------------------------------------------- */
+
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+
+#define POPWr(RD) _m32only((_d16(), _Or (0x58,_r2(RD) )))
+#define POPWm(MD, MB, MI, MS) _m32only((_d16(), _O_r_X (0x8f ,_b000 ,MD,MB,MI,MS )))
+
+#define POPLr(RD) _m32only( _Or (0x58,_r4(RD) ))
+#define POPLm(MD, MB, MI, MS) _m32only( _O_r_X (0x8f ,_b000 ,MD,MB,MI,MS ))
+
+#define POPQr(RD) _m64only((_REXQr(RD), _Or (0x58,_r8(RD) )))
+#define POPQm(MD, MB, MI, MS) _m64only((_REXQm(MB, MI), _O_r_X (0x8f ,_b000 ,MD,MB,MI,MS )))
+
+#define PUSHWr(RS) _m32only((_d16(), _Or (0x50,_r2(RS) )))
+#define PUSHWm(MD, MB, MI, MS) _m32only((_d16(), _O_r_X (0xff, ,_b110 ,MD,MB,MI,MS )))
+#define PUSHWi(IM) _m32only((_d16(), _Os_sW (0x68 ,IM )))
+
+#define PUSHLr(RS) _m32only( _Or (0x50,_r4(RS) ))
+#define PUSHLm(MD, MB, MI, MS) _m32only( _O_r_X (0xff ,_b110 ,MD,MB,MI,MS ))
+#define PUSHLi(IM) _m32only( _Os_sL (0x68 ,IM ))
+
+#define PUSHQr(RS) _m64only((_REXQr(RS), _Or (0x50,_r8(RS) )))
+#define PUSHQm(MD, MB, MI, MS) _m64only((_REXQm(MB, MI), _O_r_X (0xff ,_b110 ,MD,MB,MI,MS )))
+#define PUSHQi(IM) _m64only( _Os_sL (0x68 ,IM ))
+
+#define POPA() (_d16(), _O (0x61 ))
+#define POPAD() _O (0x61 )
+
+#define PUSHA() (_d16(), _O (0x60 ))
+#define PUSHAD() _O (0x60 )
+
+#define POPF() _O (0x9d )
+#define PUSHF() _O (0x9c )
+
+
+/* --- Test instructions --------------------------------------------------- */
+
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+
+#define TESTBrr(RS, RD) (_REXBrr(RS, RD), _O_Mrm (0x84 ,_b11,_r1(RS),_r1(RD) ))
+#define TESTBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (0x84 ,_r1(RS) ,MD,MB,MI,MS ))
+#define TESTBir(IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_AL) ? \
+ (_REXBrr(0, RD), _O_B (0xa8 ,_u8(IM))) : \
+ (_REXBrr(0, RD), _O_Mrm_B (0xf6 ,_b11,_b000 ,_r1(RD) ,_u8(IM))) )
+#define TESTBim(IM, MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X_B (0xf6 ,_b000 ,MD,MB,MI,MS ,_u8(IM)))
+
+#define TESTWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (0x85 ,_b11,_r2(RS),_r2(RD) ))
+#define TESTWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0x85 ,_r2(RS) ,MD,MB,MI,MS ))
+#define TESTWir(IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_AX) ? \
+ (_d16(), _REXLrr(0, RD), _O_W (0xa9 ,_u16(IM))) : \
+ (_d16(), _REXLrr(0, RD), _O_Mrm_W (0xf7 ,_b11,_b000 ,_r2(RD) ,_u16(IM))) )
+#define TESTWim(IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_r_X_W (0xf7 ,_b000 ,MD,MB,MI,MS ,_u16(IM)))
+
+#define TESTLrr(RS, RD) (_REXLrr(RS, RD), _O_Mrm (0x85 ,_b11,_r4(RS),_r4(RD) ))
+#define TESTLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _O_r_X (0x85 ,_r4(RS) ,MD,MB,MI,MS ))
+#define TESTLir(IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_EAX) ? \
+ (_REXLrr(0, RD), _O_L (0xa9 ,IM )) : \
+ (_REXLrr(0, RD), _O_Mrm_L (0xf7 ,_b11,_b000 ,_r4(RD) ,IM )) )
+#define TESTLim(IM, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_r_X_L (0xf7 ,_b000 ,MD,MB,MI,MS ,IM ))
+
+#define TESTQrr(RS, RD) (_REXQrr(RS, RD), _O_Mrm (0x85 ,_b11,_r8(RS),_r8(RD) ))
+#define TESTQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _O_r_X (0x85 ,_r8(RS) ,MD,MB,MI,MS ))
+#define TESTQir(IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_RAX) ? \
+ (_REXQrr(0, RD), _O_L (0xa9 ,IM )) : \
+ (_REXQrr(0, RD), _O_Mrm_L (0xf7 ,_b11,_b000 ,_r8(RD) ,IM )) )
+#define TESTQim(IM, MD, MB, MI, MS) (_REXQrm(0, MB, MI), _O_r_X_L (0xf7 ,_b000 ,MD,MB,MI,MS ,IM ))
+
+
+/* --- Exchange instructions ----------------------------------------------- */
+
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+
+#define CMPXCHGBrr(RS, RD) (_REXBrr(RS, RD), _OO_Mrm (0x0fb0 ,_b11,_r1(RS),_r1(RD) ))
+#define CMPXCHGBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _OO_r_X (0x0fb0 ,_r1(RS) ,MD,MB,MI,MS ))
+
+#define CMPXCHGWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _OO_Mrm (0x0fb1 ,_b11,_r2(RS),_r2(RD) ))
+#define CMPXCHGWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _OO_r_X (0x0fb1 ,_r2(RS) ,MD,MB,MI,MS ))
+
+#define CMPXCHGLrr(RS, RD) (_REXLrr(RS, RD), _OO_Mrm (0x0fb1 ,_b11,_r4(RS),_r4(RD) ))
+#define CMPXCHGLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0fb1 ,_r4(RS) ,MD,MB,MI,MS ))
+
+#define CMPXCHGQrr(RS, RD) (_REXQrr(RS, RD), _OO_Mrm (0x0fb1 ,_b11,_r8(RS),_r8(RD) ))
+#define CMPXCHGQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0fb1 ,_r8(RS) ,MD,MB,MI,MS ))
+
+#define XADDBrr(RS, RD) (_REXBrr(RS, RD), _OO_Mrm (0x0fc0 ,_b11,_r1(RS),_r1(RD) ))
+#define XADDBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _OO_r_X (0x0fc0 ,_r1(RS) ,MD,MB,MI,MS ))
+
+#define XADDWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _OO_Mrm (0x0fc1 ,_b11,_r2(RS),_r2(RD) ))
+#define XADDWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _OO_r_X (0x0fc1 ,_r2(RS) ,MD,MB,MI,MS ))
+
+#define XADDLrr(RS, RD) (_REXLrr(RS, RD), _OO_Mrm (0x0fc1 ,_b11,_r4(RS),_r4(RD) ))
+#define XADDLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0fc1 ,_r4(RS) ,MD,MB,MI,MS ))
+
+#define XADDQrr(RS, RD) (_REXQrr(RS, RD), _OO_Mrm (0x0fc1 ,_b11,_r8(RS),_r8(RD) ))
+#define XADDQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0fc1 ,_r8(RS) ,MD,MB,MI,MS ))
+
+#define XCHGBrr(RS, RD) (_REXBrr(RS, RD), _O_Mrm (0x86 ,_b11,_r1(RS),_r1(RD) ))
+#define XCHGBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (0x86 ,_r1(RS) ,MD,MB,MI,MS ))
+
+#define XCHGWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (0x87 ,_b11,_r2(RS),_r2(RD) ))
+#define XCHGWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0x87 ,_r2(RS) ,MD,MB,MI,MS ))
+
+#define XCHGLrr(RS, RD) (_REXLrr(RS, RD), _O_Mrm (0x87 ,_b11,_r4(RS),_r4(RD) ))
+#define XCHGLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _O_r_X (0x87 ,_r4(RS) ,MD,MB,MI,MS ))
+
+#define XCHGQrr(RS, RD) (_REXQrr(RS, RD), _O_Mrm (0x87 ,_b11,_r8(RS),_r8(RD) ))
+#define XCHGQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _O_r_X (0x87 ,_r8(RS) ,MD,MB,MI,MS ))
+
+
+/* --- Increment/Decrement instructions ------------------------------------ */
+
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+
+#define DECBm(MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X (0xfe ,_b001 ,MD,MB,MI,MS ))
+#define DECBr(RD) (_REXBrr(0, RD), _O_Mrm (0xfe ,_b11,_b001 ,_r1(RD) ))
+
+#define DECWm(MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_r_X (0xff ,_b001 ,MD,MB,MI,MS ))
+#define DECWr(RD) (! X86_TARGET_64BIT ? (_d16(), _Or (0x48,_r2(RD) )) : \
+ (_d16(), _REXLrr(0, RD), _O_Mrm (0xff ,_b11,_b001 ,_r2(RD) )))
+
+#define DECLm(MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_r_X (0xff ,_b001 ,MD,MB,MI,MS ))
+#define DECLr(RD) (! X86_TARGET_64BIT ? _Or (0x48,_r4(RD) ) : \
+ (_REXLrr(0, RD), _O_Mrm (0xff ,_b11,_b001 ,_r4(RD) )))
+
+#define DECQm(MD, MB, MI, MS) (_REXQrm(0, MB, MI), _O_r_X (0xff ,_b001 ,MD,MB,MI,MS ))
+#define DECQr(RD) (_REXQrr(0, RD), _O_Mrm (0xff ,_b11,_b001 ,_r8(RD) ))
+
+#define INCBm(MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X (0xfe ,_b000 ,MD,MB,MI,MS ))
+#define INCBr(RD) (_REXBrr(0, RD), _O_Mrm (0xfe ,_b11,_b000 ,_r1(RD) ))
+
+#define INCWm(MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_r_X (0xff ,_b000 ,MD,MB,MI,MS ))
+#define INCWr(RD) (! X86_TARGET_64BIT ? (_d16(), _Or (0x40,_r2(RD) )) : \
+ (_d16(), _REXLrr(0, RD), _O_Mrm (0xff ,_b11,_b000 ,_r2(RD) )) )
+
+#define INCLm(MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_r_X (0xff ,_b000 ,MD,MB,MI,MS ))
+#define INCLr(RD) (! X86_TARGET_64BIT ? _Or (0x40,_r4(RD) ) : \
+ (_REXLrr(0, RD), _O_Mrm (0xff ,_b11,_b000 ,_r4(RD) )))
+
+#define INCQm(MD, MB, MI, MS) (_REXQrm(0, MB, MI), _O_r_X (0xff ,_b000 ,MD,MB,MI,MS ))
+#define INCQr(RD) (_REXQrr(0, RD), _O_Mrm (0xff ,_b11,_b000 ,_r8(RD) ))
+
+
+/* --- Misc instructions --------------------------------------------------- */
+
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+
+#define BSFWrr(RS, RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0fbc ,_b11,_r2(RD),_r2(RS) ))
+#define BSFWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0fbc ,_r2(RD) ,MD,MB,MI,MS ))
+#define BSRWrr(RS, RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0fbd ,_b11,_r2(RD),_r2(RS) ))
+#define BSRWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0fbd ,_r2(RD) ,MD,MB,MI,MS ))
+
+#define BSFLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fbc ,_b11,_r4(RD),_r4(RS) ))
+#define BSFLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fbc ,_r4(RD) ,MD,MB,MI,MS ))
+#define BSRLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fbd ,_b11,_r4(RD),_r4(RS) ))
+#define BSRLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fbd ,_r4(RD) ,MD,MB,MI,MS ))
+
+#define BSFQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fbc ,_b11,_r8(RD),_r8(RS) ))
+#define BSFQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fbc ,_r8(RD) ,MD,MB,MI,MS ))
+#define BSRQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fbd ,_b11,_r8(RD),_r8(RS) ))
+#define BSRQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fbd ,_r8(RD) ,MD,MB,MI,MS ))
+
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+
+#define MOVSBWrr(RS, RD) (_d16(), _REXBLrr(RD, RS), _OO_Mrm (0x0fbe ,_b11,_r2(RD),_r1(RS) ))
+#define MOVSBWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0fbe ,_r2(RD) ,MD,MB,MI,MS ))
+#define MOVZBWrr(RS, RD) (_d16(), _REXBLrr(RD, RS), _OO_Mrm (0x0fb6 ,_b11,_r2(RD),_r1(RS) ))
+#define MOVZBWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0fb6 ,_r2(RD) ,MD,MB,MI,MS ))
+
+#define MOVSBLrr(RS, RD) (_REXBLrr(RD, RS), _OO_Mrm (0x0fbe ,_b11,_r4(RD),_r1(RS) ))
+#define MOVSBLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fbe ,_r4(RD) ,MD,MB,MI,MS ))
+#define MOVZBLrr(RS, RD) (_REXBLrr(RD, RS), _OO_Mrm (0x0fb6 ,_b11,_r4(RD),_r1(RS) ))
+#define MOVZBLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fb6 ,_r4(RD) ,MD,MB,MI,MS ))
+
+#define MOVSBQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fbe ,_b11,_r8(RD),_r1(RS) ))
+#define MOVSBQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fbe ,_r8(RD) ,MD,MB,MI,MS ))
+#define MOVZBQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fb6 ,_b11,_r8(RD),_r1(RS) ))
+#define MOVZBQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fb6 ,_r8(RD) ,MD,MB,MI,MS ))
+
+#define MOVSWLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fbf ,_b11,_r4(RD),_r2(RS) ))
+#define MOVSWLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fbf ,_r4(RD) ,MD,MB,MI,MS ))
+#define MOVZWLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fb7 ,_b11,_r4(RD),_r2(RS) ))
+#define MOVZWLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fb7 ,_r4(RD) ,MD,MB,MI,MS ))
+
+#define MOVSWQrr(RS, RD) _m64only((_REXQrr(RD, RS), _OO_Mrm (0x0fbf ,_b11,_r8(RD),_r2(RS) )))
+#define MOVSWQmr(MD, MB, MI, MS, RD) _m64only((_REXQmr(MB, MI, RD), _OO_r_X (0x0fbf ,_r8(RD) ,MD,MB,MI,MS )))
+#define MOVZWQrr(RS, RD) _m64only((_REXQrr(RD, RS), _OO_Mrm (0x0fb7 ,_b11,_r8(RD),_r2(RS) )))
+#define MOVZWQmr(MD, MB, MI, MS, RD) _m64only((_REXQmr(MB, MI, RD), _OO_r_X (0x0fb7 ,_r8(RD) ,MD,MB,MI,MS )))
+
+#define MOVSLQrr(RS, RD) _m64only((_REXQrr(RD, RS), _O_Mrm (0x63 ,_b11,_r8(RD),_r4(RS) )))
+#define MOVSLQmr(MD, MB, MI, MS, RD) _m64only((_REXQmr(MB, MI, RD), _O_r_X (0x63 ,_r8(RD) ,MD,MB,MI,MS )))
+
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+
+#define LEALmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _O_r_X (0x8d ,_r4(RD) ,MD,MB,MI,MS ))
+#define LEAQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _O_r_X (0x8d ,_r4(RD) ,MD,MB,MI,MS ))
+
+#define BSWAPLr(R) (_REXLrr(0, R), _OOr (0x0fc8,_r4(R) ))
+#define BSWAPQr(R) (_REXQrr(0, R), _OOr (0x0fc8,_r8(R) ))
+
+#define CLC() _O (0xf8 )
+#define STC() _O (0xf9 )
+#define CMC() _O (0xf5 )
+
+#define CLD() _O (0xfc )
+#define STD() _O (0xfd )
+
+#define CBTW() (_d16(), _O (0x98 ))
+#define CWTL() _O (0x98 )
+#define CLTQ() _m64only(_REXQrr(0, 0), _O (0x98 ))
+
+#define CBW CBTW
+#define CWDE CWTL
+#define CDQE CLTQ
+
+#define CWTD() (_d16(), _O (0x99 ))
+#define CLTD() _O (0x99 )
+#define CQTO() _m64only(_REXQrr(0, 0), _O (0x99 ))
+
+#define CWD CWTD
+#define CDQ CLTD
+#define CQO CQTO
+
+#define LAHF() _O (0x9f )
+#define SAHF() _O (0x9e )
+
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+
+#define CPUID() _OO (0x0fa2 )
+#define RDTSC() _OO (0xff31 )
+
+#define ENTERii(W, B) _O_W_B (0xc8 ,_su16(W),_su8(B))
+
+#define LEAVE() _O (0xc9 )
+#define RET() _O (0xc3 )
+#define RETi(IM) _O_W (0xc2 ,_su16(IM))
+
+#define NOP() _O (0x90 )
+
+
+/* --- Media 64-bit instructions ------------------------------------------- */
+
+enum {
+ X86_MMX_PABSB = 0x1c, // 2P
+ X86_MMX_PABSW = 0x1d, // 2P
+ X86_MMX_PABSD = 0x1e, // 2P
+ X86_MMX_PACKSSWB = 0x63,
+ X86_MMX_PACKSSDW = 0x6b,
+ X86_MMX_PACKUSWB = 0x67,
+ X86_MMX_PADDB = 0xfc,
+ X86_MMX_PADDW = 0xfd,
+ X86_MMX_PADDD = 0xfe,
+ X86_MMX_PADDQ = 0xd4,
+ X86_MMX_PADDSB = 0xec,
+ X86_MMX_PADDSW = 0xed,
+ X86_MMX_PADDUSB = 0xdc,
+ X86_MMX_PADDUSW = 0xdd,
+ X86_MMX_PAND = 0xdb,
+ X86_MMX_PANDN = 0xdf,
+ X86_MMX_PAVGB = 0xe0,
+ X86_MMX_PAVGW = 0xe3,
+ X86_MMX_PCMPEQB = 0x74,
+ X86_MMX_PCMPEQW = 0x75,
+ X86_MMX_PCMPEQD = 0x76,
+ X86_MMX_PCMPGTB = 0x64,
+ X86_MMX_PCMPGTW = 0x65,
+ X86_MMX_PCMPGTD = 0x66,
+ X86_MMX_PEXTRW = 0xc5, // 64, /r ib
+ X86_MMX_PHADDW = 0x01, // 2P
+ X86_MMX_PHADDD = 0x02, // 2P
+ X86_MMX_PHADDSW = 0x03, // 2P
+ X86_MMX_PHSUBW = 0x05, // 2P
+ X86_MMX_PHSUBD = 0x06, // 2P
+ X86_MMX_PHSUBSW = 0x07, // 2P
+ X86_MMX_PINSRW = 0xc4, // 64, /r ib
+ X86_MMX_PMADDUBSW = 0x04, // 2P
+ X86_MMX_PMADDWD = 0xf5,
+ X86_MMX_PMAXSW = 0xee,
+ X86_MMX_PMAXUB = 0xde,
+ X86_MMX_PMINSW = 0xea,
+ X86_MMX_PMINUB = 0xda,
+ X86_MMX_PMOVMSKB = 0xd7, // 64
+ X86_MMX_PMULHRSW = 0x0b, // 2P
+ X86_MMX_PMULHUW = 0xe4,
+ X86_MMX_PMULHW = 0xe5,
+ X86_MMX_PMULLW = 0xd5,
+ X86_MMX_PMULUDQ = 0xf4,
+ X86_MMX_POR = 0xeb,
+ X86_MMX_PSADBW = 0xf6,
+ X86_MMX_PSHUFB = 0x00, // 2P
+ X86_MMX_PSHUFW = 0x70, // /r ib
+ X86_MMX_PSIGNB = 0x08, // 2P
+ X86_MMX_PSIGNW = 0x09, // 2P
+ X86_MMX_PSIGND = 0x0a, // 2P
+ X86_MMX_PSLLW = 0xf1,
+ X86_MMX_PSLLWi = 0x71, // /6 ib
+ X86_MMX_PSLLD = 0xf2,
+ X86_MMX_PSLLDi = 0x72, // /6 ib
+ X86_MMX_PSLLQ = 0xf3,
+ X86_MMX_PSLLQi = 0x73, // /6 ib
+ X86_MMX_PSRAW = 0xe1,
+ X86_MMX_PSRAWi = 0x71, // /4 ib
+ X86_MMX_PSRAD = 0xe2,
+ X86_MMX_PSRADi = 0x72, // /4 ib
+ X86_MMX_PSRLW = 0xd1,
+ X86_MMX_PSRLWi = 0x71, // /2 ib
+ X86_MMX_PSRLD = 0xd2,
+ X86_MMX_PSRLDi = 0x72, // /2 ib
+ X86_MMX_PSRLQ = 0xd3,
+ X86_MMX_PSRLQi = 0x73, // /2 ib
+ X86_MMX_PSUBB = 0xf8,
+ X86_MMX_PSUBW = 0xf9,
+ X86_MMX_PSUBD = 0xfa,
+ X86_MMX_PSUBQ = 0xfb,
+ X86_MMX_PSUBSB = 0xe8,
+ X86_MMX_PSUBSW = 0xe9,
+ X86_MMX_PSUBUSB = 0xd8,
+ X86_MMX_PSUBUSW = 0xd9,
+ X86_MMX_PUNPCKHBW = 0x68,
+ X86_MMX_PUNPCKHWD = 0x69,
+ X86_MMX_PUNPCKHDQ = 0x6a,
+ X86_MMX_PUNPCKLBW = 0x60,
+ X86_MMX_PUNPCKLWD = 0x61,
+ X86_MMX_PUNPCKLDQ = 0x62,
+ X86_MMX_PXOR = 0xef,
+};
+
+#define __MMXLrr(OP,RS,RSA,RD,RDA) (_REXLrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) ))
+#define __MMXLmr(OP,MD,MB,MI,MS,RD,RDA) (_REXLmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS ))
+#define __MMXLrm(OP,RS,RSA,MD,MB,MI,MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS ))
+#define __MMXLirr(OP,IM,RS,RSA,RD,RDA) (_REXLrr(RD, RS), _OO_Mrm_B (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) ,_u8(IM)))
+#define __MMXLimr(OP,IM,MD,MB,MI,MS,RD,RDA) (_REXLmr(MB, MI, RS), _OO_r_X_B (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS ,_u8(IM)))
+#define __MMXQrr(OP,RS,RSA,RD,RDA) (_REXQrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) ))
+#define __MMXQmr(OP,MD,MB,MI,MS,RD,RDA) (_REXQmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS ))
+#define __MMXQrm(OP,RS,RSA,MD,MB,MI,MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS ))
+#define __MMXQirr(OP,IM,RS,RSA,RD,RDA) (_REXQrr(RD, RS), _OO_Mrm_B (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) ,_u8(IM)))
+#define __MMXQimr(OP,IM,MD,MB,MI,MS,RD,RDA) (_REXQmr(MB, MI, RS), _OO_r_X_B (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS ,_u8(IM)))
+#define __MMX1Lrr(PX,OP,RS,RSA,RD,RDA) (_REXLrr(RD, RS), _B(0x0f),_OO_Mrm(((PX)<<8)|(OP) ,_b11,RDA(RD),RSA(RS) ))
+#define __MMX1Lmr(PX,OP,MD,MB,MI,MS,RD,RDA) (_REXLmr(MB, MI, RD), _B(0x0f),_OO_r_X(((PX)<<8)|(OP) ,RDA(RD) ,MD,MB,MI,MS ))
+#define __MMX1Lrm(PX,OP,RS,RSA,MD,MB,MI,MS) (_REXLrm(RS, MB, MI), _B(0x0f),_OO_r_X(((PX)<<8)|(OP) ,RSA(RS) ,MD,MB,MI,MS ))
+
+#define _MMXLrr(OP,RS,RD) __MMXLrr(OP,RS,_rM,RD,_rM)
+#define _MMXLmr(OP,MD,MB,MI,MS,RD) __MMXLmr(OP,MD,MB,MI,MS,RD,_rM)
+#define _MMXLrm(OP,RS,MD,MB,MI,MS) __MMXLrm(OP,RS,_rM,MD,MB,MI,MS)
+#define _MMXQrr(OP,RS,RD) __MMXQrr(OP,RS,_rM,RD,_rM)
+#define _MMXQmr(OP,MD,MB,MI,MS,RD) __MMXQmr(OP,MD,MB,MI,MS,RD,_rM)
+#define _MMXQrm(OP,RS,MD,MB,MI,MS) __MMXQrm(OP,RS,_rM,MD,MB,MI,MS)
+#define _2P_MMXLrr(OP,RS,RD) __MMX1Lrr(0x38, OP,RS,_rM,RD,_rM)
+#define _2P_MMXLmr(OP,MD,MB,MI,MS,RD) __MMX1Lmr(0x38, OP,MD,MB,MI,MS,RD,_rM)
+#define _2P_MMXLrm(OP,RS,MD,MB,MI,MS) __MMX1Lrm(0x38, OP,RS,_rM,MD,MB,MI,MS)
+
+#define MMX_MOVDMDrr(RS, RD) __MMXLrr(0x6e, RS,_r4, RD,_rM)
+#define MMX_MOVQMDrr(RS, RD) __MMXQrr(0x6e, RS,_r8, RD,_rM)
+#define MMX_MOVDMSrr(RS, RD) __MMXLrr(0x7e, RD,_r4, RS,_rM)
+#define MMX_MOVQMSrr(RS, RD) __MMXQrr(0x7e, RD,_r8, RS,_rM)
+
+#define MMX_MOVDmr(MD, MB, MI, MS, RD) _MMXLmr(0x6e, MD, MB, MI, MS, RD)
+#define MMX_MOVDrm(RS, MD, MB, MI, MS) _MMXLrm(0x7e, RS, MD, MB, MI, MS)
+#define MMX_MOVQrr(RS, RD) _MMXLrr(0x6f, RS, RD)
+#define MMX_MOVQmr(MD, MB, MI, MS, RD) _MMXLmr(0x6f, MD, MB, MI, MS, RD)
+#define MMX_MOVQrm(RS, MD, MB, MI, MS) _MMXLrm(0x7f, RS, MD, MB, MI, MS)
+
+// Original MMX instructions
+#define MMX_PACKSSWBrr(RS, RD) _MMXLrr(X86_MMX_PACKSSWB,RS,RD)
+#define MMX_PACKSSWBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PACKSSWB, MD, MB, MI, MS, RD)
+#define MMX_PACKSSDWrr(RS, RD) _MMXLrr(X86_MMX_PACKSSDW,RS,RD)
+#define MMX_PACKSSDWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PACKSSDW, MD, MB, MI, MS, RD)
+#define MMX_PACKUSWBrr(RS, RD) _MMXLrr(X86_MMX_PACKUSWB,RS,RD)
+#define MMX_PACKUSWBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PACKUSWB, MD, MB, MI, MS, RD)
+#define MMX_PADDBrr(RS, RD) _MMXLrr(X86_MMX_PADDB,RS,RD)
+#define MMX_PADDBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDB, MD, MB, MI, MS, RD)
+#define MMX_PADDWrr(RS, RD) _MMXLrr(X86_MMX_PADDW,RS,RD)
+#define MMX_PADDWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDW, MD, MB, MI, MS, RD)
+#define MMX_PADDDrr(RS, RD) _MMXLrr(X86_MMX_PADDD,RS,RD)
+#define MMX_PADDDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDD, MD, MB, MI, MS, RD)
+#define MMX_PADDQrr(RS, RD) _MMXLrr(X86_MMX_PADDQ,RS,RD)
+#define MMX_PADDQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDQ, MD, MB, MI, MS, RD)
+#define MMX_PADDSBrr(RS, RD) _MMXLrr(X86_MMX_PADDSB,RS,RD)
+#define MMX_PADDSBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDSB, MD, MB, MI, MS, RD)
+#define MMX_PADDSWrr(RS, RD) _MMXLrr(X86_MMX_PADDSW,RS,RD)
+#define MMX_PADDSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDSW, MD, MB, MI, MS, RD)
+#define MMX_PADDUSBrr(RS, RD) _MMXLrr(X86_MMX_PADDUSB,RS,RD)
+#define MMX_PADDUSBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDUSB, MD, MB, MI, MS, RD)
+#define MMX_PADDUSWrr(RS, RD) _MMXLrr(X86_MMX_PADDUSW,RS,RD)
+#define MMX_PADDUSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDUSW, MD, MB, MI, MS, RD)
+#define MMX_PANDrr(RS, RD) _MMXLrr(X86_MMX_PAND,RS,RD)
+#define MMX_PANDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PAND, MD, MB, MI, MS, RD)
+#define MMX_PANDNrr(RS, RD) _MMXLrr(X86_MMX_PANDN,RS,RD)
+#define MMX_PANDNmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PANDN, MD, MB, MI, MS, RD)
+#define MMX_PAVGBrr(RS, RD) _MMXLrr(X86_MMX_PAVGB,RS,RD)
+#define MMX_PAVGBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PAVGB, MD, MB, MI, MS, RD)
+#define MMX_PAVGWrr(RS, RD) _MMXLrr(X86_MMX_PAVGW,RS,RD)
+#define MMX_PAVGWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PAVGW, MD, MB, MI, MS, RD)
+#define MMX_PCMPEQBrr(RS, RD) _MMXLrr(X86_MMX_PCMPEQB,RS,RD)
+#define MMX_PCMPEQBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPEQB, MD, MB, MI, MS, RD)
+#define MMX_PCMPEQWrr(RS, RD) _MMXLrr(X86_MMX_PCMPEQW,RS,RD)
+#define MMX_PCMPEQWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPEQW, MD, MB, MI, MS, RD)
+#define MMX_PCMPEQDrr(RS, RD) _MMXLrr(X86_MMX_PCMPEQD,RS,RD)
+#define MMX_PCMPEQDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPEQD, MD, MB, MI, MS, RD)
+#define MMX_PCMPGTBrr(RS, RD) _MMXLrr(X86_MMX_PCMPGTB,RS,RD)
+#define MMX_PCMPGTBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPGTB, MD, MB, MI, MS, RD)
+#define MMX_PCMPGTWrr(RS, RD) _MMXLrr(X86_MMX_PCMPGTW,RS,RD)
+#define MMX_PCMPGTWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPGTW, MD, MB, MI, MS, RD)
+#define MMX_PCMPGTDrr(RS, RD) _MMXLrr(X86_MMX_PCMPGTD,RS,RD)
+#define MMX_PCMPGTDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPGTD, MD, MB, MI, MS, RD)
+#define MMX_PMADDWDrr(RS, RD) _MMXLrr(X86_MMX_PMADDWD,RS,RD)
+#define MMX_PMADDWDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMADDWD, MD, MB, MI, MS, RD)
+#define MMX_PMAXSWrr(RS, RD) _MMXLrr(X86_MMX_PMAXSW,RS,RD)
+#define MMX_PMAXSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMAXSW, MD, MB, MI, MS, RD)
+#define MMX_PMAXUBrr(RS, RD) _MMXLrr(X86_MMX_PMAXUB,RS,RD)
+#define MMX_PMAXUBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMAXUB, MD, MB, MI, MS, RD)
+#define MMX_PMINSWrr(RS, RD) _MMXLrr(X86_MMX_PMINSW,RS,RD)
+#define MMX_PMINSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMINSW, MD, MB, MI, MS, RD)
+#define MMX_PMINUBrr(RS, RD) _MMXLrr(X86_MMX_PMINUB,RS,RD)
+#define MMX_PMINUBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMINUB, MD, MB, MI, MS, RD)
+#define MMX_PMULHUWrr(RS, RD) _MMXLrr(X86_MMX_PMULHUW,RS,RD)
+#define MMX_PMULHUWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMULHUW, MD, MB, MI, MS, RD)
+#define MMX_PMULHWrr(RS, RD) _MMXLrr(X86_MMX_PMULHW,RS,RD)
+#define MMX_PMULHWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMULHW, MD, MB, MI, MS, RD)
+#define MMX_PMULLWrr(RS, RD) _MMXLrr(X86_MMX_PMULLW,RS,RD)
+#define MMX_PMULLWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMULLW, MD, MB, MI, MS, RD)
+#define MMX_PMULUDQrr(RS, RD) _MMXLrr(X86_MMX_PMULUDQ,RS,RD)
+#define MMX_PMULUDQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMULUDQ, MD, MB, MI, MS, RD)
+#define MMX_PORrr(RS, RD) _MMXLrr(X86_MMX_POR,RS,RD)
+#define MMX_PORmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_POR, MD, MB, MI, MS, RD)
+#define MMX_PSADBWrr(RS, RD) _MMXLrr(X86_MMX_PSADBW,RS,RD)
+#define MMX_PSADBWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSADBW, MD, MB, MI, MS, RD)
+#define MMX_PSLLWir(IM, RD) __MMXLirr(X86_MMX_PSLLWi, IM, RD,_rM, _b110,_rN)
+#define MMX_PSLLWrr(RS, RD) _MMXLrr(X86_MMX_PSLLW,RS,RD)
+#define MMX_PSLLWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSLLW, MD, MB, MI, MS, RD)
+#define MMX_PSLLDir(IM, RD) __MMXLirr(X86_MMX_PSLLDi, IM, RD,_rM, _b110,_rN)
+#define MMX_PSLLDrr(RS, RD) _MMXLrr(X86_MMX_PSLLD,RS,RD)
+#define MMX_PSLLDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSLLD, MD, MB, MI, MS, RD)
+#define MMX_PSLLQir(IM, RD) __MMXLirr(X86_MMX_PSLLQi, IM, RD,_rM, _b110,_rN)
+#define MMX_PSLLQrr(RS, RD) _MMXLrr(X86_MMX_PSLLQ,RS,RD)
+#define MMX_PSLLQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSLLQ, MD, MB, MI, MS, RD)
+#define MMX_PSRAWir(IM, RD) __MMXLirr(X86_MMX_PSRAWi, IM, RD,_rM, _b100,_rN)
+#define MMX_PSRAWrr(RS, RD) _MMXLrr(X86_MMX_PSRAW,RS,RD)
+#define MMX_PSRAWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSRAW, MD, MB, MI, MS, RD)
+#define MMX_PSRADir(IM, RD) __MMXLirr(X86_MMX_PSRADi, IM, RD,_rM, _b100,_rN)
+#define MMX_PSRADrr(RS, RD) _MMXLrr(X86_MMX_PSRAD,RS,RD)
+#define MMX_PSRADmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSRAD, MD, MB, MI, MS, RD)
+#define MMX_PSRLWir(IM, RD) __MMXLirr(X86_MMX_PSRLWi, IM, RD,_rM, _b010,_rN)
+#define MMX_PSRLWrr(RS, RD) _MMXLrr(X86_MMX_PSRLW,RS,RD)
+#define MMX_PSRLWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSRLW, MD, MB, MI, MS, RD)
+#define MMX_PSRLDir(IM, RD) __MMXLirr(X86_MMX_PSRLDi, IM, RD,_rM, _b010,_rN)
+#define MMX_PSRLDrr(RS, RD) _MMXLrr(X86_MMX_PSRLD,RS,RD)
+#define MMX_PSRLDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSRLD, MD, MB, MI, MS, RD)
+#define MMX_PSRLQir(IM, RD) __MMXLirr(X86_MMX_PSRLQi, IM, RD,_rM, _b010,_rN)
+#define MMX_PSRLQrr(RS, RD) _MMXLrr(X86_MMX_PSRLQ,RS,RD)
+#define MMX_PSRLQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSRLQ, MD, MB, MI, MS, RD)
+#define MMX_PSUBBrr(RS, RD) _MMXLrr(X86_MMX_PSUBB,RS,RD)
+#define MMX_PSUBBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBB, MD, MB, MI, MS, RD)
+#define MMX_PSUBWrr(RS, RD) _MMXLrr(X86_MMX_PSUBW,RS,RD)
+#define MMX_PSUBWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBW, MD, MB, MI, MS, RD)
+#define MMX_PSUBDrr(RS, RD) _MMXLrr(X86_MMX_PSUBD,RS,RD)
+#define MMX_PSUBDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBD, MD, MB, MI, MS, RD)
+#define MMX_PSUBQrr(RS, RD) _MMXLrr(X86_MMX_PSUBQ,RS,RD)
+#define MMX_PSUBQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBQ, MD, MB, MI, MS, RD)
+#define MMX_PSUBSBrr(RS, RD) _MMXLrr(X86_MMX_PSUBSB,RS,RD)
+#define MMX_PSUBSBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBSB, MD, MB, MI, MS, RD)
+#define MMX_PSUBSWrr(RS, RD) _MMXLrr(X86_MMX_PSUBSW,RS,RD)
+#define MMX_PSUBSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBSW, MD, MB, MI, MS, RD)
+#define MMX_PSUBUSBrr(RS, RD) _MMXLrr(X86_MMX_PSUBUSB,RS,RD)
+#define MMX_PSUBUSBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBUSB, MD, MB, MI, MS, RD)
+#define MMX_PSUBUSWrr(RS, RD) _MMXLrr(X86_MMX_PSUBUSW,RS,RD)
+#define MMX_PSUBUSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBUSW, MD, MB, MI, MS, RD)
+#define MMX_PUNPCKHBWrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKHBW,RS,RD)
+#define MMX_PUNPCKHBWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKHBW, MD, MB, MI, MS, RD)
+#define MMX_PUNPCKHWDrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKHWD,RS,RD)
+#define MMX_PUNPCKHWDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKHWD, MD, MB, MI, MS, RD)
+#define MMX_PUNPCKHDQrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKHDQ,RS,RD)
+#define MMX_PUNPCKHDQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKHDQ, MD, MB, MI, MS, RD)
+#define MMX_PUNPCKLBWrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKLBW,RS,RD)
+#define MMX_PUNPCKLBWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKLBW, MD, MB, MI, MS, RD)
+#define MMX_PUNPCKLWDrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKLWD,RS,RD)
+#define MMX_PUNPCKLWDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKLWD, MD, MB, MI, MS, RD)
+#define MMX_PUNPCKLDQrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKLDQ,RS,RD)
+#define MMX_PUNPCKLDQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKLDQ, MD, MB, MI, MS, RD)
+#define MMX_PXORrr(RS, RD) _MMXLrr(X86_MMX_PXOR,RS,RD)
+#define MMX_PXORmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PXOR, MD, MB, MI, MS, RD)
+
+#define MMX_PSHUFWirr(IM, RS, RD) __MMXLirr(X86_MMX_PSHUFW, IM, RS,_rM, RD,_rM)
+#define MMX_PSHUFWimr(IM, MD, MB, MI, MS, RD) __MMXLimr(X86_MMX_PSHUFW, IM, MD, MB, MI, MS, RD,_rM)
+#define MMX_PEXTRWLirr(IM, RS, RD) __MMXLirr(X86_MMX_PEXTRW, IM, RS,_rM, RD,_r4)
+#define MMX_PEXTRWQirr(IM, RS, RD) __MMXQirr(X86_MMX_PEXTRW, IM, RS,_rM, RD,_r8)
+#define MMX_PINSRWLirr(IM, RS, RD) __MMXLirr(X86_MMX_PINSRW, IM, RS,_r4, RD,_rM)
+#define MMX_PINSRWLimr(IM, MD, MB, MI, MS, RD) __MMXLimr(X86_MMX_PINSRW, IM, MD, MB, MI, MS, RD,_r4)
+#define MMX_PINSRWQirr(IM, RS, RD) __MMXQirr(X86_MMX_PINSRW, IM, RS,_r4, RD,_rM)
+#define MMX_PINSRWQimr(IM, MD, MB, MI, MS, RD) __MMXQimr(X86_MMX_PINSRW, IM, MD, MB, MI, MS, RD,_r8)
+
+// Additionnal MMX instructions, brought by SSSE3 ISA
+#define MMX_PABSBrr(RS, RD) _2P_MMXLrr(X86_MMX_PABSB,RS,RD)
+#define MMX_PABSBmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PABSB, MD, MB, MI, MS, RD)
+#define MMX_PABSWrr(RS, RD) _2P_MMXLrr(X86_MMX_PABSW,RS,RD)
+#define MMX_PABSWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PABSW, MD, MB, MI, MS, RD)
+#define MMX_PABSDrr(RS, RD) _2P_MMXLrr(X86_MMX_PABSD,RS,RD)
+#define MMX_PABSDmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PABSD, MD, MB, MI, MS, RD)
+#define MMX_PHADDWrr(RS, RD) _2P_MMXLrr(X86_MMX_PHADDW,RS,RD)
+#define MMX_PHADDWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHADDW, MD, MB, MI, MS, RD)
+#define MMX_PHADDDrr(RS, RD) _2P_MMXLrr(X86_MMX_PHADDD,RS,RD)
+#define MMX_PHADDDmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHADDD, MD, MB, MI, MS, RD)
+#define MMX_PHADDSWrr(RS, RD) _2P_MMXLrr(X86_MMX_PHADDSW,RS,RD)
+#define MMX_PHADDSWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHADDSW, MD, MB, MI, MS, RD)
+#define MMX_PHSUBWrr(RS, RD) _2P_MMXLrr(X86_MMX_PHSUBW,RS,RD)
+#define MMX_PHSUBWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHSUBW, MD, MB, MI, MS, RD)
+#define MMX_PHSUBDrr(RS, RD) _2P_MMXLrr(X86_MMX_PHSUBD,RS,RD)
+#define MMX_PHSUBDmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHSUBD, MD, MB, MI, MS, RD)
+#define MMX_PHSUBSWrr(RS, RD) _2P_MMXLrr(X86_MMX_PHSUBSW,RS,RD)
+#define MMX_PHSUBSWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHSUBSW, MD, MB, MI, MS, RD)
+#define MMX_PMADDUBSWrr(RS, RD) _2P_MMXLrr(X86_MMX_PMADDUBSW,RS,RD)
+#define MMX_PMADDUBSWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PMADDUBSW, MD, MB, MI, MS, RD)
+#define MMX_PMULHRSWrr(RS, RD) _2P_MMXLrr(X86_MMX_PMULHRSW,RS,RD)
+#define MMX_PMULHRSWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PMULHRSW, MD, MB, MI, MS, RD)
+#define MMX_PSHUFBrr(RS, RD) _2P_MMXLrr(X86_MMX_PSHUFB,RS,RD)
+#define MMX_PSHUFBmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PSHUFB, MD, MB, MI, MS, RD)
+#define MMX_PSIGNBrr(RS, RD) _2P_MMXLrr(X86_MMX_PSIGNB,RS,RD)
+#define MMX_PSIGNBmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PSIGNB, MD, MB, MI, MS, RD)
+#define MMX_PSIGNWrr(RS, RD) _2P_MMXLrr(X86_MMX_PSIGNW,RS,RD)
+#define MMX_PSIGNWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PSIGNW, MD, MB, MI, MS, RD)
+#define MMX_PSIGNDrr(RS, RD) _2P_MMXLrr(X86_MMX_PSIGND,RS,RD)
+#define MMX_PSIGNDmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PSIGND, MD, MB, MI, MS, RD)
+
+#define EMMS() _OO (0x0f77 )
+
+
+/* --- Media 128-bit instructions ------------------------------------------ */
+
+enum {
+ X86_SSE_CC_EQ = 0,
+ X86_SSE_CC_LT = 1,
+ X86_SSE_CC_GT = 1,
+ X86_SSE_CC_LE = 2,
+ X86_SSE_CC_GE = 2,
+ X86_SSE_CC_U = 3,
+ X86_SSE_CC_NEQ = 4,
+ X86_SSE_CC_NLT = 5,
+ X86_SSE_CC_NGT = 5,
+ X86_SSE_CC_NLE = 6,
+ X86_SSE_CC_NGE = 6,
+ X86_SSE_CC_O = 7
+};
+
+enum {
+ X86_SSE_UCOMI = 0x2e,
+ X86_SSE_COMI = 0x2f,
+ X86_SSE_CMP = 0xc2,
+ X86_SSE_SQRT = 0x51,
+ X86_SSE_RSQRT = 0x52,
+ X86_SSE_RCP = 0x53,
+ X86_SSE_AND = 0x54,
+ X86_SSE_ANDN = 0x55,
+ X86_SSE_OR = 0x56,
+ X86_SSE_XOR = 0x57,
+ X86_SSE_ADD = 0x58,
+ X86_SSE_MUL = 0x59,
+ X86_SSE_SUB = 0x5c,
+ X86_SSE_MIN = 0x5d,
+ X86_SSE_DIV = 0x5e,
+ X86_SSE_MAX = 0x5f,
+ X86_SSE_CVTDQ2PD = 0xe6,
+ X86_SSE_CVTDQ2PS = 0x5b,
+ X86_SSE_CVTPD2DQ = 0xe6,
+ X86_SSE_CVTPD2PI = 0x2d,
+ X86_SSE_CVTPD2PS = 0x5a,
+ X86_SSE_CVTPI2PD = 0x2a,
+ X86_SSE_CVTPI2PS = 0x2a,
+ X86_SSE_CVTPS2DQ = 0x5b,
+ X86_SSE_CVTPS2PD = 0x5a,
+ X86_SSE_CVTPS2PI = 0x2d,
+ X86_SSE_CVTSD2SI = 0x2d,
+ X86_SSE_CVTSD2SS = 0x5a,
+ X86_SSE_CVTSI2SD = 0x2a,
+ X86_SSE_CVTSI2SS = 0x2a,
+ X86_SSE_CVTSS2SD = 0x5a,
+ X86_SSE_CVTSS2SI = 0x2d,
+ X86_SSE_CVTTPD2PI = 0x2c,
+ X86_SSE_CVTTPD2DQ = 0xe6,
+ X86_SSE_CVTTPS2DQ = 0x5b,
+ X86_SSE_CVTTPS2PI = 0x2c,
+ X86_SSE_CVTTSD2SI = 0x2c,
+ X86_SSE_CVTTSS2SI = 0x2c,
+ X86_SSE_MOVMSK = 0x50,
+ X86_SSE_PACKSSDW = 0x6b,
+ X86_SSE_PACKSSWB = 0x63,
+ X86_SSE_PACKUSWB = 0x67,
+ X86_SSE_PADDB = 0xfc,
+ X86_SSE_PADDD = 0xfe,
+ X86_SSE_PADDQ = 0xd4,
+ X86_SSE_PADDSB = 0xec,
+ X86_SSE_PADDSW = 0xed,
+ X86_SSE_PADDUSB = 0xdc,
+ X86_SSE_PADDUSW = 0xdd,
+ X86_SSE_PADDW = 0xfd,
+ X86_SSE_PAND = 0xdb,
+ X86_SSE_PANDN = 0xdf,
+ X86_SSE_PAVGB = 0xe0,
+ X86_SSE_PAVGW = 0xe3,
+ X86_SSE_PCMPEQB = 0x74,
+ X86_SSE_PCMPEQD = 0x76,
+ X86_SSE_PCMPEQW = 0x75,
+ X86_SSE_PCMPGTB = 0x64,
+ X86_SSE_PCMPGTD = 0x66,
+ X86_SSE_PCMPGTW = 0x65,
+ X86_SSE_PMADDWD = 0xf5,
+ X86_SSE_PMAXSW = 0xee,
+ X86_SSE_PMAXUB = 0xde,
+ X86_SSE_PMINSW = 0xea,
+ X86_SSE_PMINUB = 0xda,
+ X86_SSE_PMOVMSKB = 0xd7,
+ X86_SSE_PMULHUW = 0xe4,
+ X86_SSE_PMULHW = 0xe5,
+ X86_SSE_PMULLW = 0xd5,
+ X86_SSE_PMULUDQ = 0xf4,
+ X86_SSE_POR = 0xeb,
+ X86_SSE_PSADBW = 0xf6,
+ X86_SSE_PSLLD = 0xf2,
+ X86_SSE_PSLLQ = 0xf3,
+ X86_SSE_PSLLW = 0xf1,
+ X86_SSE_PSRAD = 0xe2,
+ X86_SSE_PSRAW = 0xe1,
+ X86_SSE_PSRLD = 0xd2,
+ X86_SSE_PSRLQ = 0xd3,
+ X86_SSE_PSRLW = 0xd1,
+ X86_SSE_PSUBB = 0xf8,
+ X86_SSE_PSUBD = 0xfa,
+ X86_SSE_PSUBQ = 0xfb,
+ X86_SSE_PSUBSB = 0xe8,
+ X86_SSE_PSUBSW = 0xe9,
+ X86_SSE_PSUBUSB = 0xd8,
+ X86_SSE_PSUBUSW = 0xd9,
+ X86_SSE_PSUBW = 0xf9,
+ X86_SSE_PUNPCKHBW = 0x68,
+ X86_SSE_PUNPCKHDQ = 0x6a,
+ X86_SSE_PUNPCKHQDQ = 0x6d,
+ X86_SSE_PUNPCKHWD = 0x69,
+ X86_SSE_PUNPCKLBW = 0x60,
+ X86_SSE_PUNPCKLDQ = 0x62,
+ X86_SSE_PUNPCKLQDQ = 0x6c,
+ X86_SSE_PUNPCKLWD = 0x61,
+ X86_SSE_PXOR = 0xef,
+ X86_SSSE3_PSHUFB = 0x00,
+};
+
+/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
+
+#define _SSSE3Lrr(OP1,OP2,RS,RSA,RD,RDA) (_B(0x66), _REXLrr(RD,RD), _B(0x0f), _OO_Mrm (((OP1)<<8)|(OP2) ,_b11,RDA(RD),RSA(RS) ))
+#define _SSSE3Lmr(OP1,OP2,MD,MB,MI,MS,RD,RDA) (_B(0x66), _REXLmr(MB, MI, RD), _B(0x0f), _OO_r_X (((OP1)<<8)|(OP2) ,RDA(RD) ,MD,MB,MI,MS ))
+#define _SSSE3Lirr(OP1,OP2,IM,RS,RD) (_B(0x66), _REXLrr(RD, RS), _B(0x0f), _OO_Mrm_B (((OP1)<<8)|(OP2) ,_b11,_rX(RD),_rX(RS) ,_u8(IM)))
+#define _SSSE3Limr(OP1,OP2,IM,MD,MB,MI,MS,RD) (_B(0x66), _REXLmr(MB, MI, RD), _B(0x0f), _OO_r_X_B (((OP1)<<8)|(OP2) ,_rX(RD) ,MD,MB,MI,MS ,_u8(IM)))
+
+#define __SSELir(OP,MO,IM,RD) (_REXLrr(0, RD), _OO_Mrm_B (0x0f00|(OP) ,_b11,MO ,_rX(RD) ,_u8(IM)))
+#define __SSELim(OP,MO,IM,MD,MB,MI,MS) (_REXLrm(0, MB, MI), _OO_r_X_B (0x0f00|(OP) ,MO ,MD,MB,MI,MS ,_u8(IM)))
+#define __SSELrr(OP,RS,RSA,RD,RDA) (_REXLrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) ))
+#define __SSELmr(OP,MD,MB,MI,MS,RD,RDA) (_REXLmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS ))
+#define __SSELrm(OP,RS,RSA,MD,MB,MI,MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS ))
+#define __SSELirr(OP,IM,RS,RD) (_REXLrr(RD, RS), _OO_Mrm_B (0x0f00|(OP) ,_b11,_rX(RD),_rX(RS) ,_u8(IM)))
+#define __SSELimr(OP,IM,MD,MB,MI,MS,RD) (_REXLmr(MB, MI, RD), _OO_r_X_B (0x0f00|(OP) ,_rX(RD) ,MD,MB,MI,MS ,_u8(IM)))
+
+#define __SSEQrr(OP,RS,RSA,RD,RDA) (_REXQrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) ))
+#define __SSEQmr(OP,MD,MB,MI,MS,RD,RDA) (_REXQmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS ))
+#define __SSEQrm(OP,RS,RSA,MD,MB,MI,MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS ))
+
+#define _SSELrr(PX,OP,RS,RSA,RD,RDA) (_B(PX), __SSELrr(OP, RS, RSA, RD, RDA))
+#define _SSELmr(PX,OP,MD,MB,MI,MS,RD,RDA) (_B(PX), __SSELmr(OP, MD, MB, MI, MS, RD, RDA))
+#define _SSELrm(PX,OP,RS,RSA,MD,MB,MI,MS) (_B(PX), __SSELrm(OP, RS, RSA, MD, MB, MI, MS))
+#define _SSELir(PX,OP,MO,IM,RD) (_B(PX), __SSELir(OP, MO, IM, RD))
+#define _SSELim(PX,OP,MO,IM,MD,MB,MI,MS) (_B(PX), __SSELim(OP, MO, IM, MD, MB, MI, MS))
+#define _SSELirr(PX,OP,IM,RS,RD) (_B(PX), __SSELirr(OP, IM, RS, RD))
+#define _SSELimr(PX,OP,IM,MD,MB,MI,MS,RD) (_B(PX), __SSELimr(OP, IM, MD, MB, MI, MS, RD))
+
+#define _SSEQrr(PX,OP,RS,RSA,RD,RDA) (_B(PX), __SSEQrr(OP, RS, RSA, RD, RDA))
+#define _SSEQmr(PX,OP,MD,MB,MI,MS,RD,RDA) (_B(PX), __SSEQmr(OP, MD, MB, MI, MS, RD, RDA))
+#define _SSEQrm(PX,OP,RS,RSA,MD,MB,MI,MS) (_B(PX), __SSEQrm(OP, RS, RSA, MD, MB, MI, MS))
+
+#define _SSEPSrr(OP,RS,RD) __SSELrr( OP, RS,_rX, RD,_rX)
+#define _SSEPSmr(OP,MD,MB,MI,MS,RD) __SSELmr( OP, MD, MB, MI, MS, RD,_rX)
+#define _SSEPSrm(OP,RS,MD,MB,MI,MS) __SSELrm( OP, RS,_rX, MD, MB, MI, MS)
+#define _SSEPSirr(OP,IM,RS,RD) __SSELirr( OP, IM, RS, RD)
+#define _SSEPSimr(OP,IM,MD,MB,MI,MS,RD) __SSELimr( OP, IM, MD, MB, MI, MS, RD)
+
+#define _SSEPDrr(OP,RS,RD) _SSELrr(0x66, OP, RS,_rX, RD,_rX)
+#define _SSEPDmr(OP,MD,MB,MI,MS,RD) _SSELmr(0x66, OP, MD, MB, MI, MS, RD,_rX)
+#define _SSEPDrm(OP,RS,MD,MB,MI,MS) _SSELrm(0x66, OP, RS,_rX, MD, MB, MI, MS)
+#define _SSEPDirr(OP,IM,RS,RD) _SSELirr(0x66, OP, IM, RS, RD)
+#define _SSEPDimr(OP,IM,MD,MB,MI,MS,RD) _SSELimr(0x66, OP, IM, MD, MB, MI, MS, RD)
+
+#define _SSESSrr(OP,RS,RD) _SSELrr(0xf3, OP, RS,_rX, RD,_rX)
+#define _SSESSmr(OP,MD,MB,MI,MS,RD) _SSELmr(0xf3, OP, MD, MB, MI, MS, RD,_rX)
+#define _SSESSrm(OP,RS,MD,MB,MI,MS) _SSELrm(0xf3, OP, RS,_rX, MD, MB, MI, MS)
+#define _SSESSirr(OP,IM,RS,RD) _SSELirr(0xf3, OP, IM, RS, RD)
+#define _SSESSimr(OP,IM,MD,MB,MI,MS,RD) _SSELimr(0xf3, OP, IM, MD, MB, MI, MS, RD)
+
+#define _SSESDrr(OP,RS,RD) _SSELrr(0xf2, OP, RS,_rX, RD,_rX)
+#define _SSESDmr(OP,MD,MB,MI,MS,RD) _SSELmr(0xf2, OP, MD, MB, MI, MS, RD,_rX)
+#define _SSESDrm(OP,RS,MD,MB,MI,MS) _SSELrm(0xf2, OP, RS,_rX, MD, MB, MI, MS)
+#define _SSESDirr(OP,IM,RS,RD) _SSELirr(0xf2, OP, IM, RS, RD)
+#define _SSESDimr(OP,IM,MD,MB,MI,MS,RD) _SSELimr(0xf2, OP, IM, MD, MB, MI, MS, RD)
+
+#define ADDPSrr(RS, RD) _SSEPSrr(X86_SSE_ADD, RS, RD)
+#define ADDPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
+#define ADDPDrr(RS, RD) _SSEPDrr(X86_SSE_ADD, RS, RD)
+#define ADDPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
+
+#define ADDSSrr(RS, RD) _SSESSrr(X86_SSE_ADD, RS, RD)
+#define ADDSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
+#define ADDSDrr(RS, RD) _SSESDrr(X86_SSE_ADD, RS, RD)
+#define ADDSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
+
+#define ANDNPSrr(RS, RD) _SSEPSrr(X86_SSE_ANDN, RS, RD)
+#define ANDNPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_ANDN, MD, MB, MI, MS, RD)
+#define ANDNPDrr(RS, RD) _SSEPDrr(X86_SSE_ANDN, RS, RD)
+#define ANDNPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_ANDN, MD, MB, MI, MS, RD)
+
+#define ANDPSrr(RS, RD) _SSEPSrr(X86_SSE_AND, RS, RD)
+#define ANDPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_AND, MD, MB, MI, MS, RD)
+#define ANDPDrr(RS, RD) _SSEPDrr(X86_SSE_AND, RS, RD)
+#define ANDPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_AND, MD, MB, MI, MS, RD)
+
+#define CMPPSrr(IM, RS, RD) _SSEPSirr(X86_SSE_CMP, IM, RS, RD)
+#define CMPPSmr(IM, MD, MB, MI, MS, RD) _SSEPSimr(X86_SSE_CMP, IM, MD, MB, MI, MS, RD)
+#define CMPPDrr(IM, RS, RD) _SSEPDirr(X86_SSE_CMP, IM, RS, RD)
+#define CMPPDmr(IM, MD, MB, MI, MS, RD) _SSEPDimr(X86_SSE_CMP, IM, MD, MB, MI, MS, RD)
+
+#define CMPSSrr(IM, RS, RD) _SSESSirr(X86_SSE_CMP, IM, RS, RD)
+#define CMPSSmr(IM, MD, MB, MI, MS, RD) _SSESSimr(X86_SSE_CMP, IM, MD, MB, MI, MS, RD)
+#define CMPSDrr(IM, RS, RD) _SSESDirr(X86_SSE_CMP, IM, RS, RD)
+#define CMPSDmr(IM, MD, MB, MI, MS, RD) _SSESDimr(X86_SSE_CMP, IM, MD, MB, MI, MS, RD)
+
+#define DIVPSrr(RS, RD) _SSEPSrr(X86_SSE_DIV, RS, RD)
+#define DIVPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
+#define DIVPDrr(RS, RD) _SSEPDrr(X86_SSE_DIV, RS, RD)
+#define DIVPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
+
+#define DIVSSrr(RS, RD) _SSESSrr(X86_SSE_DIV, RS, RD)
+#define DIVSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
+#define DIVSDrr(RS, RD) _SSESDrr(X86_SSE_DIV, RS, RD)
+#define DIVSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
+
+#define MAXPSrr(RS, RD) _SSEPSrr(X86_SSE_MAX, RS, RD)
+#define MAXPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
+#define MAXPDrr(RS, RD) _SSEPDrr(X86_SSE_MAX, RS, RD)
+#define MAXPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
+
+#define MAXSSrr(RS, RD) _SSESSrr(X86_SSE_MAX, RS, RD)
+#define MAXSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
+#define MAXSDrr(RS, RD) _SSESDrr(X86_SSE_MAX, RS, RD)
+#define MAXSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
+
+#define MINPSrr(RS, RD) _SSEPSrr(X86_SSE_MIN, RS, RD)
+#define MINPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
+#define MINPDrr(RS, RD) _SSEPDrr(X86_SSE_MIN, RS, RD)
+#define MINPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
+
+#define MINSSrr(RS, RD) _SSESSrr(X86_SSE_MIN, RS, RD)
+#define MINSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
+#define MINSDrr(RS, RD) _SSESDrr(X86_SSE_MIN, RS, RD)
+#define MINSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
+
+#define MULPSrr(RS, RD) _SSEPSrr(X86_SSE_MUL, RS, RD)
+#define MULPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
+#define MULPDrr(RS, RD) _SSEPDrr(X86_SSE_MUL, RS, RD)
+#define MULPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
+
+#define MULSSrr(RS, RD) _SSESSrr(X86_SSE_MUL, RS, RD)
+#define MULSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
+#define MULSDrr(RS, RD) _SSESDrr(X86_SSE_MUL, RS, RD)
+#define MULSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
+
+#define ORPSrr(RS, RD) _SSEPSrr(X86_SSE_OR, RS, RD)
+#define ORPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_OR, MD, MB, MI, MS, RD)
+#define ORPDrr(RS, RD) _SSEPDrr(X86_SSE_OR, RS, RD)
+#define ORPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_OR, MD, MB, MI, MS, RD)
+
+#define RCPPSrr(RS, RD) _SSEPSrr(X86_SSE_RCP, RS, RD)
+#define RCPPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_RCP, MD, MB, MI, MS, RD)
+#define RCPSSrr(RS, RD) _SSESSrr(X86_SSE_RCP, RS, RD)
+#define RCPSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_RCP, MD, MB, MI, MS, RD)
+
+#define RSQRTPSrr(RS, RD) _SSEPSrr(X86_SSE_RSQRT, RS, RD)
+#define RSQRTPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_RSQRT, MD, MB, MI, MS, RD)
+#define RSQRTSSrr(RS, RD) _SSESSrr(X86_SSE_RSQRT, RS, RD)
+#define RSQRTSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_RSQRT, MD, MB, MI, MS, RD)
+
+#define SQRTPSrr(RS, RD) _SSEPSrr(X86_SSE_SQRT, RS, RD)
+#define SQRTPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
+#define SQRTPDrr(RS, RD) _SSEPDrr(X86_SSE_SQRT, RS, RD)
+#define SQRTPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
+
+#define SQRTSSrr(RS, RD) _SSESSrr(X86_SSE_SQRT, RS, RD)
+#define SQRTSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
+#define SQRTSDrr(RS, RD) _SSESDrr(X86_SSE_SQRT, RS, RD)
+#define SQRTSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
+
+#define SUBPSrr(RS, RD) _SSEPSrr(X86_SSE_SUB, RS, RD)
+#define SUBPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
+#define SUBPDrr(RS, RD) _SSEPDrr(X86_SSE_SUB, RS, RD)
+#define SUBPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
+
+#define SUBSSrr(RS, RD) _SSESSrr(X86_SSE_SUB, RS, RD)
+#define SUBSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
+#define SUBSDrr(RS, RD) _SSESDrr(X86_SSE_SUB, RS, RD)
+#define SUBSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
+
+#define XORPSrr(RS, RD) _SSEPSrr(X86_SSE_XOR, RS, RD)
+#define XORPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_XOR, MD, MB, MI, MS, RD)
+#define XORPDrr(RS, RD) _SSEPDrr(X86_SSE_XOR, RS, RD)
+#define XORPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_XOR, MD, MB, MI, MS, RD)
+
+#define COMISSrr(RS, RD) _SSEPSrr(X86_SSE_COMI, RS, RD)
+#define COMISSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_COMI, MD, MB, MI, MS, RD)
+#define COMISDrr(RS, RD) _SSEPDrr(X86_SSE_COMI, RS, RD)
+#define COMISDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_COMI, MD, MB, MI, MS, RD)
+
+#define UCOMISSrr(RS, RD) _SSEPSrr(X86_SSE_UCOMI, RS, RD)
+#define UCOMISSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_UCOMI, MD, MB, MI, MS, RD)
+#define UCOMISDrr(RS, RD) _SSEPDrr(X86_SSE_UCOMI, RS, RD)
+#define UCOMISDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_UCOMI, MD, MB, MI, MS, RD)
+
+#define MOVAPSrr(RS, RD) _SSEPSrr(0x28, RS, RD)
+#define MOVAPSmr(MD, MB, MI, MS, RD) _SSEPSmr(0x28, MD, MB, MI, MS, RD)
+#define MOVAPSrm(RS, MD, MB, MI, MS) _SSEPSrm(0x29, RS, MD, MB, MI, MS)
+
+#define MOVAPDrr(RS, RD) _SSEPDrr(0x28, RS, RD)
+#define MOVAPDmr(MD, MB, MI, MS, RD) _SSEPDmr(0x28, MD, MB, MI, MS, RD)
+#define MOVAPDrm(RS, MD, MB, MI, MS) _SSEPDrm(0x29, RS, MD, MB, MI, MS)
+
+#define CVTDQ2PDrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTDQ2PD, RS,_rX, RD,_rX)
+#define CVTDQ2PDmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTDQ2PD, MD, MB, MI, MS, RD,_rX)
+#define CVTDQ2PSrr(RS, RD) __SSELrr( X86_SSE_CVTDQ2PS, RS,_rX, RD,_rX)
+#define CVTDQ2PSmr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTDQ2PS, MD, MB, MI, MS, RD,_rX)
+#define CVTPD2DQrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTPD2DQ, RS,_rX, RD,_rX)
+#define CVTPD2DQmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTPD2DQ, MD, MB, MI, MS, RD,_rX)
+#define CVTPD2PIrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTPD2PI, RS,_rX, RD,_rM)
+#define CVTPD2PImr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTPD2PI, MD, MB, MI, MS, RD,_rM)
+#define CVTPD2PSrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTPD2PS, RS,_rX, RD,_rX)
+#define CVTPD2PSmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTPD2PS, MD, MB, MI, MS, RD,_rX)
+#define CVTPI2PDrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTPI2PD, RS,_rM, RD,_rX)
+#define CVTPI2PDmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTPI2PD, MD, MB, MI, MS, RD,_rX)
+#define CVTPI2PSrr(RS, RD) __SSELrr( X86_SSE_CVTPI2PS, RS,_rM, RD,_rX)
+#define CVTPI2PSmr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTPI2PS, MD, MB, MI, MS, RD,_rX)
+#define CVTPS2DQrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTPS2DQ, RS,_rX, RD,_rX)
+#define CVTPS2DQmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTPS2DQ, MD, MB, MI, MS, RD,_rX)
+#define CVTPS2PDrr(RS, RD) __SSELrr( X86_SSE_CVTPS2PD, RS,_rX, RD,_rX)
+#define CVTPS2PDmr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTPS2PD, MD, MB, MI, MS, RD,_rX)
+#define CVTPS2PIrr(RS, RD) __SSELrr( X86_SSE_CVTPS2PI, RS,_rX, RD,_rM)
+#define CVTPS2PImr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTPS2PI, MD, MB, MI, MS, RD,_rM)
+#define CVTSD2SILrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTSD2SI, RS,_rX, RD,_r4)
+#define CVTSD2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTSD2SI, MD, MB, MI, MS, RD,_r4)
+#define CVTSD2SIQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTSD2SI, RS,_rX, RD,_r8)
+#define CVTSD2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTSD2SI, MD, MB, MI, MS, RD,_r8)
+#define CVTSD2SSrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTSD2SS, RS,_rX, RD,_rX)
+#define CVTSD2SSmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTSD2SS, MD, MB, MI, MS, RD,_rX)
+#define CVTSI2SDLrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTSI2SD, RS,_r4, RD,_rX)
+#define CVTSI2SDLmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTSI2SD, MD, MB, MI, MS, RD,_rX)
+#define CVTSI2SDQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTSI2SD, RS,_r8, RD,_rX)
+#define CVTSI2SDQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTSI2SD, MD, MB, MI, MS, RD,_rX)
+#define CVTSI2SSLrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTSI2SS, RS,_r4, RD,_rX)
+#define CVTSI2SSLmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTSI2SS, MD, MB, MI, MS, RD,_rX)
+#define CVTSI2SSQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTSI2SS, RS,_r8, RD,_rX)
+#define CVTSI2SSQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTSI2SS, MD, MB, MI, MS, RD,_rX)
+#define CVTSS2SDrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTSS2SD, RS,_rX, RD,_rX)
+#define CVTSS2SDmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTSS2SD, MD, MB, MI, MS, RD,_rX)
+#define CVTSS2SILrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTSS2SI, RS,_rX, RD,_r4)
+#define CVTSS2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTSS2SI, MD, MB, MI, MS, RD,_r4)
+#define CVTSS2SIQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTSS2SI, RS,_rX, RD,_r8)
+#define CVTSS2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTSS2SI, MD, MB, MI, MS, RD,_r8)
+#define CVTTPD2PIrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTTPD2PI, RS,_rX, RD,_rM)
+#define CVTTPD2PImr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTTPD2PI, MD, MB, MI, MS, RD,_rM)
+#define CVTTPD2DQrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTTPD2DQ, RS,_rX, RD,_rX)
+#define CVTTPD2DQmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTTPD2DQ, MD, MB, MI, MS, RD,_rX)
+#define CVTTPS2DQrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTTPS2DQ, RS,_rX, RD,_rX)
+#define CVTTPS2DQmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTTPS2DQ, MD, MB, MI, MS, RD,_rX)
+#define CVTTPS2PIrr(RS, RD) __SSELrr( X86_SSE_CVTTPS2PI, RS,_rX, RD,_rM)
+#define CVTTPS2PImr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTTPS2PI, MD, MB, MI, MS, RD,_rM)
+#define CVTTSD2SILrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTTSD2SI, RS,_rX, RD,_r4)
+#define CVTTSD2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTTSD2SI, MD, MB, MI, MS, RD,_r4)
+#define CVTTSD2SIQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTTSD2SI, RS,_rX, RD,_r8)
+#define CVTTSD2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTTSD2SI, MD, MB, MI, MS, RD,_r8)
+#define CVTTSS2SILrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTTSS2SI, RS,_rX, RD,_r4)
+#define CVTTSS2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTTSS2SI, MD, MB, MI, MS, RD,_r4)
+#define CVTTSS2SIQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTTSS2SI, RS,_rX, RD,_r8)
+#define CVTTSS2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTTSS2SI, MD, MB, MI, MS, RD,_r8)
+
+#define MOVDXDrr(RS, RD) _SSELrr(0x66, 0x6e, RS,_r4, RD,_rX)
+#define MOVDXDmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x6e, MD, MB, MI, MS, RD,_rX)
+#define MOVQXDrr(RS, RD) _SSEQrr(0x66, 0x6e, RS,_r8, RD,_rX)
+#define MOVQXDmr(MD, MB, MI, MS, RD) _SSEQmr(0x66, 0x6e, MD, MB, MI, MS, RD,_rX)
+
+#define MOVDXSrr(RS, RD) _SSELrr(0x66, 0x7e, RD,_r4, RS,_rX)
+#define MOVDXSrm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x7e, RS,_rX, MD, MB, MI, MS)
+#define MOVQXSrr(RS, RD) _SSEQrr(0x66, 0x7e, RD,_r8, RS,_rX)
+#define MOVQXSrm(RS, MD, MB, MI, MS) _SSEQrm(0x66, 0x7e, RS,_rX, MD, MB, MI, MS)
+
+#define MOVDLMrr(RS, RD) __SSELrr( 0x6e, RS,_r4, RD,_rM)
+#define MOVDLMmr(MD, MB, MI, MS, RD) __SSELmr( 0x6e, MD, MB, MI, MS, RD,_rM)
+#define MOVDQMrr(RS, RD) __SSEQrr( 0x6e, RS,_r8, RD,_rM)
+#define MOVDQMmr(MD, MB, MI, MS, RD) __SSEQmr( 0x6e, MD, MB, MI, MS, RD,_rM)
+
+#define MOVDMLrr(RS, RD) __SSELrr( 0x7e, RS,_rM, RD,_r4)
+#define MOVDMLrm(RS, MD, MB, MI, MS) __SSELrm( 0x7e, RS,_rM, MD, MB, MI, MS)
+#define MOVDMQrr(RS, RD) __SSEQrr( 0x7e, RS,_rM, RD,_r8)
+#define MOVDMQrm(RS, MD, MB, MI, MS) __SSEQrm( 0x7e, RS,_rM, MD, MB, MI, MS)
+
+#define MOVDQ2Qrr(RS, RD) _SSELrr(0xf2, 0xd6, RS,_rX, RD,_rM)
+#define MOVMSKPSrr(RS, RD) __SSELrr( 0x50, RS,_rX, RD,_r4)
+#define MOVMSKPDrr(RS, RD) _SSELrr(0x66, 0x50, RS,_rX, RD,_r4)
+
+#define MOVHLPSrr(RS, RD) __SSELrr( 0x12, RS,_rX, RD,_rX)
+#define MOVLHPSrr(RS, RD) __SSELrr( 0x16, RS,_rX, RD,_rX)
+
+#define MOVDQArr(RS, RD) _SSELrr(0x66, 0x6f, RS,_rX, RD,_rX)
+#define MOVDQAmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x6f, MD, MB, MI, MS, RD,_rX)
+#define MOVDQArm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x7f, RS,_rX, MD, MB, MI, MS)
+
+#define MOVDQUrr(RS, RD) _SSELrr(0xf3, 0x6f, RS,_rX, RD,_rX)
+#define MOVDQUmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, 0x6f, MD, MB, MI, MS, RD,_rX)
+#define MOVDQUrm(RS, MD, MB, MI, MS) _SSELrm(0xf3, 0x7f, RS,_rX, MD, MB, MI, MS)
+
+#define MOVHPDmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x16, MD, MB, MI, MS, RD,_rX)
+#define MOVHPDrm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x17, RS,_rX, MD, MB, MI, MS)
+#define MOVHPSmr(MD, MB, MI, MS, RD) __SSELmr( 0x16, MD, MB, MI, MS, RD,_rX)
+#define MOVHPSrm(RS, MD, MB, MI, MS) __SSELrm( 0x17, RS,_rX, MD, MB, MI, MS)
+
+#define MOVLPDmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x12, MD, MB, MI, MS, RD,_rX)
+#define MOVLPDrm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x13, RS,_rX, MD, MB, MI, MS)
+#define MOVLPSmr(MD, MB, MI, MS, RD) __SSELmr( 0x12, MD, MB, MI, MS, RD,_rX)
+#define MOVLPSrm(RS, MD, MB, MI, MS) __SSELrm( 0x13, RS,_rX, MD, MB, MI, MS)
+
+
+/* --- Floating-Point instructions ----------------------------------------- */
+
+enum {
+ X86_F2XM1 = 0xd9f0,
+ X86_FABS = 0xd9e1,
+ X86_FADD = 0xd8c0, // m32fp, m64fp, sti0, st0i, pst0i
+ X86_FIADD = 0xda00, // m32int, m16int
+ X86_FBLD = 0xdf04, // mem
+ X86_FBSTP = 0xdf06, // mem
+ X86_FCHS = 0xd9e0,
+ X86_FCMOVB = 0xdac0, // sti0
+ X86_FCMOVE = 0xdac8, // sti0
+ X86_FCMOVBE = 0xdad0, // sti0
+ X86_FCMOVU = 0xdad8, // sti0
+ X86_FCMOVNB = 0xdbc0, // sti0
+ X86_FCMOVNE = 0xdbc8, // sti0
+ X86_FCMOVNBE = 0xdbd0, // sti0
+ X86_FCMOVNU = 0xdbd8, // sti0
+ X86_FCOM = 0xd8d2, // m32fp, m64fp, sti
+ X86_FCOMP = 0xd8db, // m32fp, m64fp, sti
+ X86_FCOMPP = 0xded9,
+ X86_FCOMI = 0xdbf0, // sti0
+ X86_FCOMIP = 0xdff0, // sti0
+ X86_FUCOMI = 0xdbe8, // sti0
+ X86_FUCOMIP = 0xdfe8, // sti0
+ X86_FCOS = 0xd9ff,
+ X86_FDECSTP = 0xd9f6,
+ X86_FDIV = 0xd8f6, // m32fp, m64fp, sti0, st0i, pst0i
+ X86_FIDIV = 0xda06, // m32int, m16int
+ X86_FDIVR = 0xd8ff, // m32fp, m64fp, sti0, st0i, pst0i
+ X86_FIDIVR = 0xda07, // m32int, m16int
+ X86_FFREE = 0xddc0, // sti
+ X86_FICOM = 0xda02, // m32int, m16int
+ X86_FICOMP = 0xda03, // m32int, m16int
+ X86_FILD = 0xdb00, // m32int, m16int
+ X86_FILDQ = 0xdf05, // mem
+ X86_FINCSTP = 0xd9f7,
+ X86_FIST = 0xdb02, // m32int, m16int
+ X86_FISTP = 0xdb03, // m32int, m16int
+ X86_FISTPQ = 0xdf07, // mem
+ X86_FISTTP = 0xdb01, // m32int, m16int
+ X86_FISTTPQ = 0xdd01, // mem
+ X86_FLD = 0xd900, // m32fp, m64fp
+ X86_FLDT = 0xdb05, // mem
+ X86_FLD1 = 0xd9e8,
+ X86_FLDL2T = 0xd9e9,
+ X86_FLDL2E = 0xd9ea,
+ X86_FLDPI = 0xd9eb,
+ X86_FLDLG2 = 0xd9ec,
+ X86_FLDLN2 = 0xd9ed,
+ X86_FLDZ = 0xd9ee,
+ X86_FMUL = 0xd8c9, // m32fp, m64fp, sti0, st0i, pst0i
+ X86_FIMUL = 0xda01, // m32int, m16int
+ X86_FNOP = 0xd9d0,
+ X86_FPATAN = 0xd9f3,
+ X86_FPREM = 0xd9f8,
+ X86_FPREM1 = 0xd9f5,
+ X86_FPTAN = 0xd9f2,
+ X86_FRNDINT = 0xd9fc,
+ X86_FSCALE = 0xd9fd,
+ X86_FSIN = 0xd9fe,
+ X86_FSINCOS = 0xd9fb,
+ X86_FSQRT = 0xd9fa,
+ X86_FSTS = 0xd902, // mem
+ X86_FSTD = 0xdd02, // mem
+ X86_FST = 0xddd0, // sti
+ X86_FSTPS = 0xd903, // mem
+ X86_FSTPD = 0xdd03, // mem
+ X86_FSTPT = 0xdb07, // mem
+ X86_FSTP = 0xddd8, // sti
+ X86_FSUB = 0xd8e4, // m32fp, m64fp, sti0, st0i, pst0i
+ X86_FISUB = 0xda04, // m32int, m16int
+ X86_FSUBR = 0xd8ed, // m32fp, m64fp, sti0, st0i, pst0i
+ X86_FISUBR = 0xda05, // m32int, m16int
+ X86_FTST = 0xd9e4,
+ X86_FUCOM = 0xdde0, // sti
+ X86_FUCOMP = 0xdde8, // sti
+ X86_FUCOMPP = 0xdae9,
+ X86_FXAM = 0xd9e5,
+ X86_FXCH = 0xd9c8, // sti
+ X86_FXTRACT = 0xd9f4,
+ X86_FYL2X = 0xd9f1,
+ X86_FYL2XP1 = 0xd9f9,
+};
+
+#define _FPU(OP) _OO(OP)
+#define _FPUm(OP, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_r_X((OP)>>8, (OP)&7, MD, MB, MI, MS))
+#define _FPUSm(OP, MD, MB, MI, MS) _FPUm(OP, MD, MB, MI, MS)
+#define _FPUDm(OP, MD, MB, MI, MS) _FPUm((OP)|0x400, MD, MB, MI, MS)
+#define _FPULm(OP, MD, MB, MI, MS) _FPUm(OP, MD, MB, MI, MS)
+#define _FPUWm(OP, MD, MB, MI, MS) _FPUm((OP)|0x400, MD, MB, MI, MS)
+#define _FPUr(OP, RR) _OOr((OP)&0xfff8, _rF(RR))
+#define _FPU0r(OP, RD) _FPUr((OP)|0x400, RD)
+#define _FPUr0(OP, RS) _FPUr((OP) , RS)
+#define _FPUrr(OP, RS, RD) (_rST0P(RS) ? _FPU0r(OP, RD) : (_rST0P(RD) ? _FPUr0(OP, RS) : x86_emit_failure("FPU instruction without st0")))
+#define _FPUP0r(OP, RD) _FPU0r((OP)|0x200, RD)
+
+#define F2XM1() _FPU(X86_F2XM1)
+#define FABS() _FPU(X86_FABS)
+#define FADDSm(MD, MB, MI, MS) _FPUSm(X86_FADD, MD, MB, MI, MS)
+#define FADDDm(MD, MB, MI, MS) _FPUDm(X86_FADD, MD, MB, MI, MS)
+#define FADDP0r(RD) _FPUP0r(X86_FADD, RD)
+#define FADDrr(RS, RD) _FPUrr(X86_FADD, RS, RD)
+#define FADD0r(RD) _FPU0r(X86_FADD, RD)
+#define FADDr0(RS) _FPUr0(X86_FADD, RS)
+#define FIADDWm(MD, MB, MI, MS) _FPUWm(X86_FIADD, MD, MB, MI, MS)
+#define FIADDLm(MD, MB, MI, MS) _FPULm(X86_FIADD, MD, MB, MI, MS)
+#define FBLDm(MD, MB, MI, MS) _FPUm(X86_FBLD, MD, MB, MI, MS)
+#define FBSTPm(MD, MB, MI, MS) _FPUm(X86_FBSTP, MD, MB, MI, MS)
+#define FCHS() _FPU(X86_FCHS)
+#define FCMOVBr0(RS) _FPUr0(X86_FCMOVB, RS)
+#define FCMOVEr0(RS) _FPUr0(X86_FCMOVE, RS)
+#define FCMOVBEr0(RS) _FPUr0(X86_FCMOVBE, RS)
+#define FCMOVUr0(RS) _FPUr0(X86_FCMOVU, RS)
+#define FCMOVNBr0(RS) _FPUr0(X86_FCMOVNB, RS)
+#define FCMOVNEr0(RS) _FPUr0(X86_FCMOVNE, RS)
+#define FCMOVNBEr0(RS) _FPUr0(X86_FCMOVNBE, RS)
+#define FCMOVNUr0(RS) _FPUr0(X86_FCMOVNU, RS)
+#define FCOMSm(MD, MB, MI, MS) _FPUSm(X86_FCOM, MD, MB, MI, MS)
+#define FCOMDm(MD, MB, MI, MS) _FPUDm(X86_FCOM, MD, MB, MI, MS)
+#define FCOMr(RD) _FPUr(X86_FCOM, RD)
+#define FCOMPSm(MD, MB, MI, MS) _FPUSm(X86_FCOMP, MD, MB, MI, MS)
+#define FCOMPDm(MD, MB, MI, MS) _FPUDm(X86_FCOMP, MD, MB, MI, MS)
+#define FCOMPr(RD) _FPUr(X86_FCOMP, RD)
+#define FCOMPP() _FPU(X86_FCOMPP)
+#define FCOMIr0(RS) _FPUr0(X86_FCOMI, RS)
+#define FCOMIPr0(RS) _FPUr0(X86_FCOMIP, RS)
+#define FUCOMIr0(RS) _FPUr0(X86_FUCOMI, RS)
+#define FUCOMIPr0(RS) _FPUr0(X86_FUCOMIP, RS)
+#define FCOS() _FPU(X86_FCOS)
+#define FDECSTP() _FPU(X86_FDECSTP)
+#define FDIVSm(MD, MB, MI, MS) _FPUSm(X86_FDIV, MD, MB, MI, MS)
+#define FDIVDm(MD, MB, MI, MS) _FPUDm(X86_FDIV, MD, MB, MI, MS)
+#define FDIVP0r(RD) _FPUP0r(X86_FDIV, RD)
+#define FDIVrr(RS, RD) _FPUrr(X86_FDIV, RS, RD)
+#define FDIV0r(RD) _FPU0r(X86_FDIV, RD)
+#define FDIVr0(RS) _FPUr0(X86_FDIV, RS)
+#define FIDIVWm(MD, MB, MI, MS) _FPUWm(X86_FIDIV, MD, MB, MI, MS)
+#define FIDIVLm(MD, MB, MI, MS) _FPULm(X86_FIDIV, MD, MB, MI, MS)
+#define FDIVRSm(MD, MB, MI, MS) _FPUSm(X86_FDIVR, MD, MB, MI, MS)
+#define FDIVRDm(MD, MB, MI, MS) _FPUDm(X86_FDIVR, MD, MB, MI, MS)
+#define FDIVRP0r(RD) _FPUP0r(X86_FDIVR, RD)
+#define FDIVRrr(RS, RD) _FPUrr(X86_FDIVR, RS, RD)
+#define FDIVR0r(RD) _FPU0r(X86_FDIVR, RD)
+#define FDIVRr0(RS) _FPUr0(X86_FDIVR, RS)
+#define FIDIVRWm(MD, MB, MI, MS) _FPUWm(X86_FIDIVR, MD, MB, MI, MS)
+#define FIDIVRLm(MD, MB, MI, MS) _FPULm(X86_FIDIVR, MD, MB, MI, MS)
+#define FFREEr(RD) _FPUr(X86_FFREE, RD)
+#define FICOMWm(MD, MB, MI, MS) _FPUWm(X86_FICOM, MD, MB, MI, MS)
+#define FICOMLm(MD, MB, MI, MS) _FPULm(X86_FICOM, MD, MB, MI, MS)
+#define FICOMPWm(MD, MB, MI, MS) _FPUWm(X86_FICOMP, MD, MB, MI, MS)
+#define FICOMPLm(MD, MB, MI, MS) _FPULm(X86_FICOMP, MD, MB, MI, MS)
+#define FILDWm(MD, MB, MI, MS) _FPUWm(X86_FILD, MD, MB, MI, MS)
+#define FILDLm(MD, MB, MI, MS) _FPULm(X86_FILD, MD, MB, MI, MS)
+#define FILDQm(MD, MB, MI, MS) _FPUm(X86_FILDQ, MD, MB, MI, MS)
+#define FINCSTP() _FPU(X86_FINCSTP)
+#define FISTWm(MD, MB, MI, MS) _FPUWm(X86_FIST, MD, MB, MI, MS)
+#define FISTLm(MD, MB, MI, MS) _FPULm(X86_FIST, MD, MB, MI, MS)
+#define FISTPWm(MD, MB, MI, MS) _FPUWm(X86_FISTP, MD, MB, MI, MS)
+#define FISTPLm(MD, MB, MI, MS) _FPULm(X86_FISTP, MD, MB, MI, MS)
+#define FISTPQm(MD, MB, MI, MS) _FPUm(X86_FISTPQ, MD, MB, MI, MS)
+#define FISTTPWm(MD, MB, MI, MS) _FPUWm(X86_FISTTP, MD, MB, MI, MS)
+#define FISTTPLm(MD, MB, MI, MS) _FPULm(X86_FISTTP, MD, MB, MI, MS)
+#define FISTTPQm(MD, MB, MI, MS) _FPUm(X86_FISTTPQ, MD, MB, MI, MS)
+#define FLDSm(MD, MB, MI, MS) _FPUSm(X86_FLD, MD, MB, MI, MS)
+#define FLDDm(MD, MB, MI, MS) _FPUDm(X86_FLD, MD, MB, MI, MS)
+#define FLDTm(MD, MB, MI, MS) _FPUm(X86_FLDT, MD, MB, MI, MS)
+#define FLD1() _FPU(X86_FLD1)
+#define FLDL2T() _FPU(X86_FLDL2T)
+#define FLDL2E() _FPU(X86_FLDL2E)
+#define FLDPI() _FPU(X86_FLDPI)
+#define FLDLG2() _FPU(X86_FLDLG2)
+#define FLDLN2() _FPU(X86_FLDLN2)
+#define FLDZ() _FPU(X86_FLDZ)
+#define FMULSm(MD, MB, MI, MS) _FPUSm(X86_FMUL, MD, MB, MI, MS)
+#define FMULDm(MD, MB, MI, MS) _FPUDm(X86_FMUL, MD, MB, MI, MS)
+#define FMULP0r(RD) _FPUP0r(X86_FMUL, RD)
+#define FMULrr(RS, RD) _FPUrr(X86_FMUL, RS, RD)
+#define FMUL0r(RD) _FPU0r(X86_FMUL, RD)
+#define FMULr0(RS) _FPUr0(X86_FMUL, RS)
+#define FIMULWm(MD, MB, MI, MS) _FPUWm(X86_FIMUL, MD, MB, MI, MS)
+#define FIMULLm(MD, MB, MI, MS) _FPULm(X86_FIMUL, MD, MB, MI, MS)
+#define FNOP() _FPU(X86_FNOP)
+#define FPATAN() _FPU(X86_FPATAN)
+#define FPREM() _FPU(X86_FPREM)
+#define FPREM1() _FPU(X86_FPREM1)
+#define FPTAN() _FPU(X86_FPTAN)
+#define FRNDINT() _FPU(X86_FRNDINT)
+#define FSCALE() _FPU(X86_FSCALE)
+#define FSIN() _FPU(X86_FSIN)
+#define FSINCOS() _FPU(X86_FSINCOS)
+#define FSQRT() _FPU(X86_FSQRT)
+#define FSTSm(MD, MB, MI, MS) _FPUm(X86_FSTS, MD, MB, MI, MS)
+#define FSTDm(MD, MB, MI, MS) _FPUm(X86_FSTD, MD, MB, MI, MS)
+#define FSTr(RD) _FPUr(X86_FST, RD)
+#define FSTPSm(MD, MB, MI, MS) _FPUm(X86_FSTPS, MD, MB, MI, MS)
+#define FSTPDm(MD, MB, MI, MS) _FPUm(X86_FSTPD, MD, MB, MI, MS)
+#define FSTPTm(MD, MB, MI, MS) _FPUm(X86_FSTPT, MD, MB, MI, MS)
+#define FSTPr(RD) _FPUr(X86_FSTP, RD)
+#define FSUBSm(MD, MB, MI, MS) _FPUSm(X86_FSUB, MD, MB, MI, MS)
+#define FSUBDm(MD, MB, MI, MS) _FPUDm(X86_FSUB, MD, MB, MI, MS)
+#define FSUBP0r(RD) _FPUP0r(X86_FSUB, RD)
+#define FSUBrr(RS, RD) _FPUrr(X86_FSUB, RS, RD)
+#define FSUB0r(RD) _FPU0r(X86_FSUB, RD)
+#define FSUBr0(RS) _FPUr0(X86_FSUB, RS)
+#define FISUBWm(MD, MB, MI, MS) _FPUWm(X86_FISUB, MD, MB, MI, MS)
+#define FISUBLm(MD, MB, MI, MS) _FPULm(X86_FISUB, MD, MB, MI, MS)
+#define FSUBRSm(MD, MB, MI, MS) _FPUSm(X86_FSUBR, MD, MB, MI, MS)
+#define FSUBRDm(MD, MB, MI, MS) _FPUDm(X86_FSUBR, MD, MB, MI, MS)
+#define FSUBRP0r(RD) _FPUP0r(X86_FSUBR, RD)
+#define FSUBRrr(RS, RD) _FPUrr(X86_FSUBR, RS, RD)
+#define FSUBR0r(RD) _FPU0r(X86_FSUBR, RD)
+#define FSUBRr0(RS) _FPUr0(X86_FSUBR, RS)
+#define FISUBRWm(MD, MB, MI, MS) _FPUWm(X86_FISUBR, MD, MB, MI, MS)
+#define FISUBRLm(MD, MB, MI, MS) _FPULm(X86_FISUBR, MD, MB, MI, MS)
+#define FTST() _FPU(X86_FTST)
+#define FUCOMr(RD) _FPUr(X86_FUCOM, RD)
+#define FUCOMPr(RD) _FPUr(X86_FUCOMP, RD)
+#define FUCOMPP() _FPU(X86_FUCOMPP)
+#define FXAM() _FPU(X86_FXAM)
+#define FXCHr(RD) _FPUr(X86_FXCH, RD)
+#define FXTRACT() _FPU(X86_FXTRACT)
+#define FYL2X() _FPU(X86_FYL2X)
+#define FYL2XP1() _FPU(X86_FYL2XP1)
+
+#endif /* X86_RTASM_H */
--- /dev/null
+/*
+ * compiler/compemu.h - Public interface and definitions
+ *
+ * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
+ *
+ * Adaptation for Basilisk II and improvements, copyright 2000-2005
+ * Gwenole Beauchesne
+ *
+ * Basilisk II (C) 1997-2008 Christian Bauer
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef COMPEMU_H
+#define COMPEMU_H
+
+#include "newcpu.h"
+
+#if USE_JIT
+
+#if defined __i386__ || defined __x86_64__
+#include "flags_x86.h"
+#else
+#error "Unsupported JIT compiler for this architecture"
+#endif
+
+#if JIT_DEBUG
+/* dump some information (m68k block, x86 block addresses) about the compiler state */
+extern void compiler_dumpstate(void);
+#endif
+
+/* Now that we do block chaining, and also have linked lists on each tag,
+ TAGMASK can be much smaller and still do its job. Saves several megs
+ of memory! */
+#define TAGMASK 0x0000ffff
+#define TAGSIZE (TAGMASK+1)
+#define MAXRUN 1024
+#define cacheline(x) (((uintptr)x)&TAGMASK)
+
+extern uae_u8* start_pc_p;
+extern uae_u32 start_pc;
+
+struct blockinfo_t;
+
+struct cpu_history {
+ uae_u16 * location;
+};
+
+union cacheline {
+ cpuop_func * handler;
+ blockinfo_t * bi;
+};
+
+/* Use new spill/reload strategy when calling external functions */
+#define USE_OPTIMIZED_CALLS 0
+#if USE_OPTIMIZED_CALLS
+#error implementation in progress
+#endif
+
+/* (gb) When on, this option can save save up to 30% compilation time
+ * when many lazy flushes occur (e.g. apps in MacOS 8.x).
+ */
+#define USE_SEPARATE_BIA 1
+
+/* Use chain of checksum_info_t to compute the block checksum */
+#define USE_CHECKSUM_INFO 1
+
+/* Use code inlining, aka follow-up of constant jumps */
+#define USE_INLINING 1
+
+/* Inlining requires the chained checksuming information */
+#if USE_INLINING
+#undef USE_CHECKSUM_INFO
+#define USE_CHECKSUM_INFO 1
+#endif
+
+/* Does flush_icache_range() only check for blocks falling in the requested range? */
+#define LAZY_FLUSH_ICACHE_RANGE 0
+
+#define USE_F_ALIAS 1
+#define USE_OFFSET 1
+#define COMP_DEBUG 1
+
+#if COMP_DEBUG
+#define Dif(x) if (x)
+#else
+#define Dif(x) if (0)
+#endif
+
+#define SCALE 2
+
+#define BYTES_PER_INST 10240 /* paranoid ;-) */
+#define LONGEST_68K_INST 16 /* The number of bytes the longest possible
+ 68k instruction takes */
+#define MAX_CHECKSUM_LEN 2048 /* The maximum size we calculate checksums
+ for. Anything larger will be flushed
+ unconditionally even with SOFT_FLUSH */
+#define MAX_HOLD_BI 3 /* One for the current block, and up to two
+ for jump targets */
+
+#define INDIVIDUAL_INST 0
+#if 1
+// gb-- my format from readcpu.cpp is not the same
+#define FLAG_X 0x0010
+#define FLAG_N 0x0008
+#define FLAG_Z 0x0004
+#define FLAG_V 0x0002
+#define FLAG_C 0x0001
+#else
+#define FLAG_C 0x0010
+#define FLAG_V 0x0008
+#define FLAG_Z 0x0004
+#define FLAG_N 0x0002
+#define FLAG_X 0x0001
+#endif
+#define FLAG_CZNV (FLAG_C | FLAG_Z | FLAG_N | FLAG_V)
+#define FLAG_ZNV (FLAG_Z | FLAG_N | FLAG_V)
+
+#define KILLTHERAT 1 /* Set to 1 to avoid some partial_rat_stalls */
+
+#if defined(__x86_64__)
+#define N_REGS 16 /* really only 15, but they are numbered 0-3,5-15 */
+#else
+#define N_REGS 8 /* really only 7, but they are numbered 0,1,2,3,5,6,7 */
+#endif
+#define N_FREGS 6 /* That leaves us two positions on the stack to play with */
+
+/* Functions exposed to newcpu, or to what was moved from newcpu.c to
+ * compemu_support.c */
+extern void compiler_init(void);
+extern void compiler_exit(void);
+extern bool compiler_use_jit(void);
+extern void init_comp(void);
+extern void flush(int save_regs);
+extern void small_flush(int save_regs);
+extern void set_target(uae_u8* t);
+extern uae_u8* get_target(void);
+extern void freescratch(void);
+extern void build_comp(void);
+extern void set_cache_state(int enabled);
+extern int get_cache_state(void);
+extern uae_u32 get_jitted_size(void);
+extern void (*flush_icache)(int n);
+extern void alloc_cache(void);
+extern int check_for_cache_miss(void);
+
+/* JIT FPU compilation */
+extern void comp_fpp_opp (uae_u32 opcode, uae_u16 extra);
+extern void comp_fbcc_opp (uae_u32 opcode);
+extern void comp_fscc_opp (uae_u32 opcode, uae_u16 extra);
+
+extern uae_u32 needed_flags;
+extern cacheline cache_tags[];
+extern uae_u8* comp_pc_p;
+extern void* pushall_call_handler;
+
+#define VREGS 32
+#define VFREGS 16
+
+#define INMEM 1
+#define CLEAN 2
+#define DIRTY 3
+#define UNDEF 4
+#define ISCONST 5
+
+typedef struct {
+ uae_u32* mem;
+ uae_u32 val;
+ uae_u8 is_swapped;
+ uae_u8 status;
+ uae_s8 realreg; /* gb-- realreg can hold -1 */
+ uae_u8 realind; /* The index in the holds[] array */
+ uae_u8 needflush;
+ uae_u8 validsize;
+ uae_u8 dirtysize;
+ uae_u8 dummy;
+} reg_status;
+
+typedef struct {
+ uae_u32* mem;
+ double val;
+ uae_u8 status;
+ uae_s8 realreg; /* gb-- realreg can hold -1 */
+ uae_u8 realind;
+ uae_u8 needflush;
+} freg_status;
+
+#define PC_P 16
+#define FLAGX 17
+#define FLAGTMP 18
+#define NEXT_HANDLER 19
+#define S1 20
+#define S2 21
+#define S3 22
+#define S4 23
+#define S5 24
+#define S6 25
+#define S7 26
+#define S8 27
+#define S9 28
+#define S10 29
+#define S11 30
+#define S12 31
+
+#define FP_RESULT 8
+#define FS1 9
+#define FS2 10
+#define FS3 11
+
+typedef struct {
+ uae_u32 touched;
+ uae_s8 holds[VREGS];
+ uae_u8 nholds;
+ uae_u8 canbyte;
+ uae_u8 canword;
+ uae_u8 locked;
+} n_status;
+
+typedef struct {
+ uae_u32 touched;
+ uae_s8 holds[VFREGS];
+ uae_u8 nholds;
+ uae_u8 locked;
+} fn_status;
+
+/* For flag handling */
+#define NADA 1
+#define TRASH 2
+#define VALID 3
+
+/* needflush values */
+#define NF_SCRATCH 0
+#define NF_TOMEM 1
+#define NF_HANDLER 2
+
+typedef struct {
+ /* Integer part */
+ reg_status state[VREGS];
+ n_status nat[N_REGS];
+ uae_u32 flags_on_stack;
+ uae_u32 flags_in_flags;
+ uae_u32 flags_are_important;
+ /* FPU part */
+ freg_status fate[VFREGS];
+ fn_status fat[N_FREGS];
+
+ /* x86 FPU part */
+ uae_s8 spos[N_FREGS];
+ uae_s8 onstack[6];
+ uae_s8 tos;
+} bigstate;
+
+typedef struct {
+ /* Integer part */
+ char virt[VREGS];
+ char nat[N_REGS];
+} smallstate;
+
+extern bigstate live;
+extern int touchcnt;
+
+
+#define IMM uae_s32
+#define R1 uae_u32
+#define R2 uae_u32
+#define R4 uae_u32
+#define W1 uae_u32
+#define W2 uae_u32
+#define W4 uae_u32
+#define RW1 uae_u32
+#define RW2 uae_u32
+#define RW4 uae_u32
+#define MEMR uae_u32
+#define MEMW uae_u32
+#define MEMRW uae_u32
+
+#define FW uae_u32
+#define FR uae_u32
+#define FRW uae_u32
+
+#define MIDFUNC(nargs,func,args) void func args
+#define MENDFUNC(nargs,func,args)
+#define COMPCALL(func) func
+
+#define LOWFUNC(flags,mem,nargs,func,args) static __inline__ void func args
+#define LENDFUNC(flags,mem,nargs,func,args)
+
+/* What we expose to the outside */
+#define DECLARE_MIDFUNC(func) extern void func
+DECLARE_MIDFUNC(bt_l_ri(R4 r, IMM i));
+DECLARE_MIDFUNC(bt_l_rr(R4 r, R4 b));
+DECLARE_MIDFUNC(btc_l_ri(RW4 r, IMM i));
+DECLARE_MIDFUNC(btc_l_rr(RW4 r, R4 b));
+DECLARE_MIDFUNC(bts_l_ri(RW4 r, IMM i));
+DECLARE_MIDFUNC(bts_l_rr(RW4 r, R4 b));
+DECLARE_MIDFUNC(btr_l_ri(RW4 r, IMM i));
+DECLARE_MIDFUNC(btr_l_rr(RW4 r, R4 b));
+DECLARE_MIDFUNC(mov_l_rm(W4 d, IMM s));
+DECLARE_MIDFUNC(call_r(R4 r));
+DECLARE_MIDFUNC(sub_l_mi(IMM d, IMM s));
+DECLARE_MIDFUNC(mov_l_mi(IMM d, IMM s));
+DECLARE_MIDFUNC(mov_w_mi(IMM d, IMM s));
+DECLARE_MIDFUNC(mov_b_mi(IMM d, IMM s));
+DECLARE_MIDFUNC(rol_b_ri(RW1 r, IMM i));
+DECLARE_MIDFUNC(rol_w_ri(RW2 r, IMM i));
+DECLARE_MIDFUNC(rol_l_ri(RW4 r, IMM i));
+DECLARE_MIDFUNC(rol_l_rr(RW4 d, R1 r));
+DECLARE_MIDFUNC(rol_w_rr(RW2 d, R1 r));
+DECLARE_MIDFUNC(rol_b_rr(RW1 d, R1 r));
+DECLARE_MIDFUNC(shll_l_rr(RW4 d, R1 r));
+DECLARE_MIDFUNC(shll_w_rr(RW2 d, R1 r));
+DECLARE_MIDFUNC(shll_b_rr(RW1 d, R1 r));
+DECLARE_MIDFUNC(ror_b_ri(R1 r, IMM i));
+DECLARE_MIDFUNC(ror_w_ri(R2 r, IMM i));
+DECLARE_MIDFUNC(ror_l_ri(R4 r, IMM i));
+DECLARE_MIDFUNC(ror_l_rr(R4 d, R1 r));
+DECLARE_MIDFUNC(ror_w_rr(R2 d, R1 r));
+DECLARE_MIDFUNC(ror_b_rr(R1 d, R1 r));
+DECLARE_MIDFUNC(shrl_l_rr(RW4 d, R1 r));
+DECLARE_MIDFUNC(shrl_w_rr(RW2 d, R1 r));
+DECLARE_MIDFUNC(shrl_b_rr(RW1 d, R1 r));
+DECLARE_MIDFUNC(shra_l_rr(RW4 d, R1 r));
+DECLARE_MIDFUNC(shra_w_rr(RW2 d, R1 r));
+DECLARE_MIDFUNC(shra_b_rr(RW1 d, R1 r));
+DECLARE_MIDFUNC(shll_l_ri(RW4 r, IMM i));
+DECLARE_MIDFUNC(shll_w_ri(RW2 r, IMM i));
+DECLARE_MIDFUNC(shll_b_ri(RW1 r, IMM i));
+DECLARE_MIDFUNC(shrl_l_ri(RW4 r, IMM i));
+DECLARE_MIDFUNC(shrl_w_ri(RW2 r, IMM i));
+DECLARE_MIDFUNC(shrl_b_ri(RW1 r, IMM i));
+DECLARE_MIDFUNC(shra_l_ri(RW4 r, IMM i));
+DECLARE_MIDFUNC(shra_w_ri(RW2 r, IMM i));
+DECLARE_MIDFUNC(shra_b_ri(RW1 r, IMM i));
+DECLARE_MIDFUNC(setcc(W1 d, IMM cc));
+DECLARE_MIDFUNC(setcc_m(IMM d, IMM cc));
+DECLARE_MIDFUNC(cmov_b_rr(RW1 d, R1 s, IMM cc));
+DECLARE_MIDFUNC(cmov_w_rr(RW2 d, R2 s, IMM cc));
+DECLARE_MIDFUNC(cmov_l_rr(RW4 d, R4 s, IMM cc));
+DECLARE_MIDFUNC(cmov_l_rm(RW4 d, IMM s, IMM cc));
+DECLARE_MIDFUNC(bsf_l_rr(W4 d, R4 s));
+DECLARE_MIDFUNC(pop_m(IMM d));
+DECLARE_MIDFUNC(push_m(IMM d));
+DECLARE_MIDFUNC(pop_l(W4 d));
+DECLARE_MIDFUNC(push_l_i(IMM i));
+DECLARE_MIDFUNC(push_l(R4 s));
+DECLARE_MIDFUNC(clear_16(RW4 r));
+DECLARE_MIDFUNC(clear_8(RW4 r));
+DECLARE_MIDFUNC(sign_extend_16_rr(W4 d, R2 s));
+DECLARE_MIDFUNC(sign_extend_8_rr(W4 d, R1 s));
+DECLARE_MIDFUNC(zero_extend_16_rr(W4 d, R2 s));
+DECLARE_MIDFUNC(zero_extend_8_rr(W4 d, R1 s));
+DECLARE_MIDFUNC(imul_64_32(RW4 d, RW4 s));
+DECLARE_MIDFUNC(mul_64_32(RW4 d, RW4 s));
+DECLARE_MIDFUNC(imul_32_32(RW4 d, R4 s));
+DECLARE_MIDFUNC(mul_32_32(RW4 d, R4 s));
+DECLARE_MIDFUNC(mov_b_rr(W1 d, R1 s));
+DECLARE_MIDFUNC(mov_w_rr(W2 d, R2 s));
+DECLARE_MIDFUNC(mov_l_rrm_indexed(W4 d,R4 baser, R4 index, IMM factor));
+DECLARE_MIDFUNC(mov_w_rrm_indexed(W2 d, R4 baser, R4 index, IMM factor));
+DECLARE_MIDFUNC(mov_b_rrm_indexed(W1 d, R4 baser, R4 index, IMM factor));
+DECLARE_MIDFUNC(mov_l_mrr_indexed(R4 baser, R4 index, IMM factor, R4 s));
+DECLARE_MIDFUNC(mov_w_mrr_indexed(R4 baser, R4 index, IMM factor, R2 s));
+DECLARE_MIDFUNC(mov_b_mrr_indexed(R4 baser, R4 index, IMM factor, R1 s));
+DECLARE_MIDFUNC(mov_l_bmrr_indexed(IMM base, R4 baser, R4 index, IMM factor, R4 s));
+DECLARE_MIDFUNC(mov_w_bmrr_indexed(IMM base, R4 baser, R4 index, IMM factor, R2 s));
+DECLARE_MIDFUNC(mov_b_bmrr_indexed(IMM base, R4 baser, R4 index, IMM factor, R1 s));
+DECLARE_MIDFUNC(mov_l_brrm_indexed(W4 d, IMM base, R4 baser, R4 index, IMM factor));
+DECLARE_MIDFUNC(mov_w_brrm_indexed(W2 d, IMM base, R4 baser, R4 index, IMM factor));
+DECLARE_MIDFUNC(mov_b_brrm_indexed(W1 d, IMM base, R4 baser, R4 index, IMM factor));
+DECLARE_MIDFUNC(mov_l_rm_indexed(W4 d, IMM base, R4 index, IMM factor));
+DECLARE_MIDFUNC(mov_l_rR(W4 d, R4 s, IMM offset));
+DECLARE_MIDFUNC(mov_w_rR(W2 d, R4 s, IMM offset));
+DECLARE_MIDFUNC(mov_b_rR(W1 d, R4 s, IMM offset));
+DECLARE_MIDFUNC(mov_l_brR(W4 d, R4 s, IMM offset));
+DECLARE_MIDFUNC(mov_w_brR(W2 d, R4 s, IMM offset));
+DECLARE_MIDFUNC(mov_b_brR(W1 d, R4 s, IMM offset));
+DECLARE_MIDFUNC(mov_l_Ri(R4 d, IMM i, IMM offset));
+DECLARE_MIDFUNC(mov_w_Ri(R4 d, IMM i, IMM offset));
+DECLARE_MIDFUNC(mov_b_Ri(R4 d, IMM i, IMM offset));
+DECLARE_MIDFUNC(mov_l_Rr(R4 d, R4 s, IMM offset));
+DECLARE_MIDFUNC(mov_w_Rr(R4 d, R2 s, IMM offset));
+DECLARE_MIDFUNC(mov_b_Rr(R4 d, R1 s, IMM offset));
+DECLARE_MIDFUNC(lea_l_brr(W4 d, R4 s, IMM offset));
+DECLARE_MIDFUNC(lea_l_brr_indexed(W4 d, R4 s, R4 index, IMM factor, IMM offset));
+DECLARE_MIDFUNC(lea_l_rr_indexed(W4 d, R4 s, R4 index, IMM factor));
+DECLARE_MIDFUNC(mov_l_bRr(R4 d, R4 s, IMM offset));
+DECLARE_MIDFUNC(mov_w_bRr(R4 d, R2 s, IMM offset));
+DECLARE_MIDFUNC(mov_b_bRr(R4 d, R1 s, IMM offset));
+DECLARE_MIDFUNC(bswap_32(RW4 r));
+DECLARE_MIDFUNC(bswap_16(RW2 r));
+DECLARE_MIDFUNC(mov_l_rr(W4 d, R4 s));
+DECLARE_MIDFUNC(mov_l_mr(IMM d, R4 s));
+DECLARE_MIDFUNC(mov_w_mr(IMM d, R2 s));
+DECLARE_MIDFUNC(mov_w_rm(W2 d, IMM s));
+DECLARE_MIDFUNC(mov_b_mr(IMM d, R1 s));
+DECLARE_MIDFUNC(mov_b_rm(W1 d, IMM s));
+DECLARE_MIDFUNC(mov_l_ri(W4 d, IMM s));
+DECLARE_MIDFUNC(mov_w_ri(W2 d, IMM s));
+DECLARE_MIDFUNC(mov_b_ri(W1 d, IMM s));
+DECLARE_MIDFUNC(add_l_mi(IMM d, IMM s) );
+DECLARE_MIDFUNC(add_w_mi(IMM d, IMM s) );
+DECLARE_MIDFUNC(add_b_mi(IMM d, IMM s) );
+DECLARE_MIDFUNC(test_l_ri(R4 d, IMM i));
+DECLARE_MIDFUNC(test_l_rr(R4 d, R4 s));
+DECLARE_MIDFUNC(test_w_rr(R2 d, R2 s));
+DECLARE_MIDFUNC(test_b_rr(R1 d, R1 s));
+DECLARE_MIDFUNC(and_l_ri(RW4 d, IMM i));
+DECLARE_MIDFUNC(and_l(RW4 d, R4 s));
+DECLARE_MIDFUNC(and_w(RW2 d, R2 s));
+DECLARE_MIDFUNC(and_b(RW1 d, R1 s));
+DECLARE_MIDFUNC(or_l_rm(RW4 d, IMM s));
+DECLARE_MIDFUNC(or_l_ri(RW4 d, IMM i));
+DECLARE_MIDFUNC(or_l(RW4 d, R4 s));
+DECLARE_MIDFUNC(or_w(RW2 d, R2 s));
+DECLARE_MIDFUNC(or_b(RW1 d, R1 s));
+DECLARE_MIDFUNC(adc_l(RW4 d, R4 s));
+DECLARE_MIDFUNC(adc_w(RW2 d, R2 s));
+DECLARE_MIDFUNC(adc_b(RW1 d, R1 s));
+DECLARE_MIDFUNC(add_l(RW4 d, R4 s));
+DECLARE_MIDFUNC(add_w(RW2 d, R2 s));
+DECLARE_MIDFUNC(add_b(RW1 d, R1 s));
+DECLARE_MIDFUNC(sub_l_ri(RW4 d, IMM i));
+DECLARE_MIDFUNC(sub_w_ri(RW2 d, IMM i));
+DECLARE_MIDFUNC(sub_b_ri(RW1 d, IMM i));
+DECLARE_MIDFUNC(add_l_ri(RW4 d, IMM i));
+DECLARE_MIDFUNC(add_w_ri(RW2 d, IMM i));
+DECLARE_MIDFUNC(add_b_ri(RW1 d, IMM i));
+DECLARE_MIDFUNC(sbb_l(RW4 d, R4 s));
+DECLARE_MIDFUNC(sbb_w(RW2 d, R2 s));
+DECLARE_MIDFUNC(sbb_b(RW1 d, R1 s));
+DECLARE_MIDFUNC(sub_l(RW4 d, R4 s));
+DECLARE_MIDFUNC(sub_w(RW2 d, R2 s));
+DECLARE_MIDFUNC(sub_b(RW1 d, R1 s));
+DECLARE_MIDFUNC(cmp_l(R4 d, R4 s));
+DECLARE_MIDFUNC(cmp_l_ri(R4 r, IMM i));
+DECLARE_MIDFUNC(cmp_w(R2 d, R2 s));
+DECLARE_MIDFUNC(cmp_b(R1 d, R1 s));
+DECLARE_MIDFUNC(xor_l(RW4 d, R4 s));
+DECLARE_MIDFUNC(xor_w(RW2 d, R2 s));
+DECLARE_MIDFUNC(xor_b(RW1 d, R1 s));
+DECLARE_MIDFUNC(live_flags(void));
+DECLARE_MIDFUNC(dont_care_flags(void));
+DECLARE_MIDFUNC(duplicate_carry(void));
+DECLARE_MIDFUNC(restore_carry(void));
+DECLARE_MIDFUNC(start_needflags(void));
+DECLARE_MIDFUNC(end_needflags(void));
+DECLARE_MIDFUNC(make_flags_live(void));
+DECLARE_MIDFUNC(call_r_11(R4 r, W4 out1, R4 in1, IMM osize, IMM isize));
+DECLARE_MIDFUNC(call_r_02(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2));
+DECLARE_MIDFUNC(forget_about(W4 r));
+DECLARE_MIDFUNC(nop(void));
+
+DECLARE_MIDFUNC(f_forget_about(FW r));
+DECLARE_MIDFUNC(fmov_pi(FW r));
+DECLARE_MIDFUNC(fmov_log10_2(FW r));
+DECLARE_MIDFUNC(fmov_log2_e(FW r));
+DECLARE_MIDFUNC(fmov_loge_2(FW r));
+DECLARE_MIDFUNC(fmov_1(FW r));
+DECLARE_MIDFUNC(fmov_0(FW r));
+DECLARE_MIDFUNC(fmov_rm(FW r, MEMR m));
+DECLARE_MIDFUNC(fmovi_rm(FW r, MEMR m));
+DECLARE_MIDFUNC(fmovi_mr(MEMW m, FR r));
+DECLARE_MIDFUNC(fmovs_rm(FW r, MEMR m));
+DECLARE_MIDFUNC(fmovs_mr(MEMW m, FR r));
+DECLARE_MIDFUNC(fmov_mr(MEMW m, FR r));
+DECLARE_MIDFUNC(fmov_ext_mr(MEMW m, FR r));
+DECLARE_MIDFUNC(fmov_ext_rm(FW r, MEMR m));
+DECLARE_MIDFUNC(fmov_rr(FW d, FR s));
+DECLARE_MIDFUNC(fldcw_m_indexed(R4 index, IMM base));
+DECLARE_MIDFUNC(ftst_r(FR r));
+DECLARE_MIDFUNC(dont_care_fflags(void));
+DECLARE_MIDFUNC(fsqrt_rr(FW d, FR s));
+DECLARE_MIDFUNC(fabs_rr(FW d, FR s));
+DECLARE_MIDFUNC(frndint_rr(FW d, FR s));
+DECLARE_MIDFUNC(fsin_rr(FW d, FR s));
+DECLARE_MIDFUNC(fcos_rr(FW d, FR s));
+DECLARE_MIDFUNC(ftwotox_rr(FW d, FR s));
+DECLARE_MIDFUNC(fetox_rr(FW d, FR s));
+DECLARE_MIDFUNC(flog2_rr(FW d, FR s));
+DECLARE_MIDFUNC(fneg_rr(FW d, FR s));
+DECLARE_MIDFUNC(fadd_rr(FRW d, FR s));
+DECLARE_MIDFUNC(fsub_rr(FRW d, FR s));
+DECLARE_MIDFUNC(fmul_rr(FRW d, FR s));
+DECLARE_MIDFUNC(frem_rr(FRW d, FR s));
+DECLARE_MIDFUNC(frem1_rr(FRW d, FR s));
+DECLARE_MIDFUNC(fdiv_rr(FRW d, FR s));
+DECLARE_MIDFUNC(fcmp_rr(FR d, FR s));
+DECLARE_MIDFUNC(fflags_into_flags(W2 tmp));
+#undef DECLARE_MIDFUNC
+
+extern int failure;
+#define FAIL(x) do { failure|=x; } while (0)
+
+/* Convenience functions exposed to gencomp */
+extern uae_u32 m68k_pc_offset;
+extern void readbyte(int address, int dest, int tmp);
+extern void readword(int address, int dest, int tmp);
+extern void readlong(int address, int dest, int tmp);
+extern void writebyte(int address, int source, int tmp);
+extern void writeword(int address, int source, int tmp);
+extern void writelong(int address, int source, int tmp);
+extern void writeword_clobber(int address, int source, int tmp);
+extern void writelong_clobber(int address, int source, int tmp);
+extern void get_n_addr(int address, int dest, int tmp);
+extern void get_n_addr_jmp(int address, int dest, int tmp);
+extern void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp);
+/* Set native Z flag only if register is zero */
+extern void set_zero(int r, int tmp);
+extern int kill_rodent(int r);
+extern void sync_m68k_pc(void);
+extern uae_u32 get_const(int r);
+extern int is_const(int r);
+extern void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond);
+
+#define comp_get_ibyte(o) do_get_mem_byte((uae_u8 *)(comp_pc_p + (o) + 1))
+#define comp_get_iword(o) do_get_mem_word((uae_u16 *)(comp_pc_p + (o)))
+#define comp_get_ilong(o) do_get_mem_long((uae_u32 *)(comp_pc_p + (o)))
+
+struct blockinfo_t;
+
+typedef struct dep_t {
+ uae_u32* jmp_off;
+ struct blockinfo_t* target;
+ struct blockinfo_t* source;
+ struct dep_t** prev_p;
+ struct dep_t* next;
+} dependency;
+
+typedef struct checksum_info_t {
+ uae_u8 *start_p;
+ uae_u32 length;
+ struct checksum_info_t *next;
+} checksum_info;
+
+typedef struct blockinfo_t {
+ uae_s32 count;
+ cpuop_func* direct_handler_to_use;
+ cpuop_func* handler_to_use;
+ /* The direct handler does not check for the correct address */
+
+ cpuop_func* handler;
+ cpuop_func* direct_handler;
+
+ cpuop_func* direct_pen;
+ cpuop_func* direct_pcc;
+
+ uae_u8* pc_p;
+
+ uae_u32 c1;
+ uae_u32 c2;
+#if USE_CHECKSUM_INFO
+ checksum_info *csi;
+#else
+ uae_u32 len;
+ uae_u32 min_pcp;
+#endif
+
+ struct blockinfo_t* next_same_cl;
+ struct blockinfo_t** prev_same_cl_p;
+ struct blockinfo_t* next;
+ struct blockinfo_t** prev_p;
+
+ uae_u8 optlevel;
+ uae_u8 needed_flags;
+ uae_u8 status;
+ uae_u8 havestate;
+
+ dependency dep[2]; /* Holds things we depend on */
+ dependency* deplist; /* List of things that depend on this */
+ smallstate env;
+
+#if JIT_DEBUG
+ /* (gb) size of the compiled block (direct handler) */
+ uae_u32 direct_handler_size;
+#endif
+} blockinfo;
+
+#define BI_INVALID 0
+#define BI_ACTIVE 1
+#define BI_NEED_RECOMP 2
+#define BI_NEED_CHECK 3
+#define BI_CHECKING 4
+#define BI_COMPILING 5
+#define BI_FINALIZING 6
+
+void execute_normal(void);
+void exec_nostats(void);
+void do_nothing(void);
+
+#else
+
+static __inline__ void flush_icache(int) { }
+static __inline__ void build_comp() { }
+
+#endif /* !USE_JIT */
+
+#endif /* COMPEMU_H */
--- /dev/null
+/*
+ * compiler/compemu_fpp.cpp - Dynamic translation of FPU instructions
+ *
+ * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
+ *
+ * Adaptation for Basilisk II and improvements, copyright 2000-2005
+ * Gwenole Beauchesne
+ *
+ * Basilisk II (C) 1997-2008 Christian Bauer
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/*
+ * UAE - The Un*x Amiga Emulator
+ *
+ * MC68881 emulation
+ *
+ * Copyright 1996 Herman ten Brugge
+ * Adapted for JIT compilation (c) Bernd Meyer, 2000
+ */
+
+#include "sysdeps.h"
+
+#include <math.h>
+#include <stdio.h>
+
+#include "memory.h"
+#include "readcpu.h"
+#include "newcpu.h"
+#include "main.h"
+#include "compiler/compemu.h"
+#include "fpu/fpu.h"
+#include "fpu/flags.h"
+#include "fpu/exceptions.h"
+#include "fpu/rounding.h"
+
+#define DEBUG 0
+#include "debug.h"
+
+// gb-- WARNING: get_fpcr() and set_fpcr() support is experimental
+#define HANDLE_FPCR 0
+
+// - IEEE-based fpu core must be used
+#if defined(FPU_IEEE)
+# define CAN_HANDLE_FPCR
+#endif
+
+// - Generic rounding mode and precision modes are supported if set together
+#if defined(FPU_USE_GENERIC_ROUNDING_MODE) && defined(FPU_USE_GENERIC_ROUNDING_PRECISION)
+# define CAN_HANDLE_FPCR
+#endif
+
+// - X86 rounding mode and precision modes are *not* supported but might work (?!)
+#if defined(FPU_USE_X86_ROUNDING_MODE) && defined(FPU_USE_X86_ROUNDING_PRECISION)
+# define CAN_HANDLE_FPCR
+#endif
+
+#if HANDLE_FPCR && !defined(CAN_HANDLE_FPCR)
+# warning "Can't handle FPCR, will FAIL(1) at runtime"
+# undef HANDLE_FPCR
+# define HANDLE_FPCR 0
+#endif
+
+#define STATIC_INLINE static inline
+#define MAKE_FPSR(r) do { fmov_rr(FP_RESULT,r); } while (0)
+
+#define delay nop() ;nop()
+#define delay2 nop() ;nop()
+
+#define UNKNOWN_EXTRA 0xFFFFFFFF
+static void fpuop_illg(uae_u32 opcode, uae_u32 extra)
+{
+/*
+ if (extra == UNKNOWN_EXTRA)
+ printf("FPU opcode %x, extra UNKNOWN_EXTRA\n",opcode & 0xFFFF);
+ else
+ printf("FPU opcode %x, extra %x\n",opcode & 0xFFFF,extra & 0xFFFF);
+*/
+ op_illg(opcode);
+}
+
+static uae_s32 temp_fp[4]; /* To convert between FP/integer */
+
+/* return register number, or -1 for failure */
+STATIC_INLINE int get_fp_value (uae_u32 opcode, uae_u16 extra)
+{
+ uaecptr tmppc;
+ uae_u16 tmp;
+ int size;
+ int mode;
+ int reg;
+ double* src;
+ uae_u32 ad = 0;
+ static int sz1[8] = { 4, 4, 12, 12, 2, 8, 1, 0 };
+ static int sz2[8] = { 4, 4, 12, 12, 2, 8, 2, 0 };
+
+ if ((extra & 0x4000) == 0) {
+ return ((extra >> 10) & 7);
+ }
+
+ mode = (opcode >> 3) & 7;
+ reg = opcode & 7;
+ size = (extra >> 10) & 7;
+ switch (mode) {
+ case 0:
+ switch (size) {
+ case 6:
+ sign_extend_8_rr(S1,reg);
+ mov_l_mr((uintptr)temp_fp,S1);
+ delay2;
+ fmovi_rm(FS1,(uintptr)temp_fp);
+ return FS1;
+ case 4:
+ sign_extend_16_rr(S1,reg);
+ mov_l_mr((uintptr)temp_fp,S1);
+ delay2;
+ fmovi_rm(FS1,(uintptr)temp_fp);
+ return FS1;
+ case 0:
+ mov_l_mr((uintptr)temp_fp,reg);
+ delay2;
+ fmovi_rm(FS1,(uintptr)temp_fp);
+ return FS1;
+ case 1:
+ mov_l_mr((uintptr)temp_fp,reg);
+ delay2;
+ fmovs_rm(FS1,(uintptr)temp_fp);
+ return FS1;
+ default:
+ return -1;
+ }
+ return -1; /* Should be unreachable */
+ case 1:
+ return -1; /* Genuine invalid instruction */
+ default:
+ break;
+ }
+ /* OK, we *will* have to load something from an address. Let's make
+ sure we know how to handle that, or quit early --- i.e. *before*
+ we do any postincrement/predecrement that we may regret */
+
+ switch (size) {
+ case 3:
+ return -1;
+ case 0:
+ case 1:
+ case 2:
+ case 4:
+ case 5:
+ case 6:
+ break;
+ default:
+ return -1;
+ }
+
+ switch (mode) {
+ case 2:
+ ad=S1; /* We will change it, anyway ;-) */
+ mov_l_rr(ad,reg+8);
+ break;
+ case 3:
+ ad=S1;
+ mov_l_rr(ad,reg+8);
+ lea_l_brr(reg+8,reg+8,(reg == 7?sz2[size]:sz1[size]));
+ break;
+ case 4:
+ ad=S1;
+
+ lea_l_brr(reg+8,reg+8,-(reg == 7?sz2[size]:sz1[size]));
+ mov_l_rr(ad,reg+8);
+ break;
+ case 5:
+ {
+ uae_u32 off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+ ad=S1;
+ mov_l_rr(ad,reg+8);
+ lea_l_brr(ad,ad,off);
+ break;
+ }
+ case 6:
+ {
+ uae_u32 dp=comp_get_iword((m68k_pc_offset+=2)-2);
+ ad=S1;
+ calc_disp_ea_020(reg+8,dp,ad,S2);
+ break;
+ }
+ case 7:
+ switch (reg) {
+ case 0:
+ {
+ uae_u32 off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+ ad=S1;
+ mov_l_ri(ad,off);
+ break;
+ }
+ case 1:
+ {
+ uae_u32 off=comp_get_ilong((m68k_pc_offset+=4)-4);
+ ad=S1;
+ mov_l_ri(ad,off);
+ break;
+ }
+ case 2:
+ {
+ uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+
+ m68k_pc_offset;
+ uae_s32 PC16off =(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)
+-2);
+ ad=S1;
+ mov_l_ri(ad,address+PC16off);
+ break;
+ }
+ case 3:
+ return -1;
+ tmppc = m68k_getpc ();
+ tmp = next_iword ();
+ ad = get_disp_ea_020 (tmppc, tmp);
+ break;
+ case 4:
+ {
+ uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+ m68k_pc_offset;
+ ad=S1;
+ // Immediate addressing mode && Operation Length == Byte ->
+ // Use the low-order byte of the extension word.
+ if (size == 6) address++;
+ mov_l_ri(ad,address);
+ m68k_pc_offset+=sz2[size];
+ break;
+ }
+ default:
+ return -1;
+ }
+ }
+
+ switch (size) {
+ case 0:
+ readlong(ad,S2,S3);
+ mov_l_mr((uintptr)temp_fp,S2);
+ delay2;
+ fmovi_rm(FS1,(uintptr)temp_fp);
+ break;
+ case 1:
+ readlong(ad,S2,S3);
+ mov_l_mr((uintptr)temp_fp,S2);
+ delay2;
+ fmovs_rm(FS1,(uintptr)temp_fp);
+ break;
+ case 2:
+ readword(ad,S2,S3);
+ mov_w_mr(((uintptr)temp_fp)+8,S2);
+ add_l_ri(ad,4);
+ readlong(ad,S2,S3);
+ mov_l_mr((uintptr)(temp_fp)+4,S2);
+ add_l_ri(ad,4);
+ readlong(ad,S2,S3);
+ mov_l_mr((uintptr)(temp_fp),S2);
+ delay2;
+ fmov_ext_rm(FS1,(uintptr)(temp_fp));
+ break;
+ case 3:
+ return -1; /* Some silly "packed" stuff */
+ case 4:
+ readword(ad,S2,S3);
+ sign_extend_16_rr(S2,S2);
+ mov_l_mr((uintptr)temp_fp,S2);
+ delay2;
+ fmovi_rm(FS1,(uintptr)temp_fp);
+ break;
+ case 5:
+ readlong(ad,S2,S3);
+ mov_l_mr(((uintptr)temp_fp)+4,S2);
+ add_l_ri(ad,4);
+ readlong(ad,S2,S3);
+ mov_l_mr((uintptr)(temp_fp),S2);
+ delay2;
+ fmov_rm(FS1,(uintptr)(temp_fp));
+ break;
+ case 6:
+ readbyte(ad,S2,S3);
+ sign_extend_8_rr(S2,S2);
+ mov_l_mr((uintptr)temp_fp,S2);
+ delay2;
+ fmovi_rm(FS1,(uintptr)temp_fp);
+ break;
+ default:
+ return -1;
+ }
+ return FS1;
+}
+
+/* return of -1 means failure, >=0 means OK */
+STATIC_INLINE int put_fp_value (int val, uae_u32 opcode, uae_u16 extra)
+{
+ uae_u16 tmp;
+ uaecptr tmppc;
+ int size;
+ int mode;
+ int reg;
+ uae_u32 ad;
+ static int sz1[8] = { 4, 4, 12, 12, 2, 8, 1, 0 };
+ static int sz2[8] = { 4, 4, 12, 12, 2, 8, 2, 0 };
+
+ if ((extra & 0x4000) == 0) {
+ const int dest_reg = (extra >> 10) & 7;
+ fmov_rr(dest_reg, val);
+ // gb-- status register is affected
+ MAKE_FPSR(dest_reg);
+ return 0;
+ }
+
+ mode = (opcode >> 3) & 7;
+ reg = opcode & 7;
+ size = (extra >> 10) & 7;
+ ad = (uae_u32)-1;
+ switch (mode) {
+ case 0:
+ switch (size) {
+ case 6:
+ fmovi_mr((uintptr)temp_fp,val);
+ delay;
+ mov_b_rm(reg,(uintptr)temp_fp);
+ return 0;
+ case 4:
+ fmovi_mr((uintptr)temp_fp,val);
+ delay;
+ mov_w_rm(reg,(uintptr)temp_fp);
+ return 0;
+ case 0:
+ fmovi_mr((uintptr)temp_fp,val);
+ delay;
+ mov_l_rm(reg,(uintptr)temp_fp);
+ return 0;
+ case 1:
+ fmovs_mr((uintptr)temp_fp,val);
+ delay;
+ mov_l_rm(reg,(uintptr)temp_fp);
+ return 0;
+ default:
+ return -1;
+ }
+ case 1:
+ return -1; /* genuine invalid instruction */
+ default: break;
+ }
+
+ /* Let's make sure we get out *before* doing something silly if
+ we can't handle the size */
+ switch (size) {
+ case 0:
+ case 4:
+ case 5:
+ case 6:
+ case 2:
+ case 1:
+ break;
+ case 3:
+ default:
+ return -1;
+ }
+
+ switch (mode) {
+ case 2:
+ ad=S1;
+ mov_l_rr(ad,reg+8);
+ break;
+ case 3:
+ ad=S1;
+ mov_l_rr(ad,reg+8);
+ lea_l_brr(reg+8,reg+8,(reg == 7?sz2[size]:sz1[size]));
+ break;
+ case 4:
+ ad=S1;
+ lea_l_brr(reg+8,reg+8,-(reg == 7?sz2[size]:sz1[size]));
+ mov_l_rr(ad,reg+8);
+ break;
+ case 5:
+ {
+ uae_u32 off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+ ad=S1;
+ mov_l_rr(ad,reg+8);
+ add_l_ri(ad,off);
+ break;
+ }
+ case 6:
+ {
+ uae_u32 dp=comp_get_iword((m68k_pc_offset+=2)-2);
+ ad=S1;
+ calc_disp_ea_020(reg+8,dp,ad,S2);
+ break;
+ }
+ case 7:
+ switch (reg) {
+ case 0:
+ {
+ uae_u32 off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+ ad=S1;
+ mov_l_ri(ad,off);
+ break;
+ }
+ case 1:
+ {
+ uae_u32 off=comp_get_ilong((m68k_pc_offset+=4)-4);
+ ad=S1;
+ mov_l_ri(ad,off);
+ break;
+ }
+ case 2:
+ {
+ uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+
+ m68k_pc_offset;
+ uae_s32 PC16off =(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+ ad=S1;
+ mov_l_ri(ad,address+PC16off);
+ break;
+ }
+ case 3:
+ return -1;
+ tmppc = m68k_getpc ();
+ tmp = next_iword ();
+ ad = get_disp_ea_020 (tmppc, tmp);
+ break;
+ case 4:
+ {
+ uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+
+ m68k_pc_offset;
+ ad=S1;
+ mov_l_ri(ad,address);
+ m68k_pc_offset+=sz2[size];
+ break;
+ }
+ default:
+ return -1;
+ }
+ }
+ switch (size) {
+ case 0:
+ fmovi_mr((uintptr)temp_fp,val);
+ delay;
+ mov_l_rm(S2,(uintptr)temp_fp);
+ writelong_clobber(ad,S2,S3);
+ break;
+ case 1:
+ fmovs_mr((uintptr)temp_fp,val);
+ delay;
+ mov_l_rm(S2,(uintptr)temp_fp);
+ writelong_clobber(ad,S2,S3);
+ break;
+ case 2:
+ fmov_ext_mr((uintptr)temp_fp,val);
+ delay;
+ mov_w_rm(S2,(uintptr)temp_fp+8);
+ writeword_clobber(ad,S2,S3);
+ add_l_ri(ad,4);
+ mov_l_rm(S2,(uintptr)temp_fp+4);
+ writelong_clobber(ad,S2,S3);
+ add_l_ri(ad,4);
+ mov_l_rm(S2,(uintptr)temp_fp);
+ writelong_clobber(ad,S2,S3);
+ break;
+ case 3: return -1; /* Packed */
+
+ case 4:
+ fmovi_mr((uintptr)temp_fp,val);
+ delay;
+ mov_l_rm(S2,(uintptr)temp_fp);
+ writeword_clobber(ad,S2,S3);
+ break;
+ case 5:
+ fmov_mr((uintptr)temp_fp,val);
+ delay;
+ mov_l_rm(S2,(uintptr)temp_fp+4);
+ writelong_clobber(ad,S2,S3);
+ add_l_ri(ad,4);
+ mov_l_rm(S2,(uintptr)temp_fp);
+ writelong_clobber(ad,S2,S3);
+ break;
+ case 6:
+ fmovi_mr((uintptr)temp_fp,val);
+ delay;
+ mov_l_rm(S2,(uintptr)temp_fp);
+ writebyte(ad,S2,S3);
+ break;
+ default:
+ return -1;
+ }
+ return 0;
+}
+
+/* return -1 for failure, or register number for success */
+STATIC_INLINE int get_fp_ad (uae_u32 opcode, uae_u32 * ad)
+{
+ uae_u16 tmp;
+ uaecptr tmppc;
+ int mode;
+ int reg;
+ uae_s32 off;
+
+ mode = (opcode >> 3) & 7;
+ reg = opcode & 7;
+ switch (mode) {
+ case 0:
+ case 1:
+ return -1;
+ case 2:
+ case 3:
+ case 4:
+ mov_l_rr(S1,8+reg);
+ return S1;
+ *ad = m68k_areg (regs, reg);
+ break;
+ case 5:
+ off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+
+ mov_l_rr(S1,8+reg);
+ add_l_ri(S1,off);
+ return S1;
+ case 6:
+ return -1;
+ break;
+ case 7:
+ switch (reg) {
+ case 0:
+ off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+ mov_l_ri(S1,off);
+ return S1;
+ case 1:
+ off=comp_get_ilong((m68k_pc_offset+=4)-4);
+ mov_l_ri(S1,off);
+ return S1;
+ case 2:
+ return -1;
+// *ad = m68k_getpc ();
+// *ad += (uae_s32) (uae_s16) next_iword ();
+ off=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;
+ off+=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+ mov_l_ri(S1,off);
+ return S1;
+ case 3:
+ return -1;
+ tmppc = m68k_getpc ();
+ tmp = next_iword ();
+ *ad = get_disp_ea_020 (tmppc, tmp);
+ break;
+ default:
+ return -1;
+ }
+ }
+ abort();
+}
+
+void comp_fdbcc_opp (uae_u32 opcode, uae_u16 extra)
+{
+ FAIL(1);
+ return;
+}
+
+void comp_fscc_opp (uae_u32 opcode, uae_u16 extra)
+{
+ uae_u32 ad;
+ int cc;
+ int reg;
+
+#if DEBUG_FPP
+ printf ("fscc_opp at %08lx\n", m68k_getpc ());
+ fflush (stdout);
+#endif
+
+
+ if (extra&0x20) { /* only cc from 00 to 1f are defined */
+ FAIL(1);
+ return;
+ }
+ if ((opcode & 0x38) != 0) { /* We can only do to integer register */
+ FAIL(1);
+ return;
+ }
+
+ fflags_into_flags(S2);
+ reg=(opcode&7);
+
+ mov_l_ri(S1,255);
+ mov_l_ri(S4,0);
+ switch(extra&0x0f) { /* according to fpp.c, the 0x10 bit is ignored
+ */
+ case 0: break; /* set never */
+ case 1: mov_l_rr(S2,S4);
+ cmov_l_rr(S4,S1,4);
+ cmov_l_rr(S4,S2,10); break;
+ case 2: cmov_l_rr(S4,S1,7); break;
+ case 3: cmov_l_rr(S4,S1,3); break;
+ case 4: mov_l_rr(S2,S4);
+ cmov_l_rr(S4,S1,2);
+ cmov_l_rr(S4,S2,10); break;
+ case 5: mov_l_rr(S2,S4);
+ cmov_l_rr(S4,S1,6);
+ cmov_l_rr(S4,S2,10); break;
+ case 6: cmov_l_rr(S4,S1,5); break;
+ case 7: cmov_l_rr(S4,S1,11); break;
+ case 8: cmov_l_rr(S4,S1,10); break;
+ case 9: cmov_l_rr(S4,S1,4); break;
+ case 10: cmov_l_rr(S4,S1,10); cmov_l_rr(S4,S1,7); break;
+ case 11: cmov_l_rr(S4,S1,4); cmov_l_rr(S4,S1,3); break;
+ case 12: cmov_l_rr(S4,S1,2); break;
+ case 13: cmov_l_rr(S4,S1,6); break;
+ case 14: cmov_l_rr(S4,S1,5); cmov_l_rr(S4,S1,10); break;
+ case 15: mov_l_rr(S4,S1); break;
+ }
+
+ if ((opcode & 0x38) == 0) {
+ mov_b_rr(reg,S4);
+ } else {
+ abort();
+ if (get_fp_ad (opcode, &ad) == 0) {
+ m68k_setpc (m68k_getpc () - 4);
+ fpuop_illg (opcode,extra);
+ } else
+ put_byte (ad, cc ? 0xff : 0x00);
+ }
+}
+
+void comp_ftrapcc_opp (uae_u32 opcode, uaecptr oldpc)
+{
+ int cc;
+
+ FAIL(1);
+ return;
+}
+
+void comp_fbcc_opp (uae_u32 opcode)
+{
+ uae_u32 start_68k_offset=m68k_pc_offset;
+ uae_u32 off;
+ uae_u32 v1;
+ uae_u32 v2;
+ uae_u32 nh;
+ int cc;
+
+ // comp_pc_p is expected to be bound to 32-bit addresses
+ assert((uintptr)comp_pc_p <= 0xffffffffUL);
+
+ if (opcode&0x20) { /* only cc from 00 to 1f are defined */
+ FAIL(1);
+ return;
+ }
+ if ((opcode&0x40)==0) {
+ off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+ }
+ else {
+ off=comp_get_ilong((m68k_pc_offset+=4)-4);
+ }
+ mov_l_ri(S1,(uintptr)
+ (comp_pc_p+off-(m68k_pc_offset-start_68k_offset)));
+ mov_l_ri(PC_P,(uintptr)comp_pc_p);
+
+ /* Now they are both constant. Might as well fold in m68k_pc_offset */
+ add_l_ri(S1,m68k_pc_offset);
+ add_l_ri(PC_P,m68k_pc_offset);
+ m68k_pc_offset=0;
+
+ /* according to fpp.c, the 0x10 bit is ignored
+ (it handles exception handling, which we don't
+ do, anyway ;-) */
+ cc=opcode&0x0f;
+ v1=get_const(PC_P);
+ v2=get_const(S1);
+ fflags_into_flags(S2);
+
+ switch(cc) {
+ case 0: break; /* jump never */
+ case 1:
+ mov_l_rr(S2,PC_P);
+ cmov_l_rr(PC_P,S1,4);
+ cmov_l_rr(PC_P,S2,10); break;
+ case 2: register_branch(v1,v2,7); break;
+ case 3: register_branch(v1,v2,3); break;
+ case 4:
+ mov_l_rr(S2,PC_P);
+ cmov_l_rr(PC_P,S1,2);
+ cmov_l_rr(PC_P,S2,10); break;
+ case 5:
+ mov_l_rr(S2,PC_P);
+ cmov_l_rr(PC_P,S1,6);
+ cmov_l_rr(PC_P,S2,10); break;
+ case 6: register_branch(v1,v2,5); break;
+ case 7: register_branch(v1,v2,11); break;
+ case 8: register_branch(v1,v2,10); break;
+ case 9: register_branch(v1,v2,4); break;
+ case 10:
+ cmov_l_rr(PC_P,S1,10);
+ cmov_l_rr(PC_P,S1,7); break;
+ case 11:
+ cmov_l_rr(PC_P,S1,4);
+ cmov_l_rr(PC_P,S1,3); break;
+ case 12: register_branch(v1,v2,2); break;
+ case 13: register_branch(v1,v2,6); break;
+ case 14:
+ cmov_l_rr(PC_P,S1,5);
+ cmov_l_rr(PC_P,S1,10); break;
+ case 15: mov_l_rr(PC_P,S1); break;
+ }
+}
+
+ /* Floating point conditions
+ The "NotANumber" part could be problematic; Howver, when NaN is
+ encountered, the ftst instruction sets bot N and Z to 1 on the x87,
+ so quite often things just fall into place. This is probably not
+ accurate wrt the 68k FPU, but it is *as* accurate as this was before.
+ However, some more thought should go into fixing this stuff up so
+ it accurately emulates the 68k FPU.
+>=<U
+0000 0x00: 0 --- Never jump
+0101 0x01: Z --- jump if zero (x86: 4)
+1000 0x02: !(NotANumber || Z || N) --- Neither Z nor N set (x86: 7)
+1101 0x03: Z || !(NotANumber || N); --- Z or !N (x86: 4 and 3)
+0010 0x04: N && !(NotANumber || Z); --- N and !Z (x86: hard!)
+0111 0x05: Z || (N && !NotANumber); --- Z or N (x86: 6)
+1010 0x06: !(NotANumber || Z); --- not Z (x86: 5)
+1110 0x07: !NotANumber; --- not NaN (x86: 11, not parity)
+0001 0x08: NotANumber; --- NaN (x86: 10)
+0101 0x09: NotANumber || Z; --- Z (x86: 4)
+1001 0x0a: NotANumber || !(N || Z); --- NaN or neither N nor Z (x86: 10 and 7)
+1101 0x0b: NotANumber || Z || !N; --- Z or !N (x86: 4 and 3)
+0011 0x0c: NotANumber || (N && !Z); --- N (x86: 2)
+0111 0x0d: NotANumber || Z || N; --- Z or N (x86: 6)
+1010 0x0e: !Z; --- not Z (x86: 5)
+1111 0x0f: 1; --- always
+
+This is not how the 68k handles things, though --- it sets Z to 0 and N
+to the NaN's sign.... ('o' and 'i' denote differences from the above
+table)
+
+>=<U
+0000 0x00: 0 --- Never jump
+010o 0x01: Z --- jump if zero (x86: 4, not 10)
+1000 0x02: !(NotANumber || Z || N) --- Neither Z nor N set (x86: 7)
+110o 0x03: Z || !(NotANumber || N); --- Z or !N (x86: 3)
+0010 0x04: N && !(NotANumber || Z); --- N and !Z (x86: 2, not 10)
+011o 0x05: Z || (N && !NotANumber); --- Z or N (x86: 6, not 10)
+1010 0x06: !(NotANumber || Z); --- not Z (x86: 5)
+1110 0x07: !NotANumber; --- not NaN (x86: 11, not parity)
+0001 0x08: NotANumber; --- NaN (x86: 10)
+0101 0x09: NotANumber || Z; --- Z (x86: 4)
+1001 0x0a: NotANumber || !(N || Z); --- NaN or neither N nor Z (x86: 10 and 7)
+1101 0x0b: NotANumber || Z || !N; --- Z or !N (x86: 4 and 3)
+0011 0x0c: NotANumber || (N && !Z); --- N (x86: 2)
+0111 0x0d: NotANumber || Z || N; --- Z or N (x86: 6)
+101i 0x0e: !Z; --- not Z (x86: 5 and 10)
+1111 0x0f: 1; --- always
+
+Of course, this *still* doesn't mean that the x86 and 68k conditions are
+equivalent --- the handling of infinities is different, for one thing.
+On the 68k, +infinity minus +infinity is NotANumber (as it should be). On
+the x86, it is +infinity, and some exception is raised (which I suspect
+is promptly ignored) STUPID!
+The more I learn about their CPUs, the more I detest Intel....
+
+You can see this in action if you have "Benoit" (see Aminet) and
+set the exponent to 16. Wait for a long time, and marvel at the extra black
+areas outside the center one. That's where Benoit expects NaN, and the x86
+gives +infinity. [Ooops --- that must have been some kind of bug in my code.
+it no longer happens, and the resulting graphic looks much better, too]
+
+x86 conditions
+0011 : 2
+1100 : 3
+0101 : 4
+1010 : 5
+0111 : 6
+1000 : 7
+0001 : 10
+1110 : 11
+ */
+void comp_fsave_opp (uae_u32 opcode)
+{
+ uae_u32 ad;
+ int incr = (opcode & 0x38) == 0x20 ? -1 : 1;
+ int i;
+
+ FAIL(1);
+ return;
+
+#if DEBUG_FPP
+ printf ("fsave_opp at %08lx\n", m68k_getpc ());
+ fflush (stdout);
+#endif
+ if (get_fp_ad (opcode, &ad) == 0) {
+ m68k_setpc (m68k_getpc () - 2);
+ fpuop_illg (opcode,UNKNOWN_EXTRA);
+ return;
+ }
+
+ if (CPUType == 4) {
+ /* 4 byte 68040 IDLE frame. */
+ if (incr < 0) {
+ ad -= 4;
+ put_long (ad, 0x41000000);
+ } else {
+ put_long (ad, 0x41000000);
+ ad += 4;
+ }
+ } else {
+ if (incr < 0) {
+ ad -= 4;
+ put_long (ad, 0x70000000);
+ for (i = 0; i < 5; i++) {
+ ad -= 4;
+ put_long (ad, 0x00000000);
+ }
+ ad -= 4;
+ put_long (ad, 0x1f180000);
+ } else {
+ put_long (ad, 0x1f180000);
+ ad += 4;
+ for (i = 0; i < 5; i++) {
+ put_long (ad, 0x00000000);
+ ad += 4;
+ }
+ put_long (ad, 0x70000000);
+ ad += 4;
+ }
+ }
+ if ((opcode & 0x38) == 0x18)
+ m68k_areg (regs, opcode & 7) = ad;
+ if ((opcode & 0x38) == 0x20)
+ m68k_areg (regs, opcode & 7) = ad;
+}
+
+void comp_frestore_opp (uae_u32 opcode)
+{
+ uae_u32 ad;
+ uae_u32 d;
+ int incr = (opcode & 0x38) == 0x20 ? -1 : 1;
+
+ FAIL(1);
+ return;
+
+#if DEBUG_FPP
+ printf ("frestore_opp at %08lx\n", m68k_getpc ());
+ fflush (stdout);
+#endif
+ if (get_fp_ad (opcode, &ad) == 0) {
+ m68k_setpc (m68k_getpc () - 2);
+ fpuop_illg (opcode,UNKNOWN_EXTRA);
+ return;
+ }
+ if (CPUType == 4) {
+ /* 68040 */
+ if (incr < 0) {
+ /* @@@ This may be wrong. */
+ ad -= 4;
+ d = get_long (ad);
+ if ((d & 0xff000000) != 0) { /* Not a NULL frame? */
+ if ((d & 0x00ff0000) == 0) { /* IDLE */
+ } else if ((d & 0x00ff0000) == 0x00300000) { /* UNIMP */
+ ad -= 44;
+ } else if ((d & 0x00ff0000) == 0x00600000) { /* BUSY */
+ ad -= 92;
+ }
+ }
+ } else {
+ d = get_long (ad);
+ ad += 4;
+ if ((d & 0xff000000) != 0) { /* Not a NULL frame? */
+ if ((d & 0x00ff0000) == 0) { /* IDLE */
+ } else if ((d & 0x00ff0000) == 0x00300000) { /* UNIMP */
+ ad += 44;
+ } else if ((d & 0x00ff0000) == 0x00600000) { /* BUSY */
+ ad += 92;
+ }
+ }
+ }
+ } else {
+ if (incr < 0) {
+ ad -= 4;
+ d = get_long (ad);
+ if ((d & 0xff000000) != 0) {
+ if ((d & 0x00ff0000) == 0x00180000)
+ ad -= 6 * 4;
+ else if ((d & 0x00ff0000) == 0x00380000)
+ ad -= 14 * 4;
+ else if ((d & 0x00ff0000) == 0x00b40000)
+ ad -= 45 * 4;
+ }
+ } else {
+ d = get_long (ad);
+ ad += 4;
+ if ((d & 0xff000000) != 0) {
+ if ((d & 0x00ff0000) == 0x00180000)
+ ad += 6 * 4;
+ else if ((d & 0x00ff0000) == 0x00380000)
+ ad += 14 * 4;
+ else if ((d & 0x00ff0000) == 0x00b40000)
+ ad += 45 * 4;
+ }
+ }
+ }
+ if ((opcode & 0x38) == 0x18)
+ m68k_areg (regs, opcode & 7) = ad;
+ if ((opcode & 0x38) == 0x20)
+ m68k_areg (regs, opcode & 7) = ad;
+}
+
+#if USE_LONG_DOUBLE
+static const fpu_register const_e = 2.7182818284590452353602874713526625L;
+static const fpu_register const_log10_e = 0.4342944819032518276511289189166051L;
+static const fpu_register const_loge_10 = 2.3025850929940456840179914546843642L;
+#else
+static const fpu_register const_e = 2.7182818284590452354;
+static const fpu_register const_log10_e = 0.43429448190325182765;
+static const fpu_register const_loge_10 = 2.30258509299404568402;
+#endif
+
+static const fpu_register power10[] = {
+ 1e0, 1e1, 1e2, 1e4, 1e8, 1e16, 1e32, 1e64, 1e128, 1e256
+#if USE_LONG_DOUBLE
+, 1e512, 1e1024, 1e2048, 1e4096
+#endif
+};
+
+/* 128 words, indexed through the low byte of the 68k fpu control word */
+static uae_u16 x86_fpucw[]={
+ 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, /* p0r0 */
+ 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, /* p0r1 */
+ 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, /* p0r2 */
+ 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, /* p0r3 */
+
+ 0x107f, 0x107f, 0x107f, 0x107f, 0x107f, 0x107f, 0x107f, 0x107f, /* p1r0 */
+ 0x1c7f, 0x1c7f, 0x1c7f, 0x1c7f, 0x1c7f, 0x1c7f, 0x1c7f, 0x1c7f, /* p1r1 */
+ 0x147f, 0x147f, 0x147f, 0x147f, 0x147f, 0x147f, 0x147f, 0x147f, /* p1r2 */
+ 0x187f, 0x187f, 0x187f, 0x187f, 0x187f, 0x187f, 0x187f, 0x187f, /* p1r3 */
+
+ 0x127f, 0x127f, 0x127f, 0x127f, 0x127f, 0x127f, 0x127f, 0x127f, /* p2r0 */
+ 0x1e7f, 0x1e7f, 0x1e7f, 0x1e7f, 0x1e7f, 0x1e7f, 0x1e7f, 0x1e7f, /* p2r1 */
+ 0x167f, 0x167f, 0x167f, 0x167f, 0x167f, 0x167f, 0x167f, 0x167f, /* p2r2 */
+ 0x1a7f, 0x1a7f, 0x1a7f, 0x1a7f, 0x1a7f, 0x1a7f, 0x1a7f, 0x1a7f, /* p2r3 */
+
+ 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, /* p3r0 */
+ 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, /* p3r1 */
+ 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, /* p3r2 */
+ 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f /* p3r3 */
+};
+
+void comp_fpp_opp (uae_u32 opcode, uae_u16 extra)
+{
+ int reg;
+ int src;
+
+ switch ((extra >> 13) & 0x7) {
+ case 3: /* 2nd most common */
+ if (put_fp_value ((extra >> 7)&7 , opcode, extra) < 0) {
+ FAIL(1);
+ return;
+
+ }
+ return;
+ case 6:
+ case 7:
+ {
+ uae_u32 ad, list = 0;
+ int incr = 0;
+ if (extra & 0x2000) {
+ uae_u32 ad;
+
+ /* FMOVEM FPP->memory */
+ switch ((extra >> 11) & 3) { /* Get out early if failure */
+ case 0:
+ case 2:
+ break;
+ case 1:
+ case 3:
+ default:
+ FAIL(1); return;
+ }
+ ad=get_fp_ad (opcode, &ad);
+ if (ad<0) {
+ abort();
+ m68k_setpc (m68k_getpc () - 4);
+ fpuop_illg (opcode,extra);
+ return;
+ }
+ switch ((extra >> 11) & 3) {
+ case 0: /* static pred */
+ list = extra & 0xff;
+ incr = -1;
+ break;
+ case 2: /* static postinc */
+ list = extra & 0xff;
+ incr = 1;
+ break;
+ case 1: /* dynamic pred */
+ case 3: /* dynamic postinc */
+ abort();
+ }
+ if (incr < 0) { /* Predecrement */
+ for (reg = 7; reg >= 0; reg--) {
+ if (list & 0x80) {
+ fmov_ext_mr((uintptr)temp_fp,reg);
+ delay;
+ sub_l_ri(ad,4);
+ mov_l_rm(S2,(uintptr)temp_fp);
+ writelong_clobber(ad,S2,S3);
+ sub_l_ri(ad,4);
+ mov_l_rm(S2,(uintptr)temp_fp+4);
+ writelong_clobber(ad,S2,S3);
+ sub_l_ri(ad,4);
+ mov_w_rm(S2,(uintptr)temp_fp+8);
+ writeword_clobber(ad,S2,S3);
+ }
+ list <<= 1;
+ }
+ }
+ else { /* Postincrement */
+ for (reg = 0; reg < 8; reg++) {
+ if (list & 0x80) {
+ fmov_ext_mr((uintptr)temp_fp,reg);
+ delay;
+ mov_w_rm(S2,(uintptr)temp_fp+8);
+ writeword_clobber(ad,S2,S3);
+ add_l_ri(ad,4);
+ mov_l_rm(S2,(uintptr)temp_fp+4);
+ writelong_clobber(ad,S2,S3);
+ add_l_ri(ad,4);
+ mov_l_rm(S2,(uintptr)temp_fp);
+ writelong_clobber(ad,S2,S3);
+ add_l_ri(ad,4);
+ }
+ list <<= 1;
+ }
+ }
+ if ((opcode & 0x38) == 0x18)
+ mov_l_rr((opcode & 7)+8,ad);
+ if ((opcode & 0x38) == 0x20)
+ mov_l_rr((opcode & 7)+8,ad);
+ } else {
+ /* FMOVEM memory->FPP */
+
+ uae_u32 ad;
+ switch ((extra >> 11) & 3) { /* Get out early if failure */
+ case 0:
+ case 2:
+ break;
+ case 1:
+ case 3:
+ default:
+ FAIL(1); return;
+ }
+ ad=get_fp_ad (opcode, &ad);
+ if (ad<0) {
+ abort();
+ m68k_setpc (m68k_getpc () - 4);
+ write_log("no ad\n");
+ fpuop_illg (opcode,extra);
+ return;
+ }
+ switch ((extra >> 11) & 3) {
+ case 0: /* static pred */
+ list = extra & 0xff;
+ incr = -1;
+ break;
+ case 2: /* static postinc */
+ list = extra & 0xff;
+ incr = 1;
+ break;
+ case 1: /* dynamic pred */
+ case 3: /* dynamic postinc */
+ abort();
+ }
+
+ if (incr < 0) {
+ // not reached
+ for (reg = 7; reg >= 0; reg--) {
+ uae_u32 wrd1, wrd2, wrd3;
+ if (list & 0x80) {
+ sub_l_ri(ad,4);
+ readlong(ad,S2,S3);
+ mov_l_mr((uintptr)(temp_fp),S2);
+ sub_l_ri(ad,4);
+ readlong(ad,S2,S3);
+ mov_l_mr((uintptr)(temp_fp)+4,S2);
+ sub_l_ri(ad,4);
+ readword(ad,S2,S3);
+ mov_w_mr(((uintptr)temp_fp)+8,S2);
+ delay2;
+ fmov_ext_rm(reg,(uintptr)(temp_fp));
+ }
+ list <<= 1;
+ }
+ }
+ else {
+ for (reg = 0; reg < 8; reg++) {
+ uae_u32 wrd1, wrd2, wrd3;
+ if (list & 0x80) {
+ readword(ad,S2,S3);
+ mov_w_mr(((uintptr)temp_fp)+8,S2);
+ add_l_ri(ad,4);
+ readlong(ad,S2,S3);
+ mov_l_mr((uintptr)(temp_fp)+4,S2);
+ add_l_ri(ad,4);
+ readlong(ad,S2,S3);
+ mov_l_mr((uintptr)(temp_fp),S2);
+ add_l_ri(ad,4);
+ delay2;
+ fmov_ext_rm(reg,(uintptr)(temp_fp));
+ }
+ list <<= 1;
+ }
+ }
+ if ((opcode & 0x38) == 0x18)
+ mov_l_rr((opcode & 7)+8,ad);
+ if ((opcode & 0x38) == 0x20)
+ mov_l_rr((opcode & 7)+8,ad);
+ }
+ }
+ return;
+
+ case 4:
+ case 5: /* rare */
+ if ((opcode & 0x30) == 0) {
+ if (extra & 0x2000) {
+ if (extra & 0x1000) {
+#if HANDLE_FPCR
+ mov_l_rm(opcode & 15, (uintptr)&fpu.fpcr.rounding_mode);
+ or_l_rm(opcode & 15, (uintptr)&fpu.fpcr.rounding_precision);
+#else
+ FAIL(1);
+ return;
+#endif
+ }
+ if (extra & 0x0800) {
+ FAIL(1);
+ return;
+ }
+ if (extra & 0x0400) {
+ mov_l_rm(opcode & 15,(uintptr)&fpu.instruction_address);
+ return;
+ }
+ } else {
+ // gb-- moved here so that we may FAIL() without generating any code
+ if (extra & 0x0800) {
+ // set_fpsr(m68k_dreg (regs, opcode & 15));
+ FAIL(1);
+ return;
+ }
+ if (extra & 0x1000) {
+#if HANDLE_FPCR
+#if defined(FPU_USE_X86_ROUNDING_MODE) && defined(FPU_USE_X86_ROUNDING_PRECISION)
+ FAIL(1);
+ return;
+#endif
+ mov_l_rr(S1,opcode & 15);
+ mov_l_rr(S2,opcode & 15);
+ and_l_ri(S1,FPCR_ROUNDING_PRECISION);
+ and_l_ri(S2,FPCR_ROUNDING_MODE);
+ mov_l_mr((uintptr)&fpu.fpcr.rounding_precision,S1);
+ mov_l_mr((uintptr)&fpu.fpcr.rounding_mode,S2);
+#else
+ FAIL(1);
+ return;
+#endif
+// return; gb-- FMOVEM could also operate on fpiar
+ }
+ if (extra & 0x0400) {
+ mov_l_mr((uintptr)&fpu.instruction_address,opcode & 15);
+// return; gb-- we have to process all FMOVEM bits before returning
+ }
+ return;
+ }
+ } else if ((opcode & 0x3f) == 0x3c) {
+ if ((extra & 0x2000) == 0) {
+ // gb-- moved here so that we may FAIL() without generating any code
+ if (extra & 0x0800) {
+ FAIL(1);
+ return;
+ }
+ if (extra & 0x1000) {
+ uae_u32 val=comp_get_ilong((m68k_pc_offset+=4)-4);
+#if HANDLE_FPCR
+#if defined(FPU_USE_X86_ROUNDING_MODE) && defined(FPU_USE_X86_ROUNDING_PRECISION)
+ FAIL(1);
+ return;
+#endif
+// mov_l_mi((uintptr)®s.fpcr,val);
+ mov_l_ri(S1,val);
+ mov_l_ri(S2,val);
+ and_l_ri(S1,FPCR_ROUNDING_PRECISION);
+ and_l_ri(S2,FPCR_ROUNDING_MODE);
+ mov_l_mr((uintptr)&fpu.fpcr.rounding_precision,S1);
+ mov_l_mr((uintptr)&fpu.fpcr.rounding_mode,S2);
+#else
+ FAIL(1);
+ return;
+#endif
+// return; gb-- FMOVEM could also operate on fpiar
+ }
+ if (extra & 0x0400) {
+ uae_u32 val=comp_get_ilong((m68k_pc_offset+=4)-4);
+ mov_l_mi((uintptr)&fpu.instruction_address,val);
+// return; gb-- we have to process all FMOVEM bits before returning
+ }
+ return;
+ }
+ FAIL(1);
+ return;
+ } else if (extra & 0x2000) {
+ FAIL(1);
+ return;
+ } else {
+ FAIL(1);
+ return;
+ }
+ FAIL(1);
+ return;
+
+ case 0:
+ case 2: /* Extremely common */
+ reg = (extra >> 7) & 7;
+ if ((extra & 0xfc00) == 0x5c00) {
+ switch (extra & 0x7f) {
+ case 0x00:
+ fmov_pi(reg);
+ break;
+ case 0x0b:
+ fmov_log10_2(reg);
+ break;
+ case 0x0c:
+#if USE_LONG_DOUBLE
+ fmov_ext_rm(reg,(uintptr)&const_e);
+#else
+ fmov_rm(reg,(uintptr)&const_e);
+#endif
+ break;
+ case 0x0d:
+ fmov_log2_e(reg);
+ break;
+ case 0x0e:
+#if USE_LONG_DOUBLE
+ fmov_ext_rm(reg,(uintptr)&const_log10_e);
+#else
+ fmov_rm(reg,(uintptr)&const_log10_e);
+#endif
+ break;
+ case 0x0f:
+ fmov_0(reg);
+ break;
+ case 0x30:
+ fmov_loge_2(reg);
+ break;
+ case 0x31:
+#if USE_LONG_DOUBLE
+ fmov_ext_rm(reg,(uintptr)&const_loge_10);
+#else
+ fmov_rm(reg,(uintptr)&const_loge_10);
+#endif
+ break;
+ case 0x32:
+ fmov_1(reg);
+ break;
+ case 0x33:
+ case 0x34:
+ case 0x35:
+ case 0x36:
+ case 0x37:
+ case 0x38:
+ case 0x39:
+ case 0x3a:
+ case 0x3b:
+#if USE_LONG_DOUBLE
+ case 0x3c:
+ case 0x3d:
+ case 0x3e:
+ case 0x3f:
+ fmov_ext_rm(reg,(uintptr)(power10+(extra & 0x7f)-0x32));
+#else
+ fmov_rm(reg,(uintptr)(power10+(extra & 0x7f)-0x32));
+#endif
+ break;
+ default:
+ /* This is not valid, so we fail */
+ FAIL(1);
+ return;
+ }
+ return;
+ }
+
+ switch (extra & 0x7f) {
+ case 0x00: /* FMOVE */
+ case 0x40: /* Explicit rounding. This is just a quick fix. Same
+ * for all other cases that have three choices */
+ case 0x44:
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fmov_rr(reg,src);
+ MAKE_FPSR (src);
+ break;
+ case 0x01: /* FINT */
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ case 0x02: /* FSINH */
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ break;
+ case 0x03: /* FINTRZ */
+#if USE_X86_FPUCW
+ /* If we have control over the CW, we can do this */
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ mov_l_ri(S1,16); /* Switch to "round to zero" mode */
+ fldcw_m_indexed(S1,(uae_u32)x86_fpucw);
+
+ frndint_rr(reg,src);
+
+ /* restore control word */
+ mov_l_rm(S1,(uintptr)®s.fpcr);
+ and_l_ri(S1,0x000000f0);
+ fldcw_m_indexed(S1,(uintptr)x86_fpucw);
+
+ MAKE_FPSR (reg);
+ break;
+#endif
+ FAIL(1);
+ return;
+ break;
+ case 0x04: /* FSQRT */
+ case 0x41:
+ case 0x45:
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fsqrt_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
+ case 0x06: /* FLOGNP1 */
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ break;
+ case 0x08: /* FETOXM1 */
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ break;
+ case 0x09: /* FTANH */
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ break;
+ case 0x0a: /* FATAN */
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ break;
+ case 0x0c: /* FASIN */
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ break;
+ case 0x0d: /* FATANH */
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ break;
+ case 0x0e: /* FSIN */
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fsin_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
+ case 0x0f: /* FTAN */
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ break;
+ case 0x10: /* FETOX */
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fetox_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
+ case 0x11: /* FTWOTOX */
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ ftwotox_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
+ case 0x12: /* FTENTOX */
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ break;
+ case 0x14: /* FLOGN */
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ break;
+ case 0x15: /* FLOG10 */
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ break;
+ case 0x16: /* FLOG2 */
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ flog2_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
+ case 0x18: /* FABS */
+ case 0x58:
+ case 0x5c:
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fabs_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
+ case 0x19: /* FCOSH */
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ break;
+ case 0x1a: /* FNEG */
+ case 0x5a:
+ case 0x5e:
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fneg_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
+ case 0x1c: /* FACOS */
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ break;
+ case 0x1d: /* FCOS */
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fcos_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
+ case 0x1e: /* FGETEXP */
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ break;
+ case 0x1f: /* FGETMAN */
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ break;
+ case 0x20: /* FDIV */
+ case 0x60:
+ case 0x64:
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fdiv_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
+ case 0x21: /* FMOD */
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ frem_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
+ case 0x22: /* FADD */
+ case 0x62:
+ case 0x66:
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fadd_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
+ case 0x23: /* FMUL */
+ case 0x63:
+ case 0x67:
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fmul_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
+ case 0x24: /* FSGLDIV */
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fdiv_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
+ case 0x25: /* FREM */
+ // gb-- disabled because the quotient byte must be computed
+ // otherwise, free rotation in ClarisWorks doesn't work.
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ frem1_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
+ case 0x26: /* FSCALE */
+ dont_care_fflags();
+ FAIL(1);
+ return;
+ break;
+ case 0x27: /* FSGLMUL */
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fmul_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
+ case 0x28: /* FSUB */
+ case 0x68:
+ case 0x6c:
+ dont_care_fflags();
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fsub_rr(reg,src);
+ MAKE_FPSR (reg);
+ break;
+ case 0x30: /* FSINCOS */
+ case 0x31:
+ case 0x32:
+ case 0x33:
+ case 0x34:
+ case 0x35:
+ case 0x36:
+ case 0x37:
+ FAIL(1);
+ return;
+ dont_care_fflags();
+ break;
+ case 0x38: /* FCMP */
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fmov_rr(FP_RESULT,reg);
+ fsub_rr(FP_RESULT,src); /* Right way? */
+ break;
+ case 0x3a: /* FTST */
+ src=get_fp_value (opcode, extra);
+ if (src < 0) {
+ FAIL(1); /* Illegal instruction */
+ return;
+ }
+ fmov_rr(FP_RESULT,src);
+ break;
+ default:
+ FAIL(1);
+ return;
+ break;
+ }
+ return;
+ }
+ m68k_setpc (m68k_getpc () - 4);
+ fpuop_illg (opcode,extra);
+}
--- /dev/null
+/*
+ * compiler/compemu_support.cpp - Core dynamic translation engine
+ *
+ * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
+ *
+ * Adaptation for Basilisk II and improvements, copyright 2000-2005
+ * Gwenole Beauchesne
+ *
+ * Basilisk II (C) 1997-2008 Christian Bauer
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#if !REAL_ADDRESSING && !DIRECT_ADDRESSING
+#error "Only Real or Direct Addressing is supported with the JIT Compiler"
+#endif
+
+#if X86_ASSEMBLY && !SAHF_SETO_PROFITABLE
+#error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler"
+#endif
+
+/* NOTE: support for AMD64 assumes translation cache and other code
+ * buffers are allocated into a 32-bit address space because (i) B2/JIT
+ * code is not 64-bit clean and (ii) it's faster to resolve branches
+ * that way.
+ */
+#if !defined(__i386__) && !defined(__x86_64__)
+#error "Only IA-32 and X86-64 targets are supported with the JIT Compiler"
+#endif
+
+#define USE_MATCH 0
+
+/* kludge for Brian, so he can compile under MSVC++ */
+#define USE_NORMAL_CALLING_CONVENTION 0
+
+#ifndef WIN32
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#endif
+
+#include <stdlib.h>
+#include <fcntl.h>
+#include <errno.h>
+
+#include "sysdeps.h"
+#include "cpu_emulation.h"
+#include "main.h"
+#include "prefs.h"
+#include "user_strings.h"
+#include "vm_alloc.h"
+
+#include "m68k.h"
+#include "memory.h"
+#include "readcpu.h"
+#include "newcpu.h"
+#include "comptbl.h"
+#include "compiler/compemu.h"
+#include "fpu/fpu.h"
+#include "fpu/flags.h"
+
+#define DEBUG 1
+#include "debug.h"
+
+#ifdef ENABLE_MON
+#include "mon.h"
+#endif
+
+#ifndef WIN32
+#define PROFILE_COMPILE_TIME 1
+#define PROFILE_UNTRANSLATED_INSNS 1
+#endif
+
+#if defined(__x86_64__) && 0
+#define RECORD_REGISTER_USAGE 1
+#endif
+
+#ifdef WIN32
+#undef write_log
+#define write_log dummy_write_log
+static void dummy_write_log(const char *, ...) { }
+#endif
+
+#if JIT_DEBUG
+#undef abort
+#define abort() do { \
+ fprintf(stderr, "Abort in file %s at line %d\n", __FILE__, __LINE__); \
+ exit(EXIT_FAILURE); \
+} while (0)
+#endif
+
+#if RECORD_REGISTER_USAGE
+static uint64 reg_count[16];
+static int reg_count_local[16];
+
+static int reg_count_compare(const void *ap, const void *bp)
+{
+ const int a = *((int *)ap);
+ const int b = *((int *)bp);
+ return reg_count[b] - reg_count[a];
+}
+#endif
+
+#if PROFILE_COMPILE_TIME
+#include <time.h>
+static uae_u32 compile_count = 0;
+static clock_t compile_time = 0;
+static clock_t emul_start_time = 0;
+static clock_t emul_end_time = 0;
+#endif
+
+#if PROFILE_UNTRANSLATED_INSNS
+const int untranslated_top_ten = 20;
+static uae_u32 raw_cputbl_count[65536] = { 0, };
+static uae_u16 opcode_nums[65536];
+
+static int untranslated_compfn(const void *e1, const void *e2)
+{
+ return raw_cputbl_count[*(const uae_u16 *)e1] < raw_cputbl_count[*(const uae_u16 *)e2];
+}
+#endif
+
+static compop_func *compfunctbl[65536];
+static compop_func *nfcompfunctbl[65536];
+static cpuop_func *nfcpufunctbl[65536];
+uae_u8* comp_pc_p;
+
+// From newcpu.cpp
+extern bool quit_program;
+
+// gb-- Extra data for Basilisk II/JIT
+#if JIT_DEBUG
+static bool JITDebug = false; // Enable runtime disassemblers through mon?
+#else
+const bool JITDebug = false; // Don't use JIT debug mode at all
+#endif
+#if USE_INLINING
+static bool follow_const_jumps = true; // Flag: translation through constant jumps
+#else
+const bool follow_const_jumps = false;
+#endif
+
+const uae_u32 MIN_CACHE_SIZE = 1024; // Minimal translation cache size (1 MB)
+static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks
+static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already
+static bool lazy_flush = true; // Flag: lazy translation cache invalidation
+static bool avoid_fpu = true; // Flag: compile FPU instructions ?
+static bool have_cmov = false; // target has CMOV instructions ?
+static bool have_lahf_lm = true; // target has LAHF supported in long mode ?
+static bool have_rat_stall = true; // target has partial register stalls ?
+const bool tune_alignment = true; // Tune code alignments for running CPU ?
+const bool tune_nop_fillers = true; // Tune no-op fillers for architecture
+static bool setzflg_uses_bsf = false; // setzflg virtual instruction can use native BSF instruction correctly?
+static int align_loops = 32; // Align the start of loops
+static int align_jumps = 32; // Align the start of jumps
+static int optcount[10] = {
+ 10, // How often a block has to be executed before it is translated
+ 0, // How often to use naive translation
+ 0, 0, 0, 0,
+ -1, -1, -1, -1
+};
+
+struct op_properties {
+ uae_u8 use_flags;
+ uae_u8 set_flags;
+ uae_u8 is_addx;
+ uae_u8 cflow;
+};
+static op_properties prop[65536];
+
+static inline int end_block(uae_u32 opcode)
+{
+ return (prop[opcode].cflow & fl_end_block);
+}
+
+static inline bool is_const_jump(uae_u32 opcode)
+{
+ return (prop[opcode].cflow == fl_const_jump);
+}
+
+static inline bool may_trap(uae_u32 opcode)
+{
+ return (prop[opcode].cflow & fl_trap);
+}
+
+static inline unsigned int cft_map (unsigned int f)
+{
+#ifndef HAVE_GET_WORD_UNSWAPPED
+ return f;
+#else
+ return ((f >> 8) & 255) | ((f & 255) << 8);
+#endif
+}
+
+uae_u8* start_pc_p;
+uae_u32 start_pc;
+uae_u32 current_block_pc_p;
+static uintptr current_block_start_target;
+uae_u32 needed_flags;
+static uintptr next_pc_p;
+static uintptr taken_pc_p;
+static int branch_cc;
+static int redo_current_block;
+
+int segvcount=0;
+int soft_flush_count=0;
+int hard_flush_count=0;
+int checksum_count=0;
+static uae_u8* current_compile_p=NULL;
+static uae_u8* max_compile_start;
+static uae_u8* compiled_code=NULL;
+static uae_s32 reg_alloc_run;
+const int POPALLSPACE_SIZE = 1024; /* That should be enough space */
+static uae_u8* popallspace=NULL;
+
+void* pushall_call_handler=NULL;
+static void* popall_do_nothing=NULL;
+static void* popall_exec_nostats=NULL;
+static void* popall_execute_normal=NULL;
+static void* popall_cache_miss=NULL;
+static void* popall_recompile_block=NULL;
+static void* popall_check_checksum=NULL;
+
+/* The 68k only ever executes from even addresses. So right now, we
+ * waste half the entries in this array
+ * UPDATE: We now use those entries to store the start of the linked
+ * lists that we maintain for each hash result.
+ */
+cacheline cache_tags[TAGSIZE];
+int letit=0;
+blockinfo* hold_bi[MAX_HOLD_BI];
+blockinfo* active;
+blockinfo* dormant;
+
+/* 68040 */
+extern struct cputbl op_smalltbl_0_nf[];
+extern struct comptbl op_smalltbl_0_comp_nf[];
+extern struct comptbl op_smalltbl_0_comp_ff[];
+
+/* 68020 + 68881 */
+extern struct cputbl op_smalltbl_1_nf[];
+
+/* 68020 */
+extern struct cputbl op_smalltbl_2_nf[];
+
+/* 68010 */
+extern struct cputbl op_smalltbl_3_nf[];
+
+/* 68000 */
+extern struct cputbl op_smalltbl_4_nf[];
+
+/* 68000 slow but compatible. */
+extern struct cputbl op_smalltbl_5_nf[];
+
+static void flush_icache_hard(int n);
+static void flush_icache_lazy(int n);
+static void flush_icache_none(int n);
+void (*flush_icache)(int n) = flush_icache_none;
+
+
+
+bigstate live;
+smallstate empty_ss;
+smallstate default_ss;
+static int optlev;
+
+static int writereg(int r, int size);
+static void unlock2(int r);
+static void setlock(int r);
+static int readreg_specific(int r, int size, int spec);
+static int writereg_specific(int r, int size, int spec);
+static void prepare_for_call_1(void);
+static void prepare_for_call_2(void);
+static void align_target(uae_u32 a);
+
+static uae_s32 nextused[VREGS];
+
+uae_u32 m68k_pc_offset;
+
+/* Some arithmetic ooperations can be optimized away if the operands
+ * are known to be constant. But that's only a good idea when the
+ * side effects they would have on the flags are not important. This
+ * variable indicates whether we need the side effects or not
+ */
+uae_u32 needflags=0;
+
+/* Flag handling is complicated.
+ *
+ * x86 instructions create flags, which quite often are exactly what we
+ * want. So at times, the "68k" flags are actually in the x86 flags.
+ *
+ * Then again, sometimes we do x86 instructions that clobber the x86
+ * flags, but don't represent a corresponding m68k instruction. In that
+ * case, we have to save them.
+ *
+ * We used to save them to the stack, but now store them back directly
+ * into the regflags.cznv of the traditional emulation. Thus some odd
+ * names.
+ *
+ * So flags can be in either of two places (used to be three; boy were
+ * things complicated back then!); And either place can contain either
+ * valid flags or invalid trash (and on the stack, there was also the
+ * option of "nothing at all", now gone). A couple of variables keep
+ * track of the respective states.
+ *
+ * To make things worse, we might or might not be interested in the flags.
+ * by default, we are, but a call to dont_care_flags can change that
+ * until the next call to live_flags. If we are not, pretty much whatever
+ * is in the register and/or the native flags is seen as valid.
+ */
+
+static __inline__ blockinfo* get_blockinfo(uae_u32 cl)
+{
+ return cache_tags[cl+1].bi;
+}
+
+static __inline__ blockinfo* get_blockinfo_addr(void* addr)
+{
+ blockinfo* bi=get_blockinfo(cacheline(addr));
+
+ while (bi) {
+ if (bi->pc_p==addr)
+ return bi;
+ bi=bi->next_same_cl;
+ }
+ return NULL;
+}
+
+
+/*******************************************************************
+ * All sorts of list related functions for all of the lists *
+ *******************************************************************/
+
+static __inline__ void remove_from_cl_list(blockinfo* bi)
+{
+ uae_u32 cl=cacheline(bi->pc_p);
+
+ if (bi->prev_same_cl_p)
+ *(bi->prev_same_cl_p)=bi->next_same_cl;
+ if (bi->next_same_cl)
+ bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p;
+ if (cache_tags[cl+1].bi)
+ cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use;
+ else
+ cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
+}
+
+static __inline__ void remove_from_list(blockinfo* bi)
+{
+ if (bi->prev_p)
+ *(bi->prev_p)=bi->next;
+ if (bi->next)
+ bi->next->prev_p=bi->prev_p;
+}
+
+static __inline__ void remove_from_lists(blockinfo* bi)
+{
+ remove_from_list(bi);
+ remove_from_cl_list(bi);
+}
+
+static __inline__ void add_to_cl_list(blockinfo* bi)
+{
+ uae_u32 cl=cacheline(bi->pc_p);
+
+ if (cache_tags[cl+1].bi)
+ cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl);
+ bi->next_same_cl=cache_tags[cl+1].bi;
+
+ cache_tags[cl+1].bi=bi;
+ bi->prev_same_cl_p=&(cache_tags[cl+1].bi);
+
+ cache_tags[cl].handler=bi->handler_to_use;
+}
+
+static __inline__ void raise_in_cl_list(blockinfo* bi)
+{
+ remove_from_cl_list(bi);
+ add_to_cl_list(bi);
+}
+
+static __inline__ void add_to_active(blockinfo* bi)
+{
+ if (active)
+ active->prev_p=&(bi->next);
+ bi->next=active;
+
+ active=bi;
+ bi->prev_p=&active;
+}
+
+static __inline__ void add_to_dormant(blockinfo* bi)
+{
+ if (dormant)
+ dormant->prev_p=&(bi->next);
+ bi->next=dormant;
+
+ dormant=bi;
+ bi->prev_p=&dormant;
+}
+
+static __inline__ void remove_dep(dependency* d)
+{
+ if (d->prev_p)
+ *(d->prev_p)=d->next;
+ if (d->next)
+ d->next->prev_p=d->prev_p;
+ d->prev_p=NULL;
+ d->next=NULL;
+}
+
+/* This block's code is about to be thrown away, so it no longer
+ depends on anything else */
+static __inline__ void remove_deps(blockinfo* bi)
+{
+ remove_dep(&(bi->dep[0]));
+ remove_dep(&(bi->dep[1]));
+}
+
+static __inline__ void adjust_jmpdep(dependency* d, cpuop_func* a)
+{
+ *(d->jmp_off)=(uintptr)a-((uintptr)d->jmp_off+4);
+}
+
+/********************************************************************
+ * Soft flush handling support functions *
+ ********************************************************************/
+
+static __inline__ void set_dhtu(blockinfo* bi, cpuop_func* dh)
+{
+ //write_log("bi is %p\n",bi);
+ if (dh!=bi->direct_handler_to_use) {
+ dependency* x=bi->deplist;
+ //write_log("bi->deplist=%p\n",bi->deplist);
+ while (x) {
+ //write_log("x is %p\n",x);
+ //write_log("x->next is %p\n",x->next);
+ //write_log("x->prev_p is %p\n",x->prev_p);
+
+ if (x->jmp_off) {
+ adjust_jmpdep(x,dh);
+ }
+ x=x->next;
+ }
+ bi->direct_handler_to_use=dh;
+ }
+}
+
+static __inline__ void invalidate_block(blockinfo* bi)
+{
+ int i;
+
+ bi->optlevel=0;
+ bi->count=optcount[0]-1;
+ bi->handler=NULL;
+ bi->handler_to_use=(cpuop_func *)popall_execute_normal;
+ bi->direct_handler=NULL;
+ set_dhtu(bi,bi->direct_pen);
+ bi->needed_flags=0xff;
+ bi->status=BI_INVALID;
+ for (i=0;i<2;i++) {
+ bi->dep[i].jmp_off=NULL;
+ bi->dep[i].target=NULL;
+ }
+ remove_deps(bi);
+}
+
+static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target)
+{
+ blockinfo* tbi=get_blockinfo_addr((void*)(uintptr)target);
+
+ Dif(!tbi) {
+ write_log("Could not create jmpdep!\n");
+ abort();
+ }
+ bi->dep[i].jmp_off=jmpaddr;
+ bi->dep[i].source=bi;
+ bi->dep[i].target=tbi;
+ bi->dep[i].next=tbi->deplist;
+ if (bi->dep[i].next)
+ bi->dep[i].next->prev_p=&(bi->dep[i].next);
+ bi->dep[i].prev_p=&(tbi->deplist);
+ tbi->deplist=&(bi->dep[i]);
+}
+
+static __inline__ void block_need_recompile(blockinfo * bi)
+{
+ uae_u32 cl = cacheline(bi->pc_p);
+
+ set_dhtu(bi, bi->direct_pen);
+ bi->direct_handler = bi->direct_pen;
+
+ bi->handler_to_use = (cpuop_func *)popall_execute_normal;
+ bi->handler = (cpuop_func *)popall_execute_normal;
+ if (bi == cache_tags[cl + 1].bi)
+ cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
+ bi->status = BI_NEED_RECOMP;
+}
+
+static __inline__ void mark_callers_recompile(blockinfo * bi)
+{
+ dependency *x = bi->deplist;
+
+ while (x) {
+ dependency *next = x->next; /* This disappears when we mark for
+ * recompilation and thus remove the
+ * blocks from the lists */
+ if (x->jmp_off) {
+ blockinfo *cbi = x->source;
+
+ Dif(cbi->status == BI_INVALID) {
+ // write_log("invalid block in dependency list\n"); // FIXME?
+ // abort();
+ }
+ if (cbi->status == BI_ACTIVE || cbi->status == BI_NEED_CHECK) {
+ block_need_recompile(cbi);
+ mark_callers_recompile(cbi);
+ }
+ else if (cbi->status == BI_COMPILING) {
+ redo_current_block = 1;
+ }
+ else if (cbi->status == BI_NEED_RECOMP) {
+ /* nothing */
+ }
+ else {
+ //write_log("Status %d in mark_callers\n",cbi->status); // FIXME?
+ }
+ }
+ x = next;
+ }
+}
+
+static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate)
+{
+ blockinfo* bi=get_blockinfo_addr(addr);
+ int i;
+
+ if (!bi) {
+ for (i=0;i<MAX_HOLD_BI && !bi;i++) {
+ if (hold_bi[i]) {
+ uae_u32 cl=cacheline(addr);
+
+ bi=hold_bi[i];
+ hold_bi[i]=NULL;
+ bi->pc_p=(uae_u8 *)addr;
+ invalidate_block(bi);
+ add_to_active(bi);
+ add_to_cl_list(bi);
+
+ }
+ }
+ }
+ if (!bi) {
+ write_log("Looking for blockinfo, can't find free one\n");
+ abort();
+ }
+ return bi;
+}
+
+static void prepare_block(blockinfo* bi);
+
+/* Managment of blockinfos.
+
+ A blockinfo struct is allocated whenever a new block has to be
+ compiled. If the list of free blockinfos is empty, we allocate a new
+ pool of blockinfos and link the newly created blockinfos altogether
+ into the list of free blockinfos. Otherwise, we simply pop a structure
+ off the free list.
+
+ Blockinfo are lazily deallocated, i.e. chained altogether in the
+ list of free blockinfos whenvever a translation cache flush (hard or
+ soft) request occurs.
+*/
+
+template< class T >
+class LazyBlockAllocator
+{
+ enum {
+ kPoolSize = 1 + 4096 / sizeof(T)
+ };
+ struct Pool {
+ T chunk[kPoolSize];
+ Pool * next;
+ };
+ Pool * mPools;
+ T * mChunks;
+public:
+ LazyBlockAllocator() : mPools(0), mChunks(0) { }
+ ~LazyBlockAllocator();
+ T * acquire();
+ void release(T * const);
+};
+
+template< class T >
+LazyBlockAllocator<T>::~LazyBlockAllocator()
+{
+ Pool * currentPool = mPools;
+ while (currentPool) {
+ Pool * deadPool = currentPool;
+ currentPool = currentPool->next;
+ free(deadPool);
+ }
+}
+
+template< class T >
+T * LazyBlockAllocator<T>::acquire()
+{
+ if (!mChunks) {
+ // There is no chunk left, allocate a new pool and link the
+ // chunks into the free list
+ Pool * newPool = (Pool *)malloc(sizeof(Pool));
+ for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) {
+ chunk->next = mChunks;
+ mChunks = chunk;
+ }
+ newPool->next = mPools;
+ mPools = newPool;
+ }
+ T * chunk = mChunks;
+ mChunks = chunk->next;
+ return chunk;
+}
+
+template< class T >
+void LazyBlockAllocator<T>::release(T * const chunk)
+{
+ chunk->next = mChunks;
+ mChunks = chunk;
+}
+
+template< class T >
+class HardBlockAllocator
+{
+public:
+ T * acquire() {
+ T * data = (T *)current_compile_p;
+ current_compile_p += sizeof(T);
+ return data;
+ }
+
+ void release(T * const chunk) {
+ // Deallocated on invalidation
+ }
+};
+
+#if USE_SEPARATE_BIA
+static LazyBlockAllocator<blockinfo> BlockInfoAllocator;
+static LazyBlockAllocator<checksum_info> ChecksumInfoAllocator;
+#else
+static HardBlockAllocator<blockinfo> BlockInfoAllocator;
+static HardBlockAllocator<checksum_info> ChecksumInfoAllocator;
+#endif
+
+static __inline__ checksum_info *alloc_checksum_info(void)
+{
+ checksum_info *csi = ChecksumInfoAllocator.acquire();
+ csi->next = NULL;
+ return csi;
+}
+
+static __inline__ void free_checksum_info(checksum_info *csi)
+{
+ csi->next = NULL;
+ ChecksumInfoAllocator.release(csi);
+}
+
+static __inline__ void free_checksum_info_chain(checksum_info *csi)
+{
+ while (csi != NULL) {
+ checksum_info *csi2 = csi->next;
+ free_checksum_info(csi);
+ csi = csi2;
+ }
+}
+
+static __inline__ blockinfo *alloc_blockinfo(void)
+{
+ blockinfo *bi = BlockInfoAllocator.acquire();
+#if USE_CHECKSUM_INFO
+ bi->csi = NULL;
+#endif
+ return bi;
+}
+
+static __inline__ void free_blockinfo(blockinfo *bi)
+{
+#if USE_CHECKSUM_INFO
+ free_checksum_info_chain(bi->csi);
+ bi->csi = NULL;
+#endif
+ BlockInfoAllocator.release(bi);
+}
+
+static __inline__ void alloc_blockinfos(void)
+{
+ int i;
+ blockinfo* bi;
+
+ for (i=0;i<MAX_HOLD_BI;i++) {
+ if (hold_bi[i])
+ return;
+ bi=hold_bi[i]=alloc_blockinfo();
+ prepare_block(bi);
+ }
+}
+
+/********************************************************************
+ * Functions to emit data into memory, and other general support *
+ ********************************************************************/
+
+static uae_u8* target;
+
+static void emit_init(void)
+{
+}
+
+static __inline__ void emit_byte(uae_u8 x)
+{
+ *target++=x;
+}
+
+static __inline__ void emit_word(uae_u16 x)
+{
+ *((uae_u16*)target)=x;
+ target+=2;
+}
+
+static __inline__ void emit_long(uae_u32 x)
+{
+ *((uae_u32*)target)=x;
+ target+=4;
+}
+
+static __inline__ void emit_quad(uae_u64 x)
+{
+ *((uae_u64*)target)=x;
+ target+=8;
+}
+
+static __inline__ void emit_block(const uae_u8 *block, uae_u32 blocklen)
+{
+ memcpy((uae_u8 *)target,block,blocklen);
+ target+=blocklen;
+}
+
+static __inline__ uae_u32 reverse32(uae_u32 v)
+{
+#if 1
+ // gb-- We have specialized byteswapping functions, just use them
+ return do_byteswap_32(v);
+#else
+ return ((v>>24)&0xff) | ((v>>8)&0xff00) | ((v<<8)&0xff0000) | ((v<<24)&0xff000000);
+#endif
+}
+
+/********************************************************************
+ * Getting the information about the target CPU *
+ ********************************************************************/
+
+#include "codegen_x86.cpp"
+
+void set_target(uae_u8* t)
+{
+ target=t;
+}
+
+static __inline__ uae_u8* get_target_noopt(void)
+{
+ return target;
+}
+
+__inline__ uae_u8* get_target(void)
+{
+ return get_target_noopt();
+}
+
+
+/********************************************************************
+ * Flags status handling. EMIT TIME! *
+ ********************************************************************/
+
+static void bt_l_ri_noclobber(R4 r, IMM i);
+
+static void make_flags_live_internal(void)
+{
+ if (live.flags_in_flags==VALID)
+ return;
+ Dif (live.flags_on_stack==TRASH) {
+ write_log("Want flags, got something on stack, but it is TRASH\n");
+ abort();
+ }
+ if (live.flags_on_stack==VALID) {
+ int tmp;
+ tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2);
+ raw_reg_to_flags(tmp);
+ unlock2(tmp);
+
+ live.flags_in_flags=VALID;
+ return;
+ }
+ write_log("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n",
+ live.flags_in_flags,live.flags_on_stack);
+ abort();
+}
+
+static void flags_to_stack(void)
+{
+ if (live.flags_on_stack==VALID)
+ return;
+ if (!live.flags_are_important) {
+ live.flags_on_stack=VALID;
+ return;
+ }
+ Dif (live.flags_in_flags!=VALID)
+ abort();
+ else {
+ int tmp;
+ tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1);
+ raw_flags_to_reg(tmp);
+ unlock2(tmp);
+ }
+ live.flags_on_stack=VALID;
+}
+
+static __inline__ void clobber_flags(void)
+{
+ if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID)
+ flags_to_stack();
+ live.flags_in_flags=TRASH;
+}
+
+/* Prepare for leaving the compiled stuff */
+static __inline__ void flush_flags(void)
+{
+ flags_to_stack();
+ return;
+}
+
+int touchcnt;
+
+/********************************************************************
+ * Partial register flushing for optimized calls *
+ ********************************************************************/
+
+struct regusage {
+ uae_u16 rmask;
+ uae_u16 wmask;
+};
+
+static inline void ru_set(uae_u16 *mask, int reg)
+{
+#if USE_OPTIMIZED_CALLS
+ *mask |= 1 << reg;
+#endif
+}
+
+static inline bool ru_get(const uae_u16 *mask, int reg)
+{
+#if USE_OPTIMIZED_CALLS
+ return (*mask & (1 << reg));
+#else
+ /* Default: instruction reads & write to register */
+ return true;
+#endif
+}
+
+static inline void ru_set_read(regusage *ru, int reg)
+{
+ ru_set(&ru->rmask, reg);
+}
+
+static inline void ru_set_write(regusage *ru, int reg)
+{
+ ru_set(&ru->wmask, reg);
+}
+
+static inline bool ru_read_p(const regusage *ru, int reg)
+{
+ return ru_get(&ru->rmask, reg);
+}
+
+static inline bool ru_write_p(const regusage *ru, int reg)
+{
+ return ru_get(&ru->wmask, reg);
+}
+
+static void ru_fill_ea(regusage *ru, int reg, amodes mode,
+ wordsizes size, int write_mode)
+{
+ switch (mode) {
+ case Areg:
+ reg += 8;
+ /* fall through */
+ case Dreg:
+ ru_set(write_mode ? &ru->wmask : &ru->rmask, reg);
+ break;
+ case Ad16:
+ /* skip displacment */
+ m68k_pc_offset += 2;
+ case Aind:
+ case Aipi:
+ case Apdi:
+ ru_set_read(ru, reg+8);
+ break;
+ case Ad8r:
+ ru_set_read(ru, reg+8);
+ /* fall through */
+ case PC8r: {
+ uae_u16 dp = comp_get_iword((m68k_pc_offset+=2)-2);
+ reg = (dp >> 12) & 15;
+ ru_set_read(ru, reg);
+ if (dp & 0x100)
+ m68k_pc_offset += (((dp & 0x30) >> 3) & 7) + ((dp & 3) * 2);
+ break;
+ }
+ case PC16:
+ case absw:
+ case imm0:
+ case imm1:
+ m68k_pc_offset += 2;
+ break;
+ case absl:
+ case imm2:
+ m68k_pc_offset += 4;
+ break;
+ case immi:
+ m68k_pc_offset += (size == sz_long) ? 4 : 2;
+ break;
+ }
+}
+
+/* TODO: split into a static initialization part and a dynamic one
+ (instructions depending on extension words) */
+static void ru_fill(regusage *ru, uae_u32 opcode)
+{
+ m68k_pc_offset += 2;
+
+ /* Default: no register is used or written to */
+ ru->rmask = 0;
+ ru->wmask = 0;
+
+ uae_u32 real_opcode = cft_map(opcode);
+ struct instr *dp = &table68k[real_opcode];
+
+ bool rw_dest = true;
+ bool handled = false;
+
+ /* Handle some instructions specifically */
+ uae_u16 reg, ext;
+ switch (dp->mnemo) {
+ case i_BFCHG:
+ case i_BFCLR:
+ case i_BFEXTS:
+ case i_BFEXTU:
+ case i_BFFFO:
+ case i_BFINS:
+ case i_BFSET:
+ case i_BFTST:
+ ext = comp_get_iword((m68k_pc_offset+=2)-2);
+ if (ext & 0x800) ru_set_read(ru, (ext >> 6) & 7);
+ if (ext & 0x020) ru_set_read(ru, ext & 7);
+ ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
+ if (dp->dmode == Dreg)
+ ru_set_read(ru, dp->dreg);
+ switch (dp->mnemo) {
+ case i_BFEXTS:
+ case i_BFEXTU:
+ case i_BFFFO:
+ ru_set_write(ru, (ext >> 12) & 7);
+ break;
+ case i_BFINS:
+ ru_set_read(ru, (ext >> 12) & 7);
+ /* fall through */
+ case i_BFCHG:
+ case i_BFCLR:
+ case i_BSET:
+ if (dp->dmode == Dreg)
+ ru_set_write(ru, dp->dreg);
+ break;
+ }
+ handled = true;
+ rw_dest = false;
+ break;
+
+ case i_BTST:
+ rw_dest = false;
+ break;
+
+ case i_CAS:
+ {
+ ext = comp_get_iword((m68k_pc_offset+=2)-2);
+ int Du = ext & 7;
+ ru_set_read(ru, Du);
+ int Dc = (ext >> 6) & 7;
+ ru_set_read(ru, Dc);
+ ru_set_write(ru, Dc);
+ break;
+ }
+ case i_CAS2:
+ {
+ int Dc1, Dc2, Du1, Du2, Rn1, Rn2;
+ ext = comp_get_iword((m68k_pc_offset+=2)-2);
+ Rn1 = (ext >> 12) & 15;
+ Du1 = (ext >> 6) & 7;
+ Dc1 = ext & 7;
+ ru_set_read(ru, Rn1);
+ ru_set_read(ru, Du1);
+ ru_set_read(ru, Dc1);
+ ru_set_write(ru, Dc1);
+ ext = comp_get_iword((m68k_pc_offset+=2)-2);
+ Rn2 = (ext >> 12) & 15;
+ Du2 = (ext >> 6) & 7;
+ Dc2 = ext & 7;
+ ru_set_read(ru, Rn2);
+ ru_set_read(ru, Du2);
+ ru_set_write(ru, Dc2);
+ break;
+ }
+ case i_DIVL: case i_MULL:
+ m68k_pc_offset += 2;
+ break;
+ case i_LEA:
+ case i_MOVE: case i_MOVEA: case i_MOVE16:
+ rw_dest = false;
+ break;
+ case i_PACK: case i_UNPK:
+ rw_dest = false;
+ m68k_pc_offset += 2;
+ break;
+ case i_TRAPcc:
+ m68k_pc_offset += (dp->size == sz_long) ? 4 : 2;
+ break;
+ case i_RTR:
+ /* do nothing, just for coverage debugging */
+ break;
+ /* TODO: handle EXG instruction */
+ }
+
+ /* Handle A-Traps better */
+ if ((real_opcode & 0xf000) == 0xa000) {
+ handled = true;
+ }
+
+ /* Handle EmulOps better */
+ if ((real_opcode & 0xff00) == 0x7100) {
+ handled = true;
+ ru->rmask = 0xffff;
+ ru->wmask = 0;
+ }
+
+ if (dp->suse && !handled)
+ ru_fill_ea(ru, dp->sreg, (amodes)dp->smode, (wordsizes)dp->size, 0);
+
+ if (dp->duse && !handled)
+ ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
+
+ if (rw_dest)
+ ru->rmask |= ru->wmask;
+
+ handled = handled || dp->suse || dp->duse;
+
+ /* Mark all registers as used/written if the instruction may trap */
+ if (may_trap(opcode)) {
+ handled = true;
+ ru->rmask = 0xffff;
+ ru->wmask = 0xffff;
+ }
+
+ if (!handled) {
+ write_log("ru_fill: %04x = { %04x, %04x }\n",
+ real_opcode, ru->rmask, ru->wmask);
+ abort();
+ }
+}
+
+/********************************************************************
+ * register allocation per block logging *
+ ********************************************************************/
+
+static uae_s8 vstate[VREGS];
+static uae_s8 vwritten[VREGS];
+static uae_s8 nstate[N_REGS];
+
+#define L_UNKNOWN -127
+#define L_UNAVAIL -1
+#define L_NEEDED -2
+#define L_UNNEEDED -3
+
+static __inline__ void big_to_small_state(bigstate * b, smallstate * s)
+{
+ int i;
+
+ for (i = 0; i < VREGS; i++)
+ s->virt[i] = vstate[i];
+ for (i = 0; i < N_REGS; i++)
+ s->nat[i] = nstate[i];
+}
+
+static __inline__ int callers_need_recompile(bigstate * b, smallstate * s)
+{
+ int i;
+ int reverse = 0;
+
+ for (i = 0; i < VREGS; i++) {
+ if (vstate[i] != L_UNNEEDED && s->virt[i] == L_UNNEEDED)
+ return 1;
+ if (vstate[i] == L_UNNEEDED && s->virt[i] != L_UNNEEDED)
+ reverse++;
+ }
+ for (i = 0; i < N_REGS; i++) {
+ if (nstate[i] >= 0 && nstate[i] != s->nat[i])
+ return 1;
+ if (nstate[i] < 0 && s->nat[i] >= 0)
+ reverse++;
+ }
+ if (reverse >= 2 && USE_MATCH)
+ return 1; /* In this case, it might be worth recompiling the
+ * callers */
+ return 0;
+}
+
+static __inline__ void log_startblock(void)
+{
+ int i;
+
+ for (i = 0; i < VREGS; i++) {
+ vstate[i] = L_UNKNOWN;
+ vwritten[i] = 0;
+ }
+ for (i = 0; i < N_REGS; i++)
+ nstate[i] = L_UNKNOWN;
+}
+
+/* Using an n-reg for a temp variable */
+static __inline__ void log_isused(int n)
+{
+ if (nstate[n] == L_UNKNOWN)
+ nstate[n] = L_UNAVAIL;
+}
+
+static __inline__ void log_visused(int r)
+{
+ if (vstate[r] == L_UNKNOWN)
+ vstate[r] = L_NEEDED;
+}
+
+static __inline__ void do_load_reg(int n, int r)
+{
+ if (r == FLAGTMP)
+ raw_load_flagreg(n, r);
+ else if (r == FLAGX)
+ raw_load_flagx(n, r);
+ else
+ raw_mov_l_rm(n, (uintptr) live.state[r].mem);
+}
+
+static __inline__ void check_load_reg(int n, int r)
+{
+ raw_mov_l_rm(n, (uintptr) live.state[r].mem);
+}
+
+static __inline__ void log_vwrite(int r)
+{
+ vwritten[r] = 1;
+}
+
+/* Using an n-reg to hold a v-reg */
+static __inline__ void log_isreg(int n, int r)
+{
+ static int count = 0;
+
+ if (nstate[n] == L_UNKNOWN && r < 16 && !vwritten[r] && USE_MATCH)
+ nstate[n] = r;
+ else {
+ do_load_reg(n, r);
+ if (nstate[n] == L_UNKNOWN)
+ nstate[n] = L_UNAVAIL;
+ }
+ if (vstate[r] == L_UNKNOWN)
+ vstate[r] = L_NEEDED;
+}
+
+static __inline__ void log_clobberreg(int r)
+{
+ if (vstate[r] == L_UNKNOWN)
+ vstate[r] = L_UNNEEDED;
+}
+
+/* This ends all possibility of clever register allocation */
+
+static __inline__ void log_flush(void)
+{
+ int i;
+
+ for (i = 0; i < VREGS; i++)
+ if (vstate[i] == L_UNKNOWN)
+ vstate[i] = L_NEEDED;
+ for (i = 0; i < N_REGS; i++)
+ if (nstate[i] == L_UNKNOWN)
+ nstate[i] = L_UNAVAIL;
+}
+
+static __inline__ void log_dump(void)
+{
+ int i;
+
+ return;
+
+ write_log("----------------------\n");
+ for (i = 0; i < N_REGS; i++) {
+ switch (nstate[i]) {
+ case L_UNKNOWN:
+ write_log("Nat %d : UNKNOWN\n", i);
+ break;
+ case L_UNAVAIL:
+ write_log("Nat %d : UNAVAIL\n", i);
+ break;
+ default:
+ write_log("Nat %d : %d\n", i, nstate[i]);
+ break;
+ }
+ }
+ for (i = 0; i < VREGS; i++) {
+ if (vstate[i] == L_UNNEEDED)
+ write_log("Virt %d: UNNEEDED\n", i);
+ }
+}
+
+/********************************************************************
+ * register status handling. EMIT TIME! *
+ ********************************************************************/
+
+static __inline__ void set_status(int r, int status)
+{
+ if (status == ISCONST)
+ log_clobberreg(r);
+ live.state[r].status=status;
+}
+
+static __inline__ int isinreg(int r)
+{
+ return live.state[r].status==CLEAN || live.state[r].status==DIRTY;
+}
+
+static __inline__ void adjust_nreg(int r, uae_u32 val)
+{
+ if (!val)
+ return;
+ raw_lea_l_brr(r,r,val);
+}
+
+static void tomem(int r)
+{
+ int rr=live.state[r].realreg;
+
+ if (isinreg(r)) {
+ if (live.state[r].val && live.nat[rr].nholds==1
+ && !live.nat[rr].locked) {
+ // write_log("RemovingA offset %x from reg %d (%d) at %p\n",
+ // live.state[r].val,r,rr,target);
+ adjust_nreg(rr,live.state[r].val);
+ live.state[r].val=0;
+ live.state[r].dirtysize=4;
+ set_status(r,DIRTY);
+ }
+ }
+
+ if (live.state[r].status==DIRTY) {
+ switch (live.state[r].dirtysize) {
+ case 1: raw_mov_b_mr((uintptr)live.state[r].mem,rr); break;
+ case 2: raw_mov_w_mr((uintptr)live.state[r].mem,rr); break;
+ case 4: raw_mov_l_mr((uintptr)live.state[r].mem,rr); break;
+ default: abort();
+ }
+ log_vwrite(r);
+ set_status(r,CLEAN);
+ live.state[r].dirtysize=0;
+ }
+}
+
+static __inline__ int isconst(int r)
+{
+ return live.state[r].status==ISCONST;
+}
+
+int is_const(int r)
+{
+ return isconst(r);
+}
+
+static __inline__ void writeback_const(int r)
+{
+ if (!isconst(r))
+ return;
+ Dif (live.state[r].needflush==NF_HANDLER) {
+ write_log("Trying to write back constant NF_HANDLER!\n");
+ abort();
+ }
+
+ raw_mov_l_mi((uintptr)live.state[r].mem,live.state[r].val);
+ log_vwrite(r);
+ live.state[r].val=0;
+ set_status(r,INMEM);
+}
+
+static __inline__ void tomem_c(int r)
+{
+ if (isconst(r)) {
+ writeback_const(r);
+ }
+ else
+ tomem(r);
+}
+
+static void evict(int r)
+{
+ int rr;
+
+ if (!isinreg(r))
+ return;
+ tomem(r);
+ rr=live.state[r].realreg;
+
+ Dif (live.nat[rr].locked &&
+ live.nat[rr].nholds==1) {
+ write_log("register %d in nreg %d is locked!\n",r,live.state[r].realreg);
+ abort();
+ }
+
+ live.nat[rr].nholds--;
+ if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */
+ int topreg=live.nat[rr].holds[live.nat[rr].nholds];
+ int thisind=live.state[r].realind;
+
+ live.nat[rr].holds[thisind]=topreg;
+ live.state[topreg].realind=thisind;
+ }
+ live.state[r].realreg=-1;
+ set_status(r,INMEM);
+}
+
+static __inline__ void free_nreg(int r)
+{
+ int i=live.nat[r].nholds;
+
+ while (i) {
+ int vr;
+
+ --i;
+ vr=live.nat[r].holds[i];
+ evict(vr);
+ }
+ Dif (live.nat[r].nholds!=0) {
+ write_log("Failed to free nreg %d, nholds is %d\n",r,live.nat[r].nholds);
+ abort();
+ }
+}
+
+/* Use with care! */
+static __inline__ void isclean(int r)
+{
+ if (!isinreg(r))
+ return;
+ live.state[r].validsize=4;
+ live.state[r].dirtysize=0;
+ live.state[r].val=0;
+ set_status(r,CLEAN);
+}
+
+static __inline__ void disassociate(int r)
+{
+ isclean(r);
+ evict(r);
+}
+
+static __inline__ void set_const(int r, uae_u32 val)
+{
+ disassociate(r);
+ live.state[r].val=val;
+ set_status(r,ISCONST);
+}
+
+static __inline__ uae_u32 get_offset(int r)
+{
+ return live.state[r].val;
+}
+
+static int alloc_reg_hinted(int r, int size, int willclobber, int hint)
+{
+ int bestreg;
+ uae_s32 when;
+ int i;
+ uae_s32 badness=0; /* to shut up gcc */
+ bestreg=-1;
+ when=2000000000;
+
+ /* XXX use a regalloc_order table? */
+ for (i=0;i<N_REGS;i++) {
+ badness=live.nat[i].touched;
+ if (live.nat[i].nholds==0)
+ badness=0;
+ if (i==hint)
+ badness-=200000000;
+ if (!live.nat[i].locked && badness<when) {
+ if ((size==1 && live.nat[i].canbyte) ||
+ (size==2 && live.nat[i].canword) ||
+ (size==4)) {
+ bestreg=i;
+ when=badness;
+ if (live.nat[i].nholds==0 && hint<0)
+ break;
+ if (i==hint)
+ break;
+ }
+ }
+ }
+ Dif (bestreg==-1)
+ abort();
+
+ if (live.nat[bestreg].nholds>0) {
+ free_nreg(bestreg);
+ }
+ if (isinreg(r)) {
+ int rr=live.state[r].realreg;
+ /* This will happen if we read a partially dirty register at a
+ bigger size */
+ Dif (willclobber || live.state[r].validsize>=size)
+ abort();
+ Dif (live.nat[rr].nholds!=1)
+ abort();
+ if (size==4 && live.state[r].validsize==2) {
+ log_isused(bestreg);
+ log_visused(r);
+ raw_mov_l_rm(bestreg,(uintptr)live.state[r].mem);
+ raw_bswap_32(bestreg);
+ raw_zero_extend_16_rr(rr,rr);
+ raw_zero_extend_16_rr(bestreg,bestreg);
+ raw_bswap_32(bestreg);
+ raw_lea_l_brr_indexed(rr,rr,bestreg,1,0);
+ live.state[r].validsize=4;
+ live.nat[rr].touched=touchcnt++;
+ return rr;
+ }
+ if (live.state[r].validsize==1) {
+ /* Nothing yet */
+ }
+ evict(r);
+ }
+
+ if (!willclobber) {
+ if (live.state[r].status!=UNDEF) {
+ if (isconst(r)) {
+ raw_mov_l_ri(bestreg,live.state[r].val);
+ live.state[r].val=0;
+ live.state[r].dirtysize=4;
+ set_status(r,DIRTY);
+ log_isused(bestreg);
+ }
+ else {
+ log_isreg(bestreg, r); /* This will also load it! */
+ live.state[r].dirtysize=0;
+ set_status(r,CLEAN);
+ }
+ }
+ else {
+ live.state[r].val=0;
+ live.state[r].dirtysize=0;
+ set_status(r,CLEAN);
+ log_isused(bestreg);
+ }
+ live.state[r].validsize=4;
+ }
+ else { /* this is the easiest way, but not optimal. FIXME! */
+ /* Now it's trickier, but hopefully still OK */
+ if (!isconst(r) || size==4) {
+ live.state[r].validsize=size;
+ live.state[r].dirtysize=size;
+ live.state[r].val=0;
+ set_status(r,DIRTY);
+ if (size == 4) {
+ log_clobberreg(r);
+ log_isused(bestreg);
+ }
+ else {
+ log_visused(r);
+ log_isused(bestreg);
+ }
+ }
+ else {
+ if (live.state[r].status!=UNDEF)
+ raw_mov_l_ri(bestreg,live.state[r].val);
+ live.state[r].val=0;
+ live.state[r].validsize=4;
+ live.state[r].dirtysize=4;
+ set_status(r,DIRTY);
+ log_isused(bestreg);
+ }
+ }
+ live.state[r].realreg=bestreg;
+ live.state[r].realind=live.nat[bestreg].nholds;
+ live.nat[bestreg].touched=touchcnt++;
+ live.nat[bestreg].holds[live.nat[bestreg].nholds]=r;
+ live.nat[bestreg].nholds++;
+
+ return bestreg;
+}
+
+static int alloc_reg(int r, int size, int willclobber)
+{
+ return alloc_reg_hinted(r,size,willclobber,-1);
+}
+
+static void unlock2(int r)
+{
+ Dif (!live.nat[r].locked)
+ abort();
+ live.nat[r].locked--;
+}
+
+static void setlock(int r)
+{
+ live.nat[r].locked++;
+}
+
+
+static void mov_nregs(int d, int s)
+{
+ int ns=live.nat[s].nholds;
+ int nd=live.nat[d].nholds;
+ int i;
+
+ if (s==d)
+ return;
+
+ if (nd>0)
+ free_nreg(d);
+
+ log_isused(d);
+ raw_mov_l_rr(d,s);
+
+ for (i=0;i<live.nat[s].nholds;i++) {
+ int vs=live.nat[s].holds[i];
+
+ live.state[vs].realreg=d;
+ live.state[vs].realind=i;
+ live.nat[d].holds[i]=vs;
+ }
+ live.nat[d].nholds=live.nat[s].nholds;
+
+ live.nat[s].nholds=0;
+}
+
+
+static __inline__ void make_exclusive(int r, int size, int spec)
+{
+ int clobber;
+ reg_status oldstate;
+ int rr=live.state[r].realreg;
+ int nr;
+ int nind;
+ int ndirt=0;
+ int i;
+
+ if (!isinreg(r))
+ return;
+ if (live.nat[rr].nholds==1)
+ return;
+ for (i=0;i<live.nat[rr].nholds;i++) {
+ int vr=live.nat[rr].holds[i];
+ if (vr!=r &&
+ (live.state[vr].status==DIRTY || live.state[vr].val))
+ ndirt++;
+ }
+ if (!ndirt && size<live.state[r].validsize && !live.nat[rr].locked) {
+ /* Everything else is clean, so let's keep this register */
+ for (i=0;i<live.nat[rr].nholds;i++) {
+ int vr=live.nat[rr].holds[i];
+ if (vr!=r) {
+ evict(vr);
+ i--; /* Try that index again! */
+ }
+ }
+ Dif (live.nat[rr].nholds!=1) {
+ write_log("natreg %d holds %d vregs, %d not exclusive\n",
+ rr,live.nat[rr].nholds,r);
+ abort();
+ }
+ return;
+ }
+
+ /* We have to split the register */
+ oldstate=live.state[r];
+
+ setlock(rr); /* Make sure this doesn't go away */
+ /* Forget about r being in the register rr */
+ disassociate(r);
+ /* Get a new register, that we will clobber completely */
+ if (oldstate.status==DIRTY) {
+ /* If dirtysize is <4, we need a register that can handle the
+ eventual smaller memory store! Thanks to Quake68k for exposing
+ this detail ;-) */
+ nr=alloc_reg_hinted(r,oldstate.dirtysize,1,spec);
+ }
+ else {
+ nr=alloc_reg_hinted(r,4,1,spec);
+ }
+ nind=live.state[r].realind;
+ live.state[r]=oldstate; /* Keep all the old state info */
+ live.state[r].realreg=nr;
+ live.state[r].realind=nind;
+
+ if (size<live.state[r].validsize) {
+ if (live.state[r].val) {
+ /* Might as well compensate for the offset now */
+ raw_lea_l_brr(nr,rr,oldstate.val);
+ live.state[r].val=0;
+ live.state[r].dirtysize=4;
+ set_status(r,DIRTY);
+ }
+ else
+ raw_mov_l_rr(nr,rr); /* Make another copy */
+ }
+ unlock2(rr);
+}
+
+static __inline__ void add_offset(int r, uae_u32 off)
+{
+ live.state[r].val+=off;
+}
+
+static __inline__ void remove_offset(int r, int spec)
+{
+ reg_status oldstate;
+ int rr;
+
+ if (isconst(r))
+ return;
+ if (live.state[r].val==0)
+ return;
+ if (isinreg(r) && live.state[r].validsize<4)
+ evict(r);
+
+ if (!isinreg(r))
+ alloc_reg_hinted(r,4,0,spec);
+
+ Dif (live.state[r].validsize!=4) {
+ write_log("Validsize=%d in remove_offset\n",live.state[r].validsize);
+ abort();
+ }
+ make_exclusive(r,0,-1);
+ /* make_exclusive might have done the job already */
+ if (live.state[r].val==0)
+ return;
+
+ rr=live.state[r].realreg;
+
+ if (live.nat[rr].nholds==1) {
+ //write_log("RemovingB offset %x from reg %d (%d) at %p\n",
+ // live.state[r].val,r,rr,target);
+ adjust_nreg(rr,live.state[r].val);
+ live.state[r].dirtysize=4;
+ live.state[r].val=0;
+ set_status(r,DIRTY);
+ return;
+ }
+ write_log("Failed in remove_offset\n");
+ abort();
+}
+
+static __inline__ void remove_all_offsets(void)
+{
+ int i;
+
+ for (i=0;i<VREGS;i++)
+ remove_offset(i,-1);
+}
+
+static inline void flush_reg_count(void)
+{
+#if RECORD_REGISTER_USAGE
+ for (int r = 0; r < 16; r++)
+ if (reg_count_local[r])
+ ADDQim(reg_count_local[r], ((uintptr)reg_count) + (8 * r), X86_NOREG, X86_NOREG, 1);
+#endif
+}
+
+static inline void record_register(int r)
+{
+#if RECORD_REGISTER_USAGE
+ if (r < 16)
+ reg_count_local[r]++;
+#endif
+}
+
+static __inline__ int readreg_general(int r, int size, int spec, int can_offset)
+{
+ int n;
+ int answer=-1;
+
+ record_register(r);
+ if (live.state[r].status==UNDEF) {
+ write_log("WARNING: Unexpected read of undefined register %d\n",r);
+ }
+ if (!can_offset)
+ remove_offset(r,spec);
+
+ if (isinreg(r) && live.state[r].validsize>=size) {
+ n=live.state[r].realreg;
+ switch(size) {
+ case 1:
+ if (live.nat[n].canbyte || spec>=0) {
+ answer=n;
+ }
+ break;
+ case 2:
+ if (live.nat[n].canword || spec>=0) {
+ answer=n;
+ }
+ break;
+ case 4:
+ answer=n;
+ break;
+ default: abort();
+ }
+ if (answer<0)
+ evict(r);
+ }
+ /* either the value was in memory to start with, or it was evicted and
+ is in memory now */
+ if (answer<0) {
+ answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec);
+ }
+
+ if (spec>=0 && spec!=answer) {
+ /* Too bad */
+ mov_nregs(spec,answer);
+ answer=spec;
+ }
+ live.nat[answer].locked++;
+ live.nat[answer].touched=touchcnt++;
+ return answer;
+}
+
+
+
+static int readreg(int r, int size)
+{
+ return readreg_general(r,size,-1,0);
+}
+
+static int readreg_specific(int r, int size, int spec)
+{
+ return readreg_general(r,size,spec,0);
+}
+
+static int readreg_offset(int r, int size)
+{
+ return readreg_general(r,size,-1,1);
+}
+
+/* writereg_general(r, size, spec)
+ *
+ * INPUT
+ * - r : mid-layer register
+ * - size : requested size (1/2/4)
+ * - spec : -1 if find or make a register free, otherwise specifies
+ * the physical register to use in any case
+ *
+ * OUTPUT
+ * - hard (physical, x86 here) register allocated to virtual register r
+ */
+static __inline__ int writereg_general(int r, int size, int spec)
+{
+ int n;
+ int answer=-1;
+
+ record_register(r);
+ if (size<4) {
+ remove_offset(r,spec);
+ }
+
+ make_exclusive(r,size,spec);
+ if (isinreg(r)) {
+ int nvsize=size>live.state[r].validsize?size:live.state[r].validsize;
+ int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
+ n=live.state[r].realreg;
+
+ Dif (live.nat[n].nholds!=1)
+ abort();
+ switch(size) {
+ case 1:
+ if (live.nat[n].canbyte || spec>=0) {
+ live.state[r].dirtysize=ndsize;
+ live.state[r].validsize=nvsize;
+ answer=n;
+ }
+ break;
+ case 2:
+ if (live.nat[n].canword || spec>=0) {
+ live.state[r].dirtysize=ndsize;
+ live.state[r].validsize=nvsize;
+ answer=n;
+ }
+ break;
+ case 4:
+ live.state[r].dirtysize=ndsize;
+ live.state[r].validsize=nvsize;
+ answer=n;
+ break;
+ default: abort();
+ }
+ if (answer<0)
+ evict(r);
+ }
+ /* either the value was in memory to start with, or it was evicted and
+ is in memory now */
+ if (answer<0) {
+ answer=alloc_reg_hinted(r,size,1,spec);
+ }
+ if (spec>=0 && spec!=answer) {
+ mov_nregs(spec,answer);
+ answer=spec;
+ }
+ if (live.state[r].status==UNDEF)
+ live.state[r].validsize=4;
+ live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
+ live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize;
+
+ live.nat[answer].locked++;
+ live.nat[answer].touched=touchcnt++;
+ if (size==4) {
+ live.state[r].val=0;
+ }
+ else {
+ Dif (live.state[r].val) {
+ write_log("Problem with val\n");
+ abort();
+ }
+ }
+ set_status(r,DIRTY);
+ return answer;
+}
+
+static int writereg(int r, int size)
+{
+ return writereg_general(r,size,-1);
+}
+
+static int writereg_specific(int r, int size, int spec)
+{
+ return writereg_general(r,size,spec);
+}
+
+static __inline__ int rmw_general(int r, int wsize, int rsize, int spec)
+{
+ int n;
+ int answer=-1;
+
+ record_register(r);
+ if (live.state[r].status==UNDEF) {
+ write_log("WARNING: Unexpected read of undefined register %d\n",r);
+ }
+ remove_offset(r,spec);
+ make_exclusive(r,0,spec);
+
+ Dif (wsize<rsize) {
+ write_log("Cannot handle wsize<rsize in rmw_general()\n");
+ abort();
+ }
+ if (isinreg(r) && live.state[r].validsize>=rsize) {
+ n=live.state[r].realreg;
+ Dif (live.nat[n].nholds!=1)
+ abort();
+
+ switch(rsize) {
+ case 1:
+ if (live.nat[n].canbyte || spec>=0) {
+ answer=n;
+ }
+ break;
+ case 2:
+ if (live.nat[n].canword || spec>=0) {
+ answer=n;
+ }
+ break;
+ case 4:
+ answer=n;
+ break;
+ default: abort();
+ }
+ if (answer<0)
+ evict(r);
+ }
+ /* either the value was in memory to start with, or it was evicted and
+ is in memory now */
+ if (answer<0) {
+ answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec);
+ }
+
+ if (spec>=0 && spec!=answer) {
+ /* Too bad */
+ mov_nregs(spec,answer);
+ answer=spec;
+ }
+ if (wsize>live.state[r].dirtysize)
+ live.state[r].dirtysize=wsize;
+ if (wsize>live.state[r].validsize)
+ live.state[r].validsize=wsize;
+ set_status(r,DIRTY);
+
+ live.nat[answer].locked++;
+ live.nat[answer].touched=touchcnt++;
+
+ Dif (live.state[r].val) {
+ write_log("Problem with val(rmw)\n");
+ abort();
+ }
+ return answer;
+}
+
+static int rmw(int r, int wsize, int rsize)
+{
+ return rmw_general(r,wsize,rsize,-1);
+}
+
+static int rmw_specific(int r, int wsize, int rsize, int spec)
+{
+ return rmw_general(r,wsize,rsize,spec);
+}
+
+
+/* needed for restoring the carry flag on non-P6 cores */
+static void bt_l_ri_noclobber(R4 r, IMM i)
+{
+ int size=4;
+ if (i<16)
+ size=2;
+ r=readreg(r,size);
+ raw_bt_l_ri(r,i);
+ unlock2(r);
+}
+
+/********************************************************************
+ * FPU register status handling. EMIT TIME! *
+ ********************************************************************/
+
+static void f_tomem(int r)
+{
+ if (live.fate[r].status==DIRTY) {
+#if USE_LONG_DOUBLE
+ raw_fmov_ext_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
+#else
+ raw_fmov_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
+#endif
+ live.fate[r].status=CLEAN;
+ }
+}
+
+static void f_tomem_drop(int r)
+{
+ if (live.fate[r].status==DIRTY) {
+#if USE_LONG_DOUBLE
+ raw_fmov_ext_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
+#else
+ raw_fmov_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
+#endif
+ live.fate[r].status=INMEM;
+ }
+}
+
+
+static __inline__ int f_isinreg(int r)
+{
+ return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY;
+}
+
+static void f_evict(int r)
+{
+ int rr;
+
+ if (!f_isinreg(r))
+ return;
+ rr=live.fate[r].realreg;
+ if (live.fat[rr].nholds==1)
+ f_tomem_drop(r);
+ else
+ f_tomem(r);
+
+ Dif (live.fat[rr].locked &&
+ live.fat[rr].nholds==1) {
+ write_log("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg);
+ abort();
+ }
+
+ live.fat[rr].nholds--;
+ if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */
+ int topreg=live.fat[rr].holds[live.fat[rr].nholds];
+ int thisind=live.fate[r].realind;
+ live.fat[rr].holds[thisind]=topreg;
+ live.fate[topreg].realind=thisind;
+ }
+ live.fate[r].status=INMEM;
+ live.fate[r].realreg=-1;
+}
+
+static __inline__ void f_free_nreg(int r)
+{
+ int i=live.fat[r].nholds;
+
+ while (i) {
+ int vr;
+
+ --i;
+ vr=live.fat[r].holds[i];
+ f_evict(vr);
+ }
+ Dif (live.fat[r].nholds!=0) {
+ write_log("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds);
+ abort();
+ }
+}
+
+
+/* Use with care! */
+static __inline__ void f_isclean(int r)
+{
+ if (!f_isinreg(r))
+ return;
+ live.fate[r].status=CLEAN;
+}
+
+static __inline__ void f_disassociate(int r)
+{
+ f_isclean(r);
+ f_evict(r);
+}
+
+
+
+static int f_alloc_reg(int r, int willclobber)
+{
+ int bestreg;
+ uae_s32 when;
+ int i;
+ uae_s32 badness;
+ bestreg=-1;
+ when=2000000000;
+ for (i=N_FREGS;i--;) {
+ badness=live.fat[i].touched;
+ if (live.fat[i].nholds==0)
+ badness=0;
+
+ if (!live.fat[i].locked && badness<when) {
+ bestreg=i;
+ when=badness;
+ if (live.fat[i].nholds==0)
+ break;
+ }
+ }
+ Dif (bestreg==-1)
+ abort();
+
+ if (live.fat[bestreg].nholds>0) {
+ f_free_nreg(bestreg);
+ }
+ if (f_isinreg(r)) {
+ f_evict(r);
+ }
+
+ if (!willclobber) {
+ if (live.fate[r].status!=UNDEF) {
+#if USE_LONG_DOUBLE
+ raw_fmov_ext_rm(bestreg,(uintptr)live.fate[r].mem);
+#else
+ raw_fmov_rm(bestreg,(uintptr)live.fate[r].mem);
+#endif
+ }
+ live.fate[r].status=CLEAN;
+ }
+ else {
+ live.fate[r].status=DIRTY;
+ }
+ live.fate[r].realreg=bestreg;
+ live.fate[r].realind=live.fat[bestreg].nholds;
+ live.fat[bestreg].touched=touchcnt++;
+ live.fat[bestreg].holds[live.fat[bestreg].nholds]=r;
+ live.fat[bestreg].nholds++;
+
+ return bestreg;
+}
+
+static void f_unlock(int r)
+{
+ Dif (!live.fat[r].locked)
+ abort();
+ live.fat[r].locked--;
+}
+
+static void f_setlock(int r)
+{
+ live.fat[r].locked++;
+}
+
+static __inline__ int f_readreg(int r)
+{
+ int n;
+ int answer=-1;
+
+ if (f_isinreg(r)) {
+ n=live.fate[r].realreg;
+ answer=n;
+ }
+ /* either the value was in memory to start with, or it was evicted and
+ is in memory now */
+ if (answer<0)
+ answer=f_alloc_reg(r,0);
+
+ live.fat[answer].locked++;
+ live.fat[answer].touched=touchcnt++;
+ return answer;
+}
+
+static __inline__ void f_make_exclusive(int r, int clobber)
+{
+ freg_status oldstate;
+ int rr=live.fate[r].realreg;
+ int nr;
+ int nind;
+ int ndirt=0;
+ int i;
+
+ if (!f_isinreg(r))
+ return;
+ if (live.fat[rr].nholds==1)
+ return;
+ for (i=0;i<live.fat[rr].nholds;i++) {
+ int vr=live.fat[rr].holds[i];
+ if (vr!=r && live.fate[vr].status==DIRTY)
+ ndirt++;
+ }
+ if (!ndirt && !live.fat[rr].locked) {
+ /* Everything else is clean, so let's keep this register */
+ for (i=0;i<live.fat[rr].nholds;i++) {
+ int vr=live.fat[rr].holds[i];
+ if (vr!=r) {
+ f_evict(vr);
+ i--; /* Try that index again! */
+ }
+ }
+ Dif (live.fat[rr].nholds!=1) {
+ write_log("realreg %d holds %d (",rr,live.fat[rr].nholds);
+ for (i=0;i<live.fat[rr].nholds;i++) {
+ write_log(" %d(%d,%d)",live.fat[rr].holds[i],
+ live.fate[live.fat[rr].holds[i]].realreg,
+ live.fate[live.fat[rr].holds[i]].realind);
+ }
+ write_log("\n");
+ abort();
+ }
+ return;
+ }
+
+ /* We have to split the register */
+ oldstate=live.fate[r];
+
+ f_setlock(rr); /* Make sure this doesn't go away */
+ /* Forget about r being in the register rr */
+ f_disassociate(r);
+ /* Get a new register, that we will clobber completely */
+ nr=f_alloc_reg(r,1);
+ nind=live.fate[r].realind;
+ if (!clobber)
+ raw_fmov_rr(nr,rr); /* Make another copy */
+ live.fate[r]=oldstate; /* Keep all the old state info */
+ live.fate[r].realreg=nr;
+ live.fate[r].realind=nind;
+ f_unlock(rr);
+}
+
+
+static __inline__ int f_writereg(int r)
+{
+ int n;
+ int answer=-1;
+
+ f_make_exclusive(r,1);
+ if (f_isinreg(r)) {
+ n=live.fate[r].realreg;
+ answer=n;
+ }
+ if (answer<0) {
+ answer=f_alloc_reg(r,1);
+ }
+ live.fate[r].status=DIRTY;
+ live.fat[answer].locked++;
+ live.fat[answer].touched=touchcnt++;
+ return answer;
+}
+
+static int f_rmw(int r)
+{
+ int n;
+
+ f_make_exclusive(r,0);
+ if (f_isinreg(r)) {
+ n=live.fate[r].realreg;
+ }
+ else
+ n=f_alloc_reg(r,0);
+ live.fate[r].status=DIRTY;
+ live.fat[n].locked++;
+ live.fat[n].touched=touchcnt++;
+ return n;
+}
+
+static void fflags_into_flags_internal(uae_u32 tmp)
+{
+ int r;
+
+ clobber_flags();
+ r=f_readreg(FP_RESULT);
+ if (FFLAG_NREG_CLOBBER_CONDITION) {
+ int tmp2=tmp;
+ tmp=writereg_specific(tmp,4,FFLAG_NREG);
+ raw_fflags_into_flags(r);
+ unlock2(tmp);
+ forget_about(tmp2);
+ }
+ else
+ raw_fflags_into_flags(r);
+ f_unlock(r);
+ live_flags();
+}
+
+
+
+
+/********************************************************************
+ * CPU functions exposed to gencomp. Both CREATE and EMIT time *
+ ********************************************************************/
+
+/*
+ * RULES FOR HANDLING REGISTERS:
+ *
+ * * In the function headers, order the parameters
+ * - 1st registers written to
+ * - 2nd read/modify/write registers
+ * - 3rd registers read from
+ * * Before calling raw_*, you must call readreg, writereg or rmw for
+ * each register
+ * * The order for this is
+ * - 1st call remove_offset for all registers written to with size<4
+ * - 2nd call readreg for all registers read without offset
+ * - 3rd call rmw for all rmw registers
+ * - 4th call readreg_offset for all registers that can handle offsets
+ * - 5th call get_offset for all the registers from the previous step
+ * - 6th call writereg for all written-to registers
+ * - 7th call raw_*
+ * - 8th unlock2 all registers that were locked
+ */
+
+MIDFUNC(0,live_flags,(void))
+{
+ live.flags_on_stack=TRASH;
+ live.flags_in_flags=VALID;
+ live.flags_are_important=1;
+}
+MENDFUNC(0,live_flags,(void))
+
+MIDFUNC(0,dont_care_flags,(void))
+{
+ live.flags_are_important=0;
+}
+MENDFUNC(0,dont_care_flags,(void))
+
+
+MIDFUNC(0,duplicate_carry,(void))
+{
+ evict(FLAGX);
+ make_flags_live_internal();
+ COMPCALL(setcc_m)((uintptr)live.state[FLAGX].mem,2);
+ log_vwrite(FLAGX);
+}
+MENDFUNC(0,duplicate_carry,(void))
+
+MIDFUNC(0,restore_carry,(void))
+{
+ if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */
+ bt_l_ri_noclobber(FLAGX,0);
+ }
+ else { /* Avoid the stall the above creates.
+ This is slow on non-P6, though.
+ */
+ COMPCALL(rol_b_ri(FLAGX,8));
+ isclean(FLAGX);
+ }
+}
+MENDFUNC(0,restore_carry,(void))
+
+MIDFUNC(0,start_needflags,(void))
+{
+ needflags=1;
+}
+MENDFUNC(0,start_needflags,(void))
+
+MIDFUNC(0,end_needflags,(void))
+{
+ needflags=0;
+}
+MENDFUNC(0,end_needflags,(void))
+
+MIDFUNC(0,make_flags_live,(void))
+{
+ make_flags_live_internal();
+}
+MENDFUNC(0,make_flags_live,(void))
+
+MIDFUNC(1,fflags_into_flags,(W2 tmp))
+{
+ clobber_flags();
+ fflags_into_flags_internal(tmp);
+}
+MENDFUNC(1,fflags_into_flags,(W2 tmp))
+
+
+MIDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
+{
+ int size=4;
+ if (i<16)
+ size=2;
+ CLOBBER_BT;
+ r=readreg(r,size);
+ raw_bt_l_ri(r,i);
+ unlock2(r);
+}
+MENDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
+
+MIDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
+{
+ CLOBBER_BT;
+ r=readreg(r,4);
+ b=readreg(b,4);
+ raw_bt_l_rr(r,b);
+ unlock2(r);
+ unlock2(b);
+}
+MENDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
+
+MIDFUNC(2,btc_l_ri,(RW4 r, IMM i))
+{
+ int size=4;
+ if (i<16)
+ size=2;
+ CLOBBER_BT;
+ r=rmw(r,size,size);
+ raw_btc_l_ri(r,i);
+ unlock2(r);
+}
+MENDFUNC(2,btc_l_ri,(RW4 r, IMM i))
+
+MIDFUNC(2,btc_l_rr,(RW4 r, R4 b))
+{
+ CLOBBER_BT;
+ b=readreg(b,4);
+ r=rmw(r,4,4);
+ raw_btc_l_rr(r,b);
+ unlock2(r);
+ unlock2(b);
+}
+MENDFUNC(2,btc_l_rr,(RW4 r, R4 b))
+
+
+MIDFUNC(2,btr_l_ri,(RW4 r, IMM i))
+{
+ int size=4;
+ if (i<16)
+ size=2;
+ CLOBBER_BT;
+ r=rmw(r,size,size);
+ raw_btr_l_ri(r,i);
+ unlock2(r);
+}
+MENDFUNC(2,btr_l_ri,(RW4 r, IMM i))
+
+MIDFUNC(2,btr_l_rr,(RW4 r, R4 b))
+{
+ CLOBBER_BT;
+ b=readreg(b,4);
+ r=rmw(r,4,4);
+ raw_btr_l_rr(r,b);
+ unlock2(r);
+ unlock2(b);
+}
+MENDFUNC(2,btr_l_rr,(RW4 r, R4 b))
+
+
+MIDFUNC(2,bts_l_ri,(RW4 r, IMM i))
+{
+ int size=4;
+ if (i<16)
+ size=2;
+ CLOBBER_BT;
+ r=rmw(r,size,size);
+ raw_bts_l_ri(r,i);
+ unlock2(r);
+}
+MENDFUNC(2,bts_l_ri,(RW4 r, IMM i))
+
+MIDFUNC(2,bts_l_rr,(RW4 r, R4 b))
+{
+ CLOBBER_BT;
+ b=readreg(b,4);
+ r=rmw(r,4,4);
+ raw_bts_l_rr(r,b);
+ unlock2(r);
+ unlock2(b);
+}
+MENDFUNC(2,bts_l_rr,(RW4 r, R4 b))
+
+MIDFUNC(2,mov_l_rm,(W4 d, IMM s))
+{
+ CLOBBER_MOV;
+ d=writereg(d,4);
+ raw_mov_l_rm(d,s);
+ unlock2(d);
+}
+MENDFUNC(2,mov_l_rm,(W4 d, IMM s))
+
+
+MIDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
+{
+ r=readreg(r,4);
+ raw_call_r(r);
+ unlock2(r);
+}
+MENDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
+
+MIDFUNC(2,sub_l_mi,(IMM d, IMM s))
+{
+ CLOBBER_SUB;
+ raw_sub_l_mi(d,s) ;
+}
+MENDFUNC(2,sub_l_mi,(IMM d, IMM s))
+
+MIDFUNC(2,mov_l_mi,(IMM d, IMM s))
+{
+ CLOBBER_MOV;
+ raw_mov_l_mi(d,s) ;
+}
+MENDFUNC(2,mov_l_mi,(IMM d, IMM s))
+
+MIDFUNC(2,mov_w_mi,(IMM d, IMM s))
+{
+ CLOBBER_MOV;
+ raw_mov_w_mi(d,s) ;
+}
+MENDFUNC(2,mov_w_mi,(IMM d, IMM s))
+
+MIDFUNC(2,mov_b_mi,(IMM d, IMM s))
+{
+ CLOBBER_MOV;
+ raw_mov_b_mi(d,s) ;
+}
+MENDFUNC(2,mov_b_mi,(IMM d, IMM s))
+
+MIDFUNC(2,rol_b_ri,(RW1 r, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ CLOBBER_ROL;
+ r=rmw(r,1,1);
+ raw_rol_b_ri(r,i);
+ unlock2(r);
+}
+MENDFUNC(2,rol_b_ri,(RW1 r, IMM i))
+
+MIDFUNC(2,rol_w_ri,(RW2 r, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ CLOBBER_ROL;
+ r=rmw(r,2,2);
+ raw_rol_w_ri(r,i);
+ unlock2(r);
+}
+MENDFUNC(2,rol_w_ri,(RW2 r, IMM i))
+
+MIDFUNC(2,rol_l_ri,(RW4 r, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ CLOBBER_ROL;
+ r=rmw(r,4,4);
+ raw_rol_l_ri(r,i);
+ unlock2(r);
+}
+MENDFUNC(2,rol_l_ri,(RW4 r, IMM i))
+
+MIDFUNC(2,rol_l_rr,(RW4 d, R1 r))
+{
+ if (isconst(r)) {
+ COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val);
+ return;
+ }
+ CLOBBER_ROL;
+ r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+ d=rmw(d,4,4);
+ Dif (r!=1) {
+ write_log("Illegal register %d in raw_rol_b\n",r);
+ abort();
+ }
+ raw_rol_l_rr(d,r) ;
+ unlock2(r);
+ unlock2(d);
+}
+MENDFUNC(2,rol_l_rr,(RW4 d, R1 r))
+
+MIDFUNC(2,rol_w_rr,(RW2 d, R1 r))
+{ /* Can only do this with r==1, i.e. cl */
+
+ if (isconst(r)) {
+ COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val);
+ return;
+ }
+ CLOBBER_ROL;
+ r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+ d=rmw(d,2,2);
+ Dif (r!=1) {
+ write_log("Illegal register %d in raw_rol_b\n",r);
+ abort();
+ }
+ raw_rol_w_rr(d,r) ;
+ unlock2(r);
+ unlock2(d);
+}
+MENDFUNC(2,rol_w_rr,(RW2 d, R1 r))
+
+MIDFUNC(2,rol_b_rr,(RW1 d, R1 r))
+{ /* Can only do this with r==1, i.e. cl */
+
+ if (isconst(r)) {
+ COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val);
+ return;
+ }
+
+ CLOBBER_ROL;
+ r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+ d=rmw(d,1,1);
+ Dif (r!=1) {
+ write_log("Illegal register %d in raw_rol_b\n",r);
+ abort();
+ }
+ raw_rol_b_rr(d,r) ;
+ unlock2(r);
+ unlock2(d);
+}
+MENDFUNC(2,rol_b_rr,(RW1 d, R1 r))
+
+
+MIDFUNC(2,shll_l_rr,(RW4 d, R1 r))
+{
+ if (isconst(r)) {
+ COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val);
+ return;
+ }
+ CLOBBER_SHLL;
+ r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+ d=rmw(d,4,4);
+ Dif (r!=1) {
+ write_log("Illegal register %d in raw_rol_b\n",r);
+ abort();
+ }
+ raw_shll_l_rr(d,r) ;
+ unlock2(r);
+ unlock2(d);
+}
+MENDFUNC(2,shll_l_rr,(RW4 d, R1 r))
+
+MIDFUNC(2,shll_w_rr,(RW2 d, R1 r))
+{ /* Can only do this with r==1, i.e. cl */
+
+ if (isconst(r)) {
+ COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val);
+ return;
+ }
+ CLOBBER_SHLL;
+ r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+ d=rmw(d,2,2);
+ Dif (r!=1) {
+ write_log("Illegal register %d in raw_shll_b\n",r);
+ abort();
+ }
+ raw_shll_w_rr(d,r) ;
+ unlock2(r);
+ unlock2(d);
+}
+MENDFUNC(2,shll_w_rr,(RW2 d, R1 r))
+
+MIDFUNC(2,shll_b_rr,(RW1 d, R1 r))
+{ /* Can only do this with r==1, i.e. cl */
+
+ if (isconst(r)) {
+ COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val);
+ return;
+ }
+
+ CLOBBER_SHLL;
+ r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+ d=rmw(d,1,1);
+ Dif (r!=1) {
+ write_log("Illegal register %d in raw_shll_b\n",r);
+ abort();
+ }
+ raw_shll_b_rr(d,r) ;
+ unlock2(r);
+ unlock2(d);
+}
+MENDFUNC(2,shll_b_rr,(RW1 d, R1 r))
+
+
+MIDFUNC(2,ror_b_ri,(R1 r, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ CLOBBER_ROR;
+ r=rmw(r,1,1);
+ raw_ror_b_ri(r,i);
+ unlock2(r);
+}
+MENDFUNC(2,ror_b_ri,(R1 r, IMM i))
+
+MIDFUNC(2,ror_w_ri,(R2 r, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ CLOBBER_ROR;
+ r=rmw(r,2,2);
+ raw_ror_w_ri(r,i);
+ unlock2(r);
+}
+MENDFUNC(2,ror_w_ri,(R2 r, IMM i))
+
+MIDFUNC(2,ror_l_ri,(R4 r, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ CLOBBER_ROR;
+ r=rmw(r,4,4);
+ raw_ror_l_ri(r,i);
+ unlock2(r);
+}
+MENDFUNC(2,ror_l_ri,(R4 r, IMM i))
+
+MIDFUNC(2,ror_l_rr,(R4 d, R1 r))
+{
+ if (isconst(r)) {
+ COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val);
+ return;
+ }
+ CLOBBER_ROR;
+ r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+ d=rmw(d,4,4);
+ raw_ror_l_rr(d,r) ;
+ unlock2(r);
+ unlock2(d);
+}
+MENDFUNC(2,ror_l_rr,(R4 d, R1 r))
+
+MIDFUNC(2,ror_w_rr,(R2 d, R1 r))
+{
+ if (isconst(r)) {
+ COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val);
+ return;
+ }
+ CLOBBER_ROR;
+ r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+ d=rmw(d,2,2);
+ raw_ror_w_rr(d,r) ;
+ unlock2(r);
+ unlock2(d);
+}
+MENDFUNC(2,ror_w_rr,(R2 d, R1 r))
+
+MIDFUNC(2,ror_b_rr,(R1 d, R1 r))
+{
+ if (isconst(r)) {
+ COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val);
+ return;
+ }
+
+ CLOBBER_ROR;
+ r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+ d=rmw(d,1,1);
+ raw_ror_b_rr(d,r) ;
+ unlock2(r);
+ unlock2(d);
+}
+MENDFUNC(2,ror_b_rr,(R1 d, R1 r))
+
+MIDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
+{
+ if (isconst(r)) {
+ COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val);
+ return;
+ }
+ CLOBBER_SHRL;
+ r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+ d=rmw(d,4,4);
+ Dif (r!=1) {
+ write_log("Illegal register %d in raw_rol_b\n",r);
+ abort();
+ }
+ raw_shrl_l_rr(d,r) ;
+ unlock2(r);
+ unlock2(d);
+}
+MENDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
+
+MIDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
+{ /* Can only do this with r==1, i.e. cl */
+
+ if (isconst(r)) {
+ COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val);
+ return;
+ }
+ CLOBBER_SHRL;
+ r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+ d=rmw(d,2,2);
+ Dif (r!=1) {
+ write_log("Illegal register %d in raw_shrl_b\n",r);
+ abort();
+ }
+ raw_shrl_w_rr(d,r) ;
+ unlock2(r);
+ unlock2(d);
+}
+MENDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
+
+MIDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
+{ /* Can only do this with r==1, i.e. cl */
+
+ if (isconst(r)) {
+ COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val);
+ return;
+ }
+
+ CLOBBER_SHRL;
+ r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+ d=rmw(d,1,1);
+ Dif (r!=1) {
+ write_log("Illegal register %d in raw_shrl_b\n",r);
+ abort();
+ }
+ raw_shrl_b_rr(d,r) ;
+ unlock2(r);
+ unlock2(d);
+}
+MENDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
+
+
+
+MIDFUNC(2,shll_l_ri,(RW4 r, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ if (isconst(r) && !needflags) {
+ live.state[r].val<<=i;
+ return;
+ }
+ CLOBBER_SHLL;
+ r=rmw(r,4,4);
+ raw_shll_l_ri(r,i);
+ unlock2(r);
+}
+MENDFUNC(2,shll_l_ri,(RW4 r, IMM i))
+
+MIDFUNC(2,shll_w_ri,(RW2 r, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ CLOBBER_SHLL;
+ r=rmw(r,2,2);
+ raw_shll_w_ri(r,i);
+ unlock2(r);
+}
+MENDFUNC(2,shll_w_ri,(RW2 r, IMM i))
+
+MIDFUNC(2,shll_b_ri,(RW1 r, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ CLOBBER_SHLL;
+ r=rmw(r,1,1);
+ raw_shll_b_ri(r,i);
+ unlock2(r);
+}
+MENDFUNC(2,shll_b_ri,(RW1 r, IMM i))
+
+MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ if (isconst(r) && !needflags) {
+ live.state[r].val>>=i;
+ return;
+ }
+ CLOBBER_SHRL;
+ r=rmw(r,4,4);
+ raw_shrl_l_ri(r,i);
+ unlock2(r);
+}
+MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
+
+MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ CLOBBER_SHRL;
+ r=rmw(r,2,2);
+ raw_shrl_w_ri(r,i);
+ unlock2(r);
+}
+MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
+
+MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ CLOBBER_SHRL;
+ r=rmw(r,1,1);
+ raw_shrl_b_ri(r,i);
+ unlock2(r);
+}
+MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
+
+MIDFUNC(2,shra_l_ri,(RW4 r, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ CLOBBER_SHRA;
+ r=rmw(r,4,4);
+ raw_shra_l_ri(r,i);
+ unlock2(r);
+}
+MENDFUNC(2,shra_l_ri,(RW4 r, IMM i))
+
+MIDFUNC(2,shra_w_ri,(RW2 r, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ CLOBBER_SHRA;
+ r=rmw(r,2,2);
+ raw_shra_w_ri(r,i);
+ unlock2(r);
+}
+MENDFUNC(2,shra_w_ri,(RW2 r, IMM i))
+
+MIDFUNC(2,shra_b_ri,(RW1 r, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ CLOBBER_SHRA;
+ r=rmw(r,1,1);
+ raw_shra_b_ri(r,i);
+ unlock2(r);
+}
+MENDFUNC(2,shra_b_ri,(RW1 r, IMM i))
+
+MIDFUNC(2,shra_l_rr,(RW4 d, R1 r))
+{
+ if (isconst(r)) {
+ COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val);
+ return;
+ }
+ CLOBBER_SHRA;
+ r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+ d=rmw(d,4,4);
+ Dif (r!=1) {
+ write_log("Illegal register %d in raw_rol_b\n",r);
+ abort();
+ }
+ raw_shra_l_rr(d,r) ;
+ unlock2(r);
+ unlock2(d);
+}
+MENDFUNC(2,shra_l_rr,(RW4 d, R1 r))
+
+MIDFUNC(2,shra_w_rr,(RW2 d, R1 r))
+{ /* Can only do this with r==1, i.e. cl */
+
+ if (isconst(r)) {
+ COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val);
+ return;
+ }
+ CLOBBER_SHRA;
+ r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+ d=rmw(d,2,2);
+ Dif (r!=1) {
+ write_log("Illegal register %d in raw_shra_b\n",r);
+ abort();
+ }
+ raw_shra_w_rr(d,r) ;
+ unlock2(r);
+ unlock2(d);
+}
+MENDFUNC(2,shra_w_rr,(RW2 d, R1 r))
+
+MIDFUNC(2,shra_b_rr,(RW1 d, R1 r))
+{ /* Can only do this with r==1, i.e. cl */
+
+ if (isconst(r)) {
+ COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val);
+ return;
+ }
+
+ CLOBBER_SHRA;
+ r=readreg_specific(r,1,SHIFTCOUNT_NREG);
+ d=rmw(d,1,1);
+ Dif (r!=1) {
+ write_log("Illegal register %d in raw_shra_b\n",r);
+ abort();
+ }
+ raw_shra_b_rr(d,r) ;
+ unlock2(r);
+ unlock2(d);
+}
+MENDFUNC(2,shra_b_rr,(RW1 d, R1 r))
+
+
+MIDFUNC(2,setcc,(W1 d, IMM cc))
+{
+ CLOBBER_SETCC;
+ d=writereg(d,1);
+ raw_setcc(d,cc);
+ unlock2(d);
+}
+MENDFUNC(2,setcc,(W1 d, IMM cc))
+
+MIDFUNC(2,setcc_m,(IMM d, IMM cc))
+{
+ CLOBBER_SETCC;
+ raw_setcc_m(d,cc);
+}
+MENDFUNC(2,setcc_m,(IMM d, IMM cc))
+
+MIDFUNC(3,cmov_b_rr,(RW1 d, R1 s, IMM cc))
+{
+ if (d==s)
+ return;
+ CLOBBER_CMOV;
+ s=readreg(s,1);
+ d=rmw(d,1,1);
+ raw_cmov_b_rr(d,s,cc);
+ unlock2(s);
+ unlock2(d);
+}
+MENDFUNC(3,cmov_b_rr,(RW1 d, R1 s, IMM cc))
+
+MIDFUNC(3,cmov_w_rr,(RW2 d, R2 s, IMM cc))
+{
+ if (d==s)
+ return;
+ CLOBBER_CMOV;
+ s=readreg(s,2);
+ d=rmw(d,2,2);
+ raw_cmov_w_rr(d,s,cc);
+ unlock2(s);
+ unlock2(d);
+}
+MENDFUNC(3,cmov_w_rr,(RW2 d, R2 s, IMM cc))
+
+MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
+{
+ if (d==s)
+ return;
+ CLOBBER_CMOV;
+ s=readreg(s,4);
+ d=rmw(d,4,4);
+ raw_cmov_l_rr(d,s,cc);
+ unlock2(s);
+ unlock2(d);
+}
+MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
+
+MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
+{
+ CLOBBER_CMOV;
+ d=rmw(d,4,4);
+ raw_cmov_l_rm(d,s,cc);
+ unlock2(d);
+}
+MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
+
+MIDFUNC(2,bsf_l_rr,(W4 d, W4 s))
+{
+ CLOBBER_BSF;
+ s = readreg(s, 4);
+ d = writereg(d, 4);
+ raw_bsf_l_rr(d, s);
+ unlock2(s);
+ unlock2(d);
+}
+MENDFUNC(2,bsf_l_rr,(W4 d, W4 s))
+
+/* Set the Z flag depending on the value in s. Note that the
+ value has to be 0 or -1 (or, more precisely, for non-zero
+ values, bit 14 must be set)! */
+MIDFUNC(2,simulate_bsf,(W4 tmp, RW4 s))
+{
+ CLOBBER_BSF;
+ s=rmw_specific(s,4,4,FLAG_NREG3);
+ tmp=writereg(tmp,4);
+ raw_flags_set_zero(s, tmp);
+ unlock2(tmp);
+ unlock2(s);
+}
+MENDFUNC(2,simulate_bsf,(W4 tmp, RW4 s))
+
+MIDFUNC(2,imul_32_32,(RW4 d, R4 s))
+{
+ CLOBBER_MUL;
+ s=readreg(s,4);
+ d=rmw(d,4,4);
+ raw_imul_32_32(d,s);
+ unlock2(s);
+ unlock2(d);
+}
+MENDFUNC(2,imul_32_32,(RW4 d, R4 s))
+
+MIDFUNC(2,imul_64_32,(RW4 d, RW4 s))
+{
+ CLOBBER_MUL;
+ s=rmw_specific(s,4,4,MUL_NREG2);
+ d=rmw_specific(d,4,4,MUL_NREG1);
+ raw_imul_64_32(d,s);
+ unlock2(s);
+ unlock2(d);
+}
+MENDFUNC(2,imul_64_32,(RW4 d, RW4 s))
+
+MIDFUNC(2,mul_64_32,(RW4 d, RW4 s))
+{
+ CLOBBER_MUL;
+ s=rmw_specific(s,4,4,MUL_NREG2);
+ d=rmw_specific(d,4,4,MUL_NREG1);
+ raw_mul_64_32(d,s);
+ unlock2(s);
+ unlock2(d);
+}
+MENDFUNC(2,mul_64_32,(RW4 d, RW4 s))
+
+MIDFUNC(2,mul_32_32,(RW4 d, R4 s))
+{
+ CLOBBER_MUL;
+ s=readreg(s,4);
+ d=rmw(d,4,4);
+ raw_mul_32_32(d,s);
+ unlock2(s);
+ unlock2(d);
+}
+MENDFUNC(2,mul_32_32,(RW4 d, R4 s))
+
+#if SIZEOF_VOID_P == 8
+MIDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
+{
+ int isrmw;
+
+ if (isconst(s)) {
+ set_const(d,(uae_s32)live.state[s].val);
+ return;
+ }
+
+ CLOBBER_SE32;
+ isrmw=(s==d);
+ if (!isrmw) {
+ s=readreg(s,4);
+ d=writereg(d,4);
+ }
+ else { /* If we try to lock this twice, with different sizes, we
+ are int trouble! */
+ s=d=rmw(s,4,4);
+ }
+ raw_sign_extend_32_rr(d,s);
+ if (!isrmw) {
+ unlock2(d);
+ unlock2(s);
+ }
+ else {
+ unlock2(s);
+ }
+}
+MENDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
+#endif
+
+MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
+{
+ int isrmw;
+
+ if (isconst(s)) {
+ set_const(d,(uae_s32)(uae_s16)live.state[s].val);
+ return;
+ }
+
+ CLOBBER_SE16;
+ isrmw=(s==d);
+ if (!isrmw) {
+ s=readreg(s,2);
+ d=writereg(d,4);
+ }
+ else { /* If we try to lock this twice, with different sizes, we
+ are int trouble! */
+ s=d=rmw(s,4,2);
+ }
+ raw_sign_extend_16_rr(d,s);
+ if (!isrmw) {
+ unlock2(d);
+ unlock2(s);
+ }
+ else {
+ unlock2(s);
+ }
+}
+MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
+
+MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
+{
+ int isrmw;
+
+ if (isconst(s)) {
+ set_const(d,(uae_s32)(uae_s8)live.state[s].val);
+ return;
+ }
+
+ isrmw=(s==d);
+ CLOBBER_SE8;
+ if (!isrmw) {
+ s=readreg(s,1);
+ d=writereg(d,4);
+ }
+ else { /* If we try to lock this twice, with different sizes, we
+ are int trouble! */
+ s=d=rmw(s,4,1);
+ }
+
+ raw_sign_extend_8_rr(d,s);
+
+ if (!isrmw) {
+ unlock2(d);
+ unlock2(s);
+ }
+ else {
+ unlock2(s);
+ }
+}
+MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
+
+
+MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
+{
+ int isrmw;
+
+ if (isconst(s)) {
+ set_const(d,(uae_u32)(uae_u16)live.state[s].val);
+ return;
+ }
+
+ isrmw=(s==d);
+ CLOBBER_ZE16;
+ if (!isrmw) {
+ s=readreg(s,2);
+ d=writereg(d,4);
+ }
+ else { /* If we try to lock this twice, with different sizes, we
+ are int trouble! */
+ s=d=rmw(s,4,2);
+ }
+ raw_zero_extend_16_rr(d,s);
+ if (!isrmw) {
+ unlock2(d);
+ unlock2(s);
+ }
+ else {
+ unlock2(s);
+ }
+}
+MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
+
+MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
+{
+ int isrmw;
+ if (isconst(s)) {
+ set_const(d,(uae_u32)(uae_u8)live.state[s].val);
+ return;
+ }
+
+ isrmw=(s==d);
+ CLOBBER_ZE8;
+ if (!isrmw) {
+ s=readreg(s,1);
+ d=writereg(d,4);
+ }
+ else { /* If we try to lock this twice, with different sizes, we
+ are int trouble! */
+ s=d=rmw(s,4,1);
+ }
+
+ raw_zero_extend_8_rr(d,s);
+
+ if (!isrmw) {
+ unlock2(d);
+ unlock2(s);
+ }
+ else {
+ unlock2(s);
+ }
+}
+MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
+
+MIDFUNC(2,mov_b_rr,(W1 d, R1 s))
+{
+ if (d==s)
+ return;
+ if (isconst(s)) {
+ COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val);
+ return;
+ }
+
+ CLOBBER_MOV;
+ s=readreg(s,1);
+ d=writereg(d,1);
+ raw_mov_b_rr(d,s);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(2,mov_b_rr,(W1 d, R1 s))
+
+MIDFUNC(2,mov_w_rr,(W2 d, R2 s))
+{
+ if (d==s)
+ return;
+ if (isconst(s)) {
+ COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val);
+ return;
+ }
+
+ CLOBBER_MOV;
+ s=readreg(s,2);
+ d=writereg(d,2);
+ raw_mov_w_rr(d,s);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(2,mov_w_rr,(W2 d, R2 s))
+
+
+MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
+{
+ CLOBBER_MOV;
+ baser=readreg(baser,4);
+ index=readreg(index,4);
+ d=writereg(d,4);
+
+ raw_mov_l_rrm_indexed(d,baser,index,factor);
+ unlock2(d);
+ unlock2(baser);
+ unlock2(index);
+}
+MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
+
+MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
+{
+ CLOBBER_MOV;
+ baser=readreg(baser,4);
+ index=readreg(index,4);
+ d=writereg(d,2);
+
+ raw_mov_w_rrm_indexed(d,baser,index,factor);
+ unlock2(d);
+ unlock2(baser);
+ unlock2(index);
+}
+MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
+
+MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
+{
+ CLOBBER_MOV;
+ baser=readreg(baser,4);
+ index=readreg(index,4);
+ d=writereg(d,1);
+
+ raw_mov_b_rrm_indexed(d,baser,index,factor);
+
+ unlock2(d);
+ unlock2(baser);
+ unlock2(index);
+}
+MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
+
+
+MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
+{
+ CLOBBER_MOV;
+ baser=readreg(baser,4);
+ index=readreg(index,4);
+ s=readreg(s,4);
+
+ Dif (baser==s || index==s)
+ abort();
+
+
+ raw_mov_l_mrr_indexed(baser,index,factor,s);
+ unlock2(s);
+ unlock2(baser);
+ unlock2(index);
+}
+MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
+
+MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
+{
+ CLOBBER_MOV;
+ baser=readreg(baser,4);
+ index=readreg(index,4);
+ s=readreg(s,2);
+
+ raw_mov_w_mrr_indexed(baser,index,factor,s);
+ unlock2(s);
+ unlock2(baser);
+ unlock2(index);
+}
+MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
+
+MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
+{
+ CLOBBER_MOV;
+ s=readreg(s,1);
+ baser=readreg(baser,4);
+ index=readreg(index,4);
+
+ raw_mov_b_mrr_indexed(baser,index,factor,s);
+ unlock2(s);
+ unlock2(baser);
+ unlock2(index);
+}
+MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
+
+
+MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
+{
+ int basereg=baser;
+ int indexreg=index;
+
+ CLOBBER_MOV;
+ s=readreg(s,4);
+ baser=readreg_offset(baser,4);
+ index=readreg_offset(index,4);
+
+ base+=get_offset(basereg);
+ base+=factor*get_offset(indexreg);
+
+ raw_mov_l_bmrr_indexed(base,baser,index,factor,s);
+ unlock2(s);
+ unlock2(baser);
+ unlock2(index);
+}
+MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
+
+MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
+{
+ int basereg=baser;
+ int indexreg=index;
+
+ CLOBBER_MOV;
+ s=readreg(s,2);
+ baser=readreg_offset(baser,4);
+ index=readreg_offset(index,4);
+
+ base+=get_offset(basereg);
+ base+=factor*get_offset(indexreg);
+
+ raw_mov_w_bmrr_indexed(base,baser,index,factor,s);
+ unlock2(s);
+ unlock2(baser);
+ unlock2(index);
+}
+MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
+
+MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
+{
+ int basereg=baser;
+ int indexreg=index;
+
+ CLOBBER_MOV;
+ s=readreg(s,1);
+ baser=readreg_offset(baser,4);
+ index=readreg_offset(index,4);
+
+ base+=get_offset(basereg);
+ base+=factor*get_offset(indexreg);
+
+ raw_mov_b_bmrr_indexed(base,baser,index,factor,s);
+ unlock2(s);
+ unlock2(baser);
+ unlock2(index);
+}
+MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
+
+
+
+/* Read a long from base+baser+factor*index */
+MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
+{
+ int basereg=baser;
+ int indexreg=index;
+
+ CLOBBER_MOV;
+ baser=readreg_offset(baser,4);
+ index=readreg_offset(index,4);
+ base+=get_offset(basereg);
+ base+=factor*get_offset(indexreg);
+ d=writereg(d,4);
+ raw_mov_l_brrm_indexed(d,base,baser,index,factor);
+ unlock2(d);
+ unlock2(baser);
+ unlock2(index);
+}
+MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
+
+
+MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
+{
+ int basereg=baser;
+ int indexreg=index;
+
+ CLOBBER_MOV;
+ remove_offset(d,-1);
+ baser=readreg_offset(baser,4);
+ index=readreg_offset(index,4);
+ base+=get_offset(basereg);
+ base+=factor*get_offset(indexreg);
+ d=writereg(d,2);
+ raw_mov_w_brrm_indexed(d,base,baser,index,factor);
+ unlock2(d);
+ unlock2(baser);
+ unlock2(index);
+}
+MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
+
+
+MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
+{
+ int basereg=baser;
+ int indexreg=index;
+
+ CLOBBER_MOV;
+ remove_offset(d,-1);
+ baser=readreg_offset(baser,4);
+ index=readreg_offset(index,4);
+ base+=get_offset(basereg);
+ base+=factor*get_offset(indexreg);
+ d=writereg(d,1);
+ raw_mov_b_brrm_indexed(d,base,baser,index,factor);
+ unlock2(d);
+ unlock2(baser);
+ unlock2(index);
+}
+MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
+
+/* Read a long from base+factor*index */
+MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
+{
+ int indexreg=index;
+
+ if (isconst(index)) {
+ COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val);
+ return;
+ }
+
+ CLOBBER_MOV;
+ index=readreg_offset(index,4);
+ base+=get_offset(indexreg)*factor;
+ d=writereg(d,4);
+
+ raw_mov_l_rm_indexed(d,base,index,factor);
+ unlock2(index);
+ unlock2(d);
+}
+MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
+
+
+/* read the long at the address contained in s+offset and store in d */
+MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
+{
+ if (isconst(s)) {
+ COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
+ return;
+ }
+ CLOBBER_MOV;
+ s=readreg(s,4);
+ d=writereg(d,4);
+
+ raw_mov_l_rR(d,s,offset);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
+
+/* read the word at the address contained in s+offset and store in d */
+MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
+{
+ if (isconst(s)) {
+ COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
+ return;
+ }
+ CLOBBER_MOV;
+ s=readreg(s,4);
+ d=writereg(d,2);
+
+ raw_mov_w_rR(d,s,offset);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
+
+/* read the word at the address contained in s+offset and store in d */
+MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
+{
+ if (isconst(s)) {
+ COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
+ return;
+ }
+ CLOBBER_MOV;
+ s=readreg(s,4);
+ d=writereg(d,1);
+
+ raw_mov_b_rR(d,s,offset);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
+
+/* read the long at the address contained in s+offset and store in d */
+MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
+{
+ int sreg=s;
+ if (isconst(s)) {
+ COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
+ return;
+ }
+ CLOBBER_MOV;
+ s=readreg_offset(s,4);
+ offset+=get_offset(sreg);
+ d=writereg(d,4);
+
+ raw_mov_l_brR(d,s,offset);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
+
+/* read the word at the address contained in s+offset and store in d */
+MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
+{
+ int sreg=s;
+ if (isconst(s)) {
+ COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
+ return;
+ }
+ CLOBBER_MOV;
+ remove_offset(d,-1);
+ s=readreg_offset(s,4);
+ offset+=get_offset(sreg);
+ d=writereg(d,2);
+
+ raw_mov_w_brR(d,s,offset);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
+
+/* read the word at the address contained in s+offset and store in d */
+MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
+{
+ int sreg=s;
+ if (isconst(s)) {
+ COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
+ return;
+ }
+ CLOBBER_MOV;
+ remove_offset(d,-1);
+ s=readreg_offset(s,4);
+ offset+=get_offset(sreg);
+ d=writereg(d,1);
+
+ raw_mov_b_brR(d,s,offset);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
+
+MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
+{
+ int dreg=d;
+ if (isconst(d)) {
+ COMPCALL(mov_l_mi)(live.state[d].val+offset,i);
+ return;
+ }
+
+ CLOBBER_MOV;
+ d=readreg_offset(d,4);
+ offset+=get_offset(dreg);
+ raw_mov_l_Ri(d,i,offset);
+ unlock2(d);
+}
+MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
+
+MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
+{
+ int dreg=d;
+ if (isconst(d)) {
+ COMPCALL(mov_w_mi)(live.state[d].val+offset,i);
+ return;
+ }
+
+ CLOBBER_MOV;
+ d=readreg_offset(d,4);
+ offset+=get_offset(dreg);
+ raw_mov_w_Ri(d,i,offset);
+ unlock2(d);
+}
+MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
+
+MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
+{
+ int dreg=d;
+ if (isconst(d)) {
+ COMPCALL(mov_b_mi)(live.state[d].val+offset,i);
+ return;
+ }
+
+ CLOBBER_MOV;
+ d=readreg_offset(d,4);
+ offset+=get_offset(dreg);
+ raw_mov_b_Ri(d,i,offset);
+ unlock2(d);
+}
+MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
+
+ /* Warning! OFFSET is byte sized only! */
+MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
+{
+ if (isconst(d)) {
+ COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
+ return;
+ }
+ if (isconst(s)) {
+ COMPCALL(mov_l_Ri)(d,live.state[s].val,offset);
+ return;
+ }
+
+ CLOBBER_MOV;
+ s=readreg(s,4);
+ d=readreg(d,4);
+
+ raw_mov_l_Rr(d,s,offset);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
+
+MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
+{
+ if (isconst(d)) {
+ COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
+ return;
+ }
+ if (isconst(s)) {
+ COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset);
+ return;
+ }
+
+ CLOBBER_MOV;
+ s=readreg(s,2);
+ d=readreg(d,4);
+ raw_mov_w_Rr(d,s,offset);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
+
+MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
+{
+ if (isconst(d)) {
+ COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
+ return;
+ }
+ if (isconst(s)) {
+ COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset);
+ return;
+ }
+
+ CLOBBER_MOV;
+ s=readreg(s,1);
+ d=readreg(d,4);
+ raw_mov_b_Rr(d,s,offset);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
+
+MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
+{
+ if (isconst(s)) {
+ COMPCALL(mov_l_ri)(d,live.state[s].val+offset);
+ return;
+ }
+#if USE_OFFSET
+ if (d==s) {
+ add_offset(d,offset);
+ return;
+ }
+#endif
+ CLOBBER_LEA;
+ s=readreg(s,4);
+ d=writereg(d,4);
+ raw_lea_l_brr(d,s,offset);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
+
+MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
+{
+ if (!offset) {
+ COMPCALL(lea_l_rr_indexed)(d,s,index,factor);
+ return;
+ }
+ CLOBBER_LEA;
+ s=readreg(s,4);
+ index=readreg(index,4);
+ d=writereg(d,4);
+
+ raw_lea_l_brr_indexed(d,s,index,factor,offset);
+ unlock2(d);
+ unlock2(index);
+ unlock2(s);
+}
+MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
+
+MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
+{
+ CLOBBER_LEA;
+ s=readreg(s,4);
+ index=readreg(index,4);
+ d=writereg(d,4);
+
+ raw_lea_l_rr_indexed(d,s,index,factor);
+ unlock2(d);
+ unlock2(index);
+ unlock2(s);
+}
+MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
+
+/* write d to the long at the address contained in s+offset */
+MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
+{
+ int dreg=d;
+ if (isconst(d)) {
+ COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
+ return;
+ }
+
+ CLOBBER_MOV;
+ s=readreg(s,4);
+ d=readreg_offset(d,4);
+ offset+=get_offset(dreg);
+
+ raw_mov_l_bRr(d,s,offset);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
+
+/* write the word at the address contained in s+offset and store in d */
+MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
+{
+ int dreg=d;
+
+ if (isconst(d)) {
+ COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
+ return;
+ }
+
+ CLOBBER_MOV;
+ s=readreg(s,2);
+ d=readreg_offset(d,4);
+ offset+=get_offset(dreg);
+ raw_mov_w_bRr(d,s,offset);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
+
+MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
+{
+ int dreg=d;
+ if (isconst(d)) {
+ COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
+ return;
+ }
+
+ CLOBBER_MOV;
+ s=readreg(s,1);
+ d=readreg_offset(d,4);
+ offset+=get_offset(dreg);
+ raw_mov_b_bRr(d,s,offset);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
+
+MIDFUNC(1,bswap_32,(RW4 r))
+{
+ int reg=r;
+
+ if (isconst(r)) {
+ uae_u32 oldv=live.state[r].val;
+ live.state[r].val=reverse32(oldv);
+ return;
+ }
+
+ CLOBBER_SW32;
+ r=rmw(r,4,4);
+ raw_bswap_32(r);
+ unlock2(r);
+}
+MENDFUNC(1,bswap_32,(RW4 r))
+
+MIDFUNC(1,bswap_16,(RW2 r))
+{
+ if (isconst(r)) {
+ uae_u32 oldv=live.state[r].val;
+ live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) |
+ (oldv&0xffff0000);
+ return;
+ }
+
+ CLOBBER_SW16;
+ r=rmw(r,2,2);
+
+ raw_bswap_16(r);
+ unlock2(r);
+}
+MENDFUNC(1,bswap_16,(RW2 r))
+
+
+
+MIDFUNC(2,mov_l_rr,(W4 d, R4 s))
+{
+ int olds;
+
+ if (d==s) { /* How pointless! */
+ return;
+ }
+ if (isconst(s)) {
+ COMPCALL(mov_l_ri)(d,live.state[s].val);
+ return;
+ }
+ olds=s;
+ disassociate(d);
+ s=readreg_offset(s,4);
+ live.state[d].realreg=s;
+ live.state[d].realind=live.nat[s].nholds;
+ live.state[d].val=live.state[olds].val;
+ live.state[d].validsize=4;
+ live.state[d].dirtysize=4;
+ set_status(d,DIRTY);
+
+ live.nat[s].holds[live.nat[s].nholds]=d;
+ live.nat[s].nholds++;
+ log_clobberreg(d);
+ /* write_log("Added %d to nreg %d(%d), now holds %d regs\n",
+ d,s,live.state[d].realind,live.nat[s].nholds); */
+ unlock2(s);
+}
+MENDFUNC(2,mov_l_rr,(W4 d, R4 s))
+
+MIDFUNC(2,mov_l_mr,(IMM d, R4 s))
+{
+ if (isconst(s)) {
+ COMPCALL(mov_l_mi)(d,live.state[s].val);
+ return;
+ }
+ CLOBBER_MOV;
+ s=readreg(s,4);
+
+ raw_mov_l_mr(d,s);
+ unlock2(s);
+}
+MENDFUNC(2,mov_l_mr,(IMM d, R4 s))
+
+
+MIDFUNC(2,mov_w_mr,(IMM d, R2 s))
+{
+ if (isconst(s)) {
+ COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val);
+ return;
+ }
+ CLOBBER_MOV;
+ s=readreg(s,2);
+
+ raw_mov_w_mr(d,s);
+ unlock2(s);
+}
+MENDFUNC(2,mov_w_mr,(IMM d, R2 s))
+
+MIDFUNC(2,mov_w_rm,(W2 d, IMM s))
+{
+ CLOBBER_MOV;
+ d=writereg(d,2);
+
+ raw_mov_w_rm(d,s);
+ unlock2(d);
+}
+MENDFUNC(2,mov_w_rm,(W2 d, IMM s))
+
+MIDFUNC(2,mov_b_mr,(IMM d, R1 s))
+{
+ if (isconst(s)) {
+ COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val);
+ return;
+ }
+
+ CLOBBER_MOV;
+ s=readreg(s,1);
+
+ raw_mov_b_mr(d,s);
+ unlock2(s);
+}
+MENDFUNC(2,mov_b_mr,(IMM d, R1 s))
+
+MIDFUNC(2,mov_b_rm,(W1 d, IMM s))
+{
+ CLOBBER_MOV;
+ d=writereg(d,1);
+
+ raw_mov_b_rm(d,s);
+ unlock2(d);
+}
+MENDFUNC(2,mov_b_rm,(W1 d, IMM s))
+
+MIDFUNC(2,mov_l_ri,(W4 d, IMM s))
+{
+ set_const(d,s);
+ return;
+}
+MENDFUNC(2,mov_l_ri,(W4 d, IMM s))
+
+MIDFUNC(2,mov_w_ri,(W2 d, IMM s))
+{
+ CLOBBER_MOV;
+ d=writereg(d,2);
+
+ raw_mov_w_ri(d,s);
+ unlock2(d);
+}
+MENDFUNC(2,mov_w_ri,(W2 d, IMM s))
+
+MIDFUNC(2,mov_b_ri,(W1 d, IMM s))
+{
+ CLOBBER_MOV;
+ d=writereg(d,1);
+
+ raw_mov_b_ri(d,s);
+ unlock2(d);
+}
+MENDFUNC(2,mov_b_ri,(W1 d, IMM s))
+
+
+MIDFUNC(2,add_l_mi,(IMM d, IMM s))
+{
+ CLOBBER_ADD;
+ raw_add_l_mi(d,s) ;
+}
+MENDFUNC(2,add_l_mi,(IMM d, IMM s))
+
+MIDFUNC(2,add_w_mi,(IMM d, IMM s))
+{
+ CLOBBER_ADD;
+ raw_add_w_mi(d,s) ;
+}
+MENDFUNC(2,add_w_mi,(IMM d, IMM s))
+
+MIDFUNC(2,add_b_mi,(IMM d, IMM s))
+{
+ CLOBBER_ADD;
+ raw_add_b_mi(d,s) ;
+}
+MENDFUNC(2,add_b_mi,(IMM d, IMM s))
+
+
+MIDFUNC(2,test_l_ri,(R4 d, IMM i))
+{
+ CLOBBER_TEST;
+ d=readreg(d,4);
+
+ raw_test_l_ri(d,i);
+ unlock2(d);
+}
+MENDFUNC(2,test_l_ri,(R4 d, IMM i))
+
+MIDFUNC(2,test_l_rr,(R4 d, R4 s))
+{
+ CLOBBER_TEST;
+ d=readreg(d,4);
+ s=readreg(s,4);
+
+ raw_test_l_rr(d,s);;
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(2,test_l_rr,(R4 d, R4 s))
+
+MIDFUNC(2,test_w_rr,(R2 d, R2 s))
+{
+ CLOBBER_TEST;
+ d=readreg(d,2);
+ s=readreg(s,2);
+
+ raw_test_w_rr(d,s);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(2,test_w_rr,(R2 d, R2 s))
+
+MIDFUNC(2,test_b_rr,(R1 d, R1 s))
+{
+ CLOBBER_TEST;
+ d=readreg(d,1);
+ s=readreg(s,1);
+
+ raw_test_b_rr(d,s);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(2,test_b_rr,(R1 d, R1 s))
+
+
+MIDFUNC(2,and_l_ri,(RW4 d, IMM i))
+{
+ if (isconst(d) && !needflags) {
+ live.state[d].val &= i;
+ return;
+ }
+
+ CLOBBER_AND;
+ d=rmw(d,4,4);
+
+ raw_and_l_ri(d,i);
+ unlock2(d);
+}
+MENDFUNC(2,and_l_ri,(RW4 d, IMM i))
+
+MIDFUNC(2,and_l,(RW4 d, R4 s))
+{
+ CLOBBER_AND;
+ s=readreg(s,4);
+ d=rmw(d,4,4);
+
+ raw_and_l(d,s);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(2,and_l,(RW4 d, R4 s))
+
+MIDFUNC(2,and_w,(RW2 d, R2 s))
+{
+ CLOBBER_AND;
+ s=readreg(s,2);
+ d=rmw(d,2,2);
+
+ raw_and_w(d,s);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(2,and_w,(RW2 d, R2 s))
+
+MIDFUNC(2,and_b,(RW1 d, R1 s))
+{
+ CLOBBER_AND;
+ s=readreg(s,1);
+ d=rmw(d,1,1);
+
+ raw_and_b(d,s);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(2,and_b,(RW1 d, R1 s))
+
+// gb-- used for making an fpcr value in compemu_fpp.cpp
+MIDFUNC(2,or_l_rm,(RW4 d, IMM s))
+{
+ CLOBBER_OR;
+ d=rmw(d,4,4);
+
+ raw_or_l_rm(d,s);
+ unlock2(d);
+}
+MENDFUNC(2,or_l_rm,(RW4 d, IMM s))
+
+MIDFUNC(2,or_l_ri,(RW4 d, IMM i))
+{
+ if (isconst(d) && !needflags) {
+ live.state[d].val|=i;
+ return;
+ }
+ CLOBBER_OR;
+ d=rmw(d,4,4);
+
+ raw_or_l_ri(d,i);
+ unlock2(d);
+}
+MENDFUNC(2,or_l_ri,(RW4 d, IMM i))
+
+MIDFUNC(2,or_l,(RW4 d, R4 s))
+{
+ if (isconst(d) && isconst(s) && !needflags) {
+ live.state[d].val|=live.state[s].val;
+ return;
+ }
+ CLOBBER_OR;
+ s=readreg(s,4);
+ d=rmw(d,4,4);
+
+ raw_or_l(d,s);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(2,or_l,(RW4 d, R4 s))
+
+MIDFUNC(2,or_w,(RW2 d, R2 s))
+{
+ CLOBBER_OR;
+ s=readreg(s,2);
+ d=rmw(d,2,2);
+
+ raw_or_w(d,s);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(2,or_w,(RW2 d, R2 s))
+
+MIDFUNC(2,or_b,(RW1 d, R1 s))
+{
+ CLOBBER_OR;
+ s=readreg(s,1);
+ d=rmw(d,1,1);
+
+ raw_or_b(d,s);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(2,or_b,(RW1 d, R1 s))
+
+MIDFUNC(2,adc_l,(RW4 d, R4 s))
+{
+ CLOBBER_ADC;
+ s=readreg(s,4);
+ d=rmw(d,4,4);
+
+ raw_adc_l(d,s);
+
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(2,adc_l,(RW4 d, R4 s))
+
+MIDFUNC(2,adc_w,(RW2 d, R2 s))
+{
+ CLOBBER_ADC;
+ s=readreg(s,2);
+ d=rmw(d,2,2);
+
+ raw_adc_w(d,s);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(2,adc_w,(RW2 d, R2 s))
+
+MIDFUNC(2,adc_b,(RW1 d, R1 s))
+{
+ CLOBBER_ADC;
+ s=readreg(s,1);
+ d=rmw(d,1,1);
+
+ raw_adc_b(d,s);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(2,adc_b,(RW1 d, R1 s))
+
+MIDFUNC(2,add_l,(RW4 d, R4 s))
+{
+ if (isconst(s)) {
+ COMPCALL(add_l_ri)(d,live.state[s].val);
+ return;
+ }
+
+ CLOBBER_ADD;
+ s=readreg(s,4);
+ d=rmw(d,4,4);
+
+ raw_add_l(d,s);
+
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(2,add_l,(RW4 d, R4 s))
+
+MIDFUNC(2,add_w,(RW2 d, R2 s))
+{
+ if (isconst(s)) {
+ COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val);
+ return;
+ }
+
+ CLOBBER_ADD;
+ s=readreg(s,2);
+ d=rmw(d,2,2);
+
+ raw_add_w(d,s);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(2,add_w,(RW2 d, R2 s))
+
+MIDFUNC(2,add_b,(RW1 d, R1 s))
+{
+ if (isconst(s)) {
+ COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val);
+ return;
+ }
+
+ CLOBBER_ADD;
+ s=readreg(s,1);
+ d=rmw(d,1,1);
+
+ raw_add_b(d,s);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(2,add_b,(RW1 d, R1 s))
+
+MIDFUNC(2,sub_l_ri,(RW4 d, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ if (isconst(d) && !needflags) {
+ live.state[d].val-=i;
+ return;
+ }
+#if USE_OFFSET
+ if (!needflags) {
+ add_offset(d,-i);
+ return;
+ }
+#endif
+
+ CLOBBER_SUB;
+ d=rmw(d,4,4);
+
+ raw_sub_l_ri(d,i);
+ unlock2(d);
+}
+MENDFUNC(2,sub_l_ri,(RW4 d, IMM i))
+
+MIDFUNC(2,sub_w_ri,(RW2 d, IMM i))
+{
+ if (!i && !needflags)
+ return;
+
+ CLOBBER_SUB;
+ d=rmw(d,2,2);
+
+ raw_sub_w_ri(d,i);
+ unlock2(d);
+}
+MENDFUNC(2,sub_w_ri,(RW2 d, IMM i))
+
+MIDFUNC(2,sub_b_ri,(RW1 d, IMM i))
+{
+ if (!i && !needflags)
+ return;
+
+ CLOBBER_SUB;
+ d=rmw(d,1,1);
+
+ raw_sub_b_ri(d,i);
+
+ unlock2(d);
+}
+MENDFUNC(2,sub_b_ri,(RW1 d, IMM i))
+
+MIDFUNC(2,add_l_ri,(RW4 d, IMM i))
+{
+ if (!i && !needflags)
+ return;
+ if (isconst(d) && !needflags) {
+ live.state[d].val+=i;
+ return;
+ }
+#if USE_OFFSET
+ if (!needflags) {
+ add_offset(d,i);
+ return;
+ }
+#endif
+ CLOBBER_ADD;
+ d=rmw(d,4,4);
+ raw_add_l_ri(d,i);
+ unlock2(d);
+}
+MENDFUNC(2,add_l_ri,(RW4 d, IMM i))
+
+MIDFUNC(2,add_w_ri,(RW2 d, IMM i))
+{
+ if (!i && !needflags)
+ return;
+
+ CLOBBER_ADD;
+ d=rmw(d,2,2);
+
+ raw_add_w_ri(d,i);
+ unlock2(d);
+}
+MENDFUNC(2,add_w_ri,(RW2 d, IMM i))
+
+MIDFUNC(2,add_b_ri,(RW1 d, IMM i))
+{
+ if (!i && !needflags)
+ return;
+
+ CLOBBER_ADD;
+ d=rmw(d,1,1);
+
+ raw_add_b_ri(d,i);
+
+ unlock2(d);
+}
+MENDFUNC(2,add_b_ri,(RW1 d, IMM i))
+
+MIDFUNC(2,sbb_l,(RW4 d, R4 s))
+{
+ CLOBBER_SBB;
+ s=readreg(s,4);
+ d=rmw(d,4,4);
+
+ raw_sbb_l(d,s);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(2,sbb_l,(RW4 d, R4 s))
+
+MIDFUNC(2,sbb_w,(RW2 d, R2 s))
+{
+ CLOBBER_SBB;
+ s=readreg(s,2);
+ d=rmw(d,2,2);
+
+ raw_sbb_w(d,s);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(2,sbb_w,(RW2 d, R2 s))
+
+MIDFUNC(2,sbb_b,(RW1 d, R1 s))
+{
+ CLOBBER_SBB;
+ s=readreg(s,1);
+ d=rmw(d,1,1);
+
+ raw_sbb_b(d,s);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(2,sbb_b,(RW1 d, R1 s))
+
+MIDFUNC(2,sub_l,(RW4 d, R4 s))
+{
+ if (isconst(s)) {
+ COMPCALL(sub_l_ri)(d,live.state[s].val);
+ return;
+ }
+
+ CLOBBER_SUB;
+ s=readreg(s,4);
+ d=rmw(d,4,4);
+
+ raw_sub_l(d,s);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(2,sub_l,(RW4 d, R4 s))
+
+MIDFUNC(2,sub_w,(RW2 d, R2 s))
+{
+ if (isconst(s)) {
+ COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val);
+ return;
+ }
+
+ CLOBBER_SUB;
+ s=readreg(s,2);
+ d=rmw(d,2,2);
+
+ raw_sub_w(d,s);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(2,sub_w,(RW2 d, R2 s))
+
+MIDFUNC(2,sub_b,(RW1 d, R1 s))
+{
+ if (isconst(s)) {
+ COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val);
+ return;
+ }
+
+ CLOBBER_SUB;
+ s=readreg(s,1);
+ d=rmw(d,1,1);
+
+ raw_sub_b(d,s);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(2,sub_b,(RW1 d, R1 s))
+
+MIDFUNC(2,cmp_l,(R4 d, R4 s))
+{
+ CLOBBER_CMP;
+ s=readreg(s,4);
+ d=readreg(d,4);
+
+ raw_cmp_l(d,s);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(2,cmp_l,(R4 d, R4 s))
+
+MIDFUNC(2,cmp_l_ri,(R4 r, IMM i))
+{
+ CLOBBER_CMP;
+ r=readreg(r,4);
+
+ raw_cmp_l_ri(r,i);
+ unlock2(r);
+}
+MENDFUNC(2,cmp_l_ri,(R4 r, IMM i))
+
+MIDFUNC(2,cmp_w,(R2 d, R2 s))
+{
+ CLOBBER_CMP;
+ s=readreg(s,2);
+ d=readreg(d,2);
+
+ raw_cmp_w(d,s);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(2,cmp_w,(R2 d, R2 s))
+
+MIDFUNC(2,cmp_b,(R1 d, R1 s))
+{
+ CLOBBER_CMP;
+ s=readreg(s,1);
+ d=readreg(d,1);
+
+ raw_cmp_b(d,s);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(2,cmp_b,(R1 d, R1 s))
+
+
+MIDFUNC(2,xor_l,(RW4 d, R4 s))
+{
+ CLOBBER_XOR;
+ s=readreg(s,4);
+ d=rmw(d,4,4);
+
+ raw_xor_l(d,s);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(2,xor_l,(RW4 d, R4 s))
+
+MIDFUNC(2,xor_w,(RW2 d, R2 s))
+{
+ CLOBBER_XOR;
+ s=readreg(s,2);
+ d=rmw(d,2,2);
+
+ raw_xor_w(d,s);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(2,xor_w,(RW2 d, R2 s))
+
+MIDFUNC(2,xor_b,(RW1 d, R1 s))
+{
+ CLOBBER_XOR;
+ s=readreg(s,1);
+ d=rmw(d,1,1);
+
+ raw_xor_b(d,s);
+ unlock2(d);
+ unlock2(s);
+}
+MENDFUNC(2,xor_b,(RW1 d, R1 s))
+
+MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
+{
+ clobber_flags();
+ remove_all_offsets();
+ if (osize==4) {
+ if (out1!=in1 && out1!=r) {
+ COMPCALL(forget_about)(out1);
+ }
+ }
+ else {
+ tomem_c(out1);
+ }
+
+ in1=readreg_specific(in1,isize,REG_PAR1);
+ r=readreg(r,4);
+ prepare_for_call_1(); /* This should ensure that there won't be
+ any need for swapping nregs in prepare_for_call_2
+ */
+#if USE_NORMAL_CALLING_CONVENTION
+ raw_push_l_r(in1);
+#endif
+ unlock2(in1);
+ unlock2(r);
+
+ prepare_for_call_2();
+ raw_call_r(r);
+
+#if USE_NORMAL_CALLING_CONVENTION
+ raw_inc_sp(4);
+#endif
+
+
+ live.nat[REG_RESULT].holds[0]=out1;
+ live.nat[REG_RESULT].nholds=1;
+ live.nat[REG_RESULT].touched=touchcnt++;
+
+ live.state[out1].realreg=REG_RESULT;
+ live.state[out1].realind=0;
+ live.state[out1].val=0;
+ live.state[out1].validsize=osize;
+ live.state[out1].dirtysize=osize;
+ set_status(out1,DIRTY);
+}
+MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
+
+MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
+{
+ clobber_flags();
+ remove_all_offsets();
+ in1=readreg_specific(in1,isize1,REG_PAR1);
+ in2=readreg_specific(in2,isize2,REG_PAR2);
+ r=readreg(r,4);
+ prepare_for_call_1(); /* This should ensure that there won't be
+ any need for swapping nregs in prepare_for_call_2
+ */
+#if USE_NORMAL_CALLING_CONVENTION
+ raw_push_l_r(in2);
+ raw_push_l_r(in1);
+#endif
+ unlock2(r);
+ unlock2(in1);
+ unlock2(in2);
+ prepare_for_call_2();
+ raw_call_r(r);
+#if USE_NORMAL_CALLING_CONVENTION
+ raw_inc_sp(8);
+#endif
+}
+MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
+
+/* forget_about() takes a mid-layer register */
+MIDFUNC(1,forget_about,(W4 r))
+{
+ if (isinreg(r))
+ disassociate(r);
+ live.state[r].val=0;
+ set_status(r,UNDEF);
+}
+MENDFUNC(1,forget_about,(W4 r))
+
+MIDFUNC(0,nop,(void))
+{
+ raw_nop();
+}
+MENDFUNC(0,nop,(void))
+
+
+MIDFUNC(1,f_forget_about,(FW r))
+{
+ if (f_isinreg(r))
+ f_disassociate(r);
+ live.fate[r].status=UNDEF;
+}
+MENDFUNC(1,f_forget_about,(FW r))
+
+MIDFUNC(1,fmov_pi,(FW r))
+{
+ r=f_writereg(r);
+ raw_fmov_pi(r);
+ f_unlock(r);
+}
+MENDFUNC(1,fmov_pi,(FW r))
+
+MIDFUNC(1,fmov_log10_2,(FW r))
+{
+ r=f_writereg(r);
+ raw_fmov_log10_2(r);
+ f_unlock(r);
+}
+MENDFUNC(1,fmov_log10_2,(FW r))
+
+MIDFUNC(1,fmov_log2_e,(FW r))
+{
+ r=f_writereg(r);
+ raw_fmov_log2_e(r);
+ f_unlock(r);
+}
+MENDFUNC(1,fmov_log2_e,(FW r))
+
+MIDFUNC(1,fmov_loge_2,(FW r))
+{
+ r=f_writereg(r);
+ raw_fmov_loge_2(r);
+ f_unlock(r);
+}
+MENDFUNC(1,fmov_loge_2,(FW r))
+
+MIDFUNC(1,fmov_1,(FW r))
+{
+ r=f_writereg(r);
+ raw_fmov_1(r);
+ f_unlock(r);
+}
+MENDFUNC(1,fmov_1,(FW r))
+
+MIDFUNC(1,fmov_0,(FW r))
+{
+ r=f_writereg(r);
+ raw_fmov_0(r);
+ f_unlock(r);
+}
+MENDFUNC(1,fmov_0,(FW r))
+
+MIDFUNC(2,fmov_rm,(FW r, MEMR m))
+{
+ r=f_writereg(r);
+ raw_fmov_rm(r,m);
+ f_unlock(r);
+}
+MENDFUNC(2,fmov_rm,(FW r, MEMR m))
+
+MIDFUNC(2,fmovi_rm,(FW r, MEMR m))
+{
+ r=f_writereg(r);
+ raw_fmovi_rm(r,m);
+ f_unlock(r);
+}
+MENDFUNC(2,fmovi_rm,(FW r, MEMR m))
+
+MIDFUNC(2,fmovi_mr,(MEMW m, FR r))
+{
+ r=f_readreg(r);
+ raw_fmovi_mr(m,r);
+ f_unlock(r);
+}
+MENDFUNC(2,fmovi_mr,(MEMW m, FR r))
+
+MIDFUNC(2,fmovs_rm,(FW r, MEMR m))
+{
+ r=f_writereg(r);
+ raw_fmovs_rm(r,m);
+ f_unlock(r);
+}
+MENDFUNC(2,fmovs_rm,(FW r, MEMR m))
+
+MIDFUNC(2,fmovs_mr,(MEMW m, FR r))
+{
+ r=f_readreg(r);
+ raw_fmovs_mr(m,r);
+ f_unlock(r);
+}
+MENDFUNC(2,fmovs_mr,(MEMW m, FR r))
+
+MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
+{
+ r=f_readreg(r);
+ raw_fmov_ext_mr(m,r);
+ f_unlock(r);
+}
+MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
+
+MIDFUNC(2,fmov_mr,(MEMW m, FR r))
+{
+ r=f_readreg(r);
+ raw_fmov_mr(m,r);
+ f_unlock(r);
+}
+MENDFUNC(2,fmov_mr,(MEMW m, FR r))
+
+MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
+{
+ r=f_writereg(r);
+ raw_fmov_ext_rm(r,m);
+ f_unlock(r);
+}
+MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
+
+MIDFUNC(2,fmov_rr,(FW d, FR s))
+{
+ if (d==s) { /* How pointless! */
+ return;
+ }
+#if USE_F_ALIAS
+ f_disassociate(d);
+ s=f_readreg(s);
+ live.fate[d].realreg=s;
+ live.fate[d].realind=live.fat[s].nholds;
+ live.fate[d].status=DIRTY;
+ live.fat[s].holds[live.fat[s].nholds]=d;
+ live.fat[s].nholds++;
+ f_unlock(s);
+#else
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_fmov_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+#endif
+}
+MENDFUNC(2,fmov_rr,(FW d, FR s))
+
+MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
+{
+ index=readreg(index,4);
+
+ raw_fldcw_m_indexed(index,base);
+ unlock2(index);
+}
+MENDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
+
+MIDFUNC(1,ftst_r,(FR r))
+{
+ r=f_readreg(r);
+ raw_ftst_r(r);
+ f_unlock(r);
+}
+MENDFUNC(1,ftst_r,(FR r))
+
+MIDFUNC(0,dont_care_fflags,(void))
+{
+ f_disassociate(FP_RESULT);
+}
+MENDFUNC(0,dont_care_fflags,(void))
+
+MIDFUNC(2,fsqrt_rr,(FW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_fsqrt_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,fsqrt_rr,(FW d, FR s))
+
+MIDFUNC(2,fabs_rr,(FW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_fabs_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,fabs_rr,(FW d, FR s))
+
+MIDFUNC(2,fsin_rr,(FW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_fsin_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,fsin_rr,(FW d, FR s))
+
+MIDFUNC(2,fcos_rr,(FW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_fcos_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,fcos_rr,(FW d, FR s))
+
+MIDFUNC(2,ftwotox_rr,(FW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_ftwotox_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,ftwotox_rr,(FW d, FR s))
+
+MIDFUNC(2,fetox_rr,(FW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_fetox_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,fetox_rr,(FW d, FR s))
+
+MIDFUNC(2,frndint_rr,(FW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_frndint_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,frndint_rr,(FW d, FR s))
+
+MIDFUNC(2,flog2_rr,(FW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_flog2_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,flog2_rr,(FW d, FR s))
+
+MIDFUNC(2,fneg_rr,(FW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_writereg(d);
+ raw_fneg_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,fneg_rr,(FW d, FR s))
+
+MIDFUNC(2,fadd_rr,(FRW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_rmw(d);
+ raw_fadd_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,fadd_rr,(FRW d, FR s))
+
+MIDFUNC(2,fsub_rr,(FRW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_rmw(d);
+ raw_fsub_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,fsub_rr,(FRW d, FR s))
+
+MIDFUNC(2,fcmp_rr,(FR d, FR s))
+{
+ d=f_readreg(d);
+ s=f_readreg(s);
+ raw_fcmp_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,fcmp_rr,(FR d, FR s))
+
+MIDFUNC(2,fdiv_rr,(FRW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_rmw(d);
+ raw_fdiv_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,fdiv_rr,(FRW d, FR s))
+
+MIDFUNC(2,frem_rr,(FRW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_rmw(d);
+ raw_frem_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,frem_rr,(FRW d, FR s))
+
+MIDFUNC(2,frem1_rr,(FRW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_rmw(d);
+ raw_frem1_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,frem1_rr,(FRW d, FR s))
+
+MIDFUNC(2,fmul_rr,(FRW d, FR s))
+{
+ s=f_readreg(s);
+ d=f_rmw(d);
+ raw_fmul_rr(d,s);
+ f_unlock(s);
+ f_unlock(d);
+}
+MENDFUNC(2,fmul_rr,(FRW d, FR s))
+
+/********************************************************************
+ * Support functions exposed to gencomp. CREATE time *
+ ********************************************************************/
+
+void set_zero(int r, int tmp)
+{
+ if (setzflg_uses_bsf)
+ bsf_l_rr(r,r);
+ else
+ simulate_bsf(tmp,r);
+}
+
+int kill_rodent(int r)
+{
+ return KILLTHERAT &&
+ have_rat_stall &&
+ (live.state[r].status==INMEM ||
+ live.state[r].status==CLEAN ||
+ live.state[r].status==ISCONST ||
+ live.state[r].dirtysize==4);
+}
+
+uae_u32 get_const(int r)
+{
+ Dif (!isconst(r)) {
+ write_log("Register %d should be constant, but isn't\n",r);
+ abort();
+ }
+ return live.state[r].val;
+}
+
+void sync_m68k_pc(void)
+{
+ if (m68k_pc_offset) {
+ add_l_ri(PC_P,m68k_pc_offset);
+ comp_pc_p+=m68k_pc_offset;
+ m68k_pc_offset=0;
+ }
+}
+
+/********************************************************************
+ * Scratch registers management *
+ ********************************************************************/
+
+struct scratch_t {
+ uae_u32 regs[VREGS];
+ fpu_register fregs[VFREGS];
+};
+
+static scratch_t scratch;
+
+/********************************************************************
+ * Support functions exposed to newcpu *
+ ********************************************************************/
+
+static inline const char *str_on_off(bool b)
+{
+ return b ? "on" : "off";
+}
+
+void compiler_init(void)
+{
+ static bool initialized = false;
+ if (initialized)
+ return;
+
+#if JIT_DEBUG
+ // JIT debug mode ?
+ JITDebug = PrefsFindBool("jitdebug");
+#endif
+ write_log("<JIT compiler> : enable runtime disassemblers : %s\n", JITDebug ? "yes" : "no");
+
+#ifdef USE_JIT_FPU
+ // Use JIT compiler for FPU instructions ?
+ avoid_fpu = !PrefsFindBool("jitfpu");
+#else
+ // JIT FPU is always disabled
+ avoid_fpu = true;
+#endif
+ write_log("<JIT compiler> : compile FPU instructions : %s\n", !avoid_fpu ? "yes" : "no");
+
+ // Get size of the translation cache (in KB)
+ cache_size = PrefsFindInt32("jitcachesize");
+ write_log("<JIT compiler> : requested translation cache size : %d KB\n", cache_size);
+
+ // Initialize target CPU (check for features, e.g. CMOV, rat stalls)
+ raw_init_cpu();
+ setzflg_uses_bsf = target_check_bsf();
+ write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
+ write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
+ write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps);
+
+ // Translation cache flush mechanism
+ lazy_flush = PrefsFindBool("jitlazyflush");
+ write_log("<JIT compiler> : lazy translation cache invalidation : %s\n", str_on_off(lazy_flush));
+ flush_icache = lazy_flush ? flush_icache_lazy : flush_icache_hard;
+
+ // Compiler features
+ write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1));
+ write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS));
+ write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET));
+#if USE_INLINING
+ follow_const_jumps = PrefsFindBool("jitinline");
+#endif
+ write_log("<JIT compiler> : translate through constant jumps : %s\n", str_on_off(follow_const_jumps));
+ write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA));
+
+ // Build compiler tables
+ build_comp();
+
+ initialized = true;
+
+#if PROFILE_UNTRANSLATED_INSNS
+ write_log("<JIT compiler> : gather statistics on untranslated insns count\n");
+#endif
+
+#if PROFILE_COMPILE_TIME
+ write_log("<JIT compiler> : gather statistics on translation time\n");
+ emul_start_time = clock();
+#endif
+}
+
+void compiler_exit(void)
+{
+#if PROFILE_COMPILE_TIME
+ emul_end_time = clock();
+#endif
+
+ // Deallocate translation cache
+ if (compiled_code) {
+ vm_release(compiled_code, cache_size * 1024);
+ compiled_code = 0;
+ }
+
+ // Deallocate popallspace
+ if (popallspace) {
+ vm_release(popallspace, POPALLSPACE_SIZE);
+ popallspace = 0;
+ }
+
+#if PROFILE_COMPILE_TIME
+ write_log("### Compile Block statistics\n");
+ write_log("Number of calls to compile_block : %d\n", compile_count);
+ uae_u32 emul_time = emul_end_time - emul_start_time;
+ write_log("Total emulation time : %.1f sec\n", double(emul_time)/double(CLOCKS_PER_SEC));
+ write_log("Total compilation time : %.1f sec (%.1f%%)\n", double(compile_time)/double(CLOCKS_PER_SEC),
+ 100.0*double(compile_time)/double(emul_time));
+ write_log("\n");
+#endif
+
+#if PROFILE_UNTRANSLATED_INSNS
+ uae_u64 untranslated_count = 0;
+ for (int i = 0; i < 65536; i++) {
+ opcode_nums[i] = i;
+ untranslated_count += raw_cputbl_count[i];
+ }
+ write_log("Sorting out untranslated instructions count...\n");
+ qsort(opcode_nums, 65536, sizeof(uae_u16), untranslated_compfn);
+ write_log("\nRank Opc Count Name\n");
+ for (int i = 0; i < untranslated_top_ten; i++) {
+ uae_u32 count = raw_cputbl_count[opcode_nums[i]];
+ struct instr *dp;
+ struct mnemolookup *lookup;
+ if (!count)
+ break;
+ dp = table68k + opcode_nums[i];
+ for (lookup = lookuptab; lookup->mnemo != dp->mnemo; lookup++)
+ ;
+ write_log("%03d: %04x %10lu %s\n", i, opcode_nums[i], count, lookup->name);
+ }
+#endif
+
+#if RECORD_REGISTER_USAGE
+ int reg_count_ids[16];
+ uint64 tot_reg_count = 0;
+ for (int i = 0; i < 16; i++) {
+ reg_count_ids[i] = i;
+ tot_reg_count += reg_count[i];
+ }
+ qsort(reg_count_ids, 16, sizeof(int), reg_count_compare);
+ uint64 cum_reg_count = 0;
+ for (int i = 0; i < 16; i++) {
+ int r = reg_count_ids[i];
+ cum_reg_count += reg_count[r];
+ printf("%c%d : %16ld %2.1f%% [%2.1f]\n", r < 8 ? 'D' : 'A', r % 8,
+ reg_count[r],
+ 100.0*double(reg_count[r])/double(tot_reg_count),
+ 100.0*double(cum_reg_count)/double(tot_reg_count));
+ }
+#endif
+}
+
+bool compiler_use_jit(void)
+{
+ // Check for the "jit" prefs item
+ if (!PrefsFindBool("jit"))
+ return false;
+
+ // Don't use JIT if translation cache size is less then MIN_CACHE_SIZE KB
+ if (PrefsFindInt32("jitcachesize") < MIN_CACHE_SIZE) {
+ write_log("<JIT compiler> : translation cache size is less than %d KB. Disabling JIT.\n", MIN_CACHE_SIZE);
+ return false;
+ }
+
+ // Enable JIT for 68020+ emulation only
+ if (CPUType < 2) {
+ write_log("<JIT compiler> : JIT is not supported in 680%d0 emulation mode, disabling.\n", CPUType);
+ return false;
+ }
+
+ return true;
+}
+
+void init_comp(void)
+{
+ int i;
+ uae_s8* cb=can_byte;
+ uae_s8* cw=can_word;
+ uae_s8* au=always_used;
+
+#if RECORD_REGISTER_USAGE
+ for (i=0;i<16;i++)
+ reg_count_local[i] = 0;
+#endif
+
+ for (i=0;i<VREGS;i++) {
+ live.state[i].realreg=-1;
+ live.state[i].needflush=NF_SCRATCH;
+ live.state[i].val=0;
+ set_status(i,UNDEF);
+ }
+
+ for (i=0;i<VFREGS;i++) {
+ live.fate[i].status=UNDEF;
+ live.fate[i].realreg=-1;
+ live.fate[i].needflush=NF_SCRATCH;
+ }
+
+ for (i=0;i<VREGS;i++) {
+ if (i<16) { /* First 16 registers map to 68k registers */
+ live.state[i].mem=((uae_u32*)®s)+i;
+ live.state[i].needflush=NF_TOMEM;
+ set_status(i,INMEM);
+ }
+ else
+ live.state[i].mem=scratch.regs+i;
+ }
+ live.state[PC_P].mem=(uae_u32*)&(regs.pc_p);
+ live.state[PC_P].needflush=NF_TOMEM;
+ set_const(PC_P,(uintptr)comp_pc_p);
+
+ live.state[FLAGX].mem=(uae_u32*)&(regflags.x);
+ live.state[FLAGX].needflush=NF_TOMEM;
+ set_status(FLAGX,INMEM);
+
+ live.state[FLAGTMP].mem=(uae_u32*)&(regflags.cznv);
+ live.state[FLAGTMP].needflush=NF_TOMEM;
+ set_status(FLAGTMP,INMEM);
+
+ live.state[NEXT_HANDLER].needflush=NF_HANDLER;
+ set_status(NEXT_HANDLER,UNDEF);
+
+ for (i=0;i<VFREGS;i++) {
+ if (i<8) { /* First 8 registers map to 68k FPU registers */
+ live.fate[i].mem=(uae_u32*)fpu_register_address(i);
+ live.fate[i].needflush=NF_TOMEM;
+ live.fate[i].status=INMEM;
+ }
+ else if (i==FP_RESULT) {
+ live.fate[i].mem=(uae_u32*)(&fpu.result);
+ live.fate[i].needflush=NF_TOMEM;
+ live.fate[i].status=INMEM;
+ }
+ else
+ live.fate[i].mem=(uae_u32*)(&scratch.fregs[i]);
+ }
+
+
+ for (i=0;i<N_REGS;i++) {
+ live.nat[i].touched=0;
+ live.nat[i].nholds=0;
+ live.nat[i].locked=0;
+ if (*cb==i) {
+ live.nat[i].canbyte=1; cb++;
+ } else live.nat[i].canbyte=0;
+ if (*cw==i) {
+ live.nat[i].canword=1; cw++;
+ } else live.nat[i].canword=0;
+ if (*au==i) {
+ live.nat[i].locked=1; au++;
+ }
+ }
+
+ for (i=0;i<N_FREGS;i++) {
+ live.fat[i].touched=0;
+ live.fat[i].nholds=0;
+ live.fat[i].locked=0;
+ }
+
+ touchcnt=1;
+ m68k_pc_offset=0;
+ live.flags_in_flags=TRASH;
+ live.flags_on_stack=VALID;
+ live.flags_are_important=1;
+
+ raw_fp_init();
+}
+
+/* Only do this if you really mean it! The next call should be to init!*/
+void flush(int save_regs)
+{
+ int fi,i;
+
+ log_flush();
+ flush_flags(); /* low level */
+ sync_m68k_pc(); /* mid level */
+
+ if (save_regs) {
+ for (i=0;i<VFREGS;i++) {
+ if (live.fate[i].needflush==NF_SCRATCH ||
+ live.fate[i].status==CLEAN) {
+ f_disassociate(i);
+ }
+ }
+ for (i=0;i<VREGS;i++) {
+ if (live.state[i].needflush==NF_TOMEM) {
+ switch(live.state[i].status) {
+ case INMEM:
+ if (live.state[i].val) {
+ raw_add_l_mi((uintptr)live.state[i].mem,live.state[i].val);
+ log_vwrite(i);
+ live.state[i].val=0;
+ }
+ break;
+ case CLEAN:
+ case DIRTY:
+ remove_offset(i,-1); tomem(i); break;
+ case ISCONST:
+ if (i!=PC_P)
+ writeback_const(i);
+ break;
+ default: break;
+ }
+ Dif (live.state[i].val && i!=PC_P) {
+ write_log("Register %d still has val %x\n",
+ i,live.state[i].val);
+ }
+ }
+ }
+ for (i=0;i<VFREGS;i++) {
+ if (live.fate[i].needflush==NF_TOMEM &&
+ live.fate[i].status==DIRTY) {
+ f_evict(i);
+ }
+ }
+ raw_fp_cleanup_drop();
+ }
+ if (needflags) {
+ write_log("Warning! flush with needflags=1!\n");
+ }
+}
+
+static void flush_keepflags(void)
+{
+ int fi,i;
+
+ for (i=0;i<VFREGS;i++) {
+ if (live.fate[i].needflush==NF_SCRATCH ||
+ live.fate[i].status==CLEAN) {
+ f_disassociate(i);
+ }
+ }
+ for (i=0;i<VREGS;i++) {
+ if (live.state[i].needflush==NF_TOMEM) {
+ switch(live.state[i].status) {
+ case INMEM:
+ /* Can't adjust the offset here --- that needs "add" */
+ break;
+ case CLEAN:
+ case DIRTY:
+ remove_offset(i,-1); tomem(i); break;
+ case ISCONST:
+ if (i!=PC_P)
+ writeback_const(i);
+ break;
+ default: break;
+ }
+ }
+ }
+ for (i=0;i<VFREGS;i++) {
+ if (live.fate[i].needflush==NF_TOMEM &&
+ live.fate[i].status==DIRTY) {
+ f_evict(i);
+ }
+ }
+ raw_fp_cleanup_drop();
+}
+
+void freescratch(void)
+{
+ int i;
+ for (i=0;i<N_REGS;i++)
+ if (live.nat[i].locked && i!=4)
+ write_log("Warning! %d is locked\n",i);
+
+ for (i=0;i<VREGS;i++)
+ if (live.state[i].needflush==NF_SCRATCH) {
+ forget_about(i);
+ }
+
+ for (i=0;i<VFREGS;i++)
+ if (live.fate[i].needflush==NF_SCRATCH) {
+ f_forget_about(i);
+ }
+}
+
+/********************************************************************
+ * Support functions, internal *
+ ********************************************************************/
+
+
+static void align_target(uae_u32 a)
+{
+ if (!a)
+ return;
+
+ if (tune_nop_fillers)
+ raw_emit_nop_filler(a - (((uintptr)target) & (a - 1)));
+ else {
+ /* Fill with NOPs --- makes debugging with gdb easier */
+ while ((uintptr)target&(a-1))
+ *target++=0x90;
+ }
+}
+
+static __inline__ int isinrom(uintptr addr)
+{
+ return ((addr >= (uintptr)ROMBaseHost) && (addr < (uintptr)ROMBaseHost + ROMSize));
+}
+
+static void flush_all(void)
+{
+ int i;
+
+ log_flush();
+ for (i=0;i<VREGS;i++)
+ if (live.state[i].status==DIRTY) {
+ if (!call_saved[live.state[i].realreg]) {
+ tomem(i);
+ }
+ }
+ for (i=0;i<VFREGS;i++)
+ if (f_isinreg(i))
+ f_evict(i);
+ raw_fp_cleanup_drop();
+}
+
+/* Make sure all registers that will get clobbered by a call are
+ save and sound in memory */
+static void prepare_for_call_1(void)
+{
+ flush_all(); /* If there are registers that don't get clobbered,
+ * we should be a bit more selective here */
+}
+
+/* We will call a C routine in a moment. That will clobber all registers,
+ so we need to disassociate everything */
+static void prepare_for_call_2(void)
+{
+ int i;
+ for (i=0;i<N_REGS;i++)
+ if (!call_saved[i] && live.nat[i].nholds>0)
+ free_nreg(i);
+
+ for (i=0;i<N_FREGS;i++)
+ if (live.fat[i].nholds>0)
+ f_free_nreg(i);
+
+ live.flags_in_flags=TRASH; /* Note: We assume we already rescued the
+ flags at the very start of the call_r
+ functions! */
+}
+
+/********************************************************************
+ * Memory access and related functions, CREATE time *
+ ********************************************************************/
+
+void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond)
+{
+ next_pc_p=not_taken;
+ taken_pc_p=taken;
+ branch_cc=cond;
+}
+
+
+static uae_u32 get_handler_address(uae_u32 addr)
+{
+ uae_u32 cl=cacheline(addr);
+ blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
+ return (uintptr)&(bi->direct_handler_to_use);
+}
+
+static uae_u32 get_handler(uae_u32 addr)
+{
+ uae_u32 cl=cacheline(addr);
+ blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
+ return (uintptr)bi->direct_handler_to_use;
+}
+
+static void load_handler(int reg, uae_u32 addr)
+{
+ mov_l_rm(reg,get_handler_address(addr));
+}
+
+/* This version assumes that it is writing *real* memory, and *will* fail
+ * if that assumption is wrong! No branches, no second chances, just
+ * straight go-for-it attitude */
+
+static void writemem_real(int address, int source, int size, int tmp, int clobber)
+{
+ int f=tmp;
+
+ if (clobber)
+ f=source;
+
+ switch(size) {
+ case 1: mov_b_bRr(address,source,MEMBaseDiff); break;
+ case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break;
+ case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,MEMBaseDiff); break;
+ }
+ forget_about(tmp);
+ forget_about(f);
+}
+
+void writebyte(int address, int source, int tmp)
+{
+ writemem_real(address,source,1,tmp,0);
+}
+
+static __inline__ void writeword_general(int address, int source, int tmp,
+ int clobber)
+{
+ writemem_real(address,source,2,tmp,clobber);
+}
+
+void writeword_clobber(int address, int source, int tmp)
+{
+ writeword_general(address,source,tmp,1);
+}
+
+void writeword(int address, int source, int tmp)
+{
+ writeword_general(address,source,tmp,0);
+}
+
+static __inline__ void writelong_general(int address, int source, int tmp,
+ int clobber)
+{
+ writemem_real(address,source,4,tmp,clobber);
+}
+
+void writelong_clobber(int address, int source, int tmp)
+{
+ writelong_general(address,source,tmp,1);
+}
+
+void writelong(int address, int source, int tmp)
+{
+ writelong_general(address,source,tmp,0);
+}
+
+
+
+/* This version assumes that it is reading *real* memory, and *will* fail
+ * if that assumption is wrong! No branches, no second chances, just
+ * straight go-for-it attitude */
+
+static void readmem_real(int address, int dest, int size, int tmp)
+{
+ int f=tmp;
+
+ if (size==4 && address!=dest)
+ f=dest;
+
+ switch(size) {
+ case 1: mov_b_brR(dest,address,MEMBaseDiff); break;
+ case 2: mov_w_brR(dest,address,MEMBaseDiff); bswap_16(dest); break;
+ case 4: mov_l_brR(dest,address,MEMBaseDiff); bswap_32(dest); break;
+ }
+ forget_about(tmp);
+}
+
+void readbyte(int address, int dest, int tmp)
+{
+ readmem_real(address,dest,1,tmp);
+}
+
+void readword(int address, int dest, int tmp)
+{
+ readmem_real(address,dest,2,tmp);
+}
+
+void readlong(int address, int dest, int tmp)
+{
+ readmem_real(address,dest,4,tmp);
+}
+
+void get_n_addr(int address, int dest, int tmp)
+{
+ // a is the register containing the virtual address
+ // after the offset had been fetched
+ int a=tmp;
+
+ // f is the register that will contain the offset
+ int f=tmp;
+
+ // a == f == tmp if (address == dest)
+ if (address!=dest) {
+ a=address;
+ f=dest;
+ }
+
+#if REAL_ADDRESSING
+ mov_l_rr(dest, address);
+#elif DIRECT_ADDRESSING
+ lea_l_brr(dest,address,MEMBaseDiff);
+#endif
+ forget_about(tmp);
+}
+
+void get_n_addr_jmp(int address, int dest, int tmp)
+{
+ /* For this, we need to get the same address as the rest of UAE
+ would --- otherwise we end up translating everything twice */
+ get_n_addr(address,dest,tmp);
+}
+
+
+/* base is a register, but dp is an actual value.
+ target is a register, as is tmp */
+void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp)
+{
+ int reg = (dp >> 12) & 15;
+ int regd_shift=(dp >> 9) & 3;
+
+ if (dp & 0x100) {
+ int ignorebase=(dp&0x80);
+ int ignorereg=(dp&0x40);
+ int addbase=0;
+ int outer=0;
+
+ if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+ if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4);
+
+ if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
+ if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4);
+
+ if ((dp & 0x4) == 0) { /* add regd *before* the get_long */
+ if (!ignorereg) {
+ if ((dp & 0x800) == 0)
+ sign_extend_16_rr(target,reg);
+ else
+ mov_l_rr(target,reg);
+ shll_l_ri(target,regd_shift);
+ }
+ else
+ mov_l_ri(target,0);
+
+ /* target is now regd */
+ if (!ignorebase)
+ add_l(target,base);
+ add_l_ri(target,addbase);
+ if (dp&0x03) readlong(target,target,tmp);
+ } else { /* do the getlong first, then add regd */
+ if (!ignorebase) {
+ mov_l_rr(target,base);
+ add_l_ri(target,addbase);
+ }
+ else
+ mov_l_ri(target,addbase);
+ if (dp&0x03) readlong(target,target,tmp);
+
+ if (!ignorereg) {
+ if ((dp & 0x800) == 0)
+ sign_extend_16_rr(tmp,reg);
+ else
+ mov_l_rr(tmp,reg);
+ shll_l_ri(tmp,regd_shift);
+ /* tmp is now regd */
+ add_l(target,tmp);
+ }
+ }
+ add_l_ri(target,outer);
+ }
+ else { /* 68000 version */
+ if ((dp & 0x800) == 0) { /* Sign extend */
+ sign_extend_16_rr(target,reg);
+ lea_l_brr_indexed(target,base,target,1<<regd_shift,(uae_s32)((uae_s8)dp));
+ }
+ else {
+ lea_l_brr_indexed(target,base,reg,1<<regd_shift,(uae_s32)((uae_s8)dp));
+ }
+ }
+ forget_about(tmp);
+}
+
+
+
+
+
+void set_cache_state(int enabled)
+{
+ if (enabled!=letit)
+ flush_icache_hard(77);
+ letit=enabled;
+}
+
+int get_cache_state(void)
+{
+ return letit;
+}
+
+uae_u32 get_jitted_size(void)
+{
+ if (compiled_code)
+ return current_compile_p-compiled_code;
+ return 0;
+}
+
+const int CODE_ALLOC_MAX_ATTEMPTS = 10;
+const int CODE_ALLOC_BOUNDARIES = 128 * 1024; // 128 KB
+
+static uint8 *do_alloc_code(uint32 size, int depth)
+{
+#if defined(__linux__) && 0
+ /*
+ This is a really awful hack that is known to work on Linux at
+ least.
+
+ The trick here is to make sure the allocated cache is nearby
+ code segment, and more precisely in the positive half of a
+ 32-bit address space. i.e. addr < 0x80000000. Actually, it
+ turned out that a 32-bit binary run on AMD64 yields a cache
+ allocated around 0xa0000000, thus causing some troubles when
+ translating addresses from m68k to x86.
+ */
+ static uint8 * code_base = NULL;
+ if (code_base == NULL) {
+ uintptr page_size = getpagesize();
+ uintptr boundaries = CODE_ALLOC_BOUNDARIES;
+ if (boundaries < page_size)
+ boundaries = page_size;
+ code_base = (uint8 *)sbrk(0);
+ for (int attempts = 0; attempts < CODE_ALLOC_MAX_ATTEMPTS; attempts++) {
+ if (vm_acquire_fixed(code_base, size) == 0) {
+ uint8 *code = code_base;
+ code_base += size;
+ return code;
+ }
+ code_base += boundaries;
+ }
+ return NULL;
+ }
+
+ if (vm_acquire_fixed(code_base, size) == 0) {
+ uint8 *code = code_base;
+ code_base += size;
+ return code;
+ }
+
+ if (depth >= CODE_ALLOC_MAX_ATTEMPTS)
+ return NULL;
+
+ return do_alloc_code(size, depth + 1);
+#else
+ uint8 *code = (uint8 *)vm_acquire(size);
+ return code == VM_MAP_FAILED ? NULL : code;
+#endif
+}
+
+static inline uint8 *alloc_code(uint32 size)
+{
+ uint8 *ptr = do_alloc_code(size, 0);
+ /* allocated code must fit in 32-bit boundaries */
+ assert((uintptr)ptr <= 0xffffffff);
+ return ptr;
+}
+
+void alloc_cache(void)
+{
+ if (compiled_code) {
+ flush_icache_hard(6);
+ vm_release(compiled_code, cache_size * 1024);
+ compiled_code = 0;
+ }
+
+ if (cache_size == 0)
+ return;
+
+ while (!compiled_code && cache_size) {
+ if ((compiled_code = alloc_code(cache_size * 1024)) == NULL) {
+ compiled_code = 0;
+ cache_size /= 2;
+ }
+ }
+ vm_protect(compiled_code, cache_size * 1024, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
+
+ if (compiled_code) {
+ write_log("<JIT compiler> : actual translation cache size : %d KB at 0x%08X\n", cache_size, compiled_code);
+ max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST;
+ current_compile_p = compiled_code;
+ current_cache_size = 0;
+ }
+}
+
+
+
+extern void op_illg_1 (uae_u32 opcode) REGPARAM;
+
+static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
+{
+ uae_u32 k1 = 0;
+ uae_u32 k2 = 0;
+
+#if USE_CHECKSUM_INFO
+ checksum_info *csi = bi->csi;
+ Dif(!csi) abort();
+ while (csi) {
+ uae_s32 len = csi->length;
+ uintptr tmp = (uintptr)csi->start_p;
+#else
+ uae_s32 len = bi->len;
+ uintptr tmp = (uintptr)bi->min_pcp;
+#endif
+ uae_u32*pos;
+
+ len += (tmp & 3);
+ tmp &= ~((uintptr)3);
+ pos = (uae_u32 *)tmp;
+
+ if (len >= 0 && len <= MAX_CHECKSUM_LEN) {
+ while (len > 0) {
+ k1 += *pos;
+ k2 ^= *pos;
+ pos++;
+ len -= 4;
+ }
+ }
+
+#if USE_CHECKSUM_INFO
+ csi = csi->next;
+ }
+#endif
+
+ *c1 = k1;
+ *c2 = k2;
+}
+
+#if 0
+static void show_checksum(CSI_TYPE* csi)
+{
+ uae_u32 k1=0;
+ uae_u32 k2=0;
+ uae_s32 len=CSI_LENGTH(csi);
+ uae_u32 tmp=(uintptr)CSI_START_P(csi);
+ uae_u32* pos;
+
+ len+=(tmp&3);
+ tmp&=(~3);
+ pos=(uae_u32*)tmp;
+
+ if (len<0 || len>MAX_CHECKSUM_LEN) {
+ return;
+ }
+ else {
+ while (len>0) {
+ write_log("%08x ",*pos);
+ pos++;
+ len-=4;
+ }
+ write_log(" bla\n");
+ }
+}
+#endif
+
+
+int check_for_cache_miss(void)
+{
+ blockinfo* bi=get_blockinfo_addr(regs.pc_p);
+
+ if (bi) {
+ int cl=cacheline(regs.pc_p);
+ if (bi!=cache_tags[cl+1].bi) {
+ raise_in_cl_list(bi);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+
+static void recompile_block(void)
+{
+ /* An existing block's countdown code has expired. We need to make
+ sure that execute_normal doesn't refuse to recompile due to a
+ perceived cache miss... */
+ blockinfo* bi=get_blockinfo_addr(regs.pc_p);
+
+ Dif (!bi)
+ abort();
+ raise_in_cl_list(bi);
+ execute_normal();
+ return;
+}
+static void cache_miss(void)
+{
+ blockinfo* bi=get_blockinfo_addr(regs.pc_p);
+ uae_u32 cl=cacheline(regs.pc_p);
+ blockinfo* bi2=get_blockinfo(cl);
+
+ if (!bi) {
+ execute_normal(); /* Compile this block now */
+ return;
+ }
+ Dif (!bi2 || bi==bi2) {
+ write_log("Unexplained cache miss %p %p\n",bi,bi2);
+ abort();
+ }
+ raise_in_cl_list(bi);
+ return;
+}
+
+static int called_check_checksum(blockinfo* bi);
+
+static inline int block_check_checksum(blockinfo* bi)
+{
+ uae_u32 c1,c2;
+ bool isgood;
+
+ if (bi->status!=BI_NEED_CHECK)
+ return 1; /* This block is in a checked state */
+
+ checksum_count++;
+
+ if (bi->c1 || bi->c2)
+ calc_checksum(bi,&c1,&c2);
+ else {
+ c1=c2=1; /* Make sure it doesn't match */
+ }
+
+ isgood=(c1==bi->c1 && c2==bi->c2);
+
+ if (isgood) {
+ /* This block is still OK. So we reactivate. Of course, that
+ means we have to move it into the needs-to-be-flushed list */
+ bi->handler_to_use=bi->handler;
+ set_dhtu(bi,bi->direct_handler);
+ bi->status=BI_CHECKING;
+ isgood=called_check_checksum(bi);
+ }
+ if (isgood) {
+ /* write_log("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
+ c1,c2,bi->c1,bi->c2);*/
+ remove_from_list(bi);
+ add_to_active(bi);
+ raise_in_cl_list(bi);
+ bi->status=BI_ACTIVE;
+ }
+ else {
+ /* This block actually changed. We need to invalidate it,
+ and set it up to be recompiled */
+ /* write_log("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
+ c1,c2,bi->c1,bi->c2); */
+ invalidate_block(bi);
+ raise_in_cl_list(bi);
+ }
+ return isgood;
+}
+
+static int called_check_checksum(blockinfo* bi)
+{
+ dependency* x=bi->deplist;
+ int isgood=1;
+ int i;
+
+ for (i=0;i<2 && isgood;i++) {
+ if (bi->dep[i].jmp_off) {
+ isgood=block_check_checksum(bi->dep[i].target);
+ }
+ }
+ return isgood;
+}
+
+static void check_checksum(void)
+{
+ blockinfo* bi=get_blockinfo_addr(regs.pc_p);
+ uae_u32 cl=cacheline(regs.pc_p);
+ blockinfo* bi2=get_blockinfo(cl);
+
+ /* These are not the droids you are looking for... */
+ if (!bi) {
+ /* Whoever is the primary target is in a dormant state, but
+ calling it was accidental, and we should just compile this
+ new block */
+ execute_normal();
+ return;
+ }
+ if (bi!=bi2) {
+ /* The block was hit accidentally, but it does exist. Cache miss */
+ cache_miss();
+ return;
+ }
+
+ if (!block_check_checksum(bi))
+ execute_normal();
+}
+
+static __inline__ void match_states(blockinfo* bi)
+{
+ int i;
+ smallstate* s=&(bi->env);
+
+ if (bi->status==BI_NEED_CHECK) {
+ block_check_checksum(bi);
+ }
+ if (bi->status==BI_ACTIVE ||
+ bi->status==BI_FINALIZING) { /* Deal with the *promises* the
+ block makes (about not using
+ certain vregs) */
+ for (i=0;i<16;i++) {
+ if (s->virt[i]==L_UNNEEDED) {
+ // write_log("unneeded reg %d at %p\n",i,target);
+ COMPCALL(forget_about)(i); // FIXME
+ }
+ }
+ }
+ flush(1);
+
+ /* And now deal with the *demands* the block makes */
+ for (i=0;i<N_REGS;i++) {
+ int v=s->nat[i];
+ if (v>=0) {
+ // printf("Loading reg %d into %d at %p\n",v,i,target);
+ readreg_specific(v,4,i);
+ // do_load_reg(i,v);
+ // setlock(i);
+ }
+ }
+ for (i=0;i<N_REGS;i++) {
+ int v=s->nat[i];
+ if (v>=0) {
+ unlock2(i);
+ }
+ }
+}
+
+static __inline__ void create_popalls(void)
+{
+ int i,r;
+
+ if ((popallspace = alloc_code(POPALLSPACE_SIZE)) == NULL) {
+ write_log("FATAL: Could not allocate popallspace!\n");
+ abort();
+ }
+ vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_WRITE);
+
+ int stack_space = STACK_OFFSET;
+ for (i=0;i<N_REGS;i++) {
+ if (need_to_preserve[i])
+ stack_space += sizeof(void *);
+ }
+ stack_space %= STACK_ALIGN;
+ if (stack_space)
+ stack_space = STACK_ALIGN - stack_space;
+
+ current_compile_p=popallspace;
+ set_target(current_compile_p);
+
+ /* We need to guarantee 16-byte stack alignment on x86 at any point
+ within the JIT generated code. We have multiple exit points
+ possible but a single entry. A "jmp" is used so that we don't
+ have to generate stack alignment in generated code that has to
+ call external functions (e.g. a generic instruction handler).
+
+ In summary, JIT generated code is not leaf so we have to deal
+ with it here to maintain correct stack alignment. */
+ align_target(align_jumps);
+ current_compile_p=get_target();
+ pushall_call_handler=get_target();
+ for (i=N_REGS;i--;) {
+ if (need_to_preserve[i])
+ raw_push_l_r(i);
+ }
+ raw_dec_sp(stack_space);
+ r=REG_PC_TMP;
+ raw_mov_l_rm(r,(uintptr)®s.pc_p);
+ raw_and_l_ri(r,TAGMASK);
+ raw_jmp_m_indexed((uintptr)cache_tags,r,SIZEOF_VOID_P);
+
+ /* now the exit points */
+ align_target(align_jumps);
+ popall_do_nothing=get_target();
+ raw_inc_sp(stack_space);
+ for (i=0;i<N_REGS;i++) {
+ if (need_to_preserve[i])
+ raw_pop_l_r(i);
+ }
+ raw_jmp((uintptr)do_nothing);
+
+ align_target(align_jumps);
+ popall_execute_normal=get_target();
+ raw_inc_sp(stack_space);
+ for (i=0;i<N_REGS;i++) {
+ if (need_to_preserve[i])
+ raw_pop_l_r(i);
+ }
+ raw_jmp((uintptr)execute_normal);
+
+ align_target(align_jumps);
+ popall_cache_miss=get_target();
+ raw_inc_sp(stack_space);
+ for (i=0;i<N_REGS;i++) {
+ if (need_to_preserve[i])
+ raw_pop_l_r(i);
+ }
+ raw_jmp((uintptr)cache_miss);
+
+ align_target(align_jumps);
+ popall_recompile_block=get_target();
+ raw_inc_sp(stack_space);
+ for (i=0;i<N_REGS;i++) {
+ if (need_to_preserve[i])
+ raw_pop_l_r(i);
+ }
+ raw_jmp((uintptr)recompile_block);
+
+ align_target(align_jumps);
+ popall_exec_nostats=get_target();
+ raw_inc_sp(stack_space);
+ for (i=0;i<N_REGS;i++) {
+ if (need_to_preserve[i])
+ raw_pop_l_r(i);
+ }
+ raw_jmp((uintptr)exec_nostats);
+
+ align_target(align_jumps);
+ popall_check_checksum=get_target();
+ raw_inc_sp(stack_space);
+ for (i=0;i<N_REGS;i++) {
+ if (need_to_preserve[i])
+ raw_pop_l_r(i);
+ }
+ raw_jmp((uintptr)check_checksum);
+
+ // no need to further write into popallspace
+ vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_EXECUTE);
+}
+
+static __inline__ void reset_lists(void)
+{
+ int i;
+
+ for (i=0;i<MAX_HOLD_BI;i++)
+ hold_bi[i]=NULL;
+ active=NULL;
+ dormant=NULL;
+}
+
+static void prepare_block(blockinfo* bi)
+{
+ int i;
+
+ set_target(current_compile_p);
+ align_target(align_jumps);
+ bi->direct_pen=(cpuop_func *)get_target();
+ raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
+ raw_mov_l_mr((uintptr)®s.pc_p,0);
+ raw_jmp((uintptr)popall_execute_normal);
+
+ align_target(align_jumps);
+ bi->direct_pcc=(cpuop_func *)get_target();
+ raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
+ raw_mov_l_mr((uintptr)®s.pc_p,0);
+ raw_jmp((uintptr)popall_check_checksum);
+ current_compile_p=get_target();
+
+ bi->deplist=NULL;
+ for (i=0;i<2;i++) {
+ bi->dep[i].prev_p=NULL;
+ bi->dep[i].next=NULL;
+ }
+ bi->env=default_ss;
+ bi->status=BI_INVALID;
+ bi->havestate=0;
+ //bi->env=empty_ss;
+}
+
+// OPCODE is in big endian format, use cft_map() beforehand, if needed.
+static inline void reset_compop(int opcode)
+{
+ compfunctbl[opcode] = NULL;
+ nfcompfunctbl[opcode] = NULL;
+}
+
+static int read_opcode(const char *p)
+{
+ int opcode = 0;
+ for (int i = 0; i < 4; i++) {
+ int op = p[i];
+ switch (op) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ opcode = (opcode << 4) | (op - '0');
+ break;
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ opcode = (opcode << 4) | ((op - 'a') + 10);
+ break;
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ opcode = (opcode << 4) | ((op - 'A') + 10);
+ break;
+ default:
+ return -1;
+ }
+ }
+ return opcode;
+}
+
+static bool merge_blacklist()
+{
+ const char *blacklist = PrefsFindString("jitblacklist");
+ if (blacklist) {
+ const char *p = blacklist;
+ for (;;) {
+ if (*p == 0)
+ return true;
+
+ int opcode1 = read_opcode(p);
+ if (opcode1 < 0)
+ return false;
+ p += 4;
+
+ int opcode2 = opcode1;
+ if (*p == '-') {
+ p++;
+ opcode2 = read_opcode(p);
+ if (opcode2 < 0)
+ return false;
+ p += 4;
+ }
+
+ if (*p == 0 || *p == ',' || *p == ';') {
+ write_log("<JIT compiler> : blacklist opcodes : %04x-%04x\n", opcode1, opcode2);
+ for (int opcode = opcode1; opcode <= opcode2; opcode++)
+ reset_compop(cft_map(opcode));
+
+ if (*p == ',' || *p++ == ';')
+ continue;
+
+ return true;
+ }
+
+ return false;
+ }
+ }
+ return true;
+}
+
+void build_comp(void)
+{
+ int i;
+ int jumpcount=0;
+ unsigned long opcode;
+ struct comptbl* tbl=op_smalltbl_0_comp_ff;
+ struct comptbl* nftbl=op_smalltbl_0_comp_nf;
+ int count;
+ int cpu_level = 0; // 68000 (default)
+ if (CPUType == 4)
+ cpu_level = 4; // 68040 with FPU
+ else {
+ if (FPUType)
+ cpu_level = 3; // 68020 with FPU
+ else if (CPUType >= 2)
+ cpu_level = 2; // 68020
+ else if (CPUType == 1)
+ cpu_level = 1;
+ }
+ struct cputbl *nfctbl = (
+ cpu_level == 4 ? op_smalltbl_0_nf
+ : cpu_level == 3 ? op_smalltbl_1_nf
+ : cpu_level == 2 ? op_smalltbl_2_nf
+ : cpu_level == 1 ? op_smalltbl_3_nf
+ : op_smalltbl_4_nf);
+
+ write_log ("<JIT compiler> : building compiler function tables\n");
+
+ for (opcode = 0; opcode < 65536; opcode++) {
+ reset_compop(opcode);
+ nfcpufunctbl[opcode] = op_illg_1;
+ prop[opcode].use_flags = 0x1f;
+ prop[opcode].set_flags = 0x1f;
+ prop[opcode].cflow = fl_trap; // ILLEGAL instructions do trap
+ }
+
+ for (i = 0; tbl[i].opcode < 65536; i++) {
+ int cflow = table68k[tbl[i].opcode].cflow;
+ if (follow_const_jumps && (tbl[i].specific & 16))
+ cflow = fl_const_jump;
+ else
+ cflow &= ~fl_const_jump;
+ prop[cft_map(tbl[i].opcode)].cflow = cflow;
+
+ int uses_fpu = tbl[i].specific & 32;
+ if (uses_fpu && avoid_fpu)
+ compfunctbl[cft_map(tbl[i].opcode)] = NULL;
+ else
+ compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler;
+ }
+
+ for (i = 0; nftbl[i].opcode < 65536; i++) {
+ int uses_fpu = tbl[i].specific & 32;
+ if (uses_fpu && avoid_fpu)
+ nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL;
+ else
+ nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler;
+
+ nfcpufunctbl[cft_map(nftbl[i].opcode)] = nfctbl[i].handler;
+ }
+
+ for (i = 0; nfctbl[i].handler; i++) {
+ nfcpufunctbl[cft_map(nfctbl[i].opcode)] = nfctbl[i].handler;
+ }
+
+ for (opcode = 0; opcode < 65536; opcode++) {
+ compop_func *f;
+ compop_func *nff;
+ cpuop_func *nfcf;
+ int isaddx,cflow;
+
+ if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level)
+ continue;
+
+ if (table68k[opcode].handler != -1) {
+ f = compfunctbl[cft_map(table68k[opcode].handler)];
+ nff = nfcompfunctbl[cft_map(table68k[opcode].handler)];
+ nfcf = nfcpufunctbl[cft_map(table68k[opcode].handler)];
+ cflow = prop[cft_map(table68k[opcode].handler)].cflow;
+ isaddx = prop[cft_map(table68k[opcode].handler)].is_addx;
+ prop[cft_map(opcode)].cflow = cflow;
+ prop[cft_map(opcode)].is_addx = isaddx;
+ compfunctbl[cft_map(opcode)] = f;
+ nfcompfunctbl[cft_map(opcode)] = nff;
+ Dif (nfcf == op_illg_1)
+ abort();
+ nfcpufunctbl[cft_map(opcode)] = nfcf;
+ }
+ prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead;
+ prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive;
+ /* Unconditional jumps don't evaluate condition codes, so they
+ * don't actually use any flags themselves */
+ if (prop[cft_map(opcode)].cflow & fl_const_jump)
+ prop[cft_map(opcode)].use_flags = 0;
+ }
+ for (i = 0; nfctbl[i].handler != NULL; i++) {
+ if (nfctbl[i].specific)
+ nfcpufunctbl[cft_map(tbl[i].opcode)] = nfctbl[i].handler;
+ }
+
+ /* Merge in blacklist */
+ if (!merge_blacklist())
+ write_log("<JIT compiler> : blacklist merge failure!\n");
+
+ count=0;
+ for (opcode = 0; opcode < 65536; opcode++) {
+ if (compfunctbl[cft_map(opcode)])
+ count++;
+ }
+ write_log("<JIT compiler> : supposedly %d compileable opcodes!\n",count);
+
+ /* Initialise state */
+ create_popalls();
+ alloc_cache();
+ reset_lists();
+
+ for (i=0;i<TAGSIZE;i+=2) {
+ cache_tags[i].handler=(cpuop_func *)popall_execute_normal;
+ cache_tags[i+1].bi=NULL;
+ }
+
+#if 0
+ for (i=0;i<N_REGS;i++) {
+ empty_ss.nat[i].holds=-1;
+ empty_ss.nat[i].validsize=0;
+ empty_ss.nat[i].dirtysize=0;
+ }
+#endif
+ for (i=0;i<VREGS;i++) {
+ empty_ss.virt[i]=L_NEEDED;
+ }
+ for (i=0;i<N_REGS;i++) {
+ empty_ss.nat[i]=L_UNKNOWN;
+ }
+ default_ss=empty_ss;
+}
+
+
+static void flush_icache_none(int n)
+{
+ /* Nothing to do. */
+}
+
+static void flush_icache_hard(int n)
+{
+ uae_u32 i;
+ blockinfo* bi, *dbi;
+
+ hard_flush_count++;
+#if 0
+ write_log("Flush Icache_hard(%d/%x/%p), %u KB\n",
+ n,regs.pc,regs.pc_p,current_cache_size/1024);
+ current_cache_size = 0;
+#endif
+ bi=active;
+ while(bi) {
+ cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
+ cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
+ dbi=bi; bi=bi->next;
+ free_blockinfo(dbi);
+ }
+ bi=dormant;
+ while(bi) {
+ cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
+ cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
+ dbi=bi; bi=bi->next;
+ free_blockinfo(dbi);
+ }
+
+ reset_lists();
+ if (!compiled_code)
+ return;
+ current_compile_p=compiled_code;
+ SPCFLAGS_SET( SPCFLAG_JIT_EXEC_RETURN ); /* To get out of compiled code */
+}
+
+
+/* "Soft flushing" --- instead of actually throwing everything away,
+ we simply mark everything as "needs to be checked".
+*/
+
+static inline void flush_icache_lazy(int n)
+{
+ uae_u32 i;
+ blockinfo* bi;
+ blockinfo* bi2;
+
+ soft_flush_count++;
+ if (!active)
+ return;
+
+ bi=active;
+ while (bi) {
+ uae_u32 cl=cacheline(bi->pc_p);
+ if (bi->status==BI_INVALID ||
+ bi->status==BI_NEED_RECOMP) {
+ if (bi==cache_tags[cl+1].bi)
+ cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
+ bi->handler_to_use=(cpuop_func *)popall_execute_normal;
+ set_dhtu(bi,bi->direct_pen);
+ bi->status=BI_INVALID;
+ }
+ else {
+ if (bi==cache_tags[cl+1].bi)
+ cache_tags[cl].handler=(cpuop_func *)popall_check_checksum;
+ bi->handler_to_use=(cpuop_func *)popall_check_checksum;
+ set_dhtu(bi,bi->direct_pcc);
+ bi->status=BI_NEED_CHECK;
+ }
+ bi2=bi;
+ bi=bi->next;
+ }
+ /* bi2 is now the last entry in the active list */
+ bi2->next=dormant;
+ if (dormant)
+ dormant->prev_p=&(bi2->next);
+
+ dormant=active;
+ active->prev_p=&dormant;
+ active=NULL;
+}
+
+void flush_icache_range(uae_u8 *start_p, uae_u32 length)
+{
+ if (!active)
+ return;
+
+#if LAZY_FLUSH_ICACHE_RANGE
+ blockinfo *bi = active;
+ while (bi) {
+#if USE_CHECKSUM_INFO
+ bool candidate = false;
+ for (checksum_info *csi = bi->csi; csi; csi = csi->next) {
+ if (((start_p - csi->start_p) < csi->length) ||
+ ((csi->start_p - start_p) < length)) {
+ candidate = true;
+ break;
+ }
+ }
+#else
+ // Assume system is consistent and would invalidate the right range
+ const bool candidate = (bi->pc_p - start_p) < length;
+#endif
+ blockinfo *dbi = bi;
+ bi = bi->next;
+ if (candidate) {
+ uae_u32 cl = cacheline(dbi->pc_p);
+ if (dbi->status == BI_INVALID || dbi->status == BI_NEED_RECOMP) {
+ if (dbi == cache_tags[cl+1].bi)
+ cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
+ dbi->handler_to_use = (cpuop_func *)popall_execute_normal;
+ set_dhtu(dbi, dbi->direct_pen);
+ dbi->status = BI_INVALID;
+ }
+ else {
+ if (dbi == cache_tags[cl+1].bi)
+ cache_tags[cl].handler = (cpuop_func *)popall_check_checksum;
+ dbi->handler_to_use = (cpuop_func *)popall_check_checksum;
+ set_dhtu(dbi, dbi->direct_pcc);
+ dbi->status = BI_NEED_CHECK;
+ }
+ remove_from_list(dbi);
+ add_to_dormant(dbi);
+ }
+ }
+ return;
+#endif
+ flush_icache(-1);
+}
+
+static void catastrophe(void)
+{
+ abort();
+}
+
+int failure;
+
+#define TARGET_M68K 0
+#define TARGET_POWERPC 1
+#define TARGET_X86 2
+#define TARGET_X86_64 3
+#if defined(i386) || defined(__i386__)
+#define TARGET_NATIVE TARGET_X86
+#endif
+#if defined(powerpc) || defined(__powerpc__)
+#define TARGET_NATIVE TARGET_POWERPC
+#endif
+#if defined(x86_64) || defined(__x86_64__)
+#define TARGET_NATIVE TARGET_X86_64
+#endif
+
+#ifdef ENABLE_MON
+static uae_u32 mon_read_byte_jit(uintptr addr)
+{
+ uae_u8 *m = (uae_u8 *)addr;
+ return (uintptr)(*m);
+}
+
+static void mon_write_byte_jit(uintptr addr, uae_u32 b)
+{
+ uae_u8 *m = (uae_u8 *)addr;
+ *m = b;
+}
+#endif
+
+void disasm_block(int target, uint8 * start, size_t length)
+{
+ if (!JITDebug)
+ return;
+
+#if defined(JIT_DEBUG) && defined(ENABLE_MON)
+ char disasm_str[200];
+ sprintf(disasm_str, "%s $%x $%x",
+ target == TARGET_M68K ? "d68" :
+ target == TARGET_X86 ? "d86" :
+ target == TARGET_X86_64 ? "d8664" :
+ target == TARGET_POWERPC ? "d" : "x",
+ start, start + length - 1);
+
+ uae_u32 (*old_mon_read_byte)(uintptr) = mon_read_byte;
+ void (*old_mon_write_byte)(uintptr, uae_u32) = mon_write_byte;
+
+ mon_read_byte = mon_read_byte_jit;
+ mon_write_byte = mon_write_byte_jit;
+
+ char *arg[5] = {"mon", "-m", "-r", disasm_str, NULL};
+ mon(4, arg);
+
+ mon_read_byte = old_mon_read_byte;
+ mon_write_byte = old_mon_write_byte;
+#endif
+}
+
+static void disasm_native_block(uint8 *start, size_t length)
+{
+ disasm_block(TARGET_NATIVE, start, length);
+}
+
+static void disasm_m68k_block(uint8 *start, size_t length)
+{
+ disasm_block(TARGET_M68K, start, length);
+}
+
+#ifdef HAVE_GET_WORD_UNSWAPPED
+# define DO_GET_OPCODE(a) (do_get_mem_word_unswapped((uae_u16 *)(a)))
+#else
+# define DO_GET_OPCODE(a) (do_get_mem_word((uae_u16 *)(a)))
+#endif
+
+#if JIT_DEBUG
+static uae_u8 *last_regs_pc_p = 0;
+static uae_u8 *last_compiled_block_addr = 0;
+
+void compiler_dumpstate(void)
+{
+ if (!JITDebug)
+ return;
+
+ write_log("### Host addresses\n");
+ write_log("MEM_BASE : %x\n", MEMBaseDiff);
+ write_log("PC_P : %p\n", ®s.pc_p);
+ write_log("SPCFLAGS : %p\n", ®s.spcflags);
+ write_log("D0-D7 : %p-%p\n", ®s.regs[0], ®s.regs[7]);
+ write_log("A0-A7 : %p-%p\n", ®s.regs[8], ®s.regs[15]);
+ write_log("\n");
+
+ write_log("### M68k processor state\n");
+ m68k_dumpstate(0);
+ write_log("\n");
+
+ write_log("### Block in Mac address space\n");
+ write_log("M68K block : %p\n",
+ (void *)(uintptr)get_virtual_address(last_regs_pc_p));
+ write_log("Native block : %p (%d bytes)\n",
+ (void *)(uintptr)get_virtual_address(last_compiled_block_addr),
+ get_blockinfo_addr(last_regs_pc_p)->direct_handler_size);
+ write_log("\n");
+}
+#endif
+
+static void compile_block(cpu_history* pc_hist, int blocklen)
+{
+ if (letit && compiled_code) {
+#if PROFILE_COMPILE_TIME
+ compile_count++;
+ clock_t start_time = clock();
+#endif
+#if JIT_DEBUG
+ bool disasm_block = false;
+#endif
+
+ /* OK, here we need to 'compile' a block */
+ int i;
+ int r;
+ int was_comp=0;
+ uae_u8 liveflags[MAXRUN+1];
+#if USE_CHECKSUM_INFO
+ bool trace_in_rom = isinrom((uintptr)pc_hist[0].location);
+ uintptr max_pcp=(uintptr)pc_hist[blocklen - 1].location;
+ uintptr min_pcp=max_pcp;
+#else
+ uintptr max_pcp=(uintptr)pc_hist[0].location;
+ uintptr min_pcp=max_pcp;
+#endif
+ uae_u32 cl=cacheline(pc_hist[0].location);
+ void* specflags=(void*)®s.spcflags;
+ blockinfo* bi=NULL;
+ blockinfo* bi2;
+ int extra_len=0;
+
+ redo_current_block=0;
+ if (current_compile_p>=max_compile_start)
+ flush_icache_hard(7);
+
+ alloc_blockinfos();
+
+ bi=get_blockinfo_addr_new(pc_hist[0].location,0);
+ bi2=get_blockinfo(cl);
+
+ optlev=bi->optlevel;
+ if (bi->status!=BI_INVALID) {
+ Dif (bi!=bi2) {
+ /* I don't think it can happen anymore. Shouldn't, in
+ any case. So let's make sure... */
+ write_log("WOOOWOO count=%d, ol=%d %p %p\n",
+ bi->count,bi->optlevel,bi->handler_to_use,
+ cache_tags[cl].handler);
+ abort();
+ }
+
+ Dif (bi->count!=-1 && bi->status!=BI_NEED_RECOMP) {
+ write_log("bi->count=%d, bi->status=%d\n",bi->count,bi->status);
+ /* What the heck? We are not supposed to be here! */
+ abort();
+ }
+ }
+ if (bi->count==-1) {
+ optlev++;
+ while (!optcount[optlev])
+ optlev++;
+ bi->count=optcount[optlev]-1;
+ }
+ current_block_pc_p=(uintptr)pc_hist[0].location;
+
+ remove_deps(bi); /* We are about to create new code */
+ bi->optlevel=optlev;
+ bi->pc_p=(uae_u8*)pc_hist[0].location;
+#if USE_CHECKSUM_INFO
+ free_checksum_info_chain(bi->csi);
+ bi->csi = NULL;
+#endif
+
+ liveflags[blocklen]=0x1f; /* All flags needed afterwards */
+ i=blocklen;
+ while (i--) {
+ uae_u16* currpcp=pc_hist[i].location;
+ uae_u32 op=DO_GET_OPCODE(currpcp);
+
+#if USE_CHECKSUM_INFO
+ trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp);
+ if (follow_const_jumps && is_const_jump(op)) {
+ checksum_info *csi = alloc_checksum_info();
+ csi->start_p = (uae_u8 *)min_pcp;
+ csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
+ csi->next = bi->csi;
+ bi->csi = csi;
+ max_pcp = (uintptr)currpcp;
+ }
+ min_pcp = (uintptr)currpcp;
+#else
+ if ((uintptr)currpcp<min_pcp)
+ min_pcp=(uintptr)currpcp;
+ if ((uintptr)currpcp>max_pcp)
+ max_pcp=(uintptr)currpcp;
+#endif
+
+ liveflags[i]=((liveflags[i+1]&
+ (~prop[op].set_flags))|
+ prop[op].use_flags);
+ if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0)
+ liveflags[i]&= ~FLAG_Z;
+ }
+
+#if USE_CHECKSUM_INFO
+ checksum_info *csi = alloc_checksum_info();
+ csi->start_p = (uae_u8 *)min_pcp;
+ csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
+ csi->next = bi->csi;
+ bi->csi = csi;
+#endif
+
+ bi->needed_flags=liveflags[0];
+
+ align_target(align_loops);
+ was_comp=0;
+
+ bi->direct_handler=(cpuop_func *)get_target();
+ set_dhtu(bi,bi->direct_handler);
+ bi->status=BI_COMPILING;
+ current_block_start_target=(uintptr)get_target();
+
+ log_startblock();
+
+ if (bi->count>=0) { /* Need to generate countdown code */
+ raw_mov_l_mi((uintptr)®s.pc_p,(uintptr)pc_hist[0].location);
+ raw_sub_l_mi((uintptr)&(bi->count),1);
+ raw_jl((uintptr)popall_recompile_block);
+ }
+ if (optlev==0) { /* No need to actually translate */
+ /* Execute normally without keeping stats */
+ raw_mov_l_mi((uintptr)®s.pc_p,(uintptr)pc_hist[0].location);
+ raw_jmp((uintptr)popall_exec_nostats);
+ }
+ else {
+ reg_alloc_run=0;
+ next_pc_p=0;
+ taken_pc_p=0;
+ branch_cc=0;
+
+ comp_pc_p=(uae_u8*)pc_hist[0].location;
+ init_comp();
+ was_comp=1;
+
+#ifdef USE_CPU_EMUL_SERVICES
+ raw_sub_l_mi((uintptr)&emulated_ticks,blocklen);
+ raw_jcc_b_oponly(NATIVE_CC_GT);
+ uae_s8 *branchadd=(uae_s8*)get_target();
+ emit_byte(0);
+ raw_call((uintptr)cpu_do_check_ticks);
+ *branchadd=(uintptr)get_target()-((uintptr)branchadd+1);
+#endif
+
+#if JIT_DEBUG
+ if (JITDebug) {
+ raw_mov_l_mi((uintptr)&last_regs_pc_p,(uintptr)pc_hist[0].location);
+ raw_mov_l_mi((uintptr)&last_compiled_block_addr,current_block_start_target);
+ }
+#endif
+
+ for (i=0;i<blocklen &&
+ get_target_noopt()<max_compile_start;i++) {
+ cpuop_func **cputbl;
+ compop_func **comptbl;
+ uae_u32 opcode=DO_GET_OPCODE(pc_hist[i].location);
+ needed_flags=(liveflags[i+1] & prop[opcode].set_flags);
+ if (!needed_flags) {
+ cputbl=nfcpufunctbl;
+ comptbl=nfcompfunctbl;
+ }
+ else {
+ cputbl=cpufunctbl;
+ comptbl=compfunctbl;
+ }
+
+#if FLIGHT_RECORDER
+ {
+ mov_l_ri(S1, get_virtual_address((uae_u8 *)(pc_hist[i].location)) | 1);
+ clobber_flags();
+ remove_all_offsets();
+ int arg = readreg_specific(S1,4,REG_PAR1);
+ prepare_for_call_1();
+ unlock2(arg);
+ prepare_for_call_2();
+ raw_call((uintptr)m68k_record_step);
+ }
+#endif
+
+ failure = 1; // gb-- defaults to failure state
+ if (comptbl[opcode] && optlev>1) {
+ failure=0;
+ if (!was_comp) {
+ comp_pc_p=(uae_u8*)pc_hist[i].location;
+ init_comp();
+ }
+ was_comp=1;
+
+ comptbl[opcode](opcode);
+ freescratch();
+ if (!(liveflags[i+1] & FLAG_CZNV)) {
+ /* We can forget about flags */
+ dont_care_flags();
+ }
+#if INDIVIDUAL_INST
+ flush(1);
+ nop();
+ flush(1);
+ was_comp=0;
+#endif
+ }
+
+ if (failure) {
+ if (was_comp) {
+ flush(1);
+ was_comp=0;
+ }
+ raw_mov_l_ri(REG_PAR1,(uae_u32)opcode);
+#if USE_NORMAL_CALLING_CONVENTION
+ raw_push_l_r(REG_PAR1);
+#endif
+ raw_mov_l_mi((uintptr)®s.pc_p,
+ (uintptr)pc_hist[i].location);
+ raw_call((uintptr)cputbl[opcode]);
+#if PROFILE_UNTRANSLATED_INSNS
+ // raw_cputbl_count[] is indexed with plain opcode (in m68k order)
+ raw_add_l_mi((uintptr)&raw_cputbl_count[cft_map(opcode)],1);
+#endif
+#if USE_NORMAL_CALLING_CONVENTION
+ raw_inc_sp(4);
+#endif
+
+ if (i < blocklen - 1) {
+ uae_s8* branchadd;
+
+ raw_mov_l_rm(0,(uintptr)specflags);
+ raw_test_l_rr(0,0);
+ raw_jz_b_oponly();
+ branchadd=(uae_s8 *)get_target();
+ emit_byte(0);
+ raw_jmp((uintptr)popall_do_nothing);
+ *branchadd=(uintptr)get_target()-(uintptr)branchadd-1;
+ }
+ }
+ }
+#if 1 /* This isn't completely kosher yet; It really needs to be
+ be integrated into a general inter-block-dependency scheme */
+ if (next_pc_p && taken_pc_p &&
+ was_comp && taken_pc_p==current_block_pc_p) {
+ blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0);
+ blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0);
+ uae_u8 x=bi1->needed_flags;
+
+ if (x==0xff || 1) { /* To be on the safe side */
+ uae_u16* next=(uae_u16*)next_pc_p;
+ uae_u32 op=DO_GET_OPCODE(next);
+
+ x=0x1f;
+ x&=(~prop[op].set_flags);
+ x|=prop[op].use_flags;
+ }
+
+ x|=bi2->needed_flags;
+ if (!(x & FLAG_CZNV)) {
+ /* We can forget about flags */
+ dont_care_flags();
+ extra_len+=2; /* The next instruction now is part of this
+ block */
+ }
+
+ }
+#endif
+ log_flush();
+
+ if (next_pc_p) { /* A branch was registered */
+ uintptr t1=next_pc_p;
+ uintptr t2=taken_pc_p;
+ int cc=branch_cc;
+
+ uae_u32* branchadd;
+ uae_u32* tba;
+ bigstate tmp;
+ blockinfo* tbi;
+
+ if (taken_pc_p<next_pc_p) {
+ /* backward branch. Optimize for the "taken" case ---
+ which means the raw_jcc should fall through when
+ the 68k branch is taken. */
+ t1=taken_pc_p;
+ t2=next_pc_p;
+ cc=branch_cc^1;
+ }
+
+ tmp=live; /* ouch! This is big... */
+ raw_jcc_l_oponly(cc);
+ branchadd=(uae_u32*)get_target();
+ emit_long(0);
+
+ /* predicted outcome */
+ tbi=get_blockinfo_addr_new((void*)t1,1);
+ match_states(tbi);
+ raw_cmp_l_mi((uintptr)specflags,0);
+ raw_jcc_l_oponly(4);
+ tba=(uae_u32*)get_target();
+ emit_long(get_handler(t1)-((uintptr)tba+4));
+ raw_mov_l_mi((uintptr)®s.pc_p,t1);
+ flush_reg_count();
+ raw_jmp((uintptr)popall_do_nothing);
+ create_jmpdep(bi,0,tba,t1);
+
+ align_target(align_jumps);
+ /* not-predicted outcome */
+ *branchadd=(uintptr)get_target()-((uintptr)branchadd+4);
+ live=tmp; /* Ouch again */
+ tbi=get_blockinfo_addr_new((void*)t2,1);
+ match_states(tbi);
+
+ //flush(1); /* Can only get here if was_comp==1 */
+ raw_cmp_l_mi((uintptr)specflags,0);
+ raw_jcc_l_oponly(4);
+ tba=(uae_u32*)get_target();
+ emit_long(get_handler(t2)-((uintptr)tba+4));
+ raw_mov_l_mi((uintptr)®s.pc_p,t2);
+ flush_reg_count();
+ raw_jmp((uintptr)popall_do_nothing);
+ create_jmpdep(bi,1,tba,t2);
+ }
+ else
+ {
+ if (was_comp) {
+ flush(1);
+ }
+ flush_reg_count();
+
+ /* Let's find out where next_handler is... */
+ if (was_comp && isinreg(PC_P)) {
+ r=live.state[PC_P].realreg;
+ raw_and_l_ri(r,TAGMASK);
+ int r2 = (r==0) ? 1 : 0;
+ raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
+ raw_cmp_l_mi((uintptr)specflags,0);
+ raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ);
+ raw_jmp_r(r2);
+ }
+ else if (was_comp && isconst(PC_P)) {
+ uae_u32 v=live.state[PC_P].val;
+ uae_u32* tba;
+ blockinfo* tbi;
+
+ tbi=get_blockinfo_addr_new((void*)(uintptr)v,1);
+ match_states(tbi);
+
+ raw_cmp_l_mi((uintptr)specflags,0);
+ raw_jcc_l_oponly(4);
+ tba=(uae_u32*)get_target();
+ emit_long(get_handler(v)-((uintptr)tba+4));
+ raw_mov_l_mi((uintptr)®s.pc_p,v);
+ raw_jmp((uintptr)popall_do_nothing);
+ create_jmpdep(bi,0,tba,v);
+ }
+ else {
+ r=REG_PC_TMP;
+ raw_mov_l_rm(r,(uintptr)®s.pc_p);
+ raw_and_l_ri(r,TAGMASK);
+ int r2 = (r==0) ? 1 : 0;
+ raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
+ raw_cmp_l_mi((uintptr)specflags,0);
+ raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ);
+ raw_jmp_r(r2);
+ }
+ }
+ }
+
+#if USE_MATCH
+ if (callers_need_recompile(&live,&(bi->env))) {
+ mark_callers_recompile(bi);
+ }
+
+ big_to_small_state(&live,&(bi->env));
+#endif
+
+#if USE_CHECKSUM_INFO
+ remove_from_list(bi);
+ if (trace_in_rom) {
+ // No need to checksum that block trace on cache invalidation
+ free_checksum_info_chain(bi->csi);
+ bi->csi = NULL;
+ add_to_dormant(bi);
+ }
+ else {
+ calc_checksum(bi,&(bi->c1),&(bi->c2));
+ add_to_active(bi);
+ }
+#else
+ if (next_pc_p+extra_len>=max_pcp &&
+ next_pc_p+extra_len<max_pcp+LONGEST_68K_INST)
+ max_pcp=next_pc_p+extra_len; /* extra_len covers flags magic */
+ else
+ max_pcp+=LONGEST_68K_INST;
+
+ bi->len=max_pcp-min_pcp;
+ bi->min_pcp=min_pcp;
+
+ remove_from_list(bi);
+ if (isinrom(min_pcp) && isinrom(max_pcp)) {
+ add_to_dormant(bi); /* No need to checksum it on cache flush.
+ Please don't start changing ROMs in
+ flight! */
+ }
+ else {
+ calc_checksum(bi,&(bi->c1),&(bi->c2));
+ add_to_active(bi);
+ }
+#endif
+
+ current_cache_size += get_target() - (uae_u8 *)current_compile_p;
+
+#if JIT_DEBUG
+ if (JITDebug)
+ bi->direct_handler_size = get_target() - (uae_u8 *)current_block_start_target;
+
+ if (JITDebug && disasm_block) {
+ uaecptr block_addr = start_pc + ((char *)pc_hist[0].location - (char *)start_pc_p);
+ D(bug("M68K block @ 0x%08x (%d insns)\n", block_addr, blocklen));
+ uae_u32 block_size = ((uae_u8 *)pc_hist[blocklen - 1].location - (uae_u8 *)pc_hist[0].location) + 1;
+ disasm_m68k_block((uae_u8 *)pc_hist[0].location, block_size);
+ D(bug("Compiled block @ 0x%08x\n", pc_hist[0].location));
+ disasm_native_block((uae_u8 *)current_block_start_target, bi->direct_handler_size);
+ getchar();
+ }
+#endif
+
+ log_dump();
+ align_target(align_jumps);
+
+ /* This is the non-direct handler */
+ bi->handler=
+ bi->handler_to_use=(cpuop_func *)get_target();
+ raw_cmp_l_mi((uintptr)®s.pc_p,(uintptr)pc_hist[0].location);
+ raw_jnz((uintptr)popall_cache_miss);
+ comp_pc_p=(uae_u8*)pc_hist[0].location;
+
+ bi->status=BI_FINALIZING;
+ init_comp();
+ match_states(bi);
+ flush(1);
+
+ raw_jmp((uintptr)bi->direct_handler);
+
+ current_compile_p=get_target();
+ raise_in_cl_list(bi);
+
+ /* We will flush soon, anyway, so let's do it now */
+ if (current_compile_p>=max_compile_start)
+ flush_icache_hard(7);
+
+ bi->status=BI_ACTIVE;
+ if (redo_current_block)
+ block_need_recompile(bi);
+
+#if PROFILE_COMPILE_TIME
+ compile_time += (clock() - start_time);
+#endif
+ }
+
+ /* Account for compilation time */
+ cpu_do_check_ticks();
+}
+
+void do_nothing(void)
+{
+ /* What did you expect this to do? */
+}
+
+void exec_nostats(void)
+{
+ for (;;) {
+ uae_u32 opcode = GET_OPCODE;
+#if FLIGHT_RECORDER
+ m68k_record_step(m68k_getpc());
+#endif
+ (*cpufunctbl[opcode])(opcode);
+ cpu_check_ticks();
+ if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) {
+ return; /* We will deal with the spcflags in the caller */
+ }
+ }
+}
+
+void execute_normal(void)
+{
+ if (!check_for_cache_miss()) {
+ cpu_history pc_hist[MAXRUN];
+ int blocklen = 0;
+#if REAL_ADDRESSING || DIRECT_ADDRESSING
+ start_pc_p = regs.pc_p;
+ start_pc = get_virtual_address(regs.pc_p);
+#else
+ start_pc_p = regs.pc_oldp;
+ start_pc = regs.pc;
+#endif
+ for (;;) { /* Take note: This is the do-it-normal loop */
+ pc_hist[blocklen++].location = (uae_u16 *)regs.pc_p;
+ uae_u32 opcode = GET_OPCODE;
+#if FLIGHT_RECORDER
+ m68k_record_step(m68k_getpc());
+#endif
+ (*cpufunctbl[opcode])(opcode);
+ cpu_check_ticks();
+ if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) {
+ compile_block(pc_hist, blocklen);
+ return; /* We will deal with the spcflags in the caller */
+ }
+ /* No need to check regs.spcflags, because if they were set,
+ we'd have ended up inside that "if" */
+ }
+ }
+}
+
+typedef void (*compiled_handler)(void);
+
+static void m68k_do_compile_execute(void)
+{
+ for (;;) {
+ ((compiled_handler)(pushall_call_handler))();
+ /* Whenever we return from that, we should check spcflags */
+ if (SPCFLAGS_TEST(SPCFLAG_ALL)) {
+ if (m68k_do_specialties ())
+ return;
+ }
+ }
+}
+
+void m68k_compile_execute (void)
+{
+ for (;;) {
+ if (quit_program)
+ break;
+ m68k_do_compile_execute();
+ }
+}
--- /dev/null
+/*
+ * compiler/flags_x86.h - Native flags definitions for IA-32
+ *
+ * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
+ *
+ * Adaptation for Basilisk II and improvements, copyright 2000-2005
+ * Gwenole Beauchesne
+ *
+ * Basilisk II (C) 1997-2008 Christian Bauer
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef NATIVE_FLAGS_X86_H
+#define NATIVE_FLAGS_X86_H
+
+/* Native integer code conditions */
+enum {
+ NATIVE_CC_HI = 7,
+ NATIVE_CC_LS = 6,
+ NATIVE_CC_CC = 3,
+ NATIVE_CC_CS = 2,
+ NATIVE_CC_NE = 5,
+ NATIVE_CC_EQ = 4,
+ NATIVE_CC_VC = 11,
+ NATIVE_CC_VS = 10,
+ NATIVE_CC_PL = 9,
+ NATIVE_CC_MI = 8,
+ NATIVE_CC_GE = 13,
+ NATIVE_CC_LT = 12,
+ NATIVE_CC_GT = 15,
+ NATIVE_CC_LE = 14
+};
+
+#endif /* NATIVE_FLAGS_X86_H */
--- /dev/null
+/*
+ * compiler/gencomp.c - MC680x0 compilation generator
+ *
+ * Based on work Copyright 1995, 1996 Bernd Schmidt
+ * Changes for UAE-JIT Copyright 2000 Bernd Meyer
+ *
+ * Adaptation for Basilisk II and improvements, copyright 2000-2005
+ * Gwenole Beauchesne
+ *
+ * Basilisk II (C) 1997-2005 Christian Bauer
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <ctype.h>
+#include "sysdeps.h"
+#include "readcpu.h"
+
+#define BOOL_TYPE "int"
+#define failure global_failure=1
+#define FAILURE global_failure=1
+#define isjump global_isjump=1
+#define is_const_jump global_iscjump=1;
+#define isaddx global_isaddx=1
+#define uses_cmov global_cmov=1
+#define mayfail global_mayfail=1
+#define uses_fpu global_fpu=1
+
+int hack_opcode;
+
+static int global_failure;
+static int global_isjump;
+static int global_iscjump;
+static int global_isaddx;
+static int global_cmov;
+static int long_opcode;
+static int global_mayfail;
+static int global_fpu;
+
+static char endstr[1000];
+static char lines[100000];
+static int comp_index=0;
+
+static int cond_codes_x86[]={-1,-1,7,6,3,2,5,4,-1,-1,9,8,13,12,15,14};
+
+static void comprintf(const char* format, ...)
+{
+ va_list args;
+
+ va_start(args,format);
+ comp_index+=vsprintf(lines+comp_index,format,args);
+}
+
+static void com_discard(void)
+{
+ comp_index=0;
+}
+
+static void com_flush(void)
+{
+ int i;
+ for (i=0;i<comp_index;i++)
+ putchar(lines[i]);
+ com_discard();
+}
+
+
+static FILE *headerfile;
+static FILE *stblfile;
+
+static int using_prefetch;
+static int using_exception_3;
+static int cpu_level;
+static int noflags;
+
+/* For the current opcode, the next lower level that will have different code.
+ * Initialized to -1 for each opcode. If it remains unchanged, indicates we
+ * are done with that opcode. */
+static int next_cpu_level;
+
+static int *opcode_map;
+static int *opcode_next_clev;
+static int *opcode_last_postfix;
+static unsigned long *counts;
+
+static void
+read_counts (void)
+{
+ FILE *file;
+ unsigned long opcode, count, total;
+ char name[20];
+ int nr = 0;
+ memset (counts, 0, 65536 * sizeof *counts);
+
+ file = fopen ("frequent.68k", "r");
+ if (file)
+ {
+ fscanf (file, "Total: %lu\n", &total);
+ while (fscanf (file, "%lx: %lu %s\n", &opcode, &count, name) == 3)
+ {
+ opcode_next_clev[nr] = 4;
+ opcode_last_postfix[nr] = -1;
+ opcode_map[nr++] = opcode;
+ counts[opcode] = count;
+ }
+ fclose (file);
+ }
+ if (nr == nr_cpuop_funcs)
+ return;
+ for (opcode = 0; opcode < 0x10000; opcode++)
+ {
+ if (table68k[opcode].handler == -1 && table68k[opcode].mnemo != i_ILLG
+ && counts[opcode] == 0)
+ {
+ opcode_next_clev[nr] = 4;
+ opcode_last_postfix[nr] = -1;
+ opcode_map[nr++] = opcode;
+ counts[opcode] = count;
+ }
+ }
+ if (nr != nr_cpuop_funcs)
+ abort ();
+}
+
+static int n_braces = 0;
+static int insn_n_cycles;
+
+static void
+start_brace (void)
+{
+ n_braces++;
+ comprintf ("{");
+}
+
+static void
+close_brace (void)
+{
+ assert (n_braces > 0);
+ n_braces--;
+ comprintf ("}");
+}
+
+static void
+finish_braces (void)
+{
+ while (n_braces > 0)
+ close_brace ();
+}
+
+static void
+pop_braces (int to)
+{
+ while (n_braces > to)
+ close_brace ();
+}
+
+static int
+bit_size (int size)
+{
+ switch (size)
+ {
+ case sz_byte:
+ return 8;
+ case sz_word:
+ return 16;
+ case sz_long:
+ return 32;
+ default:
+ abort ();
+ }
+ return 0;
+}
+
+static const char *
+bit_mask (int size)
+{
+ switch (size)
+ {
+ case sz_byte:
+ return "0xff";
+ case sz_word:
+ return "0xffff";
+ case sz_long:
+ return "0xffffffff";
+ default:
+ abort ();
+ }
+ return 0;
+}
+
+static __inline__ void gen_update_next_handler(void)
+{
+ return; /* Can anything clever be done here? */
+}
+
+static void gen_writebyte(char* address, char* source)
+{
+ comprintf("\twritebyte(%s,%s,scratchie);\n",address,source);
+}
+
+static void gen_writeword(char* address, char* source)
+{
+ comprintf("\twriteword(%s,%s,scratchie);\n",address,source);
+}
+
+static void gen_writelong(char* address, char* source)
+{
+ comprintf("\twritelong(%s,%s,scratchie);\n",address,source);
+}
+
+static void gen_readbyte(char* address, char* dest)
+{
+ comprintf("\treadbyte(%s,%s,scratchie);\n",address,dest);
+}
+
+static void gen_readword(char* address, char* dest)
+{
+ comprintf("\treadword(%s,%s,scratchie);\n",address,dest);
+}
+
+static void gen_readlong(char* address, char* dest)
+{
+ comprintf("\treadlong(%s,%s,scratchie);\n",address,dest);
+}
+
+
+
+static const char *
+gen_nextilong (void)
+{
+ static char buffer[80];
+
+ sprintf (buffer, "comp_get_ilong((m68k_pc_offset+=4)-4)");
+ insn_n_cycles += 4;
+
+ long_opcode=1;
+ return buffer;
+}
+
+static const char *
+gen_nextiword (void)
+{
+ static char buffer[80];
+
+ sprintf (buffer, "comp_get_iword((m68k_pc_offset+=2)-2)");
+ insn_n_cycles+=2;
+
+ long_opcode=1;
+ return buffer;
+}
+
+static const char *
+gen_nextibyte (void)
+{
+ static char buffer[80];
+
+ sprintf (buffer, "comp_get_ibyte((m68k_pc_offset+=2)-2)");
+ insn_n_cycles += 2;
+
+ long_opcode=1;
+ return buffer;
+}
+
+static void
+swap_opcode (void)
+{
+ comprintf("#ifdef HAVE_GET_WORD_UNSWAPPED\n");
+ comprintf("\topcode = do_byteswap_16(opcode);\n");
+ comprintf("#endif\n");
+}
+
+static void
+sync_m68k_pc (void)
+{
+ comprintf("\t if (m68k_pc_offset>100) sync_m68k_pc();\n");
+}
+
+
+/* getv == 1: fetch data; getv != 0: check for odd address. If movem != 0,
+ * the calling routine handles Apdi and Aipi modes.
+ * gb-- movem == 2 means the same thing but for a MOVE16 instruction */
+static void
+genamode (amodes mode, char *reg, wordsizes size, char *name, int getv, int movem)
+{
+ start_brace ();
+ switch (mode)
+ {
+ case Dreg: /* Do we need to check dodgy here? */
+ if (movem)
+ abort ();
+ if (getv == 1 || getv==2) {
+ /* We generate the variable even for getv==2, so we can use
+ it as a destination for MOVE */
+ comprintf ("\tint %s=%s;\n",name,reg);
+ }
+ return;
+
+ case Areg:
+ if (movem)
+ abort ();
+ if (getv == 1 || getv==2) {
+ /* see above */
+ comprintf ("\tint %s=dodgy?scratchie++:%s+8;\n",name,reg);
+ if (getv==1) {
+ comprintf ("\tif (dodgy) \n");
+ comprintf ("\t\tmov_l_rr(%s,%s+8);\n",name, reg);
+ }
+ }
+ return;
+
+ case Aind:
+ comprintf ("\tint %sa=dodgy?scratchie++:%s+8;\n",name,reg);
+ comprintf ("\tif (dodgy) \n");
+ comprintf ("\t\tmov_l_rr(%sa,%s+8);\n",name, reg);
+ break;
+ case Aipi:
+ comprintf ("\tint %sa=scratchie++;\n",name,reg);
+ comprintf ("\tmov_l_rr(%sa,%s+8);\n",name, reg);
+ break;
+ case Apdi:
+ switch (size)
+ {
+ case sz_byte:
+ if (movem) {
+ comprintf ("\tint %sa=dodgy?scratchie++:%s+8;\n",name,reg);
+ comprintf ("\tif (dodgy) \n");
+ comprintf("\tmov_l_rr(%sa,8+%s);\n",name,reg);
+ }
+ else {
+ start_brace();
+ comprintf ("\tint %sa=dodgy?scratchie++:%s+8;\n",name,reg);
+ comprintf("\tlea_l_brr(%s+8,%s+8,(uae_s32)-areg_byteinc[%s]);\n",reg,reg,reg);
+ comprintf ("\tif (dodgy) \n");
+ comprintf("\tmov_l_rr(%sa,8+%s);\n",name,reg);
+ }
+ break;
+ case sz_word:
+ if (movem) {
+ comprintf ("\tint %sa=dodgy?scratchie++:%s+8;\n",name,reg);
+ comprintf ("\tif (dodgy) \n");
+ comprintf("\tmov_l_rr(%sa,8+%s);\n",name,reg);
+ }
+ else {
+ start_brace();
+ comprintf ("\tint %sa=dodgy?scratchie++:%s+8;\n",name,reg);
+ comprintf("\tlea_l_brr(%s+8,%s+8,-2);\n",reg,reg);
+ comprintf ("\tif (dodgy) \n");
+ comprintf("\tmov_l_rr(%sa,8+%s);\n",name,reg);
+ }
+ break;
+ case sz_long:
+ if (movem) {
+ comprintf ("\tint %sa=dodgy?scratchie++:%s+8;\n",name,reg);
+ comprintf ("\tif (dodgy) \n");
+ comprintf("\tmov_l_rr(%sa,8+%s);\n",name,reg);
+ }
+ else {
+ start_brace();
+ comprintf ("\tint %sa=dodgy?scratchie++:%s+8;\n",name,reg);
+ comprintf("\tlea_l_brr(%s+8,%s+8,-4);\n",reg,reg);
+ comprintf ("\tif (dodgy) \n");
+ comprintf("\tmov_l_rr(%sa,8+%s);\n",name,reg);
+ }
+ break;
+ default:
+ abort ();
+ }
+ break;
+ case Ad16:
+ comprintf("\tint %sa=scratchie++;\n",name);
+ comprintf("\tmov_l_rr(%sa,8+%s);\n",name,reg);
+ comprintf("\tlea_l_brr(%sa,%sa,(uae_s32)(uae_s16)%s);\n",name,name,gen_nextiword());
+ break;
+ case Ad8r:
+ comprintf("\tint %sa=scratchie++;\n",name);
+ comprintf("\tcalc_disp_ea_020(%s+8,%s,%sa,scratchie);\n",
+ reg,gen_nextiword(),name);
+ break;
+
+ case PC16:
+ comprintf("\tint %sa=scratchie++;\n",name);
+ comprintf("\tuae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n");
+ comprintf ("\tuae_s32 PC16off = (uae_s32)(uae_s16)%s;\n", gen_nextiword ());
+ comprintf("\tmov_l_ri(%sa,address+PC16off);\n",name);
+ break;
+
+ case PC8r:
+ comprintf("\tint pctmp=scratchie++;\n");
+ comprintf("\tint %sa=scratchie++;\n",name);
+ comprintf("\tuae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n");
+ start_brace();
+ comprintf("\tmov_l_ri(pctmp,address);\n");
+
+ comprintf("\tcalc_disp_ea_020(pctmp,%s,%sa,scratchie);\n",
+ gen_nextiword(),name);
+ break;
+ case absw:
+ comprintf ("\tint %sa = scratchie++;\n",name);
+ comprintf ("\tmov_l_ri(%sa,(uae_s32)(uae_s16)%s);\n", name, gen_nextiword ());
+ break;
+ case absl:
+ comprintf ("\tint %sa = scratchie++;\n",name);
+ comprintf ("\tmov_l_ri(%sa,%s); /* absl */\n", name, gen_nextilong ());
+ break;
+ case imm:
+ if (getv != 1)
+ abort ();
+ switch (size)
+ {
+ case sz_byte:
+ comprintf ("\tint %s = scratchie++;\n",name);
+ comprintf ("\tmov_l_ri(%s,(uae_s32)(uae_s8)%s);\n", name, gen_nextibyte ());
+ break;
+ case sz_word:
+ comprintf ("\tint %s = scratchie++;\n",name);
+ comprintf ("\tmov_l_ri(%s,(uae_s32)(uae_s16)%s);\n", name, gen_nextiword ());
+ break;
+ case sz_long:
+ comprintf ("\tint %s = scratchie++;\n",name);
+ comprintf ("\tmov_l_ri(%s,%s);\n", name, gen_nextilong ());
+ break;
+ default:
+ abort ();
+ }
+ return;
+ case imm0:
+ if (getv != 1)
+ abort ();
+ comprintf ("\tint %s = scratchie++;\n",name);
+ comprintf ("\tmov_l_ri(%s,(uae_s32)(uae_s8)%s);\n", name, gen_nextibyte ());
+ return;
+ case imm1:
+ if (getv != 1)
+ abort ();
+ comprintf ("\tint %s = scratchie++;\n",name);
+ comprintf ("\tmov_l_ri(%s,(uae_s32)(uae_s16)%s);\n", name, gen_nextiword ());
+ return;
+ case imm2:
+ if (getv != 1)
+ abort ();
+ comprintf ("\tint %s = scratchie++;\n",name);
+ comprintf ("\tmov_l_ri(%s,%s);\n", name, gen_nextilong ());
+ return;
+ case immi:
+ if (getv != 1)
+ abort ();
+ comprintf ("\tint %s = scratchie++;\n",name);
+ comprintf ("\tmov_l_ri(%s,%s);\n", name, reg);
+ return;
+ default:
+ abort ();
+ }
+
+ /* We get here for all non-reg non-immediate addressing modes to
+ * actually fetch the value. */
+ if (getv == 1)
+ {
+ char astring[80];
+ sprintf(astring,"%sa",name);
+ switch (size)
+ {
+ case sz_byte:
+ insn_n_cycles += 2;
+ break;
+ case sz_word:
+ insn_n_cycles += 2;
+ break;
+ case sz_long:
+ insn_n_cycles += 4;
+ break;
+ default:
+ abort ();
+ }
+ start_brace ();
+ comprintf("\tint %s=scratchie++;\n",name);
+ switch (size)
+ {
+ case sz_byte:
+ gen_readbyte(astring,name);
+ break;
+ case sz_word:
+ gen_readword(astring,name);
+ break;
+ case sz_long:
+ gen_readlong(astring,name);
+ break;
+ default:
+ abort ();
+ }
+ }
+
+ /* We now might have to fix up the register for pre-dec or post-inc
+ * addressing modes. */
+ if (!movem) {
+ char x[160];
+ switch (mode)
+ {
+ case Aipi:
+ switch (size)
+ {
+ case sz_byte:
+ comprintf("\tlea_l_brr(%s+8,%s+8,areg_byteinc[%s]);\n",reg,reg,reg);
+ break;
+ case sz_word:
+ comprintf("\tlea_l_brr(%s+8,%s+8,2);\n",reg,reg,reg);
+ break;
+ case sz_long:
+ comprintf("\tlea_l_brr(%s+8,%s+8,4);\n",reg,reg);
+ break;
+ default:
+ abort ();
+ }
+ break;
+ case Apdi:
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void
+genastore (char *from, amodes mode, char *reg, wordsizes size, char *to)
+{
+ switch (mode)
+ {
+ case Dreg:
+ switch (size)
+ {
+ case sz_byte:
+ comprintf("\tif(%s!=%s)\n",reg,from);
+ comprintf ("\t\tmov_b_rr(%s,%s);\n", reg, from);
+ break;
+ case sz_word:
+ comprintf("\tif(%s!=%s)\n",reg,from);
+ comprintf ("\t\tmov_w_rr(%s,%s);\n", reg, from);
+ break;
+ case sz_long:
+ comprintf("\tif(%s!=%s)\n",reg,from);
+ comprintf ("\t\tmov_l_rr(%s,%s);\n", reg, from);
+ break;
+ default:
+ abort ();
+ }
+ break;
+ case Areg:
+ switch (size)
+ {
+ case sz_word:
+ comprintf("\tif(%s+8!=%s)\n",reg,from);
+ comprintf ("\t\tmov_w_rr(%s+8,%s);\n", reg, from);
+ break;
+ case sz_long:
+ comprintf("\tif(%s+8!=%s)\n",reg,from);
+ comprintf ("\t\tmov_l_rr(%s+8,%s);\n", reg, from);
+ break;
+ default:
+ abort ();
+ }
+ break;
+
+ case Apdi:
+ case absw:
+ case PC16:
+ case PC8r:
+ case Ad16:
+ case Ad8r:
+ case Aipi:
+ case Aind:
+ case absl:
+ {
+ char astring[80];
+ sprintf(astring,"%sa",to);
+
+ switch (size)
+ {
+ case sz_byte:
+ insn_n_cycles += 2;
+ gen_writebyte(astring,from);
+ break;
+ case sz_word:
+ insn_n_cycles += 2;
+ gen_writeword(astring,from);
+ break;
+ case sz_long:
+ insn_n_cycles += 4;
+ gen_writelong(astring,from);
+ break;
+ default:
+ abort ();
+ }
+ }
+ break;
+ case imm:
+ case imm0:
+ case imm1:
+ case imm2:
+ case immi:
+ abort ();
+ break;
+ default:
+ abort ();
+ }
+}
+
+static void genmov16(uae_u32 opcode, struct instr *curi)
+{
+ comprintf("\tint src=scratchie++;\n");
+ comprintf("\tint dst=scratchie++;\n");
+
+ if ((opcode & 0xfff8) == 0xf620) {
+ /* MOVE16 (Ax)+,(Ay)+ */
+ comprintf("\tuae_u16 dstreg=((%s)>>12)&0x07;\n", gen_nextiword());
+ comprintf("\tmov_l_rr(src,8+srcreg);\n");
+ comprintf("\tmov_l_rr(dst,8+dstreg);\n");
+ }
+ else {
+ /* Other variants */
+ genamode (curi->smode, "srcreg", curi->size, "src", 0, 2);
+ genamode (curi->dmode, "dstreg", curi->size, "dst", 0, 2);
+ comprintf("\tmov_l_rr(src,srca);\n");
+ comprintf("\tmov_l_rr(dst,dsta);\n");
+ }
+
+ /* Align on 16-byte boundaries */
+ comprintf("\tand_l_ri(src,~15);\n");
+ comprintf("\tand_l_ri(dst,~15);\n");
+
+ if ((opcode & 0xfff8) == 0xf620) {
+ comprintf("\tif (srcreg != dstreg)\n");
+ comprintf("\tadd_l_ri(srcreg+8,16);\n");
+ comprintf("\tadd_l_ri(dstreg+8,16);\n");
+ }
+ else if ((opcode & 0xfff8) == 0xf600)
+ comprintf("\tadd_l_ri(srcreg+8,16);\n");
+ else if ((opcode & 0xfff8) == 0xf608)
+ comprintf("\tadd_l_ri(dstreg+8,16);\n");
+
+ comprintf("\tint tmp=scratchie;\n");
+ comprintf("\tscratchie+=4;\n");
+
+ comprintf("\tget_n_addr(src,src,scratchie);\n"
+ "\tget_n_addr(dst,dst,scratchie);\n"
+ "\tmov_l_rR(tmp+0,src,0);\n"
+ "\tmov_l_rR(tmp+1,src,4);\n"
+ "\tmov_l_rR(tmp+2,src,8);\n"
+ "\tmov_l_rR(tmp+3,src,12);\n"
+ "\tmov_l_Rr(dst,tmp+0,0);\n"
+ "\tforget_about(tmp+0);\n"
+ "\tmov_l_Rr(dst,tmp+1,4);\n"
+ "\tforget_about(tmp+1);\n"
+ "\tmov_l_Rr(dst,tmp+2,8);\n"
+ "\tforget_about(tmp+2);\n"
+ "\tmov_l_Rr(dst,tmp+3,12);\n");
+}
+
+static void
+genmovemel (uae_u16 opcode)
+{
+ comprintf ("\tuae_u16 mask = %s;\n", gen_nextiword ());
+ comprintf ("\tint native=scratchie++;\n");
+ comprintf ("\tint i;\n");
+ comprintf ("\tsigned char offset=0;\n");
+ genamode (table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src", 2, 1);
+ comprintf("\tget_n_addr(srca,native,scratchie);\n");
+
+ comprintf("\tfor (i=0;i<16;i++) {\n"
+ "\t\tif ((mask>>i)&1) {\n");
+ switch(table68k[opcode].size) {
+ case sz_long:
+ comprintf("\t\t\tmov_l_rR(i,native,offset);\n"
+ "\t\t\tbswap_32(i);\n"
+ "\t\t\toffset+=4;\n");
+ break;
+ case sz_word:
+ comprintf("\t\t\tmov_w_rR(i,native,offset);\n"
+ "\t\t\tbswap_16(i);\n"
+ "\t\t\tsign_extend_16_rr(i,i);\n"
+ "\t\t\toffset+=2;\n");
+ break;
+ default: abort();
+ }
+ comprintf("\t\t}\n"
+ "\t}");
+ if (table68k[opcode].dmode == Aipi) {
+ comprintf("\t\t\tlea_l_brr(8+dstreg,srca,offset);\n");
+ }
+}
+
+
+static void
+genmovemle (uae_u16 opcode)
+{
+ comprintf ("\tuae_u16 mask = %s;\n", gen_nextiword ());
+ comprintf ("\tint native=scratchie++;\n");
+ comprintf ("\tint i;\n");
+ comprintf ("\tint tmp=scratchie++;\n");
+ comprintf ("\tsigned char offset=0;\n");
+ genamode (table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src", 2, 1);
+
+ comprintf("\tget_n_addr(srca,native,scratchie);\n");
+
+ if (table68k[opcode].dmode!=Apdi) {
+ comprintf("\tfor (i=0;i<16;i++) {\n"
+ "\t\tif ((mask>>i)&1) {\n");
+ switch(table68k[opcode].size) {
+ case sz_long:
+ comprintf("\t\t\tmov_l_rr(tmp,i);\n"
+ "\t\t\tbswap_32(tmp);\n"
+ "\t\t\tmov_l_Rr(native,tmp,offset);\n"
+ "\t\t\toffset+=4;\n");
+ break;
+ case sz_word:
+ comprintf("\t\t\tmov_l_rr(tmp,i);\n"
+ "\t\t\tbswap_16(tmp);\n"
+ "\t\t\tmov_w_Rr(native,tmp,offset);\n"
+ "\t\t\toffset+=2;\n");
+ break;
+ default: abort();
+ }
+ }
+ else { /* Pre-decrement */
+ comprintf("\tfor (i=0;i<16;i++) {\n"
+ "\t\tif ((mask>>i)&1) {\n");
+ switch(table68k[opcode].size) {
+ case sz_long:
+ comprintf("\t\t\toffset-=4;\n"
+ "\t\t\tmov_l_rr(tmp,15-i);\n"
+ "\t\t\tbswap_32(tmp);\n"
+ "\t\t\tmov_l_Rr(native,tmp,offset);\n"
+ );
+ break;
+ case sz_word:
+ comprintf("\t\t\toffset-=2;\n"
+ "\t\t\tmov_l_rr(tmp,15-i);\n"
+ "\t\t\tbswap_16(tmp);\n"
+ "\t\t\tmov_w_Rr(native,tmp,offset);\n"
+ );
+ break;
+ default: abort();
+ }
+ }
+
+
+ comprintf("\t\t}\n"
+ "\t}");
+ if (table68k[opcode].dmode == Apdi) {
+ comprintf("\t\t\tlea_l_brr(8+dstreg,srca,(uae_s32)offset);\n");
+ }
+}
+
+
+static void
+duplicate_carry (void)
+{
+ comprintf ("\tif (needed_flags&FLAG_X) duplicate_carry();\n");
+}
+
+typedef enum
+{
+ flag_logical_noclobber, flag_logical, flag_add, flag_sub, flag_cmp,
+ flag_addx, flag_subx, flag_zn, flag_av, flag_sv, flag_and, flag_or,
+ flag_eor, flag_mov
+}
+flagtypes;
+
+
+static void
+genflags (flagtypes type, wordsizes size, char *value, char *src, char *dst)
+{
+ if (noflags) {
+ switch(type) {
+ case flag_cmp:
+ comprintf("\tdont_care_flags();\n");
+ comprintf("/* Weird --- CMP with noflags ;-) */\n");
+ return;
+ case flag_add:
+ case flag_sub:
+ comprintf("\tdont_care_flags();\n");
+ {
+ char* op;
+ switch(type) {
+ case flag_add: op="add"; break;
+ case flag_sub: op="sub"; break;
+ default: abort();
+ }
+ switch (size)
+ {
+ case sz_byte:
+ comprintf("\t%s_b(%s,%s);\n",op,dst,src);
+ break;
+ case sz_word:
+ comprintf("\t%s_w(%s,%s);\n",op,dst,src);
+ break;
+ case sz_long:
+ comprintf("\t%s_l(%s,%s);\n",op,dst,src);
+ break;
+ }
+ return;
+ }
+ break;
+
+ case flag_and:
+ comprintf("\tdont_care_flags();\n");
+ switch (size)
+ {
+ case sz_byte:
+ comprintf("if (kill_rodent(dst)) {\n");
+ comprintf("\tzero_extend_8_rr(scratchie,%s);\n",src);
+ comprintf("\tor_l_ri(scratchie,0xffffff00);\n");
+ comprintf("\tand_l(%s,scratchie);\n",dst);
+ comprintf("\tforget_about(scratchie);\n");
+ comprintf("\t} else \n"
+ "\tand_b(%s,%s);\n",dst,src);
+ break;
+ case sz_word:
+ comprintf("if (kill_rodent(dst)) {\n");
+ comprintf("\tzero_extend_16_rr(scratchie,%s);\n",src);
+ comprintf("\tor_l_ri(scratchie,0xffff0000);\n");
+ comprintf("\tand_l(%s,scratchie);\n",dst);
+ comprintf("\tforget_about(scratchie);\n");
+ comprintf("\t} else \n"
+ "\tand_w(%s,%s);\n",dst,src);
+ break;
+ case sz_long:
+ comprintf("\tand_l(%s,%s);\n",dst,src);
+ break;
+ }
+ return;
+
+ case flag_mov:
+ comprintf("\tdont_care_flags();\n");
+ switch (size)
+ {
+ case sz_byte:
+ comprintf("if (kill_rodent(dst)) {\n");
+ comprintf("\tzero_extend_8_rr(scratchie,%s);\n",src);
+ comprintf("\tand_l_ri(%s,0xffffff00);\n",dst);
+ comprintf("\tor_l(%s,scratchie);\n",dst);
+ comprintf("\tforget_about(scratchie);\n");
+ comprintf("\t} else \n"
+ "\tmov_b_rr(%s,%s);\n",dst,src);
+ break;
+ case sz_word:
+ comprintf("if (kill_rodent(dst)) {\n");
+ comprintf("\tzero_extend_16_rr(scratchie,%s);\n",src);
+ comprintf("\tand_l_ri(%s,0xffff0000);\n",dst);
+ comprintf("\tor_l(%s,scratchie);\n",dst);
+ comprintf("\tforget_about(scratchie);\n");
+ comprintf("\t} else \n"
+ "\tmov_w_rr(%s,%s);\n",dst,src);
+ break;
+ case sz_long:
+ comprintf("\tmov_l_rr(%s,%s);\n",dst,src);
+ break;
+ }
+ return;
+
+ case flag_or:
+ case flag_eor:
+ comprintf("\tdont_care_flags();\n");
+ start_brace();
+ {
+ char* op;
+ switch(type) {
+ case flag_or: op="or"; break;
+ case flag_eor: op="xor"; break;
+ default: abort();
+ }
+ switch (size)
+ {
+ case sz_byte:
+ comprintf("if (kill_rodent(dst)) {\n");
+ comprintf("\tzero_extend_8_rr(scratchie,%s);\n",src);
+ comprintf("\t%s_l(%s,scratchie);\n",op,dst);
+ comprintf("\tforget_about(scratchie);\n");
+ comprintf("\t} else \n"
+ "\t%s_b(%s,%s);\n",op,dst,src);
+ break;
+ case sz_word:
+ comprintf("if (kill_rodent(dst)) {\n");
+ comprintf("\tzero_extend_16_rr(scratchie,%s);\n",src);
+ comprintf("\t%s_l(%s,scratchie);\n",op,dst);
+ comprintf("\tforget_about(scratchie);\n");
+ comprintf("\t} else \n"
+ "\t%s_w(%s,%s);\n",op,dst,src);
+ break;
+ case sz_long:
+ comprintf("\t%s_l(%s,%s);\n",op,dst,src);
+ break;
+ }
+ close_brace();
+ return;
+ }
+
+
+ case flag_addx:
+ case flag_subx:
+ comprintf("\tdont_care_flags();\n");
+ {
+ char* op;
+ switch(type) {
+ case flag_addx: op="adc"; break;
+ case flag_subx: op="sbb"; break;
+ default: abort();
+ }
+ comprintf("\trestore_carry();\n"); /* Reload the X flag into C */
+ switch (size)
+ {
+ case sz_byte:
+ comprintf("\t%s_b(%s,%s);\n",op,dst,src);
+ break;
+ case sz_word:
+ comprintf("\t%s_w(%s,%s);\n",op,dst,src);
+ break;
+ case sz_long:
+ comprintf("\t%s_l(%s,%s);\n",op,dst,src);
+ break;
+ }
+ return;
+ }
+ break;
+ default: return;
+ }
+ }
+
+ /* Need the flags, but possibly not all of them */
+ switch (type)
+ {
+ case flag_logical_noclobber:
+ failure;
+
+ case flag_and:
+ case flag_or:
+ case flag_eor:
+ comprintf("\tdont_care_flags();\n");
+ start_brace();
+ {
+ char* op;
+ switch(type) {
+ case flag_and: op="and"; break;
+ case flag_or: op="or"; break;
+ case flag_eor: op="xor"; break;
+ default: abort();
+ }
+ switch (size)
+ {
+ case sz_byte:
+ comprintf("\tstart_needflags();\n"
+ "\t%s_b(%s,%s);\n",op,dst,src);
+ break;
+ case sz_word:
+ comprintf("\tstart_needflags();\n"
+ "\t%s_w(%s,%s);\n",op,dst,src);
+ break;
+ case sz_long:
+ comprintf("\tstart_needflags();\n"
+ "\t%s_l(%s,%s);\n",op,dst,src);
+ break;
+ }
+ comprintf("\tlive_flags();\n");
+ comprintf("\tend_needflags();\n");
+ close_brace();
+ return;
+ }
+
+ case flag_mov:
+ comprintf("\tdont_care_flags();\n");
+ start_brace();
+ {
+ switch (size)
+ {
+ case sz_byte:
+ comprintf("\tif (%s!=%s) {\n",src,dst);
+ comprintf("\tmov_b_ri(%s,0);\n"
+ "\tstart_needflags();\n",dst);
+ comprintf("\tor_b(%s,%s);\n",dst,src);
+ comprintf("\t} else {\n");
+ comprintf("\tmov_b_rr(%s,%s);\n",dst,src);
+ comprintf("\ttest_b_rr(%s,%s);\n",dst,dst);
+ comprintf("\t}\n");
+ break;
+ case sz_word:
+ comprintf("\tif (%s!=%s) {\n",src,dst);
+ comprintf("\tmov_w_ri(%s,0);\n"
+ "\tstart_needflags();\n",dst);
+ comprintf("\tor_w(%s,%s);\n",dst,src);
+ comprintf("\t} else {\n");
+ comprintf("\tmov_w_rr(%s,%s);\n",dst,src);
+ comprintf("\ttest_w_rr(%s,%s);\n",dst,dst);
+ comprintf("\t}\n");
+ break;
+ case sz_long:
+ comprintf("\tif (%s!=%s) {\n",src,dst);
+ comprintf("\tmov_l_ri(%s,0);\n"
+ "\tstart_needflags();\n",dst);
+ comprintf("\tor_l(%s,%s);\n",dst,src);
+ comprintf("\t} else {\n");
+ comprintf("\tmov_l_rr(%s,%s);\n",dst,src);
+ comprintf("\ttest_l_rr(%s,%s);\n",dst,dst);
+ comprintf("\t}\n");
+ break;
+ }
+ comprintf("\tlive_flags();\n");
+ comprintf("\tend_needflags();\n");
+ close_brace();
+ return;
+ }
+
+ case flag_logical:
+ comprintf("\tdont_care_flags();\n");
+ start_brace();
+ switch (size)
+ {
+ case sz_byte:
+ comprintf("\tstart_needflags();\n"
+ "\ttest_b_rr(%s,%s);\n",value,value);
+ break;
+ case sz_word:
+ comprintf("\tstart_needflags();\n"
+ "\ttest_w_rr(%s,%s);\n",value,value);
+ break;
+ case sz_long:
+ comprintf("\tstart_needflags();\n"
+ "\ttest_l_rr(%s,%s);\n",value,value);
+ break;
+ }
+ comprintf("\tlive_flags();\n");
+ comprintf("\tend_needflags();\n");
+ close_brace();
+ return;
+
+
+ case flag_add:
+ case flag_sub:
+ case flag_cmp:
+ comprintf("\tdont_care_flags();\n");
+ {
+ char* op;
+ switch(type) {
+ case flag_add: op="add"; break;
+ case flag_sub: op="sub"; break;
+ case flag_cmp: op="cmp"; break;
+ default: abort();
+ }
+ switch (size)
+ {
+ case sz_byte:
+ comprintf("\tstart_needflags();\n"
+ "\t%s_b(%s,%s);\n",op,dst,src);
+ break;
+ case sz_word:
+ comprintf("\tstart_needflags();\n"
+ "\t%s_w(%s,%s);\n",op,dst,src);
+ break;
+ case sz_long:
+ comprintf("\tstart_needflags();\n"
+ "\t%s_l(%s,%s);\n",op,dst,src);
+ break;
+ }
+ comprintf("\tlive_flags();\n");
+ comprintf("\tend_needflags();\n");
+ if (type!=flag_cmp) {
+ duplicate_carry();
+ }
+ comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n");
+
+ return;
+ }
+
+ case flag_addx:
+ case flag_subx:
+ uses_cmov;
+ comprintf("\tdont_care_flags();\n");
+ {
+ char* op;
+ switch(type) {
+ case flag_addx: op="adc"; break;
+ case flag_subx: op="sbb"; break;
+ default: abort();
+ }
+ start_brace();
+ comprintf("\tint zero=scratchie++;\n"
+ "\tint one=scratchie++;\n"
+ "\tif (needed_flags&FLAG_Z) {\n"
+ "\tmov_l_ri(zero,0);\n"
+ "\tmov_l_ri(one,-1);\n"
+ "\tmake_flags_live();\n"
+ "\tcmov_l_rr(zero,one,5);\n"
+ "\t}\n");
+ comprintf("\trestore_carry();\n"); /* Reload the X flag into C */
+ switch (size)
+ {
+ case sz_byte:
+ comprintf("\tstart_needflags();\n"
+ "\t%s_b(%s,%s);\n",op,dst,src);
+ break;
+ case sz_word:
+ comprintf("\tstart_needflags();\n"
+ "\t%s_w(%s,%s);\n",op,dst,src);
+ break;
+ case sz_long:
+ comprintf("\tstart_needflags();\n"
+ "\t%s_l(%s,%s);\n",op,dst,src);
+ break;
+ }
+ comprintf("\tlive_flags();\n");
+ comprintf("\tif (needed_flags&FLAG_Z) {\n"
+ "\tcmov_l_rr(zero,one,5);\n"
+ "\tset_zero(zero, one);\n" /* No longer need one */
+ "\tlive_flags();\n"
+ "\t}\n");
+ comprintf("\tend_needflags();\n");
+ duplicate_carry();
+ comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n");
+ return;
+ }
+ default:
+ failure;
+ break;
+ }
+}
+
+static void
+force_range_for_rox (const char *var, wordsizes size)
+{
+ /* Could do a modulo operation here... which one is faster? */
+ switch (size)
+ {
+ case sz_long:
+ comprintf ("\tif (%s >= 33) %s -= 33;\n", var, var);
+ break;
+ case sz_word:
+ comprintf ("\tif (%s >= 34) %s -= 34;\n", var, var);
+ comprintf ("\tif (%s >= 17) %s -= 17;\n", var, var);
+ break;
+ case sz_byte:
+ comprintf ("\tif (%s >= 36) %s -= 36;\n", var, var);
+ comprintf ("\tif (%s >= 18) %s -= 18;\n", var, var);
+ comprintf ("\tif (%s >= 9) %s -= 9;\n", var, var);
+ break;
+ }
+}
+
+static const char *
+cmask (wordsizes size)
+{
+ switch (size)
+ {
+ case sz_byte:
+ return "0x80";
+ case sz_word:
+ return "0x8000";
+ case sz_long:
+ return "0x80000000";
+ default:
+ abort ();
+ }
+}
+
+static int
+source_is_imm1_8 (struct instr *i)
+{
+ return i->stype == 3;
+}
+
+static int /* returns zero for success, non-zero for failure */
+gen_opcode (unsigned long int opcode)
+{
+ struct instr *curi = table68k + opcode;
+ char* ssize=NULL;
+
+ insn_n_cycles = 2;
+ global_failure=0;
+ long_opcode=0;
+ global_isjump=0;
+ global_iscjump=0;
+ global_isaddx=0;
+ global_cmov=0;
+ global_fpu=0;
+ global_mayfail=0;
+ hack_opcode=opcode;
+ endstr[0]=0;
+
+ start_brace ();
+ comprintf("\tuae_u8 scratchie=S1;\n");
+ switch (curi->plev)
+ {
+ case 0: /* not privileged */
+ break;
+ case 1: /* unprivileged only on 68000 */
+ if (cpu_level == 0)
+ break;
+ if (next_cpu_level < 0)
+ next_cpu_level = 0;
+
+ /* fall through */
+ case 2: /* priviledged */
+ failure; /* Easy ones first */
+ break;
+ case 3: /* privileged if size == word */
+ if (curi->size == sz_byte)
+ break;
+ failure;
+ break;
+ }
+ switch (curi->size) {
+ case sz_byte: ssize="b"; break;
+ case sz_word: ssize="w"; break;
+ case sz_long: ssize="l"; break;
+ default: abort();
+ }
+
+ switch (curi->mnemo)
+ {
+ case i_OR:
+ case i_AND:
+ case i_EOR:
+ genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+ genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
+ switch(curi->mnemo) {
+ case i_OR: genflags (flag_or, curi->size, "", "src", "dst"); break;
+ case i_AND: genflags (flag_and, curi->size, "", "src", "dst"); break;
+ case i_EOR: genflags (flag_eor, curi->size, "", "src", "dst"); break;
+ }
+ genastore ("dst", curi->dmode, "dstreg", curi->size, "dst");
+ break;
+
+ case i_ORSR:
+ case i_EORSR:
+ failure;
+ isjump;
+ break;
+ case i_ANDSR:
+ failure;
+ isjump;
+ break;
+ case i_SUB:
+ genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+ genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
+ genflags (flag_sub, curi->size, "", "src", "dst");
+ genastore ("dst", curi->dmode, "dstreg", curi->size, "dst");
+ break;
+ case i_SUBA:
+ genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+ genamode (curi->dmode, "dstreg", sz_long, "dst", 1, 0);
+ start_brace();
+ comprintf("\tint tmp=scratchie++;\n");
+ switch(curi->size) {
+ case sz_byte: comprintf("\tsign_extend_8_rr(tmp,src);\n"); break;
+ case sz_word: comprintf("\tsign_extend_16_rr(tmp,src);\n"); break;
+ case sz_long: comprintf("\ttmp=src;\n"); break;
+ default: abort();
+ }
+ comprintf("\tsub_l(dst,tmp);\n");
+ genastore ("dst", curi->dmode, "dstreg", sz_long, "dst");
+ break;
+ case i_SUBX:
+ isaddx;
+ genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+ genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
+ genflags (flag_subx, curi->size, "", "src", "dst");
+ genastore ("dst", curi->dmode, "dstreg", curi->size, "dst");
+ break;
+ case i_SBCD:
+ failure;
+ /* I don't think so! */
+ break;
+ case i_ADD:
+ genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+ genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
+ genflags (flag_add, curi->size, "", "src", "dst");
+ genastore ("dst", curi->dmode, "dstreg", curi->size, "dst");
+ break;
+ case i_ADDA:
+ genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+ genamode (curi->dmode, "dstreg", sz_long, "dst", 1, 0);
+ start_brace();
+ comprintf("\tint tmp=scratchie++;\n");
+ switch(curi->size) {
+ case sz_byte: comprintf("\tsign_extend_8_rr(tmp,src);\n"); break;
+ case sz_word: comprintf("\tsign_extend_16_rr(tmp,src);\n"); break;
+ case sz_long: comprintf("\ttmp=src;\n"); break;
+ default: abort();
+ }
+ comprintf("\tadd_l(dst,tmp);\n");
+ genastore ("dst", curi->dmode, "dstreg", sz_long, "dst");
+ break;
+ case i_ADDX:
+ isaddx;
+ genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+ genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
+ start_brace();
+ genflags (flag_addx, curi->size, "", "src", "dst");
+ genastore ("dst", curi->dmode, "dstreg", curi->size, "dst");
+ break;
+ case i_ABCD:
+ failure;
+ /* No BCD maths for me.... */
+ break;
+ case i_NEG:
+ genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+ start_brace ();
+ comprintf("\tint dst=scratchie++;\n");
+ comprintf("\tmov_l_ri(dst,0);\n");
+ genflags (flag_sub, curi->size, "", "src", "dst");
+ genastore ("dst", curi->smode, "srcreg", curi->size, "src");
+ break;
+ case i_NEGX:
+ isaddx;
+ genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+ start_brace ();
+ comprintf("\tint dst=scratchie++;\n");
+ comprintf("\tmov_l_ri(dst,0);\n");
+ genflags (flag_subx, curi->size, "", "src", "dst");
+ genastore ("dst", curi->smode, "srcreg", curi->size, "src");
+ break;
+
+ case i_NBCD:
+ failure;
+ /* Nope! */
+ break;
+ case i_CLR:
+ genamode (curi->smode, "srcreg", curi->size, "src", 2, 0);
+ start_brace();
+ comprintf("\tint dst=scratchie++;\n");
+ comprintf("\tmov_l_ri(dst,0);\n");
+ genflags (flag_logical, curi->size, "dst", "", "");
+ genastore ("dst", curi->smode, "srcreg", curi->size, "src");
+ break;
+ case i_NOT:
+ genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+ start_brace ();
+ comprintf("\tint dst=scratchie++;\n");
+ comprintf("\tmov_l_ri(dst,0xffffffff);\n");
+ genflags (flag_eor, curi->size, "", "src", "dst");
+ genastore ("dst", curi->smode, "srcreg", curi->size, "src");
+ break;
+ case i_TST:
+ genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+ genflags (flag_logical, curi->size, "src", "", "");
+ break;
+ case i_BCHG:
+ case i_BCLR:
+ case i_BSET:
+ case i_BTST:
+/* failure; /* NEW: from "Ipswitch Town" release */
+ genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+ genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
+ start_brace();
+ comprintf("\tint s=scratchie++;\n"
+ "\tint tmp=scratchie++;\n"
+ "\tmov_l_rr(s,src);\n");
+ if (curi->size == sz_byte)
+ comprintf("\tand_l_ri(s,7);\n");
+ else
+ comprintf("\tand_l_ri(s,31);\n");
+
+ {
+ char* op;
+ int need_write=1;
+
+ switch(curi->mnemo) {
+ case i_BCHG: op="btc"; break;
+ case i_BCLR: op="btr"; break;
+ case i_BSET: op="bts"; break;
+ case i_BTST: op="bt"; need_write=0; break;
+ default: abort();
+ }
+ comprintf("\t%s_l_rr(dst,s);\n" /* Answer now in C */
+ "\tsbb_l(s,s);\n" /* s is 0 if bit was 0, -1 otherwise */
+ "\tmake_flags_live();\n" /* Get the flags back */
+ "\tdont_care_flags();\n",op);
+ if (!noflags) {
+ comprintf("\tstart_needflags();\n"
+ "\tset_zero(s,tmp);\n"
+ "\tlive_flags();\n"
+ "\tend_needflags();\n");
+ }
+ if (need_write)
+ genastore ("dst", curi->dmode, "dstreg", curi->size, "dst");
+ }
+ break;
+
+ case i_CMPM:
+ case i_CMP:
+ genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+ genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
+ start_brace ();
+ genflags (flag_cmp, curi->size, "", "src", "dst");
+ break;
+ case i_CMPA:
+ genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+ genamode (curi->dmode, "dstreg", sz_long, "dst", 1, 0);
+ start_brace();
+ comprintf("\tint tmps=scratchie++;\n");
+ switch(curi->size) {
+ case sz_byte: comprintf("\tsign_extend_8_rr(tmps,src);\n"); break;
+ case sz_word: comprintf("\tsign_extend_16_rr(tmps,src);\n"); break;
+ case sz_long: comprintf("tmps=src;\n"); break;
+ default: abort();
+ }
+ genflags (flag_cmp, sz_long, "", "tmps", "dst");
+ break;
+ /* The next two are coded a little unconventional, but they are doing
+ * weird things... */
+ case i_MVPRM:
+ isjump;
+ failure;
+ break;
+ case i_MVPMR:
+ isjump;
+ failure;
+ break;
+ case i_MOVE:
+ switch(curi->dmode) {
+ case Dreg:
+ case Areg:
+ genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+ genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0);
+ genflags (flag_mov, curi->size, "", "src", "dst");
+ genastore ("dst", curi->dmode, "dstreg", curi->size, "dst");
+ break;
+ default: /* It goes to memory, not a register */
+ genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+ genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0);
+ genflags (flag_logical, curi->size, "src", "", "");
+ genastore ("src", curi->dmode, "dstreg", curi->size, "dst");
+ break;
+ }
+ break;
+ case i_MOVEA:
+ genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+ genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0);
+
+ start_brace();
+ comprintf("\tint tmps=scratchie++;\n");
+ switch(curi->size) {
+ case sz_word: comprintf("\tsign_extend_16_rr(dst,src);\n"); break;
+ case sz_long: comprintf("\tmov_l_rr(dst,src);\n"); break;
+ default: abort();
+ }
+ genastore ("dst", curi->dmode, "dstreg", sz_long, "dst");
+ break;
+
+ case i_MVSR2:
+ isjump;
+ failure;
+ break;
+ case i_MV2SR:
+ isjump;
+ failure;
+ break;
+ case i_SWAP:
+ genamode (curi->smode, "srcreg", sz_long, "src", 1, 0);
+ comprintf("\tdont_care_flags();\n");
+ comprintf("\trol_l_ri(src,16);\n");
+ genflags (flag_logical, sz_long, "src", "", "");
+ genastore ("src", curi->smode, "srcreg", sz_long, "src");
+ break;
+ case i_EXG:
+ genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+ genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
+ start_brace();
+ comprintf("\tint tmp=scratchie++;\n"
+ "\tmov_l_rr(tmp,src);\n");
+ genastore ("dst", curi->smode, "srcreg", curi->size, "src");
+ genastore ("tmp", curi->dmode, "dstreg", curi->size, "dst");
+ break;
+ case i_EXT:
+ genamode (curi->smode, "srcreg", sz_long, "src", 1, 0);
+ comprintf("\tdont_care_flags();\n");
+ start_brace ();
+ switch (curi->size)
+ {
+ case sz_byte:
+ comprintf ("\tint dst = src;\n"
+ "\tsign_extend_8_rr(src,src);\n");
+ break;
+ case sz_word:
+ comprintf ("\tint dst = scratchie++;\n"
+ "\tsign_extend_8_rr(dst,src);\n");
+ break;
+ case sz_long:
+ comprintf ("\tint dst = src;\n"
+ "\tsign_extend_16_rr(src,src);\n");
+ break;
+ default:
+ abort ();
+ }
+ genflags (flag_logical,
+ curi->size == sz_word ? sz_word : sz_long, "dst", "", "");
+ genastore ("dst", curi->smode, "srcreg",
+ curi->size == sz_word ? sz_word : sz_long, "src");
+ break;
+ case i_MVMEL:
+ genmovemel (opcode);
+ break;
+ case i_MVMLE:
+ genmovemle (opcode);
+ break;
+ case i_TRAP:
+ isjump;
+ failure;
+ break;
+ case i_MVR2USP:
+ isjump;
+ failure;
+ break;
+ case i_MVUSP2R:
+ isjump;
+ failure;
+ break;
+ case i_RESET:
+ isjump;
+ failure;
+ break;
+ case i_NOP:
+ break;
+ case i_STOP:
+ isjump;
+ failure;
+ break;
+ case i_RTE:
+ isjump;
+ failure;
+ break;
+ case i_RTD:
+/* failure; /* NEW: from "Ipswitch Town" release */
+ genamode (curi->smode, "srcreg", curi->size, "offs", 1, 0);
+ /* offs is constant */
+ comprintf("\tadd_l_ri(offs,4);\n");
+ start_brace();
+ comprintf("\tint newad=scratchie++;\n"
+ "\treadlong(15,newad,scratchie);\n"
+ "\tmov_l_mr((uintptr)®s.pc,newad);\n"
+ "\tget_n_addr_jmp(newad,PC_P,scratchie);\n"
+ "\tmov_l_mr((uintptr)®s.pc_oldp,PC_P);\n"
+ "\tm68k_pc_offset=0;\n"
+ "\tadd_l(15,offs);\n");
+ gen_update_next_handler();
+ isjump;
+ break;
+ case i_LINK:
+/* failure; /* NEW: from "Ipswitch Town" release */
+ genamode (curi->smode, "srcreg", sz_long, "src", 1, 0);
+ genamode (curi->dmode, "dstreg", curi->size, "offs", 1, 0);
+ comprintf("\tsub_l_ri(15,4);\n"
+ "\twritelong_clobber(15,src,scratchie);\n"
+ "\tmov_l_rr(src,15);\n");
+ if (curi->size==sz_word)
+ comprintf("\tsign_extend_16_rr(offs,offs);\n");
+ comprintf("\tadd_l(15,offs);\n");
+ genastore ("src", curi->smode, "srcreg", sz_long, "src");
+ break;
+ case i_UNLK:
+/* failure; /* NEW: from "Ipswitch Town" release */
+ genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+ comprintf("\tmov_l_rr(15,src);\n"
+ "\treadlong(15,src,scratchie);\n"
+ "\tadd_l_ri(15,4);\n");
+ genastore ("src", curi->smode, "srcreg", curi->size, "src");
+ break;
+ case i_RTS:
+ comprintf("\tint newad=scratchie++;\n"
+ "\treadlong(15,newad,scratchie);\n"
+ "\tmov_l_mr((uintptr)®s.pc,newad);\n"
+ "\tget_n_addr_jmp(newad,PC_P,scratchie);\n"
+ "\tmov_l_mr((uintptr)®s.pc_oldp,PC_P);\n"
+ "\tm68k_pc_offset=0;\n"
+ "\tlea_l_brr(15,15,4);\n");
+ gen_update_next_handler();
+ isjump;
+ break;
+ case i_TRAPV:
+ isjump;
+ failure;
+ break;
+ case i_RTR:
+ isjump;
+ failure;
+ break;
+ case i_JSR:
+ isjump;
+ genamode (curi->smode, "srcreg", curi->size, "src", 0, 0);
+ start_brace();
+ comprintf("\tuae_u32 retadd=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n");
+ comprintf("\tint ret=scratchie++;\n"
+ "\tmov_l_ri(ret,retadd);\n"
+ "\tsub_l_ri(15,4);\n"
+ "\twritelong_clobber(15,ret,scratchie);\n");
+ comprintf("\tmov_l_mr((uintptr)®s.pc,srca);\n"
+ "\tget_n_addr_jmp(srca,PC_P,scratchie);\n"
+ "\tmov_l_mr((uintptr)®s.pc_oldp,PC_P);\n"
+ "\tm68k_pc_offset=0;\n");
+ gen_update_next_handler();
+ break;
+ case i_JMP:
+ isjump;
+ genamode (curi->smode, "srcreg", curi->size, "src", 0, 0);
+ comprintf("\tmov_l_mr((uintptr)®s.pc,srca);\n"
+ "\tget_n_addr_jmp(srca,PC_P,scratchie);\n"
+ "\tmov_l_mr((uintptr)®s.pc_oldp,PC_P);\n"
+ "\tm68k_pc_offset=0;\n");
+ gen_update_next_handler();
+ break;
+ case i_BSR:
+ is_const_jump;
+ genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+ start_brace();
+ comprintf("\tuae_u32 retadd=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n");
+ comprintf("\tint ret=scratchie++;\n"
+ "\tmov_l_ri(ret,retadd);\n"
+ "\tsub_l_ri(15,4);\n"
+ "\twritelong_clobber(15,ret,scratchie);\n");
+ comprintf("\tadd_l_ri(src,m68k_pc_offset_thisinst+2);\n");
+ comprintf("\tm68k_pc_offset=0;\n");
+ comprintf("\tadd_l(PC_P,src);\n");
+
+ comprintf("\tcomp_pc_p=(uae_u8*)get_const(PC_P);\n");
+ break;
+ case i_Bcc:
+ comprintf("\tuae_u32 v,v1,v2;\n");
+ genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+ /* That source is an immediate, so we can clobber it with abandon */
+ switch(curi->size) {
+ case sz_byte: comprintf("\tsign_extend_8_rr(src,src);\n"); break;
+ case sz_word: comprintf("\tsign_extend_16_rr(src,src);\n"); break;
+ case sz_long: break;
+ }
+ comprintf("\tsub_l_ri(src,m68k_pc_offset-m68k_pc_offset_thisinst-2);\n");
+ /* Leave the following as "add" --- it will allow it to be optimized
+ away due to src being a constant ;-) */
+ comprintf("\tadd_l_ri(src,(uintptr)comp_pc_p);\n");
+ comprintf("\tmov_l_ri(PC_P,(uintptr)comp_pc_p);\n");
+ /* Now they are both constant. Might as well fold in m68k_pc_offset */
+ comprintf("\tadd_l_ri(src,m68k_pc_offset);\n");
+ comprintf("\tadd_l_ri(PC_P,m68k_pc_offset);\n");
+ comprintf("\tm68k_pc_offset=0;\n");
+
+ if (curi->cc>=2) {
+ comprintf("\tv1=get_const(PC_P);\n"
+ "\tv2=get_const(src);\n"
+ "\tregister_branch(v1,v2,%d);\n",
+ cond_codes_x86[curi->cc]);
+ comprintf("\tmake_flags_live();\n"); /* Load the flags */
+ isjump;
+ }
+ else {
+ is_const_jump;
+ }
+
+ switch(curi->cc) {
+ case 0: /* Unconditional jump */
+ comprintf("\tmov_l_rr(PC_P,src);\n");
+ comprintf("\tcomp_pc_p=(uae_u8*)get_const(PC_P);\n");
+ break;
+ case 1: break; /* This is silly! */
+ case 8: failure; break; /* Work out details! FIXME */
+ case 9: failure; break; /* Not critical, though! */
+
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ case 10:
+ case 11:
+ case 12:
+ case 13:
+ case 14:
+ case 15:
+ break;
+ default: abort();
+ }
+ break;
+ case i_LEA:
+ genamode (curi->smode, "srcreg", curi->size, "src", 0, 0);
+ genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0);
+ genastore ("srca", curi->dmode, "dstreg", curi->size, "dst");
+ break;
+ case i_PEA:
+ if (table68k[opcode].smode==Areg ||
+ table68k[opcode].smode==Aind ||
+ table68k[opcode].smode==Aipi ||
+ table68k[opcode].smode==Apdi ||
+ table68k[opcode].smode==Ad16 ||
+ table68k[opcode].smode==Ad8r)
+ comprintf("if (srcreg==7) dodgy=1;\n");
+
+ genamode (curi->smode, "srcreg", curi->size, "src", 0, 0);
+ genamode (Apdi, "7", sz_long, "dst", 2, 0);
+ genastore ("srca", Apdi, "7", sz_long, "dst");
+ break;
+ case i_DBcc:
+ isjump;
+ uses_cmov;
+ genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+ genamode (curi->dmode, "dstreg", curi->size, "offs", 1, 0);
+
+ /* That offs is an immediate, so we can clobber it with abandon */
+ switch(curi->size) {
+ case sz_word: comprintf("\tsign_extend_16_rr(offs,offs);\n"); break;
+ default: abort(); /* Seems this only comes in word flavour */
+ }
+ comprintf("\tsub_l_ri(offs,m68k_pc_offset-m68k_pc_offset_thisinst-2);\n");
+ comprintf("\tadd_l_ri(offs,(uintptr)comp_pc_p);\n"); /* New PC,
+ once the
+ offset_68k is
+ * also added */
+ /* Let's fold in the m68k_pc_offset at this point */
+ comprintf("\tadd_l_ri(offs,m68k_pc_offset);\n");
+ comprintf("\tadd_l_ri(PC_P,m68k_pc_offset);\n");
+ comprintf("\tm68k_pc_offset=0;\n");
+
+ start_brace();
+ comprintf("\tint nsrc=scratchie++;\n");
+
+ if (curi->cc>=2) {
+ comprintf("\tmake_flags_live();\n"); /* Load the flags */
+ }
+
+ if (curi->size!=sz_word)
+ abort();
+
+
+ switch(curi->cc) {
+ case 0: /* This is an elaborate nop? */
+ break;
+ case 1:
+ comprintf("\tstart_needflags();\n");
+ comprintf("\tsub_w_ri(src,1);\n");
+ comprintf("\t end_needflags();\n");
+ start_brace();
+ comprintf("\tuae_u32 v2,v;\n"
+ "\tuae_u32 v1=get_const(PC_P);\n");
+ comprintf("\tv2=get_const(offs);\n"
+ "\tregister_branch(v1,v2,3);\n");
+ break;
+
+ case 8: failure; break; /* Work out details! FIXME */
+ case 9: failure; break; /* Not critical, though! */
+
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ case 10:
+ case 11:
+ case 12:
+ case 13:
+ case 14:
+ case 15:
+ comprintf("\tmov_l_rr(nsrc,src);\n");
+ comprintf("\tlea_l_brr(scratchie,src,(uae_s32)-1);\n"
+ "\tmov_w_rr(src,scratchie);\n");
+ comprintf("\tcmov_l_rr(offs,PC_P,%d);\n",
+ cond_codes_x86[curi->cc]);
+ comprintf("\tcmov_l_rr(src,nsrc,%d);\n",
+ cond_codes_x86[curi->cc]);
+ /* OK, now for cc=true, we have src==nsrc and offs==PC_P,
+ so whether we move them around doesn't matter. However,
+ if cc=false, we have offs==jump_pc, and src==nsrc-1 */
+
+ comprintf("\t start_needflags();\n");
+ comprintf("\ttest_w_rr(nsrc,nsrc);\n");
+ comprintf("\t end_needflags();\n");
+ comprintf("\tcmov_l_rr(PC_P,offs,5);\n");
+ break;
+ default: abort();
+ }
+ genastore ("src", curi->smode, "srcreg", curi->size, "src");
+ gen_update_next_handler();
+ break;
+
+ case i_Scc:
+/* failure; /* NEW: from "Ipswitch Town" release */
+ genamode (curi->smode, "srcreg", curi->size, "src", 2, 0);
+ start_brace ();
+ comprintf ("\tint val = scratchie++;\n");
+
+ /* We set val to 0 if we really should use 255, and to 1 for real 0 */
+ switch(curi->cc) {
+ case 0: /* Unconditional set */
+ comprintf("\tmov_l_ri(val,0);\n");
+ break;
+ case 1:
+ /* Unconditional not-set */
+ comprintf("\tmov_l_ri(val,1);\n");
+ break;
+ case 8: failure; break; /* Work out details! FIXME */
+ case 9: failure; break; /* Not critical, though! */
+
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ case 10:
+ case 11:
+ case 12:
+ case 13:
+ case 14:
+ case 15:
+ comprintf("\tmake_flags_live();\n"); /* Load the flags */
+ /* All condition codes can be inverted by changing the LSB */
+ comprintf("\tsetcc(val,%d);\n",
+ cond_codes_x86[curi->cc]^1); break;
+ default: abort();
+ }
+ comprintf("\tsub_b_ri(val,1);\n");
+ genastore ("val", curi->smode, "srcreg", curi->size, "src");
+ break;
+ case i_DIVU:
+ isjump;
+ failure;
+ break;
+ case i_DIVS:
+ isjump;
+ failure;
+ break;
+ case i_MULU:
+/* failure; /* NEW: from "Ipswitch Town" release */
+ comprintf("\tdont_care_flags();\n");
+ genamode (curi->smode, "srcreg", sz_word, "src", 1, 0);
+ genamode (curi->dmode, "dstreg", sz_word, "dst", 1, 0);
+ /* To do 16x16 unsigned multiplication, we actually use
+ 32x32 signed, and zero-extend the registers first.
+ That solves the problem of MUL needing dedicated registers
+ on the x86 */
+ comprintf("\tzero_extend_16_rr(scratchie,src);\n"
+ "\tzero_extend_16_rr(dst,dst);\n"
+ "\timul_32_32(dst,scratchie);\n");
+ genflags (flag_logical, sz_long, "dst", "", "");
+ genastore ("dst", curi->dmode, "dstreg", sz_long, "dst");
+ break;
+ case i_MULS:
+/* failure; /* NEW: from "Ipswitch Town" release */
+ comprintf("\tdont_care_flags();\n");
+ genamode (curi->smode, "srcreg", sz_word, "src", 1, 0);
+ genamode (curi->dmode, "dstreg", sz_word, "dst", 1, 0);
+ comprintf("\tsign_extend_16_rr(scratchie,src);\n"
+ "\tsign_extend_16_rr(dst,dst);\n"
+ "\timul_32_32(dst,scratchie);\n");
+ genflags (flag_logical, sz_long, "dst", "", "");
+ genastore ("dst", curi->dmode, "dstreg", sz_long, "dst");
+ break;
+ case i_CHK:
+ isjump;
+ failure;
+ break;
+
+ case i_CHK2:
+ isjump;
+ failure;
+ break;
+
+ case i_ASR:
+ mayfail;
+ if (curi->smode==Dreg) {
+ comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n"
+ " FAIL(1);\n"
+ " return;\n"
+ "} \n");
+ start_brace();
+ }
+ comprintf("\tdont_care_flags();\n");
+
+ genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0);
+ genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0);
+ if (curi->smode!=immi) {
+/* failure; /* UNTESTED: NEW: from "Ipswitch Town" release */
+ if (!noflags) {
+ uses_cmov;
+ start_brace();
+ comprintf("\tint highmask;\n"
+ "\tint width;\n"
+ "\tint cdata=scratchie++;\n"
+ "\tint sdata=scratchie++;\n"
+ "\tint tmpcnt=scratchie++;\n");
+ comprintf("\tmov_l_rr(sdata,data);\n"
+ "\tmov_l_rr(cdata,data);\n"
+ "\tmov_l_rr(tmpcnt,cnt);\n");
+ switch (curi->size) {
+ case sz_byte: comprintf("\tshra_b_ri(sdata,7);\n"); break;
+ case sz_word: comprintf("\tshra_w_ri(sdata,15);\n"); break;
+ case sz_long: comprintf("\tshra_l_ri(sdata,31);\n"); break;
+ default: abort();
+ }
+ /* sdata is now the MSB propagated to all bits for the
+ register of specified size */
+ comprintf("\tand_l_ri(tmpcnt,63);\n");
+ switch(curi->size) {
+ case sz_byte: comprintf("\tshra_b_rr(data,tmpcnt);\n"
+ "\thighmask=0x38;\n");
+ break;
+ case sz_word: comprintf("\tshra_w_rr(data,tmpcnt);\n"
+ "\thighmask=0x30;\n");
+ break;
+ case sz_long: comprintf("\tshra_l_rr(data,tmpcnt);\n"
+ "\thighmask=0x20;\n");
+ break;
+ }
+ comprintf("\ttest_l_ri(tmpcnt,highmask);\n");
+ switch (curi->size) {
+ case sz_byte: comprintf("\tcmov_b_rr(data,sdata,NATIVE_CC_NE);\n"); break;
+ case sz_word: comprintf("\tcmov_w_rr(data,sdata,NATIVE_CC_NE);\n"); break;
+ case sz_long: comprintf("\tcmov_l_rr(data,sdata,NATIVE_CC_NE);\n"); break;
+ }
+
+ /* Result of shift is now in data. Now we need to determine
+ the carry by shifting cdata one less */
+ /* NOTE: carry bit is cleared if shift count is zero */
+ comprintf("\tmov_l_ri(scratchie,0);\n"
+ "\ttest_l_rr(tmpcnt,tmpcnt);\n"
+ "\tcmov_l_rr(sdata,scratchie,NATIVE_CC_EQ);\n"
+ "\tforget_about(scratchie);\n");
+ comprintf("\tsub_l_ri(tmpcnt,1);\n");
+ switch(curi->size) {
+ case sz_byte: comprintf("\tshra_b_rr(cdata,tmpcnt);\n");break;
+ case sz_word: comprintf("\tshra_w_rr(cdata,tmpcnt);\n");break;
+ case sz_long: comprintf("\tshra_l_rr(cdata,tmpcnt);\n");break;
+ default: abort();
+ }
+ /* If the shift count was higher than the width, we need
+ to pick up the sign from original data (sdata) */
+ /* NOTE: for shift count of zero, the following holds
+ true and cdata contains 0 so that carry bit is cleared */
+ comprintf("\ttest_l_ri(tmpcnt,highmask);\n"
+ "\tforget_about(tmpcnt);\n"
+ "\tcmov_l_rr(cdata,sdata,NATIVE_CC_NE);\n");
+
+ /* And create the flags (preserve X flag if shift count is zero) */
+ comprintf("\ttest_l_ri(cnt,63);\n"
+ "\tcmov_l_rr(FLAGX,cdata,NATIVE_CC_NE);\n");
+ comprintf("\tstart_needflags();\n");
+ comprintf("\tif (needed_flags & FLAG_ZNV)\n");
+ switch(curi->size) {
+ case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break;
+ case sz_word: comprintf("\t test_w_rr(data,data);\n"); break;
+ case sz_long: comprintf("\t test_l_rr(data,data);\n"); break;
+ }
+ comprintf("\t bt_l_ri(cdata,0);\n"); /* Set C */
+ comprintf("\t live_flags();\n");
+ comprintf("\t end_needflags();\n");
+ comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n");
+ genastore ("data", curi->dmode, "dstreg", curi->size, "data");
+ }
+ else {
+ uses_cmov;
+ start_brace();
+ comprintf("\tint highmask;\n"
+ "\tint width;\n"
+ "\tint highshift=scratchie++;\n");
+ switch(curi->size) {
+ case sz_byte: comprintf("\tshra_b_rr(data,cnt);\n"
+ "\thighmask=0x38;\n"
+ "\twidth=8;\n");
+ break;
+ case sz_word: comprintf("\tshra_w_rr(data,cnt);\n"
+ "\thighmask=0x30;\n"
+ "\twidth=16;\n");
+ break;
+ case sz_long: comprintf("\tshra_l_rr(data,cnt);\n"
+ "\thighmask=0x20;\n"
+ "\twidth=32;\n");
+ break;
+ default: abort();
+ }
+ comprintf("test_l_ri(cnt,highmask);\n"
+ "mov_l_ri(highshift,0);\n"
+ "mov_l_ri(scratchie,width/2);\n"
+ "cmov_l_rr(highshift,scratchie,5);\n");
+ /* The x86 masks out bits, so we now make sure that things
+ really get shifted as much as planned */
+ switch(curi->size) {
+ case sz_byte: comprintf("\tshra_b_rr(data,highshift);\n");break;
+ case sz_word: comprintf("\tshra_w_rr(data,highshift);\n");break;
+ case sz_long: comprintf("\tshra_l_rr(data,highshift);\n");break;
+ default: abort();
+ }
+ /* And again */
+ switch(curi->size) {
+ case sz_byte: comprintf("\tshra_b_rr(data,highshift);\n");break;
+ case sz_word: comprintf("\tshra_w_rr(data,highshift);\n");break;
+ case sz_long: comprintf("\tshra_l_rr(data,highshift);\n");break;
+ default: abort();
+ }
+ genastore ("data", curi->dmode, "dstreg", curi->size, "data");
+ }
+ }
+ else {
+ start_brace();
+ comprintf("\tint tmp=scratchie++;\n"
+ "\tint bp;\n"
+ "\tmov_l_rr(tmp,data);\n");
+ switch(curi->size) {
+ case sz_byte: comprintf("\tshra_b_ri(data,srcreg);\n"
+ "\tbp=srcreg-1;\n"); break;
+ case sz_word: comprintf("\tshra_w_ri(data,srcreg);\n"
+ "\tbp=srcreg-1;\n"); break;
+ case sz_long: comprintf("\tshra_l_ri(data,srcreg);\n"
+ "\tbp=srcreg-1;\n"); break;
+ default: abort();
+ }
+
+ if (!noflags) {
+ comprintf("\tstart_needflags();\n");
+ comprintf("\tif (needed_flags & FLAG_ZNV)\n");
+ switch(curi->size) {
+ case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break;
+ case sz_word: comprintf("\t test_w_rr(data,data);\n"); break;
+ case sz_long: comprintf("\t test_l_rr(data,data);\n"); break;
+ }
+ comprintf("\t bt_l_ri(tmp,bp);\n"); /* Set C */
+ comprintf("\t live_flags();\n");
+ comprintf("\t end_needflags();\n");
+ comprintf("\t duplicate_carry();\n");
+ comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n");
+ }
+ genastore ("data", curi->dmode, "dstreg", curi->size, "data");
+ }
+ break;
+
+ case i_ASL:
+/* failure; /* NEW: from "Ipswitch Town" release */
+ mayfail;
+ if (curi->smode==Dreg) {
+ comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n"
+ " FAIL(1);\n"
+ " return;\n"
+ "} \n");
+ start_brace();
+ }
+ comprintf("\tdont_care_flags();\n");
+ /* Except for the handling of the V flag, this is identical to
+ LSL. The handling of V is, uhm, unpleasant, so if it's needed,
+ let the normal emulation handle it. Shoulders of giants kinda
+ thing ;-) */
+ comprintf("if (needed_flags & FLAG_V) {\n"
+ " FAIL(1);\n"
+ " return;\n"
+ "} \n");
+
+ genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0);
+ genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0);
+ if (curi->smode!=immi) {
+ if (!noflags) {
+ uses_cmov;
+ start_brace();
+ comprintf("\tint highmask;\n"
+ "\tint cdata=scratchie++;\n"
+ "\tint tmpcnt=scratchie++;\n");
+ comprintf("\tmov_l_rr(tmpcnt,cnt);\n"
+ "\tand_l_ri(tmpcnt,63);\n"
+ "\tmov_l_ri(cdata,0);\n"
+ "\tcmov_l_rr(cdata,data,5);\n");
+ /* cdata is now either data (for shift count!=0) or
+ 0 (for shift count==0) */
+ switch(curi->size) {
+ case sz_byte: comprintf("\tshll_b_rr(data,cnt);\n"
+ "\thighmask=0x38;\n");
+ break;
+ case sz_word: comprintf("\tshll_w_rr(data,cnt);\n"
+ "\thighmask=0x30;\n");
+ break;
+ case sz_long: comprintf("\tshll_l_rr(data,cnt);\n"
+ "\thighmask=0x20;\n");
+ break;
+ default: abort();
+ }
+ comprintf("test_l_ri(cnt,highmask);\n"
+ "mov_l_ri(scratchie,0);\n"
+ "cmov_l_rr(scratchie,data,4);\n");
+ switch(curi->size) {
+ case sz_byte: comprintf("\tmov_b_rr(data,scratchie);\n");break;
+ case sz_word: comprintf("\tmov_w_rr(data,scratchie);\n");break;
+ case sz_long: comprintf("\tmov_l_rr(data,scratchie);\n");break;
+ default: abort();
+ }
+ /* Result of shift is now in data. Now we need to determine
+ the carry by shifting cdata one less */
+ comprintf("\tsub_l_ri(tmpcnt,1);\n");
+ switch(curi->size) {
+ case sz_byte: comprintf("\tshll_b_rr(cdata,tmpcnt);\n");break;
+ case sz_word: comprintf("\tshll_w_rr(cdata,tmpcnt);\n");break;
+ case sz_long: comprintf("\tshll_l_rr(cdata,tmpcnt);\n");break;
+ default: abort();
+ }
+ comprintf("test_l_ri(tmpcnt,highmask);\n"
+ "mov_l_ri(scratchie,0);\n"
+ "cmov_l_rr(cdata,scratchie,5);\n");
+ /* And create the flags */
+ comprintf("\tstart_needflags();\n");
+
+ comprintf("\tif (needed_flags & FLAG_ZNV)\n");
+ switch(curi->size) {
+ case sz_byte: comprintf("\t test_b_rr(data,data);\n");
+ comprintf("\t bt_l_ri(cdata,7);\n"); break;
+ case sz_word: comprintf("\t test_w_rr(data,data);\n");
+ comprintf("\t bt_l_ri(cdata,15);\n"); break;
+ case sz_long: comprintf("\t test_l_rr(data,data);\n");
+ comprintf("\t bt_l_ri(cdata,31);\n"); break;
+ }
+ comprintf("\t live_flags();\n");
+ comprintf("\t end_needflags();\n");
+ comprintf("\t duplicate_carry();\n");
+ comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n");
+ genastore ("data", curi->dmode, "dstreg", curi->size, "data");
+ }
+ else {
+ uses_cmov;
+ start_brace();
+ comprintf("\tint highmask;\n");
+ switch(curi->size) {
+ case sz_byte: comprintf("\tshll_b_rr(data,cnt);\n"
+ "\thighmask=0x38;\n");
+ break;
+ case sz_word: comprintf("\tshll_w_rr(data,cnt);\n"
+ "\thighmask=0x30;\n");
+ break;
+ case sz_long: comprintf("\tshll_l_rr(data,cnt);\n"
+ "\thighmask=0x20;\n");
+ break;
+ default: abort();
+ }
+ comprintf("test_l_ri(cnt,highmask);\n"
+ "mov_l_ri(scratchie,0);\n"
+ "cmov_l_rr(scratchie,data,4);\n");
+ switch(curi->size) {
+ case sz_byte: comprintf("\tmov_b_rr(data,scratchie);\n");break;
+ case sz_word: comprintf("\tmov_w_rr(data,scratchie);\n");break;
+ case sz_long: comprintf("\tmov_l_rr(data,scratchie);\n");break;
+ default: abort();
+ }
+ genastore ("data", curi->dmode, "dstreg", curi->size, "data");
+ }
+ }
+ else {
+ start_brace();
+ comprintf("\tint tmp=scratchie++;\n"
+ "\tint bp;\n"
+ "\tmov_l_rr(tmp,data);\n");
+ switch(curi->size) {
+ case sz_byte: comprintf("\tshll_b_ri(data,srcreg);\n"
+ "\tbp=8-srcreg;\n"); break;
+ case sz_word: comprintf("\tshll_w_ri(data,srcreg);\n"
+ "\tbp=16-srcreg;\n"); break;
+ case sz_long: comprintf("\tshll_l_ri(data,srcreg);\n"
+ "\tbp=32-srcreg;\n"); break;
+ default: abort();
+ }
+
+ if (!noflags) {
+ comprintf("\tstart_needflags();\n");
+ comprintf("\tif (needed_flags & FLAG_ZNV)\n");
+ switch(curi->size) {
+ case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break;
+ case sz_word: comprintf("\t test_w_rr(data,data);\n"); break;
+ case sz_long: comprintf("\t test_l_rr(data,data);\n"); break;
+ }
+ comprintf("\t bt_l_ri(tmp,bp);\n"); /* Set C */
+ comprintf("\t live_flags();\n");
+ comprintf("\t end_needflags();\n");
+ comprintf("\t duplicate_carry();\n");
+ comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n");
+ }
+ genastore ("data", curi->dmode, "dstreg", curi->size, "data");
+ }
+ break;
+
+ case i_LSR:
+/* failure; /* NEW: from "Ipswitch Town" release */
+ mayfail;
+ if (curi->smode==Dreg) {
+ comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n"
+ " FAIL(1);\n"
+ " return;\n"
+ "} \n");
+ start_brace();
+ }
+ comprintf("\tdont_care_flags();\n");
+
+ genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0);
+ genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0);
+ if (curi->smode!=immi) {
+ if (!noflags) {
+ uses_cmov;
+ start_brace();
+ comprintf("\tint highmask;\n"
+ "\tint cdata=scratchie++;\n"
+ "\tint tmpcnt=scratchie++;\n");
+ comprintf("\tmov_l_rr(tmpcnt,cnt);\n"
+ "\tand_l_ri(tmpcnt,63);\n"
+ "\tmov_l_ri(cdata,0);\n"
+ "\tcmov_l_rr(cdata,data,NATIVE_CC_NE);\n");
+ /* cdata is now either data (for shift count!=0) or
+ 0 (for shift count==0) */
+ switch(curi->size) {
+ case sz_byte: comprintf("\tshrl_b_rr(data,tmpcnt);\n"
+ "\thighmask=0x38;\n");
+ break;
+ case sz_word: comprintf("\tshrl_w_rr(data,tmpcnt);\n"
+ "\thighmask=0x30;\n");
+ break;
+ case sz_long: comprintf("\tshrl_l_rr(data,tmpcnt);\n"
+ "\thighmask=0x20;\n");
+ break;
+ default: abort();
+ }
+ comprintf("\ttest_l_ri(tmpcnt,highmask);\n"
+ "\rmov_l_ri(scratchie,0);\n");
+ if (curi->size == sz_long)
+ comprintf("\tcmov_l_rr(data,scratchie,NATIVE_CC_NE);\n");
+ else {
+ comprintf("\tcmov_l_rr(scratchie,data,NATIVE_CC_EQ);\n");
+ switch(curi->size) {
+ case sz_byte: comprintf("\tmov_b_rr(data,scratchie);\n");break;
+ case sz_word: comprintf("\tmov_w_rr(data,scratchie);\n");break;
+ default: abort();
+ }
+ }
+ /* Result of shift is now in data. Now we need to determine
+ the carry by shifting cdata one less */
+ comprintf("\tsub_l_ri(tmpcnt,1);\n");
+ comprintf("\tshrl_l_rr(cdata,tmpcnt);\n");
+ comprintf("\ttest_l_ri(tmpcnt,highmask);\n");
+ comprintf("\tforget_about(tmpcnt);\n");
+ if (curi->size != sz_long) /* scratchie is still live for LSR.L */
+ comprintf("\tmov_l_ri(scratchie,0);\n");
+ comprintf("\tcmov_l_rr(cdata,scratchie,NATIVE_CC_NE);\n");
+ comprintf("\tforget_about(scratchie);\n");
+ /* And create the flags (preserve X flag if shift count is zero) */
+ comprintf("\ttest_l_ri(cnt,63);\n"
+ "\tcmov_l_rr(FLAGX,cdata,NATIVE_CC_NE);\n");
+ comprintf("\tstart_needflags();\n");
+ comprintf("\tif (needed_flags & FLAG_ZNV)\n");
+ switch(curi->size) {
+ case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break;
+ case sz_word: comprintf("\t test_w_rr(data,data);\n"); break;
+ case sz_long: comprintf("\t test_l_rr(data,data);\n"); break;
+ }
+ comprintf("\t bt_l_ri(cdata,0);\n"); /* Set C */
+ comprintf("\t live_flags();\n");
+ comprintf("\t end_needflags();\n");
+ comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n");
+ genastore ("data", curi->dmode, "dstreg", curi->size, "data");
+ }
+ else {
+ uses_cmov;
+ start_brace();
+ comprintf("\tint highmask;\n");
+ switch(curi->size) {
+ case sz_byte: comprintf("\tshrl_b_rr(data,cnt);\n"
+ "\thighmask=0x38;\n");
+ break;
+ case sz_word: comprintf("\tshrl_w_rr(data,cnt);\n"
+ "\thighmask=0x30;\n");
+ break;
+ case sz_long: comprintf("\tshrl_l_rr(data,cnt);\n"
+ "\thighmask=0x20;\n");
+ break;
+ default: abort();
+ }
+ comprintf("test_l_ri(cnt,highmask);\n"
+ "mov_l_ri(scratchie,0);\n"
+ "cmov_l_rr(scratchie,data,4);\n");
+ switch(curi->size) {
+ case sz_byte: comprintf("\tmov_b_rr(data,scratchie);\n");break;
+ case sz_word: comprintf("\tmov_w_rr(data,scratchie);\n");break;
+ case sz_long: comprintf("\tmov_l_rr(data,scratchie);\n");break;
+ default: abort();
+ }
+ genastore ("data", curi->dmode, "dstreg", curi->size, "data");
+ }
+ }
+ else {
+ start_brace();
+ comprintf("\tint tmp=scratchie++;\n"
+ "\tint bp;\n"
+ "\tmov_l_rr(tmp,data);\n");
+ switch(curi->size) {
+ case sz_byte: comprintf("\tshrl_b_ri(data,srcreg);\n"
+ "\tbp=srcreg-1;\n"); break;
+ case sz_word: comprintf("\tshrl_w_ri(data,srcreg);\n"
+ "\tbp=srcreg-1;\n"); break;
+ case sz_long: comprintf("\tshrl_l_ri(data,srcreg);\n"
+ "\tbp=srcreg-1;\n"); break;
+ default: abort();
+ }
+
+ if (!noflags) {
+ comprintf("\tstart_needflags();\n");
+ comprintf("\tif (needed_flags & FLAG_ZNV)\n");
+ switch(curi->size) {
+ case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break;
+ case sz_word: comprintf("\t test_w_rr(data,data);\n"); break;
+ case sz_long: comprintf("\t test_l_rr(data,data);\n"); break;
+ }
+ comprintf("\t bt_l_ri(tmp,bp);\n"); /* Set C */
+ comprintf("\t live_flags();\n");
+ comprintf("\t end_needflags();\n");
+ comprintf("\t duplicate_carry();\n");
+ comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n");
+ }
+ genastore ("data", curi->dmode, "dstreg", curi->size, "data");
+ }
+ break;
+
+ case i_LSL:
+ mayfail;
+ if (curi->smode==Dreg) {
+ comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n"
+ " FAIL(1);\n"
+ " return;\n"
+ "} \n");
+ start_brace();
+ }
+ comprintf("\tdont_care_flags();\n");
+
+ genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0);
+ genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0);
+ if (curi->smode!=immi) {
+/* failure; /* UNTESTED: NEW: from "Ipswitch Town" release */
+ if (!noflags) {
+ uses_cmov;
+ start_brace();
+ comprintf("\tint highmask;\n"
+ "\tint cdata=scratchie++;\n"
+ "\tint tmpcnt=scratchie++;\n");
+ comprintf("\tmov_l_rr(tmpcnt,cnt);\n"
+ "\tand_l_ri(tmpcnt,63);\n"
+ "\tmov_l_ri(cdata,0);\n"
+ "\tcmov_l_rr(cdata,data,NATIVE_CC_NE);\n");
+ /* cdata is now either data (for shift count!=0) or
+ 0 (for shift count==0) */
+ switch(curi->size) {
+ case sz_byte: comprintf("\tshll_b_rr(data,tmpcnt);\n"
+ "\thighmask=0x38;\n");
+ break;
+ case sz_word: comprintf("\tshll_w_rr(data,tmpcnt);\n"
+ "\thighmask=0x30;\n");
+ break;
+ case sz_long: comprintf("\tshll_l_rr(data,tmpcnt);\n"
+ "\thighmask=0x20;\n");
+ break;
+ default: abort();
+ }
+ comprintf("\ttest_l_ri(tmpcnt,highmask);\n"
+ "\tmov_l_ri(scratchie,0);\n");
+ if (curi->size == sz_long)
+ comprintf("\tcmov_l_rr(data,scratchie,NATIVE_CC_NE);\n");
+ else {
+ comprintf("\tcmov_l_rr(scratchie,data,NATIVE_CC_EQ);\n");
+ switch(curi->size) {
+ case sz_byte: comprintf("\tmov_b_rr(data,scratchie);\n");break;
+ case sz_word: comprintf("\tmov_w_rr(data,scratchie);\n");break;
+ default: abort();
+ }
+ }
+ /* Result of shift is now in data. Now we need to determine
+ the carry by shifting cdata one less */
+ comprintf("\tsub_l_ri(tmpcnt,1);\n");
+ comprintf("\tshll_l_rr(cdata,tmpcnt);\n");
+ comprintf("\ttest_l_ri(tmpcnt,highmask);\n");
+ comprintf("\tforget_about(tmpcnt);\n");
+ if (curi->size != sz_long) /* scratchie is still live for LSL.L */
+ comprintf("\tmov_l_ri(scratchie,0);\n");
+ comprintf("\tcmov_l_rr(cdata,scratchie,NATIVE_CC_NE);\n");
+ comprintf("\tforget_about(scratchie);\n");
+ /* And create the flags (preserve X flag if shift count is zero) */
+ switch (curi->size) {
+ case sz_byte: comprintf("\tshrl_l_ri(cdata,7);\n"); break;
+ case sz_word: comprintf("\tshrl_l_ri(cdata,15);\n"); break;
+ case sz_long: comprintf("\tshrl_l_ri(cdata,31);\n"); break;
+ }
+ comprintf("\ttest_l_ri(cnt,63);\n"
+ "\tcmov_l_rr(FLAGX,cdata,NATIVE_CC_NE);\n");
+ comprintf("\tstart_needflags();\n");
+ comprintf("\tif (needed_flags & FLAG_ZNV)\n");
+ switch(curi->size) {
+ case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break;
+ case sz_word: comprintf("\t test_w_rr(data,data);\n"); break;
+ case sz_long: comprintf("\t test_l_rr(data,data);\n"); break;
+ }
+ comprintf("\t bt_l_ri(cdata,0);\n");
+ comprintf("\t live_flags();\n");
+ comprintf("\t end_needflags();\n");
+ comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n");
+ genastore ("data", curi->dmode, "dstreg", curi->size, "data");
+ }
+ else {
+ uses_cmov;
+ start_brace();
+ comprintf("\tint highmask;\n");
+ switch(curi->size) {
+ case sz_byte: comprintf("\tshll_b_rr(data,cnt);\n"
+ "\thighmask=0x38;\n");
+ break;
+ case sz_word: comprintf("\tshll_w_rr(data,cnt);\n"
+ "\thighmask=0x30;\n");
+ break;
+ case sz_long: comprintf("\tshll_l_rr(data,cnt);\n"
+ "\thighmask=0x20;\n");
+ break;
+ default: abort();
+ }
+ comprintf("test_l_ri(cnt,highmask);\n"
+ "mov_l_ri(scratchie,0);\n"
+ "cmov_l_rr(scratchie,data,4);\n");
+ switch(curi->size) {
+ case sz_byte: comprintf("\tmov_b_rr(data,scratchie);\n");break;
+ case sz_word: comprintf("\tmov_w_rr(data,scratchie);\n");break;
+ case sz_long: comprintf("\tmov_l_rr(data,scratchie);\n");break;
+ default: abort();
+ }
+ genastore ("data", curi->dmode, "dstreg", curi->size, "data");
+ }
+ }
+ else {
+ start_brace();
+ comprintf("\tint tmp=scratchie++;\n"
+ "\tint bp;\n"
+ "\tmov_l_rr(tmp,data);\n");
+ switch(curi->size) {
+ case sz_byte: comprintf("\tshll_b_ri(data,srcreg);\n"
+ "\tbp=8-srcreg;\n"); break;
+ case sz_word: comprintf("\tshll_w_ri(data,srcreg);\n"
+ "\tbp=16-srcreg;\n"); break;
+ case sz_long: comprintf("\tshll_l_ri(data,srcreg);\n"
+ "\tbp=32-srcreg;\n"); break;
+ default: abort();
+ }
+
+ if (!noflags) {
+ comprintf("\tstart_needflags();\n");
+ comprintf("\tif (needed_flags & FLAG_ZNV)\n");
+ switch(curi->size) {
+ case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break;
+ case sz_word: comprintf("\t test_w_rr(data,data);\n"); break;
+ case sz_long: comprintf("\t test_l_rr(data,data);\n"); break;
+ }
+ comprintf("\t bt_l_ri(tmp,bp);\n"); /* Set C */
+ comprintf("\t live_flags();\n");
+ comprintf("\t end_needflags();\n");
+ comprintf("\t duplicate_carry();\n");
+ comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n");
+ }
+ genastore ("data", curi->dmode, "dstreg", curi->size, "data");
+ }
+ break;
+
+ case i_ROL:
+ mayfail;
+ if (curi->smode==Dreg) {
+ comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n"
+ " FAIL(1);\n"
+ " return;\n"
+ "} \n");
+ start_brace();
+ }
+ comprintf("\tdont_care_flags();\n");
+ genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0);
+ genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0);
+ start_brace ();
+
+ switch(curi->size) {
+ case sz_long: comprintf("\t rol_l_rr(data,cnt);\n"); break;
+ case sz_word: comprintf("\t rol_w_rr(data,cnt);\n"); break;
+ case sz_byte: comprintf("\t rol_b_rr(data,cnt);\n"); break;
+ }
+
+ if (!noflags) {
+ comprintf("\tstart_needflags();\n");
+ comprintf("\tif (needed_flags & FLAG_ZNV)\n");
+ switch(curi->size) {
+ case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break;
+ case sz_word: comprintf("\t test_w_rr(data,data);\n"); break;
+ case sz_long: comprintf("\t test_l_rr(data,data);\n"); break;
+ }
+ comprintf("\t bt_l_ri(data,0x00);\n"); /* Set C */
+ comprintf("\t live_flags();\n");
+ comprintf("\t end_needflags();\n");
+ }
+ genastore ("data", curi->dmode, "dstreg", curi->size, "data");
+ break;
+
+ case i_ROR:
+ mayfail;
+ if (curi->smode==Dreg) {
+ comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n"
+ " FAIL(1);\n"
+ " return;\n"
+ "} \n");
+ start_brace();
+ }
+ comprintf("\tdont_care_flags();\n");
+ genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0);
+ genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0);
+ start_brace ();
+
+ switch(curi->size) {
+ case sz_long: comprintf("\t ror_l_rr(data,cnt);\n"); break;
+ case sz_word: comprintf("\t ror_w_rr(data,cnt);\n"); break;
+ case sz_byte: comprintf("\t ror_b_rr(data,cnt);\n"); break;
+ }
+
+ if (!noflags) {
+ comprintf("\tstart_needflags();\n");
+ comprintf("\tif (needed_flags & FLAG_ZNV)\n");
+ switch(curi->size) {
+ case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break;
+ case sz_word: comprintf("\t test_w_rr(data,data);\n"); break;
+ case sz_long: comprintf("\t test_l_rr(data,data);\n"); break;
+ }
+ switch(curi->size) {
+ case sz_byte: comprintf("\t bt_l_ri(data,0x07);\n"); break;
+ case sz_word: comprintf("\t bt_l_ri(data,0x0f);\n"); break;
+ case sz_long: comprintf("\t bt_l_ri(data,0x1f);\n"); break;
+ }
+ comprintf("\t live_flags();\n");
+ comprintf("\t end_needflags();\n");
+ }
+ genastore ("data", curi->dmode, "dstreg", curi->size, "data");
+ break;
+
+ case i_ROXL:
+ failure;
+ break;
+ case i_ROXR:
+ failure;
+ break;
+ case i_ASRW:
+ failure;
+ break;
+ case i_ASLW:
+ failure;
+ break;
+ case i_LSRW:
+ failure;
+ break;
+ case i_LSLW:
+ failure;
+ break;
+ case i_ROLW:
+ failure;
+ break;
+ case i_RORW:
+ failure;
+ break;
+ case i_ROXLW:
+ failure;
+ break;
+ case i_ROXRW:
+ failure;
+ break;
+ case i_MOVEC2:
+ isjump;
+ failure;
+ break;
+ case i_MOVE2C:
+ isjump;
+ failure;
+ break;
+ case i_CAS:
+ failure;
+ break;
+ case i_CAS2:
+ failure;
+ break;
+ case i_MOVES: /* ignore DFC and SFC because we have no MMU */
+ isjump;
+ failure;
+ break;
+ case i_BKPT: /* only needed for hardware emulators */
+ isjump;
+ failure;
+ break;
+ case i_CALLM: /* not present in 68030 */
+ isjump;
+ failure;
+ break;
+ case i_RTM: /* not present in 68030 */
+ isjump;
+ failure;
+ break;
+ case i_TRAPcc:
+ isjump;
+ failure;
+ break;
+ case i_DIVL:
+ isjump;
+ failure;
+ break;
+ case i_MULL:
+/* failure; /* NEW: from "Ipswitch Town" release */
+ if (!noflags) {
+ failure;
+ break;
+ }
+ comprintf("\tuae_u16 extra=%s;\n",gen_nextiword());
+ comprintf("\tint r2=(extra>>12)&7;\n"
+ "\tint tmp=scratchie++;\n");
+
+ genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
+ /* The two operands are in dst and r2 */
+ comprintf("\tif (extra&0x0400) {\n" /* Need full 64 bit result */
+ "\tint r3=(extra&7);\n"
+ "\tmov_l_rr(r3,dst);\n"); /* operands now in r3 and r2 */
+ comprintf("\tif (extra&0x0800) { \n" /* signed */
+ "\t\timul_64_32(r2,r3);\n"
+ "\t} else { \n"
+ "\t\tmul_64_32(r2,r3);\n"
+ "\t} \n");
+ /* The result is in r2/tmp, with r2 holding the lower 32 bits */
+ comprintf("\t} else {\n"); /* Only want 32 bit result */
+ /* operands in dst and r2, result foes into r2 */
+ /* shouldn't matter whether it's signed or unsigned?!? */
+ comprintf("\timul_32_32(r2,dst);\n"
+ "\t}\n");
+ break;
+
+ case i_BFTST:
+ case i_BFEXTU:
+ case i_BFCHG:
+ case i_BFEXTS:
+ case i_BFCLR:
+ case i_BFFFO:
+ case i_BFSET:
+ case i_BFINS:
+ failure;
+ break;
+ case i_PACK:
+ failure;
+ break;
+ case i_UNPK:
+ failure;
+ break;
+ case i_TAS:
+ failure;
+ break;
+ case i_FPP:
+ uses_fpu;
+#ifdef USE_JIT_FPU
+ mayfail;
+ comprintf("\tuae_u16 extra=%s;\n",gen_nextiword());
+ swap_opcode();
+ comprintf("\tcomp_fpp_opp(opcode,extra);\n");
+#else
+ failure;
+#endif
+ break;
+ case i_FBcc:
+ uses_fpu;
+#ifdef USE_JIT_FPU
+ isjump;
+ uses_cmov;
+ mayfail;
+ swap_opcode();
+ comprintf("\tcomp_fbcc_opp(opcode);\n");
+#else
+ isjump;
+ failure;
+#endif
+ break;
+ case i_FDBcc:
+ uses_fpu;
+ isjump;
+ failure;
+ break;
+ case i_FScc:
+ uses_fpu;
+#ifdef USE_JIT_FPU
+ mayfail;
+ uses_cmov;
+ comprintf("\tuae_u16 extra=%s;\n",gen_nextiword());
+ swap_opcode();
+ comprintf("\tcomp_fscc_opp(opcode,extra);\n");
+#else
+ failure;
+#endif
+ break;
+ case i_FTRAPcc:
+ uses_fpu;
+ isjump;
+ failure;
+ break;
+ case i_FSAVE:
+ uses_fpu;
+ failure;
+ break;
+ case i_FRESTORE:
+ uses_fpu;
+ failure;
+ break;
+
+ case i_CINVL:
+ case i_CINVP:
+ case i_CINVA:
+ isjump; /* Not really, but it's probably a good idea to stop
+ translating at this point */
+ failure;
+ comprintf ("\tflush_icache();\n"); /* Differentiate a bit more? */
+ break;
+ case i_CPUSHL:
+ case i_CPUSHP:
+ case i_CPUSHA:
+ isjump; /* Not really, but it's probably a good idea to stop
+ translating at this point */
+ failure;
+ break;
+ case i_MOVE16:
+ genmov16(opcode, curi);
+ break;
+
+ case i_EMULOP_RETURN:
+ isjump;
+ failure;
+ break;
+
+ case i_EMULOP:
+ failure;
+ break;
+
+ case i_MMUOP:
+ isjump;
+ failure;
+ break;
+ default:
+ abort ();
+ break;
+ }
+ comprintf("%s",endstr);
+ finish_braces ();
+ sync_m68k_pc ();
+ if (global_mayfail)
+ comprintf("\tif (failure) m68k_pc_offset=m68k_pc_offset_thisinst;\n");
+ return global_failure;
+}
+
+static void
+generate_includes (FILE * f)
+{
+ fprintf (f, "#include \"sysdeps.h\"\n");
+ fprintf (f, "#include \"m68k.h\"\n");
+ fprintf (f, "#include \"memory.h\"\n");
+ fprintf (f, "#include \"readcpu.h\"\n");
+ fprintf (f, "#include \"newcpu.h\"\n");
+ fprintf (f, "#include \"comptbl.h\"\n");
+}
+
+static int postfix;
+
+static void
+generate_one_opcode (int rp, int noflags)
+{
+ uae_u16 smsk, dmsk;
+ const long int opcode = opcode_map[rp];
+ const char *opcode_str;
+ int aborted=0;
+ int have_srcreg=0;
+ int have_dstreg=0;
+
+ if (table68k[opcode].mnemo == i_ILLG
+ || table68k[opcode].clev > cpu_level)
+ return;
+
+ if (table68k[opcode].handler != -1)
+ return;
+
+ switch (table68k[opcode].stype)
+ {
+ case 0:
+ smsk = 7;
+ break;
+ case 1:
+ smsk = 255;
+ break;
+ case 2:
+ smsk = 15;
+ break;
+ case 3:
+ smsk = 7;
+ break;
+ case 4:
+ smsk = 7;
+ break;
+ case 5:
+ smsk = 63;
+ break;
+ case 6:
+ smsk = 255;
+ break;
+ case 7:
+ smsk = 3;
+ break;
+ default:
+ abort ();
+ }
+ dmsk = 7;
+
+ next_cpu_level = -1;
+ if (table68k[opcode].suse
+ && table68k[opcode].smode != imm && table68k[opcode].smode != imm0
+ && table68k[opcode].smode != imm1 && table68k[opcode].smode != imm2
+ && table68k[opcode].smode != absw && table68k[opcode].smode != absl
+ && table68k[opcode].smode != PC8r && table68k[opcode].smode != PC16)
+ {
+ have_srcreg=1;
+ if (table68k[opcode].spos == -1)
+ {
+ if (((int) table68k[opcode].sreg) >= 128)
+ comprintf ("\tuae_s32 srcreg = (uae_s32)(uae_s8)%d;\n", (int) table68k[opcode].sreg);
+ else
+ comprintf ("\tuae_s32 srcreg = %d;\n", (int) table68k[opcode].sreg);
+ }
+ else
+ {
+ char source[100];
+ int pos = table68k[opcode].spos;
+
+ comprintf ("#ifdef HAVE_GET_WORD_UNSWAPPED\n");
+
+ if (pos < 8 && (smsk >> (8 - pos)) != 0)
+ sprintf (source, "(((opcode >> %d) | (opcode << %d)) & %d)",
+ pos ^ 8, 8 - pos, dmsk);
+ else if (pos != 8)
+ sprintf (source, "((opcode >> %d) & %d)", pos ^ 8, smsk);
+ else
+ sprintf (source, "(opcode & %d)", smsk);
+
+ if (table68k[opcode].stype == 3)
+ comprintf ("\tuae_u32 srcreg = imm8_table[%s];\n", source);
+ else if (table68k[opcode].stype == 1)
+ comprintf ("\tuae_u32 srcreg = (uae_s32)(uae_s8)%s;\n", source);
+ else
+ comprintf ("\tuae_u32 srcreg = %s;\n", source);
+
+ comprintf ("#else\n");
+
+ if (pos)
+ sprintf (source, "((opcode >> %d) & %d)", pos, smsk);
+ else
+ sprintf (source, "(opcode & %d)", smsk);
+
+ if (table68k[opcode].stype == 3)
+ comprintf ("\tuae_s32 srcreg = imm8_table[%s];\n", source);
+ else if (table68k[opcode].stype == 1)
+ comprintf ("\tuae_s32 srcreg = (uae_s32)(uae_s8)%s;\n", source);
+ else
+ comprintf ("\tuae_s32 srcreg = %s;\n", source);
+
+ comprintf ("#endif\n");
+ }
+ }
+ if (table68k[opcode].duse
+ /* Yes, the dmode can be imm, in case of LINK or DBcc */
+ && table68k[opcode].dmode != imm && table68k[opcode].dmode != imm0
+ && table68k[opcode].dmode != imm1 && table68k[opcode].dmode != imm2
+ && table68k[opcode].dmode != absw && table68k[opcode].dmode != absl)
+ {
+ have_dstreg=1;
+ if (table68k[opcode].dpos == -1)
+ {
+ if (((int) table68k[opcode].dreg) >= 128)
+ comprintf ("\tuae_s32 dstreg = (uae_s32)(uae_s8)%d;\n", (int) table68k[opcode].dreg);
+ else
+ comprintf ("\tuae_s32 dstreg = %d;\n", (int) table68k[opcode].dreg);
+ }
+ else
+ {
+ int pos = table68k[opcode].dpos;
+
+ comprintf ("#ifdef HAVE_GET_WORD_UNSWAPPED\n");
+
+ if (pos < 8 && (dmsk >> (8 - pos)) != 0)
+ comprintf ("\tuae_u32 dstreg = ((opcode >> %d) | (opcode << %d)) & %d;\n",
+ pos ^ 8, 8 - pos, dmsk);
+ else if (pos != 8)
+ comprintf ("\tuae_u32 dstreg = (opcode >> %d) & %d;\n",
+ pos ^ 8, dmsk);
+ else
+ comprintf ("\tuae_u32 dstreg = opcode & %d;\n", dmsk);
+
+ comprintf ("#else\n");
+
+ if (pos)
+ comprintf ("\tuae_u32 dstreg = (opcode >> %d) & %d;\n",
+ pos, dmsk);
+ else
+ comprintf ("\tuae_u32 dstreg = opcode & %d;\n", dmsk);
+
+ comprintf ("#endif\n");
+ }
+ }
+
+ if (have_srcreg && have_dstreg &&
+ (table68k[opcode].dmode==Areg ||
+ table68k[opcode].dmode==Aind ||
+ table68k[opcode].dmode==Aipi ||
+ table68k[opcode].dmode==Apdi ||
+ table68k[opcode].dmode==Ad16 ||
+ table68k[opcode].dmode==Ad8r) &&
+ (table68k[opcode].smode==Areg ||
+ table68k[opcode].smode==Aind ||
+ table68k[opcode].smode==Aipi ||
+ table68k[opcode].smode==Apdi ||
+ table68k[opcode].smode==Ad16 ||
+ table68k[opcode].smode==Ad8r)
+ ) {
+ comprintf("\tuae_u32 dodgy=(srcreg==(uae_s32)dstreg);\n");
+ }
+ else {
+ comprintf("\tuae_u32 dodgy=0;\n");
+ }
+ comprintf("\tuae_u32 m68k_pc_offset_thisinst=m68k_pc_offset;\n");
+ comprintf("\tm68k_pc_offset+=2;\n");
+
+ opcode_str = get_instruction_string (opcode);
+
+ aborted=gen_opcode (opcode);
+ {
+ int flags=0;
+ if (global_isjump) flags|=1;
+ if (long_opcode) flags|=2;
+ if (global_cmov) flags|=4;
+ if (global_isaddx) flags|=8;
+ if (global_iscjump) flags|=16;
+ if (global_fpu) flags|=32;
+
+ comprintf ("}\n");
+
+ if (aborted) {
+ fprintf (stblfile, "{ NULL, 0x%08x, %ld }, /* %s */\n", flags, opcode, opcode_str);
+ com_discard();
+ }
+ else {
+ if (noflags) {
+ fprintf (stblfile, "{ op_%lx_%d_comp_nf, 0x%08x, %ld }, /* %s */\n", opcode, postfix, flags, opcode, opcode_str);
+ fprintf (headerfile, "extern compop_func op_%lx_%d_comp_nf;\n", opcode, postfix);
+ printf ("void REGPARAM2 op_%lx_%d_comp_nf(uae_u32 opcode) /* %s */\n{\n", opcode, postfix, opcode_str);
+ }
+ else {
+ fprintf (stblfile, "{ op_%lx_%d_comp_ff, 0x%08x, %ld }, /* %s */\n", opcode, postfix, flags, opcode, opcode_str);
+ fprintf (headerfile, "extern compop_func op_%lx_%d_comp_ff;\n", opcode, postfix);
+ printf ("void REGPARAM2 op_%lx_%d_comp_ff(uae_u32 opcode) /* %s */\n{\n", opcode, postfix, opcode_str);
+ }
+ com_flush();
+ }
+ }
+ opcode_next_clev[rp] = next_cpu_level;
+ opcode_last_postfix[rp] = postfix;
+}
+
+static void
+generate_func (int noflags)
+{
+ int i, j, rp;
+
+ using_prefetch = 0;
+ using_exception_3 = 0;
+ for (i = 0; i < 1; i++) /* We only do one level! */
+ {
+ cpu_level = 4 - i;
+ postfix = i;
+
+ if (noflags)
+ fprintf (stblfile, "struct comptbl op_smalltbl_%d_comp_nf[] = {\n", postfix);
+ else
+ fprintf (stblfile, "struct comptbl op_smalltbl_%d_comp_ff[] = {\n", postfix);
+
+
+ /* sam: this is for people with low memory (eg. me :)) */
+ !printf ("\n"
+ "#if !defined(PART_1) && !defined(PART_2) && "
+ "!defined(PART_3) && !defined(PART_4) && "
+ "!defined(PART_5) && !defined(PART_6) && "
+ "!defined(PART_7) && !defined(PART_8)"
+ "\n"
+ "#define PART_1 1\n"
+ "#define PART_2 1\n"
+ "#define PART_3 1\n"
+ "#define PART_4 1\n"
+ "#define PART_5 1\n"
+ "#define PART_6 1\n"
+ "#define PART_7 1\n"
+ "#define PART_8 1\n"
+ "#endif\n\n");
+
+ rp = 0;
+ for (j = 1; j <= 8; ++j)
+ {
+ int k = (j * nr_cpuop_funcs) / 8;
+ printf ("#ifdef PART_%d\n", j);
+ for (; rp < k; rp++)
+ generate_one_opcode (rp,noflags);
+ printf ("#endif\n\n");
+ }
+
+ fprintf (stblfile, "{ 0, 0,65536 }};\n");
+ }
+
+}
+
+int
+main (int argc, char **argv)
+{
+ read_table68k ();
+ do_merges ();
+
+ opcode_map = (int *) malloc (sizeof (int) * nr_cpuop_funcs);
+ opcode_last_postfix = (int *) malloc (sizeof (int) * nr_cpuop_funcs);
+ opcode_next_clev = (int *) malloc (sizeof (int) * nr_cpuop_funcs);
+ counts = (unsigned long *) malloc (65536 * sizeof (unsigned long));
+ read_counts ();
+
+ /* It would be a lot nicer to put all in one file (we'd also get rid of
+ * cputbl.h that way), but cpuopti can't cope. That could be fixed, but
+ * I don't dare to touch the 68k version. */
+
+ headerfile = fopen ("comptbl.h", "wb");
+ stblfile = fopen ("compstbl.cpp", "wb");
+ freopen ("compemu.cpp", "wb", stdout);
+
+ generate_includes (stdout);
+ generate_includes (stblfile);
+
+ printf("#include \"compiler/compemu.h\"\n");
+
+ noflags=0;
+ generate_func (noflags);
+
+ free(opcode_map);
+ free(opcode_last_postfix);
+ free(opcode_next_clev);
+ free(counts);
+
+ opcode_map = (int *) malloc (sizeof (int) * nr_cpuop_funcs);
+ opcode_last_postfix = (int *) malloc (sizeof (int) * nr_cpuop_funcs);
+ opcode_next_clev = (int *) malloc (sizeof (int) * nr_cpuop_funcs);
+ counts = (unsigned long *) malloc (65536 * sizeof (unsigned long));
+ read_counts ();
+ noflags=1;
+ generate_func (noflags);
+
+ free(opcode_map);
+ free(opcode_last_postfix);
+ free(opcode_next_clev);
+ free(counts);
+
+ free (table68k);
+ fclose (stblfile);
+ fclose (headerfile);
+ fflush (stdout);
+ return 0;
+}
--- /dev/null
+/******************** -*- mode: C; tab-width: 8 -*- ********************
+ *
+ * Dumb and Brute Force Run-time assembler verifier for IA-32 and AMD64
+ *
+ ***********************************************************************/
+
+
+/***********************************************************************
+ *
+ * Copyright 2004-2008 Gwenole Beauchesne
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ ***********************************************************************/
+
+/*
+ * STATUS: 26M variations covering unary register based operations,
+ * reg/reg operations, imm/reg operations.
+ *
+ * TODO:
+ * - Rewrite to use internal BFD/opcodes format instead of string compares
+ * - Add reg/mem, imm/mem variations
+ */
+
+#define _BSD_SOURCE 1
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+
+#include "sysdeps.h"
+
+static int verbose = 2;
+
+#define TEST_INST_ALU 1
+#define TEST_INST_FPU 1
+#define TEST_INST_MMX 1
+#define TEST_INST_SSE 1
+#if TEST_INST_ALU
+#define TEST_INST_ALU_REG 1
+#define TEST_INST_ALU_REG_REG 1
+#define TEST_INST_ALU_CNT_REG 1
+#define TEST_INST_ALU_IMM_REG 1
+#define TEST_INST_ALU_MEM_REG 1
+#endif
+#if TEST_INST_FPU
+#define TEST_INST_FPU_UNARY 1
+#define TEST_INST_FPU_REG 1
+#define TEST_INST_FPU_MEM 1
+#endif
+#if TEST_INST_MMX
+#define TEST_INST_MMX_REG_REG 1
+#define TEST_INST_MMX_IMM_REG 1
+#define TEST_INST_MMX_MEM_REG 1
+#endif
+#if TEST_INST_SSE
+#define TEST_INST_SSE_REG 1
+#define TEST_INST_SSE_REG_REG 1
+#define TEST_INST_SSE_MEM_REG 1
+#endif
+
+#undef abort
+#define abort() do { \
+ fprintf(stderr, "ABORT: %s, line %d\n", __FILE__, __LINE__); \
+ (abort)(); \
+} while (0)
+
+#define X86_TARGET_64BIT 1
+#define X86_FLAT_REGISTERS 0
+#define X86_OPTIMIZE_ALU 1
+#define X86_OPTIMIZE_ROTSHI 1
+#define X86_RIP_RELATIVE_ADDR 0
+#include "compiler/codegen_x86.h"
+
+#if X86_TARGET_64BIT
+#define X86_MAX_ALU_REGS 16
+#define X86_MAX_SSE_REGS 16
+#else
+#define X86_MAX_ALU_REGS 8
+#define X86_MAX_SSE_REGS 8
+#endif
+#define X86_MAX_FPU_REGS 8
+#define X86_MAX_MMX_REGS 8
+
+#define VALID_REG(r, b, n) (((unsigned)((r) - X86_##b)) < (n))
+#if X86_TARGET_64BIT
+#define VALID_REG8(r) (VALID_REG(r, AL, 16) || VALID_REG(r, AH, 4))
+#define VALID_REG64(r) VALID_REG(r, RAX, X86_MAX_ALU_REGS)
+#else
+#define VALID_REG8(r) (VALID_REG(r, AL, 4) || VALID_REG(r, AH, 4))
+#define VALID_REG64(r) (0)
+#endif
+#define VALID_REG16(r) VALID_REG(r, AX, X86_MAX_ALU_REGS)
+#define VALID_REG32(r) VALID_REG(r, EAX, X86_MAX_ALU_REGS)
+
+#define x86_emit_byte(B) emit_byte(B)
+#define x86_emit_word(W) emit_word(W)
+#define x86_emit_long(L) emit_long(L)
+#define x86_emit_quad(Q) emit_quad(Q)
+#define x86_get_target() get_target()
+#define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
+
+static void jit_fail(const char *msg, const char *file, int line, const char *function)
+{
+ fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
+ function, file, line, msg);
+ abort();
+}
+
+static uint8 *target;
+
+static inline void emit_byte(uint8 x)
+{
+ *target++ = x;
+}
+
+static inline void emit_word(uint16 x)
+{
+ *((uint16 *)target) = x;
+ target += 2;
+}
+
+static inline void emit_long(uint32 x)
+{
+ *((uint32 *)target) = x;
+ target += 4;
+}
+
+static inline void emit_quad(uint64 x)
+{
+ *((uint64 *)target) = x;
+ target += 8;
+}
+
+static inline void set_target(uint8 *t)
+{
+ target = t;
+}
+
+static inline uint8 *get_target(void)
+{
+ return target;
+}
+
+static uint32 mon_read_byte(uintptr addr)
+{
+ uint8 *m = (uint8 *)addr;
+ return (uint32)(*m);
+}
+
+extern "C" {
+#include "disass/dis-asm.h"
+
+int buffer_read_memory(bfd_vma from, bfd_byte *to, unsigned int length, struct disassemble_info *info)
+{
+ while (length--)
+ *to++ = mon_read_byte(from++);
+ return 0;
+}
+
+void perror_memory(int status, bfd_vma memaddr, struct disassemble_info *info)
+{
+ info->fprintf_func(info->stream, "Unknown error %d\n", status);
+}
+
+void generic_print_address(bfd_vma addr, struct disassemble_info *info)
+{
+ if (addr >= UVAL64(0x100000000))
+ info->fprintf_func(info->stream, "$%08x%08x", (uint32)(addr >> 32), (uint32)addr);
+ else
+ info->fprintf_func(info->stream, "$%08x", (uint32)addr);
+}
+
+int generic_symbol_at_address(bfd_vma addr, struct disassemble_info *info)
+{
+ return 0;
+}
+}
+
+struct SFILE {
+ char *buffer;
+ char *current;
+};
+
+static int mon_sprintf(SFILE *f, const char *format, ...)
+{
+ int n;
+ va_list args;
+ va_start(args, format);
+ vsprintf(f->current, format, args);
+ f->current += n = strlen(f->current);
+ va_end(args);
+ return n;
+}
+
+static int disass_x86(char *buf, uintptr adr)
+{
+ disassemble_info info;
+ SFILE sfile;
+ sfile.buffer = buf;
+ sfile.current = buf;
+ INIT_DISASSEMBLE_INFO(info, (FILE *)&sfile, (fprintf_ftype)mon_sprintf);
+ info.mach = X86_TARGET_64BIT ? bfd_mach_x86_64 : bfd_mach_i386_i386;
+ info.disassembler_options = "suffix";
+ return print_insn_i386(adr, &info);
+}
+
+enum {
+ op_disp,
+ op_reg,
+ op_base,
+ op_index,
+ op_scale,
+ op_imm,
+};
+struct operand_t {
+ int32 disp;
+ int8 reg;
+ int8 base;
+ int8 index;
+ int8 scale;
+ int64 imm;
+
+ void clear() {
+ disp = imm = 0;
+ reg = base = index = -1;
+ scale = 1;
+ }
+
+ void fill(int optype, int value) {
+ switch (optype) {
+ case op_disp: disp = value; break;
+ case op_reg: reg = value; break;
+ case op_base: base = value; break;
+ case op_index: index = value; break;
+ case op_scale: scale = value; break;
+ case op_imm: imm = value; break;
+ default: abort();
+ }
+ }
+};
+
+#define MAX_INSNS 1024
+#define MAX_INSN_LENGTH 16
+#define MAX_INSN_OPERANDS 3
+
+struct insn_t {
+ char name[16];
+ int n_operands;
+ operand_t operands[MAX_INSN_OPERANDS];
+
+ void clear() {
+ memset(name, 0, sizeof(name));
+ n_operands = 0;
+ for (int i = 0; i < MAX_INSN_OPERANDS; i++)
+ operands[i].clear();
+ }
+
+ void pretty_print() {
+ printf("%s, %d operands\n", name, n_operands);
+ for (int i = 0; i < n_operands; i++) {
+ operand_t *op = &operands[i];
+ if (op->reg != -1)
+ printf(" reg r%d\n", op->reg);
+ else {
+ printf(" mem 0x%08x(", op->disp);
+ if (op->base != -1)
+ printf("r%d", op->base);
+ printf(",");
+ if (op->index != -1)
+ printf("r%d", op->index);
+ printf(",");
+ if (op->base != -1 || op->index != -1)
+ printf("%d", op->scale);
+ printf(")\n");
+ }
+ }
+ }
+};
+
+static inline char *find_blanks(char *p)
+{
+ while (*p && !isspace(*p))
+ ++p;
+ return p;
+}
+
+static inline char *skip_blanks(char *p)
+{
+ while (*p && isspace(*p))
+ ++p;
+ return p;
+}
+
+static int parse_reg(operand_t *op, int optype, char *buf)
+{
+ int reg = X86_NOREG;
+ int len = 0;
+ char *p = buf;
+ switch (p[0]) {
+ case 'a': case 'A':
+ len = 2;
+ switch (p[1]) {
+ case 'l': case 'L': reg = X86_AL; break;
+ case 'h': case 'H': reg = X86_AH; break;
+ case 'x': case 'X': reg = X86_AX; break;
+ }
+ break;
+ case 'b': case 'B':
+ len = 2;
+ switch (p[1]) {
+ case 'l': case 'L': reg = X86_BL; break;
+ case 'h': case 'H': reg = X86_BH; break;
+ case 'x': case 'X': reg = X86_BX; break;
+ case 'p': case 'P':
+ switch (p[2]) {
+#if X86_TARGET_64BIT
+ case 'l': case 'L': reg = X86_BPL, ++len; break;
+#endif
+ default: reg = X86_BP; break;
+ }
+ break;
+ }
+ break;
+ case 'c': case 'C':
+ len = 2;
+ switch (p[1]) {
+ case 'l': case 'L': reg = X86_CL; break;
+ case 'h': case 'H': reg = X86_CH; break;
+ case 'x': case 'X': reg = X86_CX; break;
+ }
+ break;
+ case 'd': case 'D':
+ len = 2;
+ switch (p[1]) {
+ case 'l': case 'L': reg = X86_DL; break;
+ case 'h': case 'H': reg = X86_DH; break;
+ case 'x': case 'X': reg = X86_DX; break;
+ case 'i': case 'I':
+ switch (p[2]) {
+#if X86_TARGET_64BIT
+ case 'l': case 'L': reg = X86_DIL; ++len; break;
+#endif
+ default: reg = X86_DI; break;
+ }
+ break;
+ }
+ break;
+ case 's': case 'S':
+ len = 2;
+ switch (p[2]) {
+#if X86_TARGET_64BIT
+ case 'l': case 'L':
+ ++len;
+ switch (p[1]) {
+ case 'p': case 'P': reg = X86_SPL; break;
+ case 'i': case 'I': reg = X86_SIL; break;
+ }
+ break;
+#endif
+ case '(':
+ if ((p[1] == 't' || p[1] == 'T') && isdigit(p[3]) && p[4] == ')')
+ len += 3, reg = X86_ST0 + (p[3] - '0');
+ break;
+ default:
+ switch (p[1]) {
+ case 't': case 'T': reg = X86_ST0; break;
+ case 'p': case 'P': reg = X86_SP; break;
+ case 'i': case 'I': reg = X86_SI; break;
+ }
+ break;
+ }
+ break;
+ case 'e': case 'E':
+ len = 3;
+ switch (p[2]) {
+ case 'x': case 'X':
+ switch (p[1]) {
+ case 'a': case 'A': reg = X86_EAX; break;
+ case 'b': case 'B': reg = X86_EBX; break;
+ case 'c': case 'C': reg = X86_ECX; break;
+ case 'd': case 'D': reg = X86_EDX; break;
+ }
+ break;
+ case 'i': case 'I':
+ switch (p[1]) {
+ case 's': case 'S': reg = X86_ESI; break;
+ case 'd': case 'D': reg = X86_EDI; break;
+ }
+ break;
+ case 'p': case 'P':
+ switch (p[1]) {
+ case 'b': case 'B': reg = X86_EBP; break;
+ case 's': case 'S': reg = X86_ESP; break;
+ }
+ break;
+ }
+ break;
+#if X86_TARGET_64BIT
+ case 'r': case 'R':
+ len = 3;
+ switch (p[2]) {
+ case 'x': case 'X':
+ switch (p[1]) {
+ case 'a': case 'A': reg = X86_RAX; break;
+ case 'b': case 'B': reg = X86_RBX; break;
+ case 'c': case 'C': reg = X86_RCX; break;
+ case 'd': case 'D': reg = X86_RDX; break;
+ }
+ break;
+ case 'i': case 'I':
+ switch (p[1]) {
+ case 's': case 'S': reg = X86_RSI; break;
+ case 'd': case 'D': reg = X86_RDI; break;
+ }
+ break;
+ case 'p': case 'P':
+ switch (p[1]) {
+ case 'b': case 'B': reg = X86_RBP; break;
+ case 's': case 'S': reg = X86_RSP; break;
+ }
+ break;
+ case 'b': case 'B':
+ switch (p[1]) {
+ case '8': reg = X86_R8B; break;
+ case '9': reg = X86_R9B; break;
+ }
+ break;
+ case 'w': case 'W':
+ switch (p[1]) {
+ case '8': reg = X86_R8W; break;
+ case '9': reg = X86_R9W; break;
+ }
+ break;
+ case 'd': case 'D':
+ switch (p[1]) {
+ case '8': reg = X86_R8D; break;
+ case '9': reg = X86_R9D; break;
+ }
+ break;
+ case '0': case '1': case '2': case '3': case '4': case '5':
+ if (p[1] == '1') {
+ const int r = p[2] - '0';
+ switch (p[3]) {
+ case 'b': case 'B': reg = X86_R10B + r, ++len; break;
+ case 'w': case 'W': reg = X86_R10W + r, ++len; break;
+ case 'd': case 'D': reg = X86_R10D + r, ++len; break;
+ default: reg = X86_R10 + r; break;
+ }
+ }
+ break;
+ default:
+ switch (p[1]) {
+ case '8': reg = X86_R8, len = 2; break;
+ case '9': reg = X86_R9, len = 2; break;
+ }
+ break;
+ }
+ break;
+#endif
+ case 'm': case 'M':
+ if ((p[1] == 'm' || p[1] == 'M') && isdigit(p[2]))
+ reg = X86_MM0 + (p[2] - '0'), len = 3;
+ break;
+ case 'x': case 'X':
+ if ((p[1] == 'm' || p[1] == 'M') && (p[2] == 'm' || p[2] == 'M')) {
+#if X86_TARGET_64BIT
+ if (p[3] == '1' && isdigit(p[4]))
+ reg = X86_XMM10 + (p[4] - '0'), len = 5;
+ else
+#endif
+ if (isdigit(p[3]))
+ reg = X86_XMM0 + (p[3] - '0'), len = 4;
+ }
+ break;
+ }
+
+ if (len > 0 && reg != X86_NOREG) {
+ op->fill(optype, reg);
+ return len;
+ }
+
+ return X86_NOREG;
+}
+
+static unsigned long parse_imm(char *nptr, char **endptr, int base = 0)
+{
+ errno = 0;
+#if X86_TARGET_64BIT
+ if (sizeof(unsigned long) != 8) {
+ unsigned long long val = strtoull(nptr, endptr, 0);
+ if (errno == 0)
+ return val;
+ abort();
+ }
+#endif
+ unsigned long val = strtoul(nptr, endptr, 0);
+ if (errno == 0)
+ return val;
+ abort();
+ return 0;
+}
+
+static int parse_mem(operand_t *op, char *buf)
+{
+ char *p = buf;
+
+ if (strncmp(buf, "0x", 2) == 0)
+ op->disp = parse_imm(buf, &p, 16);
+
+ if (*p == '(') {
+ p++;
+
+ if (*p == '%') {
+ p++;
+
+ int n = parse_reg(op, op_base, p);
+ if (n <= 0)
+ return -3;
+ p += n;
+ }
+
+ if (*p == ',') {
+ p++;
+
+ if (*p == '%') {
+ int n = parse_reg(op, op_index, ++p);
+ if (n <= 0)
+ return -4;
+ p += n;
+
+ if (*p != ',')
+ return -5;
+ p++;
+
+ goto do_parse_scale;
+ }
+ else if (isdigit(*p)) {
+ do_parse_scale:
+ long val = strtol(p, &p, 10);
+ if (val == 0 && errno == EINVAL)
+ abort();
+ op->scale = val;
+ }
+ }
+
+ if (*p != ')')
+ return -6;
+ p++;
+ }
+
+ return p - buf;
+}
+
+static void parse_insn(insn_t *ii, char *buf)
+{
+ char *p = buf;
+ ii->clear();
+
+#if 0
+ printf("BUF: %s\n", buf);
+#endif
+
+ if (strncmp(p, "rex64", 5) == 0) {
+ char *q = find_blanks(p);
+ if (verbose > 1) {
+ char prefix[16];
+ memset(prefix, 0, sizeof(prefix));
+ memcpy(prefix, p, q - p);
+ fprintf(stderr, "Instruction '%s', skip REX prefix '%s'\n", buf, prefix);
+ }
+ p = skip_blanks(q);
+ }
+
+ if (strncmp(p, "rep", 3) == 0) {
+ char *q = find_blanks(p);
+ if (verbose > 1) {
+ char prefix[16];
+ memset(prefix, 0, sizeof(prefix));
+ memcpy(prefix, p, q - p);
+ fprintf(stderr, "Instruction '%s', skip REP prefix '%s'\n", buf, prefix);
+ }
+ p = skip_blanks(q);
+ }
+
+ for (int i = 0; !isspace(*p); i++)
+ ii->name[i] = *p++;
+
+ while (*p && isspace(*p))
+ p++;
+ if (*p == '\0')
+ return;
+
+ int n_operands = 0;
+ int optype = op_reg;
+ bool done = false;
+ while (!done) {
+ int n;
+ switch (*p) {
+ case '%':
+ n = parse_reg(&ii->operands[n_operands], optype, ++p);
+ if (n <= 0) {
+ fprintf(stderr, "parse_reg(%s) error %d\n", p, n);
+ abort();
+ }
+ p += n;
+ break;
+ case '0': case '(':
+ n = parse_mem(&ii->operands[n_operands], p);
+ if (n <= 0) {
+ fprintf(stderr, "parse_mem(%s) error %d\n", p, n);
+ abort();
+ }
+ p += n;
+ break;
+ case '$': {
+ ii->operands[n_operands].imm = parse_imm(++p, &p, 0);
+ break;
+ }
+ case '*':
+ p++;
+ break;
+ case ',':
+ n_operands++;
+ p++;
+ break;
+ case ' ': case '\t':
+ p++;
+ break;
+ case '\0':
+ done = true;
+ break;
+ default:
+ fprintf(stderr, "parse error> %s\n", p);
+ abort();
+ }
+ }
+ ii->n_operands = n_operands + 1;
+}
+
+static unsigned long n_tests, n_failures;
+static unsigned long n_all_tests, n_all_failures;
+
+static bool check_unary(insn_t *ii, const char *name)
+{
+ if (strcasecmp(ii->name, name) != 0) {
+ fprintf(stderr, "ERROR: instruction mismatch, expected %s, got %s\n", name, ii->name);
+ return false;
+ }
+
+ if (ii->n_operands != 0) {
+ fprintf(stderr, "ERROR: instruction expected 0 operand, got %d\n", ii->n_operands);
+ return false;
+ }
+
+ return true;
+}
+
+static bool check_reg(insn_t *ii, const char *name, int r)
+{
+ if (strcasecmp(ii->name, name) != 0) {
+ fprintf(stderr, "ERROR: instruction mismatch, expected %s, got %s\n", name, ii->name);
+ return false;
+ }
+
+ if (ii->n_operands != 1) {
+ fprintf(stderr, "ERROR: instruction expected 1 operand, got %d\n", ii->n_operands);
+ return false;
+ }
+
+ int reg = ii->operands[0].reg;
+
+ if (reg != r) {
+ fprintf(stderr, "ERROR: instruction expected r%d as source, got ", r);
+ if (reg == -1)
+ fprintf(stderr, "nothing\n");
+ else
+ fprintf(stderr, "r%d\n", reg);
+ return false;
+ }
+
+ return true;
+}
+
+static bool check_reg_reg(insn_t *ii, const char *name, int s, int d)
+{
+ if (strcasecmp(ii->name, name) != 0) {
+ fprintf(stderr, "ERROR: instruction mismatch, expected %s, got %s\n", name, ii->name);
+ return false;
+ }
+
+ if (ii->n_operands != 2) {
+ fprintf(stderr, "ERROR: instruction expected 2 operands, got %d\n", ii->n_operands);
+ return false;
+ }
+
+ int srcreg = ii->operands[0].reg;
+ int dstreg = ii->operands[1].reg;
+
+ if (srcreg != s) {
+ fprintf(stderr, "ERROR: instruction expected r%d as source, got ", s);
+ if (srcreg == -1)
+ fprintf(stderr, "nothing\n");
+ else
+ fprintf(stderr, "r%d\n", srcreg);
+ return false;
+ }
+
+ if (dstreg != d) {
+ fprintf(stderr, "ERROR: instruction expected r%d as destination, got ", d);
+ if (dstreg == -1)
+ fprintf(stderr, "nothing\n");
+ else
+ fprintf(stderr, "r%d\n", dstreg);
+ return false;
+ }
+
+ return true;
+}
+
+static bool check_imm_reg(insn_t *ii, const char *name, uint32 v, int d, int mode = -1)
+{
+ if (strcasecmp(ii->name, name) != 0) {
+ fprintf(stderr, "ERROR: instruction mismatch, expected %s, got %s\n", name, ii->name);
+ return false;
+ }
+
+ if (ii->n_operands != 2) {
+ fprintf(stderr, "ERROR: instruction expected 2 operands, got %d\n", ii->n_operands);
+ return false;
+ }
+
+ uint32 imm = ii->operands[0].imm;
+ int dstreg = ii->operands[1].reg;
+
+ if (mode == -1) {
+ char suffix = name[strlen(name) - 1];
+ switch (suffix) {
+ case 'b': mode = 1; break;
+ case 'w': mode = 2; break;
+ case 'l': mode = 4; break;
+ case 'q': mode = 8; break;
+ }
+ }
+ switch (mode) {
+ case 1: v &= 0xff; break;
+ case 2: v &= 0xffff; break;
+ }
+
+ if (imm != v) {
+ fprintf(stderr, "ERROR: instruction expected 0x%08x as immediate, got ", v);
+ if (imm == -1)
+ fprintf(stderr, "nothing\n");
+ else
+ fprintf(stderr, "0x%08x\n", imm);
+ return false;
+ }
+
+ if (dstreg != d) {
+ fprintf(stderr, "ERROR: instruction expected r%d as destination, got ", d);
+ if (dstreg == -1)
+ fprintf(stderr, "nothing\n");
+ else
+ fprintf(stderr, "%d\n", dstreg);
+ return false;
+ }
+
+ return true;
+}
+
+static bool do_check_mem(insn_t *ii, uint32 D, int B, int I, int S, int Mpos)
+{
+ operand_t *mem = &ii->operands[Mpos];
+ uint32 d = mem->disp;
+ int b = mem->base;
+ int i = mem->index;
+ int s = mem->scale;
+
+ if (d != D) {
+ fprintf(stderr, "ERROR: instruction expected 0x%08x as displacement, got 0x%08x\n", D, d);
+ return false;
+ }
+
+ if (b != B) {
+ fprintf(stderr, "ERROR: instruction expected r%d as base, got r%d\n", B, b);
+ return false;
+ }
+
+ if (i != I) {
+ fprintf(stderr, "ERROR: instruction expected r%d as index, got r%d\n", I, i);
+ return false;
+ }
+
+ if (s != S) {
+ fprintf(stderr, "ERROR: instruction expected %d as scale factor, got %d\n", S, s);
+ return false;
+ }
+
+ return true;
+}
+
+static bool check_mem(insn_t *ii, const char *name, uint32 D, int B, int I, int S)
+{
+ if (strcasecmp(ii->name, name) != 0) {
+ fprintf(stderr, "ERROR: instruction mismatch, expected %s, got %s\n", name, ii->name);
+ return false;
+ }
+
+ if (ii->n_operands != 1) {
+ fprintf(stderr, "ERROR: instruction expected 1 operand, got %d\n", ii->n_operands);
+ return false;
+ }
+
+ return do_check_mem(ii, D, B, I, S, 0);
+}
+
+static bool check_mem_reg(insn_t *ii, const char *name, uint32 D, int B, int I, int S, int R, int Rpos = 1)
+{
+ if (strcasecmp(ii->name, name) != 0) {
+ fprintf(stderr, "ERROR: instruction mismatch, expected %s, got %s\n", name, ii->name);
+ return false;
+ }
+
+ if (ii->n_operands != 2) {
+ fprintf(stderr, "ERROR: instruction expected 2 operands, got %d\n", ii->n_operands);
+ return false;
+ }
+
+ if (!do_check_mem(ii, D, B, I, S, Rpos ^ 1))
+ return false;
+
+ int r = ii->operands[Rpos].reg;
+
+ if (r != R) {
+ fprintf(stderr, "ERROR: instruction expected r%d as reg operand, got r%d\n", R, r);
+ return false;
+ }
+
+ return true;
+}
+
+static inline bool check_reg_mem(insn_t *ii, const char *name, uint32 D, int B, int I, int S, int R)
+{
+ return check_mem_reg(ii, name, D, B, I, S, R, 0);
+}
+
+static void show_instruction(const char *buffer, const uint8 *bytes)
+{
+ if (verbose > 1) {
+ if (1) {
+ for (int j = 0; j < MAX_INSN_LENGTH; j++)
+ fprintf(stderr, "%02x ", bytes[j]);
+ fprintf(stderr, "| ");
+ }
+ fprintf(stderr, "%s\n", buffer);
+ }
+}
+
+static void show_status(unsigned long n_tests)
+{
+#if 1
+ const unsigned long N_STEPS = 100000;
+ static const char cursors[] = { '-', '\\', '|', '/' };
+ if ((n_tests % N_STEPS) == 0) {
+ printf(" %c (%d)\r", cursors[(n_tests/N_STEPS)%sizeof(cursors)], n_tests);
+ fflush(stdout);
+ }
+#else
+ const unsigned long N_STEPS = 1000000;
+ if ((n_tests % N_STEPS) == 0)
+ printf(" ... %d\n", n_tests);
+#endif
+}
+
+int main(void)
+{
+ static char buffer[1024];
+ static uint8 block[MAX_INSNS * MAX_INSN_LENGTH];
+ static char *insns[MAX_INSNS];
+ static int modes[MAX_INSNS];
+ n_all_tests = n_all_failures = 0;
+
+#if TEST_INST_ALU_REG
+ printf("Testing reg forms\n");
+ n_tests = n_failures = 0;
+ for (int r = 0; r < X86_MAX_ALU_REGS; r++) {
+ set_target(block);
+ uint8 *b = get_target();
+ int i = 0;
+#define GEN(INSN, GENOP) do { \
+ insns[i++] = INSN; \
+ GENOP##r(r); \
+} while (0)
+#define GEN64(INSN, GENOP) do { \
+ if (X86_TARGET_64BIT) \
+ GEN(INSN, GENOP); \
+} while (0)
+#define GENA(INSN, GENOP) do { \
+ if (VALID_REG8(r)) \
+ GEN(INSN "b", GENOP##B); \
+ GEN(INSN "w", GENOP##W); \
+ GEN(INSN "l", GENOP##L); \
+ GEN64(INSN "q", GENOP##Q); \
+} while (0)
+ GENA("not", NOT);
+ GENA("neg", NEG);
+ GENA("mul", MUL);
+ GENA("imul", IMUL);
+ GENA("div", DIV);
+ GENA("idiv", IDIV);
+ GENA("dec", DEC);
+ GENA("inc", INC);
+ if (X86_TARGET_64BIT) {
+ GEN("callq", CALLs);
+ GEN("jmpq", JMPs);
+ GEN("pushq", PUSHQ);
+ GEN("popq", POPQ);
+ }
+ else {
+ GEN("calll", CALLs);
+ GEN("jmpl", JMPs);
+ GEN("pushl", PUSHL);
+ GEN("popl", POPL);
+ }
+ GEN("bswap", BSWAPL); // FIXME: disass bug? no suffix
+ GEN64("bswap", BSWAPQ); // FIXME: disass bug? no suffix
+ if (VALID_REG8(r)) {
+ GEN("seto", SETO);
+ GEN("setno", SETNO);
+ GEN("setb", SETB);
+ GEN("setae", SETAE);
+ GEN("sete", SETE);
+ GEN("setne", SETNE);
+ GEN("setbe", SETBE);
+ GEN("seta", SETA);
+ GEN("sets", SETS);
+ GEN("setns", SETNS);
+ GEN("setp", SETP);
+ GEN("setnp", SETNP);
+ GEN("setl", SETL);
+ GEN("setge", SETGE);
+ GEN("setle", SETLE);
+ GEN("setg", SETG);
+ }
+#undef GENA
+#undef GEN64
+#undef GEN
+ int last_insn = i;
+ uint8 *e = get_target();
+
+ uint8 *p = b;
+ i = 0;
+ while (p < e) {
+ int n = disass_x86(buffer, (uintptr)p);
+ insn_t ii;
+ parse_insn(&ii, buffer);
+
+ if (!check_reg(&ii, insns[i], r)) {
+ show_instruction(buffer, p);
+ n_failures++;
+ }
+
+ p += n;
+ i += 1;
+ n_tests++;
+ }
+ if (i != last_insn)
+ abort();
+ }
+ printf(" done %ld/%ld\n", n_tests - n_failures, n_tests);
+ n_all_tests += n_tests;
+ n_all_failures += n_failures;
+#endif
+
+#if TEST_INST_ALU_REG_REG
+ printf("Testing reg,reg forms\n");
+ n_tests = n_failures = 0;
+ for (int s = 0; s < X86_MAX_ALU_REGS; s++) {
+ for (int d = 0; d < X86_MAX_ALU_REGS; d++) {
+ set_target(block);
+ uint8 *b = get_target();
+ int i = 0;
+#define GEN(INSN, GENOP) do { \
+ insns[i++] = INSN; \
+ GENOP##rr(s, d); \
+} while (0)
+#define GEN64(INSN, GENOP) do { \
+ if (X86_TARGET_64BIT) \
+ GEN(INSN, GENOP); \
+} while (0)
+#define GEN1(INSN, GENOP, OP) do { \
+ insns[i++] = INSN; \
+ GENOP##rr(OP, s, d); \
+} while (0)
+#define GENA(INSN, GENOP) do { \
+ if (VALID_REG8(s) && VALID_REG8(d)) \
+ GEN(INSN "b", GENOP##B); \
+ GEN(INSN "w", GENOP##W); \
+ GEN(INSN "l", GENOP##L); \
+ GEN64(INSN "q", GENOP##Q); \
+} while (0)
+ GENA("adc", ADC);
+ GENA("add", ADD);
+ GENA("and", AND);
+ GENA("cmp", CMP);
+ GENA("or", OR);
+ GENA("sbb", SBB);
+ GENA("sub", SUB);
+ GENA("xor", XOR);
+ GENA("mov", MOV);
+ GEN("btw", BTW);
+ GEN("btl", BTL);
+ GEN64("btq", BTQ);
+ GEN("btcw", BTCW);
+ GEN("btcl", BTCL);
+ GEN64("btcq", BTCQ);
+ GEN("btrw", BTRW);
+ GEN("btrl", BTRL);
+ GEN64("btrq", BTRQ);
+ GEN("btsw", BTSW);
+ GEN("btsl", BTSL);
+ GEN64("btsq", BTSQ);
+ GEN("imulw", IMULW);
+ GEN("imull", IMULL);
+ GEN64("imulq", IMULQ);
+ GEN1("cmove", CMOVW, X86_CC_Z);
+ GEN1("cmove", CMOVL, X86_CC_Z);
+ if (X86_TARGET_64BIT)
+ GEN1("cmove", CMOVQ, X86_CC_Z);
+ GENA("test", TEST);
+ GENA("cmpxchg", CMPXCHG);
+ GENA("xadd", XADD);
+ GENA("xchg", XCHG);
+ GEN("bsfw", BSFW);
+ GEN("bsfl", BSFL);
+ GEN64("bsfq", BSFQ);
+ GEN("bsrw", BSRW);
+ GEN("bsrl", BSRL);
+ GEN64("bsrq", BSRQ);
+ if (VALID_REG8(s)) {
+ GEN("movsbw", MOVSBW);
+ GEN("movsbl", MOVSBL);
+ GEN64("movsbq", MOVSBQ);
+ GEN("movzbw", MOVZBW);
+ GEN("movzbl", MOVZBL);
+ GEN64("movzbq", MOVZBQ);
+ }
+ GEN("movswl", MOVSWL);
+ GEN64("movswq", MOVSWQ);
+ GEN("movzwl", MOVZWL);
+ GEN64("movzwq", MOVZWQ);
+ GEN64("movslq", MOVSLQ);
+#undef GENA
+#undef GEN1
+#undef GEN64
+#undef GEN
+ int last_insn = i;
+ uint8 *e = get_target();
+
+ uint8 *p = b;
+ i = 0;
+ while (p < e) {
+ int n = disass_x86(buffer, (uintptr)p);
+ insn_t ii;
+ parse_insn(&ii, buffer);
+
+ if (!check_reg_reg(&ii, insns[i], s, d)) {
+ show_instruction(buffer, p);
+ n_failures++;
+ }
+
+ p += n;
+ i += 1;
+ n_tests++;
+ }
+ if (i != last_insn)
+ abort();
+ }
+ }
+ printf(" done %ld/%ld\n", n_tests - n_failures, n_tests);
+ n_all_tests += n_tests;
+ n_all_failures += n_failures;
+#endif
+
+#if TEST_INST_ALU_CNT_REG
+ printf("Testing cl,reg forms\n");
+ n_tests = n_failures = 0;
+ for (int d = 0; d < X86_MAX_ALU_REGS; d++) {
+ set_target(block);
+ uint8 *b = get_target();
+ int i = 0;
+#define GEN(INSN, GENOP) do { \
+ insns[i++] = INSN; \
+ GENOP##rr(X86_CL, d); \
+} while (0)
+#define GEN64(INSN, GENOP) do { \
+ if (X86_TARGET_64BIT) \
+ GEN(INSN, GENOP); \
+} while (0)
+#define GENA(INSN, GENOP) do { \
+ if (VALID_REG8(d)) \
+ GEN(INSN "b", GENOP##B); \
+ GEN(INSN "w", GENOP##W); \
+ GEN(INSN "l", GENOP##L); \
+ GEN64(INSN "q", GENOP##Q); \
+} while (0)
+ GENA("rol", ROL);
+ GENA("ror", ROR);
+ GENA("rcl", RCL);
+ GENA("rcr", RCR);
+ GENA("shl", SHL);
+ GENA("shr", SHR);
+ GENA("sar", SAR);
+#undef GENA
+#undef GEN64
+#undef GEN
+ int last_insn = i;
+ uint8 *e = get_target();
+
+ uint8 *p = b;
+ i = 0;
+ while (p < e) {
+ int n = disass_x86(buffer, (uintptr)p);
+ insn_t ii;
+ parse_insn(&ii, buffer);
+
+ if (!check_reg_reg(&ii, insns[i], X86_CL, d)) {
+ show_instruction(buffer, p);
+ n_failures++;
+ }
+
+ p += n;
+ i += 1;
+ n_tests++;
+ }
+ if (i != last_insn)
+ abort();
+ }
+ printf(" done %ld/%ld\n", n_tests - n_failures, n_tests);
+ n_all_tests += n_tests;
+ n_all_failures += n_failures;
+#endif
+
+ static const uint32 imm_table[] = {
+ 0x00000000, 0x00000001, 0x00000002, 0x00000004,
+ 0x00000008, 0x00000010, 0x00000020, 0x00000040,
+ 0x00000080, 0x000000fe, 0x000000ff, 0x00000100,
+ 0x00000101, 0x00000102, 0xfffffffe, 0xffffffff,
+ 0x00000000, 0x10000000, 0x20000000, 0x30000000,
+ 0x40000000, 0x50000000, 0x60000000, 0x70000000,
+ 0x80000000, 0x90000000, 0xa0000000, 0xb0000000,
+ 0xc0000000, 0xd0000000, 0xe0000000, 0xf0000000,
+ 0xfffffffd, 0xfffffffe, 0xffffffff, 0x00000001,
+ 0x00000002, 0x00000003, 0x11111111, 0x22222222,
+ 0x33333333, 0x44444444, 0x55555555, 0x66666666,
+ 0x77777777, 0x88888888, 0x99999999, 0xaaaaaaaa,
+ 0xbbbbbbbb, 0xcccccccc, 0xdddddddd, 0xeeeeeeee,
+ };
+ const int n_imm_tab_count = sizeof(imm_table)/sizeof(imm_table[0]);
+
+#if TEST_INST_ALU_IMM_REG
+ printf("Testing imm,reg forms\n");
+ n_tests = n_failures = 0;
+ for (int j = 0; j < n_imm_tab_count; j++) {
+ const uint32 value = imm_table[j];
+ for (int d = 0; d < X86_MAX_ALU_REGS; d++) {
+ set_target(block);
+ uint8 *b = get_target();
+ int i = 0;
+#define GEN(INSN, GENOP) do { \
+ insns[i] = INSN; \
+ modes[i] = -1; \
+ i++; GENOP##ir(value, d); \
+ } while (0)
+#define GEN64(INSN, GENOP) do { \
+ if (X86_TARGET_64BIT) \
+ GEN(INSN, GENOP); \
+ } while (0)
+#define GENM(INSN, GENOP, MODE) do { \
+ insns[i] = INSN; \
+ modes[i] = MODE; \
+ i++; GENOP##ir(value, d); \
+ } while (0)
+#define GENM64(INSN, GENOP, MODE) do { \
+ if (X86_TARGET_64BIT) \
+ GENM(INSN, GENOP, MODE); \
+ } while (0)
+#define GENA(INSN, GENOP) do { \
+ if (VALID_REG8(d)) \
+ GEN(INSN "b", GENOP##B); \
+ GEN(INSN "w", GENOP##W); \
+ GEN(INSN "l", GENOP##L); \
+ GEN64(INSN "q", GENOP##Q); \
+ } while (0)
+#define GENAM(INSN, GENOP, MODE) do { \
+ if (VALID_REG8(d)) \
+ GENM(INSN "b", GENOP##B, MODE); \
+ GENM(INSN "w", GENOP##W, MODE); \
+ GENM(INSN "l", GENOP##L, MODE); \
+ GENM64(INSN "q", GENOP##Q, MODE); \
+ } while (0)
+ GENA("adc", ADC);
+ GENA("add", ADD);
+ GENA("and", AND);
+ GENA("cmp", CMP);
+ GENA("or", OR);
+ GENA("sbb", SBB);
+ GENA("sub", SUB);
+ GENA("xor", XOR);
+ GENA("mov", MOV);
+ GENM("btw", BTW, 1);
+ GENM("btl", BTL, 1);
+ GENM64("btq", BTQ, 1);
+ GENM("btcw", BTCW, 1);
+ GENM("btcl", BTCL, 1);
+ GENM64("btcq", BTCQ, 1);
+ GENM("btrw", BTRW, 1);
+ GENM("btrl", BTRL, 1);
+ GENM64("btrq", BTRQ, 1);
+ GENM("btsw", BTSW, 1);
+ GENM("btsl", BTSL, 1);
+ GENM64("btsq", BTSQ, 1);
+ if (value != 1) {
+ GENAM("rol", ROL, 1);
+ GENAM("ror", ROR, 1);
+ GENAM("rcl", RCL, 1);
+ GENAM("rcr", RCR, 1);
+ GENAM("shl", SHL, 1);
+ GENAM("shr", SHR, 1);
+ GENAM("sar", SAR, 1);
+ }
+ GENA("test", TEST);
+#undef GENAM
+#undef GENA
+#undef GENM64
+#undef GENM
+#undef GEN64
+#undef GEN
+ int last_insn = i;
+ uint8 *e = get_target();
+
+ uint8 *p = b;
+ i = 0;
+ while (p < e) {
+ int n = disass_x86(buffer, (uintptr)p);
+ insn_t ii;
+ parse_insn(&ii, buffer);
+
+ if (!check_imm_reg(&ii, insns[i], value, d, modes[i])) {
+ show_instruction(buffer, p);
+ n_failures++;
+ }
+
+ p += n;
+ i += 1;
+ n_tests++;
+ }
+ if (i != last_insn)
+ abort();
+ }
+ }
+ printf(" done %ld/%ld\n", n_tests - n_failures, n_tests);
+ n_all_tests += n_tests;
+ n_all_failures += n_failures;
+#endif
+
+ static const uint32 off_table[] = {
+ 0x00000000,
+ 0x00000001,
+ 0x00000040,
+ 0x00000080,
+ 0x000000ff,
+ 0x00000100,
+ 0xfffffffe,
+ 0xffffffff,
+ };
+ const int off_table_count = sizeof(off_table) / sizeof(off_table[0]);
+
+#if TEST_INST_ALU_MEM_REG
+ printf("Testing mem,reg forms\n");
+ n_tests = n_failures = 0;
+ for (int d = 0; d < off_table_count; d++) {
+ const uint32 D = off_table[d];
+ for (int B = -1; B < X86_MAX_ALU_REGS; B++) {
+ for (int I = -1; I < X86_MAX_ALU_REGS; I++) {
+ if (I == X86_RSP)
+ continue;
+ for (int S = 1; S < 16; S *= 2) {
+ if (I == -1 && S > 1)
+ continue;
+ for (int r = 0; r < X86_MAX_ALU_REGS; r++) {
+ set_target(block);
+ uint8 *b = get_target();
+ int i = 0;
+#define GEN(INSN, GENOP) do { \
+ insns[i++] = INSN; \
+ GENOP##mr(D, B, I, S, r); \
+ } while (0)
+#define GEN64(INSN, GENOP) do { \
+ if (X86_TARGET_64BIT) \
+ GEN(INSN, GENOP); \
+ } while (0)
+#define GENA(INSN, GENOP) do { \
+ if (VALID_REG8(r)) \
+ GEN(INSN "b", GENOP##B); \
+ GEN(INSN "w", GENOP##W); \
+ GEN(INSN "l", GENOP##L); \
+ GEN64(INSN "q", GENOP##Q); \
+ } while (0)
+ GENA("adc", ADC);
+ GENA("add", ADD);
+ GENA("and", AND);
+ GENA("cmp", CMP);
+ GENA("or", OR);
+ GENA("sbb", SBB);
+ GENA("sub", SUB);
+ GENA("xor", XOR);
+ GENA("mov", MOV);
+ GEN("imulw", IMULW);
+ GEN("imull", IMULL);
+ GEN64("imulq", IMULQ);
+ GEN("bsfw", BSFW);
+ GEN("bsfl", BSFL);
+ GEN64("bsfq", BSFQ);
+ GEN("bsrw", BSRW);
+ GEN("bsrl", BSRL);
+ GEN64("bsrq", BSRQ);
+ GEN("movsbw", MOVSBW);
+ GEN("movsbl", MOVSBL);
+ GEN64("movsbq", MOVSBQ);
+ GEN("movzbw", MOVZBW);
+ GEN("movzbl", MOVZBL);
+ GEN64("movzbq", MOVZBQ);
+ GEN("movswl", MOVSWL);
+ GEN64("movswq", MOVSWQ);
+ GEN("movzwl", MOVZWL);
+ GEN64("movzwq", MOVZWQ);
+ GEN64("movslq", MOVSLQ);
+#undef GENA
+#undef GEN64
+#undef GEN
+ int last_insn = i;
+ uint8 *e = get_target();
+
+ uint8 *p = b;
+ i = 0;
+ while (p < e) {
+ int n = disass_x86(buffer, (uintptr)p);
+ insn_t ii;
+ parse_insn(&ii, buffer);
+
+ if (!check_mem_reg(&ii, insns[i], D, B, I, S, r)) {
+ show_instruction(buffer, p);
+ n_failures++;
+ }
+
+ p += n;
+ i += 1;
+ n_tests++;
+ show_status(n_tests);
+ }
+ if (i != last_insn)
+ abort();
+ }
+ }
+ }
+ }
+ }
+ printf(" done %ld/%ld\n", n_tests - n_failures, n_tests);
+ n_all_tests += n_tests;
+ n_all_failures += n_failures;
+#endif
+
+#if TEST_INST_FPU_UNARY
+ printf("Testing FPU unary forms\n");
+ n_tests = n_failures = 0;
+ {
+ set_target(block);
+ uint8 *b = get_target();
+ int i = 0;
+#define GEN(INSN, GENOP) do { \
+ insns[i++] = INSN; \
+ GENOP(); \
+} while (0)
+ GEN("f2xm1", F2XM1);
+ GEN("fabs", FABS);
+ GEN("fchs", FCHS);
+ GEN("fcompp", FCOMPP);
+ GEN("fcos", FCOS);
+ GEN("fdecstp", FDECSTP);
+ GEN("fincstp", FINCSTP);
+ GEN("fld1", FLD1);
+ GEN("fldl2t", FLDL2T);
+ GEN("fldl2e", FLDL2E);
+ GEN("fldpi", FLDPI);
+ GEN("fldlg2", FLDLG2);
+ GEN("fldln2", FLDLN2);
+ GEN("fldz", FLDZ);
+ GEN("fnop", FNOP);
+ GEN("fpatan", FPATAN);
+ GEN("fprem", FPREM);
+ GEN("fprem1", FPREM1);
+ GEN("fptan", FPTAN);
+ GEN("frndint", FRNDINT);
+ GEN("fscale", FSCALE);
+ GEN("fsin", FSIN);
+ GEN("fsincos", FSINCOS);
+ GEN("fsqrt", FSQRT);
+ GEN("ftst", FTST);
+ GEN("fucompp", FUCOMPP);
+ GEN("fxam", FXAM);
+ GEN("fxtract", FXTRACT);
+ GEN("fyl2x", FYL2X);
+ GEN("fyl2xp1", FYL2XP1);
+#undef GEN
+ int last_insn = i;
+ uint8 *e = get_target();
+
+ uint8 *p = b;
+ i = 0;
+ while (p < e) {
+ int n = disass_x86(buffer, (uintptr)p);
+ insn_t ii;
+ parse_insn(&ii, buffer);
+
+ if (!check_unary(&ii, insns[i])) {
+ show_instruction(buffer, p);
+ n_failures++;
+ }
+
+ p += n;
+ i += 1;
+ n_tests++;
+ }
+ if (i != last_insn)
+ abort();
+ }
+ printf(" done %ld/%ld\n", n_tests - n_failures, n_tests);
+ n_all_tests += n_tests;
+ n_all_failures += n_failures;
+#endif
+
+#if TEST_INST_FPU_REG
+ printf("Testing FPU reg forms\n");
+ n_tests = n_failures = 0;
+ for (int r = 0; r < X86_MAX_FPU_REGS; r++) {
+ set_target(block);
+ uint8 *b = get_target();
+ int i = 0;
+#define GENr(INSN, GENOP) do { \
+ insns[i] = INSN; \
+ modes[i] = 0; \
+ i++, GENOP##r(r); \
+} while (0)
+#define GENr0(INSN, GENOP) do { \
+ insns[i] = INSN; \
+ modes[i] = 1; \
+ i++, GENOP##r0(r); \
+} while (0)
+#define GEN0r(INSN, GENOP) do { \
+ insns[i] = INSN; \
+ modes[i] = 2; \
+ i++, GENOP##0r(r); \
+} while (0)
+ GENr("fcom", FCOM);
+ GENr("fcomp", FCOMP);
+ GENr("ffree", FFREE);
+ GENr("fxch", FXCH);
+ GENr("fst", FST);
+ GENr("fstp", FSTP);
+ GENr("fucom", FUCOM);
+ GENr("fucomp", FUCOMP);
+ GENr0("fadd", FADD);
+ GENr0("fcmovb", FCMOVB);
+ GENr0("fcmove", FCMOVE);
+ GENr0("fcmovbe", FCMOVBE);
+ GENr0("fcmovu", FCMOVU);
+ GENr0("fcmovnb", FCMOVNB);
+ GENr0("fcmovne", FCMOVNE);
+ GENr0("fcmovnbe", FCMOVNBE);
+ GENr0("fcmovnu", FCMOVNU);
+ GENr0("fcomi", FCOMI);
+ GENr0("fcomip", FCOMIP);
+ GENr0("fucomi", FUCOMI);
+ GENr0("fucomip", FUCOMIP);
+ GENr0("fdiv", FDIV);
+ GENr0("fdivr", FDIVR);
+ GENr0("fmul", FMUL);
+ GENr0("fsub", FSUB);
+ GENr0("fsubr", FSUBR);
+#undef GEN0r
+#undef GENr0
+#undef GENr
+ int last_insn = i;
+ uint8 *e = get_target();
+
+ uint8 *p = b;
+ i = 0;
+ while (p < e) {
+ int n = disass_x86(buffer, (uintptr)p);
+ insn_t ii;
+ parse_insn(&ii, buffer);
+
+ switch (modes[i]) {
+ case 0:
+ if (!check_reg(&ii, insns[i], r)) {
+ show_instruction(buffer, p);
+ n_failures++;
+ }
+ break;
+ case 1:
+ if (!check_reg_reg(&ii, insns[i], r, 0)) {
+ show_instruction(buffer, p);
+ n_failures++;
+ }
+ break;
+ case 2:
+ if (!check_reg_reg(&ii, insns[i], 0, r)) {
+ show_instruction(buffer, p);
+ n_failures++;
+ }
+ break;
+ }
+
+ p += n;
+ i += 1;
+ n_tests++;
+ }
+ if (i != last_insn)
+ abort();
+ }
+ printf(" done %ld/%ld\n", n_tests - n_failures, n_tests);
+ n_all_tests += n_tests;
+ n_all_failures += n_failures;
+#endif
+
+#if TEST_INST_FPU_MEM
+ printf("Testing FPU mem forms\n");
+ n_tests = n_failures = 0;
+ for (int d = 0; d < off_table_count; d++) {
+ const uint32 D = off_table[d];
+ for (int B = -1; B < X86_MAX_ALU_REGS; B++) {
+ for (int I = -1; I < X86_MAX_ALU_REGS; I++) {
+ if (I == X86_RSP)
+ continue;
+ for (int S = 1; S < 16; S *= 2) {
+ if (I == -1 && S > 1)
+ continue;
+ set_target(block);
+ uint8 *b = get_target();
+ int i = 0;
+#define GEN(INSN, GENOP) do { \
+ insns[i++] = INSN; \
+ GENOP##m(D, B, I, S); \
+} while (0)
+ GEN("fadds", FADDS);
+ GEN("faddl", FADDD);
+ GEN("fiadd", FIADDW);
+ GEN("fiaddl", FIADDL);
+ GEN("fbld", FBLD);
+ GEN("fbstp", FBSTP);
+ GEN("fcoms", FCOMS);
+ GEN("fcoml", FCOMD);
+ GEN("fcomps", FCOMPS);
+ GEN("fcompl", FCOMPD);
+ GEN("fdivs", FDIVS);
+ GEN("fdivl", FDIVD);
+ GEN("fidiv", FIDIVW);
+ GEN("fidivl", FIDIVL);
+ GEN("fdivrs", FDIVRS);
+ GEN("fdivrl", FDIVRD);
+ GEN("fidivr", FIDIVRW);
+ GEN("fidivrl", FIDIVRL);
+ GEN("ficom", FICOMW);
+ GEN("ficoml", FICOML);
+ GEN("ficomp", FICOMPW);
+ GEN("ficompl", FICOMPL);
+ GEN("fild", FILDW);
+ GEN("fildl", FILDL);
+ GEN("fildll", FILDQ);
+ GEN("fist", FISTW);
+ GEN("fistl", FISTL);
+ GEN("fistp", FISTPW);
+ GEN("fistpl", FISTPL);
+ GEN("fistpll", FISTPQ);
+ GEN("fisttp", FISTTPW);
+ GEN("fisttpl", FISTTPL);
+ GEN("fisttpll", FISTTPQ);
+ GEN("flds", FLDS);
+ GEN("fldl", FLDD);
+ GEN("fldt", FLDT);
+ GEN("fmuls", FMULS);
+ GEN("fmull", FMULD);
+ GEN("fimul", FIMULW);
+ GEN("fimull", FIMULL);
+ GEN("fsts", FSTS);
+ GEN("fstl", FSTD);
+ GEN("fstps", FSTPS);
+ GEN("fstpl", FSTPD);
+ GEN("fstpt", FSTPT);
+ GEN("fsubs", FSUBS);
+ GEN("fsubl", FSUBD);
+ GEN("fisub", FISUBW);
+ GEN("fisubl", FISUBL);
+ GEN("fsubrs", FSUBRS);
+ GEN("fsubrl", FSUBRD);
+ GEN("fisubr", FISUBRW);
+ GEN("fisubrl", FISUBRL);
+#undef GEN
+ int last_insn = i;
+ uint8 *e = get_target();
+
+ uint8 *p = b;
+ i = 0;
+ while (p < e) {
+ int n = disass_x86(buffer, (uintptr)p);
+ insn_t ii;
+ parse_insn(&ii, buffer);
+
+ if (!check_mem(&ii, insns[i], D, B, I, S)) {
+ show_instruction(buffer, p);
+ n_failures++;
+ }
+
+ p += n;
+ i += 1;
+ n_tests++;
+ show_status(n_tests);
+ }
+ if (i != last_insn)
+ abort();
+ }
+ }
+ }
+ }
+ printf(" done %ld/%ld\n", n_tests - n_failures, n_tests);
+ n_all_tests += n_tests;
+ n_all_failures += n_failures;
+#endif
+
+#if TEST_INST_MMX_REG_REG
+ printf("Testing MMX reg,reg forms\n");
+ n_tests = n_failures = 0;
+ for (int s = 0; s < X86_MAX_MMX_REGS; s++) {
+ for (int d = 0; d < X86_MAX_MMX_REGS; d++) {
+ set_target(block);
+ uint8 *b = get_target();
+ int i = 0;
+#define GEN(INSN, GENOP) do { \
+ insns[i++] = INSN; \
+ MMX_##GENOP##rr(s, d); \
+} while (0)
+#define GEN64(INSN, GENOP) do { \
+ if (X86_TARGET_64BIT) \
+ GEN(INSN, GENOP); \
+} while (0)
+ GEN("movq", MOVQ);
+ GEN("packsswb", PACKSSWB);
+ GEN("packssdw", PACKSSDW);
+ GEN("packuswb", PACKUSWB);
+ GEN("paddb", PADDB);
+ GEN("paddw", PADDW);
+ GEN("paddd", PADDD);
+ GEN("paddq", PADDQ);
+ GEN("paddsb", PADDSB);
+ GEN("paddsw", PADDSW);
+ GEN("paddusb", PADDUSB);
+ GEN("paddusw", PADDUSW);
+ GEN("pand", PAND);
+ GEN("pandn", PANDN);
+ GEN("pavgb", PAVGB);
+ GEN("pavgw", PAVGW);
+ GEN("pcmpeqb", PCMPEQB);
+ GEN("pcmpeqw", PCMPEQW);
+ GEN("pcmpeqd", PCMPEQD);
+ GEN("pcmpgtb", PCMPGTB);
+ GEN("pcmpgtw", PCMPGTW);
+ GEN("pcmpgtd", PCMPGTD);
+ GEN("pmaddwd", PMADDWD);
+ GEN("pmaxsw", PMAXSW);
+ GEN("pmaxub", PMAXUB);
+ GEN("pminsw", PMINSW);
+ GEN("pminub", PMINUB);
+ GEN("pmulhuw", PMULHUW);
+ GEN("pmulhw", PMULHW);
+ GEN("pmullw", PMULLW);
+ GEN("pmuludq", PMULUDQ);
+ GEN("por", POR);
+ GEN("psadbw", PSADBW);
+ GEN("psllw", PSLLW);
+ GEN("pslld", PSLLD);
+ GEN("psllq", PSLLQ);
+ GEN("psraw", PSRAW);
+ GEN("psrad", PSRAD);
+ GEN("psrlw", PSRLW);
+ GEN("psrld", PSRLD);
+ GEN("psrlq", PSRLQ);
+ GEN("psubb", PSUBB);
+ GEN("psubw", PSUBW);
+ GEN("psubd", PSUBD);
+ GEN("psubq", PSUBQ);
+ GEN("psubsb", PSUBSB);
+ GEN("psubsw", PSUBSW);
+ GEN("psubusb", PSUBUSB);
+ GEN("psubusw", PSUBUSW);
+ GEN("punpckhbw", PUNPCKHBW);
+ GEN("punpckhwd", PUNPCKHWD);
+ GEN("punpckhdq", PUNPCKHDQ);
+ GEN("punpcklbw", PUNPCKLBW);
+ GEN("punpcklwd", PUNPCKLWD);
+ GEN("punpckldq", PUNPCKLDQ);
+ GEN("pxor", PXOR);
+ GEN("pabsb", PABSB);
+ GEN("pabsw", PABSW);
+ GEN("pabsd", PABSD);
+ GEN("phaddw", PHADDW);
+ GEN("phaddd", PHADDD);
+ GEN("phaddsw", PHADDSW);
+ GEN("phsubw", PHSUBW);
+ GEN("phsubd", PHSUBD);
+ GEN("phsubsw", PHSUBSW);
+ GEN("pmaddubsw", PMADDUBSW);
+ GEN("pmulhrsw", PMULHRSW);
+ GEN("pshufb", PSHUFB);
+ GEN("psignb", PSIGNB);
+ GEN("psignw", PSIGNW);
+ GEN("psignd", PSIGND);
+#undef GEN64
+#undef GEN
+ int last_insn = i;
+ uint8 *e = get_target();
+
+ uint8 *p = b;
+ i = 0;
+ while (p < e) {
+ int n = disass_x86(buffer, (uintptr)p);
+ insn_t ii;
+ parse_insn(&ii, buffer);
+
+ if (!check_reg_reg(&ii, insns[i], s, d)) {
+ show_instruction(buffer, p);
+ n_failures++;
+ }
+
+ p += n;
+ i += 1;
+ n_tests++;
+ }
+ if (i != last_insn)
+ abort();
+ }
+ }
+ printf(" done %ld/%ld\n", n_tests - n_failures, n_tests);
+ n_all_tests += n_tests;
+ n_all_failures += n_failures;
+#endif
+
+ static const uint8 imm8_table[] = {
+ 0x00, 0x01, 0x02, 0x03,
+ 0x06, 0x07, 0x08, 0x09,
+ 0x0e, 0x0f, 0x10, 0x11,
+ 0x1e, 0x1f, 0x20, 0x21,
+ 0xfc, 0xfd, 0xfe, 0xff,
+ };
+ const int n_imm8_tab_count = sizeof(imm8_table)/sizeof(imm8_table[0]);
+
+#if TEST_INST_MMX_IMM_REG
+ printf("Testing imm,reg forms\n");
+ n_tests = n_failures = 0;
+ for (int j = 0; j < n_imm8_tab_count; j++) {
+ const uint8 value = imm8_table[j];
+ for (int d = 0; d < X86_MAX_MMX_REGS; d++) {
+ set_target(block);
+ uint8 *b = get_target();
+ int i = 0;
+#define GEN(INSN, GENOP) do { \
+ insns[i] = INSN; \
+ modes[i] = 1; \
+ i++; MMX_##GENOP##ir(value, d); \
+} while (0)
+ GEN("psllw", PSLLW);
+ GEN("pslld", PSLLD);
+ GEN("psllq", PSLLQ);
+ GEN("psraw", PSRAW);
+ GEN("psrad", PSRAD);
+ GEN("psrlw", PSRLW);
+ GEN("psrld", PSRLD);
+ GEN("psrlq", PSRLQ);
+#undef GEN
+ int last_insn = i;
+ uint8 *e = get_target();
+
+ uint8 *p = b;
+ i = 0;
+ while (p < e) {
+ int n = disass_x86(buffer, (uintptr)p);
+ insn_t ii;
+ parse_insn(&ii, buffer);
+
+ if (!check_imm_reg(&ii, insns[i], value, d, modes[i])) {
+ show_instruction(buffer, p);
+ n_failures++;
+ }
+
+ p += n;
+ i += 1;
+ n_tests++;
+ }
+ if (i != last_insn)
+ abort();
+ }
+ }
+ printf(" done %ld/%ld\n", n_tests - n_failures, n_tests);
+ n_all_tests += n_tests;
+ n_all_failures += n_failures;
+#endif
+
+#if TEST_INST_MMX_MEM_REG
+ printf("Testing MMX mem,reg forms\n");
+ n_tests = n_failures = 0;
+ for (int d = 0; d < off_table_count; d++) {
+ const uint32 D = off_table[d];
+ for (int B = -1; B < X86_MAX_ALU_REGS; B++) {
+ for (int I = -1; I < X86_MAX_ALU_REGS; I++) {
+ if (I == X86_RSP)
+ continue;
+ for (int S = 1; S < 16; S *= 2) {
+ if (I == -1 && S > 1)
+ continue;
+ for (int r = 0; r < X86_MAX_MMX_REGS; r++) {
+ set_target(block);
+ uint8 *b = get_target();
+ int i = 0;
+#define _GENrm(INSN, GENOP) do { \
+ insns[i] = INSN; \
+ modes[i] = 0; \
+ i++; MMX_##GENOP##rm(r, D, B, I, S); \
+} while (0)
+#define _GENmr(INSN, GENOP) do { \
+ insns[i] = INSN; \
+ modes[i] = 1; \
+ i++; MMX_##GENOP##mr(D, B, I, S, r); \
+} while (0)
+#define GEN(INSN, GENOP) do { \
+ _GENmr(INSN, GENOP); \
+} while (0)
+ _GENmr("movd", MOVD);
+ _GENrm("movd", MOVD);
+ _GENmr("movq", MOVQ);
+ _GENrm("movq", MOVQ);
+ GEN("packsswb", PACKSSWB);
+ GEN("packssdw", PACKSSDW);
+ GEN("packuswb", PACKUSWB);
+ GEN("paddb", PADDB);
+ GEN("paddw", PADDW);
+ GEN("paddd", PADDD);
+ GEN("paddq", PADDQ);
+ GEN("paddsb", PADDSB);
+ GEN("paddsw", PADDSW);
+ GEN("paddusb", PADDUSB);
+ GEN("paddusw", PADDUSW);
+ GEN("pand", PAND);
+ GEN("pandn", PANDN);
+ GEN("pavgb", PAVGB);
+ GEN("pavgw", PAVGW);
+ GEN("pcmpeqb", PCMPEQB);
+ GEN("pcmpeqw", PCMPEQW);
+ GEN("pcmpeqd", PCMPEQD);
+ GEN("pcmpgtb", PCMPGTB);
+ GEN("pcmpgtw", PCMPGTW);
+ GEN("pcmpgtd", PCMPGTD);
+ GEN("pmaddwd", PMADDWD);
+ GEN("pmaxsw", PMAXSW);
+ GEN("pmaxub", PMAXUB);
+ GEN("pminsw", PMINSW);
+ GEN("pminub", PMINUB);
+ GEN("pmulhuw", PMULHUW);
+ GEN("pmulhw", PMULHW);
+ GEN("pmullw", PMULLW);
+ GEN("pmuludq", PMULUDQ);
+ GEN("por", POR);
+ GEN("psadbw", PSADBW);
+ GEN("psllw", PSLLW);
+ GEN("pslld", PSLLD);
+ GEN("psllq", PSLLQ);
+ GEN("psraw", PSRAW);
+ GEN("psrad", PSRAD);
+ GEN("psrlw", PSRLW);
+ GEN("psrld", PSRLD);
+ GEN("psrlq", PSRLQ);
+ GEN("psubb", PSUBB);
+ GEN("psubw", PSUBW);
+ GEN("psubd", PSUBD);
+ GEN("psubq", PSUBQ);
+ GEN("psubsb", PSUBSB);
+ GEN("psubsw", PSUBSW);
+ GEN("psubusb", PSUBUSB);
+ GEN("psubusw", PSUBUSW);
+ GEN("punpckhbw", PUNPCKHBW);
+ GEN("punpckhwd", PUNPCKHWD);
+ GEN("punpckhdq", PUNPCKHDQ);
+ GEN("punpcklbw", PUNPCKLBW);
+ GEN("punpcklwd", PUNPCKLWD);
+ GEN("punpckldq", PUNPCKLDQ);
+ GEN("pxor", PXOR);
+ GEN("pabsb", PABSB);
+ GEN("pabsw", PABSW);
+ GEN("pabsd", PABSD);
+ GEN("phaddw", PHADDW);
+ GEN("phaddd", PHADDD);
+ GEN("phaddsw", PHADDSW);
+ GEN("phsubw", PHSUBW);
+ GEN("phsubd", PHSUBD);
+ GEN("phsubsw", PHSUBSW);
+ GEN("pmaddubsw", PMADDUBSW);
+ GEN("pmulhrsw", PMULHRSW);
+ GEN("pshufb", PSHUFB);
+ GEN("psignb", PSIGNB);
+ GEN("psignw", PSIGNW);
+ GEN("psignd", PSIGND);
+#undef GEN
+#undef _GENmr
+#undef _GENrm
+ int last_insn = i;
+ uint8 *e = get_target();
+
+ uint8 *p = b;
+ i = 0;
+ while (p < e) {
+ int n = disass_x86(buffer, (uintptr)p);
+ insn_t ii;
+ parse_insn(&ii, buffer);
+
+ if (!check_mem_reg(&ii, insns[i], D, B, I, S, r, modes[i])) {
+ show_instruction(buffer, p);
+ n_failures++;
+ }
+
+ p += n;
+ i += 1;
+ n_tests++;
+ show_status(n_tests);
+ }
+ if (i != last_insn)
+ abort();
+ }
+ }
+ }
+ }
+ }
+ printf(" done %ld/%ld\n", n_tests - n_failures, n_tests);
+ n_all_tests += n_tests;
+ n_all_failures += n_failures;
+#endif
+
+#if TEST_INST_SSE_REG_REG
+ printf("Testing SSE reg,reg forms\n");
+ n_tests = n_failures = 0;
+ for (int s = 0; s < X86_MAX_SSE_REGS; s++) {
+ for (int d = 0; d < X86_MAX_SSE_REGS; d++) {
+ set_target(block);
+ uint8 *b = get_target();
+ int i = 0;
+#define GEN(INSN, GENOP) do { \
+ insns[i++] = INSN; \
+ GENOP##rr(s, d); \
+} while (0)
+#define GEN64(INSN, GENOP) do { \
+ if (X86_TARGET_64BIT) \
+ GEN(INSN, GENOP); \
+} while (0)
+#define GEN1(INSN, GENOP) do { \
+ GEN(INSN "s", GENOP##S); \
+ GEN(INSN "d", GENOP##D); \
+} while (0)
+#define GENA(INSN, GENOP) do { \
+ GEN1(INSN "s", GENOP##S); \
+ GEN1(INSN "p", GENOP##P); \
+} while (0)
+#define GENI(INSN, GENOP, IMM) do { \
+ insns[i++] = INSN; \
+ GENOP##rr(IMM, s, d); \
+} while (0)
+#define GENI1(INSN, GENOP, IMM) do { \
+ GENI(INSN "s", GENOP##S, IMM); \
+ GENI(INSN "d", GENOP##D, IMM); \
+} while (0)
+#define GENIA(INSN, GENOP, IMM) do { \
+ GENI1(INSN "s", GENOP##S, IMM); \
+ GENI1(INSN "p", GENOP##P, IMM); \
+} while (0)
+ GEN1("andp", ANDP);
+ GEN1("andnp", ANDNP);
+ GEN1("orp", ORP);
+ GEN1("xorp", XORP);
+ GENA("add", ADD);
+ GENA("sub", SUB);
+ GENA("mul", MUL);
+ GENA("div", DIV);
+ GEN1("comis", COMIS);
+ GEN1("ucomis", UCOMIS);
+ GENA("min", MIN);
+ GENA("max", MAX);
+ GEN("rcpss", RCPSS);
+ GEN("rcpps", RCPPS);
+ GEN("rsqrtss", RSQRTSS);
+ GEN("rsqrtps", RSQRTPS);
+ GENA("sqrt", SQRT);
+ GENIA("cmpeq", CMP, X86_SSE_CC_EQ);
+ GENIA("cmplt", CMP, X86_SSE_CC_LT);
+ GENIA("cmple", CMP, X86_SSE_CC_LE);
+ GENIA("cmpunord", CMP, X86_SSE_CC_U);
+ GENIA("cmpneq", CMP, X86_SSE_CC_NEQ);
+ GENIA("cmpnlt", CMP, X86_SSE_CC_NLT);
+ GENIA("cmpnle", CMP, X86_SSE_CC_NLE);
+ GENIA("cmpord", CMP, X86_SSE_CC_O);
+ GEN1("movap", MOVAP);
+ GEN("movdqa", MOVDQA);
+ GEN("movdqu", MOVDQU);
+ GEN("movd", MOVDXD);
+ GEN64("movd", MOVQXD); // FIXME: disass bug? "movq" expected
+ GEN("movd", MOVDXS);
+ GEN64("movd", MOVQXS); // FIXME: disass bug? "movq" expected
+ GEN("cvtdq2pd", CVTDQ2PD);
+ GEN("cvtdq2ps", CVTDQ2PS);
+ GEN("cvtpd2dq", CVTPD2DQ);
+ GEN("cvtpd2ps", CVTPD2PS);
+ GEN("cvtps2dq", CVTPS2DQ);
+ GEN("cvtps2pd", CVTPS2PD);
+ GEN("cvtsd2si", CVTSD2SIL);
+ GEN64("cvtsd2siq", CVTSD2SIQ);
+ GEN("cvtsd2ss", CVTSD2SS);
+ GEN("cvtsi2sd", CVTSI2SDL);
+ GEN64("cvtsi2sdq", CVTSI2SDQ);
+ GEN("cvtsi2ss", CVTSI2SSL);
+ GEN64("cvtsi2ssq", CVTSI2SSQ);
+ GEN("cvtss2sd", CVTSS2SD);
+ GEN("cvtss2si", CVTSS2SIL);
+ GEN64("cvtss2siq", CVTSS2SIQ);
+ GEN("cvttpd2dq", CVTTPD2DQ);
+ GEN("cvttps2dq", CVTTPS2DQ);
+ GEN("cvttsd2si", CVTTSD2SIL);
+ GEN64("cvttsd2siq", CVTTSD2SIQ);
+ GEN("cvttss2si", CVTTSS2SIL);
+ GEN64("cvttss2siq", CVTTSS2SIQ);
+ if (s < 8) {
+ // MMX source register
+ GEN("cvtpi2pd", CVTPI2PD);
+ GEN("cvtpi2ps", CVTPI2PS);
+ }
+ if (d < 8) {
+ // MMX dest register
+ GEN("cvtpd2pi", CVTPD2PI);
+ GEN("cvtps2pi", CVTPS2PI);
+ GEN("cvttpd2pi", CVTTPD2PI);
+ GEN("cvttps2pi", CVTTPS2PI);
+ }
+#undef GENIA
+#undef GENI1
+#undef GENI
+#undef GENA
+#undef GEN1
+#undef GEN64
+#undef GEN
+ int last_insn = i;
+ uint8 *e = get_target();
+
+ uint8 *p = b;
+ i = 0;
+ while (p < e) {
+ int n = disass_x86(buffer, (uintptr)p);
+ insn_t ii;
+ parse_insn(&ii, buffer);
+
+ if (!check_reg_reg(&ii, insns[i], s, d)) {
+ show_instruction(buffer, p);
+ n_failures++;
+ }
+
+ p += n;
+ i += 1;
+ n_tests++;
+ }
+ if (i != last_insn)
+ abort();
+ }
+ }
+ printf(" done %ld/%ld\n", n_tests - n_failures, n_tests);
+ n_all_tests += n_tests;
+ n_all_failures += n_failures;
+#endif
+
+#if TEST_INST_SSE_MEM_REG
+ printf("Testing SSE mem,reg forms\n");
+ n_tests = n_failures = 0;
+ for (int d = 0; d < off_table_count; d++) {
+ const uint32 D = off_table[d];
+ for (int B = -1; B < X86_MAX_ALU_REGS; B++) {
+ for (int I = -1; I < X86_MAX_ALU_REGS; I++) {
+ if (I == X86_RSP)
+ continue;
+ for (int S = 1; S < 16; S *= 2) {
+ if (I == -1 && S > 1)
+ continue;
+ for (int r = 0; r < X86_MAX_SSE_REGS; r++) {
+ set_target(block);
+ uint8 *b = get_target();
+ int i = 0;
+#define GEN(INSN, GENOP) do { \
+ insns[i++] = INSN; \
+ GENOP##mr(D, B, I, S, r); \
+} while (0)
+#define GEN64(INSN, GENOP) do { \
+ if (X86_TARGET_64BIT) \
+ GEN(INSN, GENOP); \
+} while (0)
+#define GEN1(INSN, GENOP) do { \
+ GEN(INSN "s", GENOP##S); \
+ GEN(INSN "d", GENOP##D); \
+} while (0)
+#define GENA(INSN, GENOP) do { \
+ GEN1(INSN "s", GENOP##S); \
+ GEN1(INSN "p", GENOP##P); \
+} while (0)
+#define GENI(INSN, GENOP, IMM) do { \
+ insns[i++] = INSN; \
+ GENOP##mr(IMM, D, B, I, S, r); \
+} while (0)
+#define GENI1(INSN, GENOP, IMM) do { \
+ GENI(INSN "s", GENOP##S, IMM); \
+ GENI(INSN "d", GENOP##D, IMM); \
+} while (0)
+#define GENIA(INSN, GENOP, IMM) do { \
+ GENI1(INSN "s", GENOP##S, IMM); \
+ GENI1(INSN "p", GENOP##P, IMM); \
+} while (0)
+ GEN1("andp", ANDP);
+ GEN1("andnp", ANDNP);
+ GEN1("orp", ORP);
+ GEN1("xorp", XORP);
+ GENA("add", ADD);
+ GENA("sub", SUB);
+ GENA("mul", MUL);
+ GENA("div", DIV);
+ GEN1("comis", COMIS);
+ GEN1("ucomis", UCOMIS);
+ GENA("min", MIN);
+ GENA("max", MAX);
+ GEN("rcpss", RCPSS);
+ GEN("rcpps", RCPPS);
+ GEN("rsqrtss", RSQRTSS);
+ GEN("rsqrtps", RSQRTPS);
+ GENA("sqrt", SQRT);
+ GENIA("cmpeq", CMP, X86_SSE_CC_EQ);
+ GENIA("cmplt", CMP, X86_SSE_CC_LT);
+ GENIA("cmple", CMP, X86_SSE_CC_LE);
+ GENIA("cmpunord", CMP, X86_SSE_CC_U);
+ GENIA("cmpneq", CMP, X86_SSE_CC_NEQ);
+ GENIA("cmpnlt", CMP, X86_SSE_CC_NLT);
+ GENIA("cmpnle", CMP, X86_SSE_CC_NLE);
+ GENIA("cmpord", CMP, X86_SSE_CC_O);
+ GEN1("movap", MOVAP);
+ GEN("movdqa", MOVDQA);
+ GEN("movdqu", MOVDQU);
+#if 0
+ // FIXME: extraneous REX bits generated
+ GEN("movd", MOVDXD);
+ GEN64("movd", MOVQXD); // FIXME: disass bug? "movq" expected
+#endif
+ GEN("cvtdq2pd", CVTDQ2PD);
+ GEN("cvtdq2ps", CVTDQ2PS);
+ GEN("cvtpd2dq", CVTPD2DQ);
+ GEN("cvtpd2ps", CVTPD2PS);
+ GEN("cvtps2dq", CVTPS2DQ);
+ GEN("cvtps2pd", CVTPS2PD);
+ GEN("cvtsd2si", CVTSD2SIL);
+ GEN64("cvtsd2siq", CVTSD2SIQ);
+ GEN("cvtsd2ss", CVTSD2SS);
+ GEN("cvtsi2sd", CVTSI2SDL);
+ GEN64("cvtsi2sdq", CVTSI2SDQ);
+ GEN("cvtsi2ss", CVTSI2SSL);
+ GEN64("cvtsi2ssq", CVTSI2SSQ);
+ GEN("cvtss2sd", CVTSS2SD);
+ GEN("cvtss2si", CVTSS2SIL);
+ GEN64("cvtss2siq", CVTSS2SIQ);
+ GEN("cvttpd2dq", CVTTPD2DQ);
+ GEN("cvttps2dq", CVTTPS2DQ);
+ GEN("cvttsd2si", CVTTSD2SIL);
+ GEN64("cvttsd2siq", CVTTSD2SIQ);
+ GEN("cvttss2si", CVTTSS2SIL);
+ GEN64("cvttss2siq", CVTTSS2SIQ);
+ if (r < 8) {
+ // MMX dest register
+ GEN("cvtpd2pi", CVTPD2PI);
+ GEN("cvtps2pi", CVTPS2PI);
+ GEN("cvttpd2pi", CVTTPD2PI);
+ GEN("cvttps2pi", CVTTPS2PI);
+ }
+#undef GENIA
+#undef GENI1
+#undef GENI
+#undef GENA
+#undef GEN1
+#undef GEN64
+#undef GEN
+ int last_insn = i;
+ uint8 *e = get_target();
+
+ uint8 *p = b;
+ i = 0;
+ while (p < e) {
+ int n = disass_x86(buffer, (uintptr)p);
+ insn_t ii;
+ parse_insn(&ii, buffer);
+
+ if (!check_mem_reg(&ii, insns[i], D, B, I, S, r)) {
+ show_instruction(buffer, p);
+ n_failures++;
+ }
+
+ p += n;
+ i += 1;
+ n_tests++;
+ show_status(n_tests);
+ }
+ if (i != last_insn)
+ abort();
+ }
+ }
+ }
+ }
+ }
+ printf(" done %ld/%ld\n", n_tests - n_failures, n_tests);
+ n_all_tests += n_tests;
+ n_all_failures += n_failures;
+#endif
+
+ printf("\n");
+ printf("All %ld tests run, %ld failures\n", n_all_tests, n_all_failures);
+}
// set the source format
memset (&wfxSrc, 0, sizeof (wfxSrc));
wfxSrc.Format.wFormatTag = WAVE_FORMAT_PCM;
- wfxSrc.Format.nChannels = get_audio_nativechannels () ? get_audio_nativechannels () : 2;
+ wfxSrc.Format.nChannels = get_audio_nativechannels (currprefs.sound_stereo) ? get_audio_nativechannels (currprefs.sound_stereo) : 2;
wfxSrc.Format.nSamplesPerSec = workprefs.sound_freq ? workprefs.sound_freq : 44100;
wfxSrc.Format.nBlockAlign = wfxSrc.Format.nChannels * 16 / 8;
wfxSrc.Format.nAvgBytesPerSec = wfxSrc.Format.nBlockAlign * wfxSrc.Format.nSamplesPerSec;
int AVIOutput_GetAudioCodec (TCHAR *name, int len)
{
+ AVIOutput_Initialize ();
if (AVIOutput_AudioAllocated ())
return AVIOutput_GetAudioCodecName (pwfxDst, name, len);
if (!AVIOutput_AllocateAudio ())
int AVIOutput_ChooseAudioCodec (HWND hwnd, TCHAR *s, int len)
{
+ AVIOutput_Initialize ();
AVIOutput_End();
if (!AVIOutput_AllocateAudio ())
return 0;
avioutput_height = WIN32GFX_GetHeight ();
avioutput_bits = WIN32GFX_GetDepth (0);
+ AVIOutput_Initialize ();
AVIOutput_ReleaseVideo ();
if (!avioutput_width || !avioutput_height || !avioutput_bits) {
avioutput_width = workprefs.gfx_size.width;
int AVIOutput_GetVideoCodec (TCHAR *name, int len)
{
+ AVIOutput_Initialize ();
+
if (AVIOutput_VideoAllocated ())
return AVIOutput_GetVideoCodecName (pcompvars, name, len);
if (!AVIOutput_AllocateVideo ())
int AVIOutput_ChooseVideoCodec (HWND hwnd, TCHAR *s, int len)
{
+ AVIOutput_Initialize ();
+
AVIOutput_End ();
if (!AVIOutput_AllocateVideo ())
return 0;
uae_u16 tw;
uae_u32 tl;
int bits = 16;
- int channels = get_audio_nativechannels ();
+ int channels = get_audio_nativechannels (currprefs.sound_stereo);
fseek (wavfile, 0, SEEK_SET);
fwrite ("RIFF", 1, 4, wavfile);
TCHAR *ext1, *ext2;
struct avientry *ae = NULL;
+ AVIOutput_Initialize ();
+
avientryindex = -1;
if (avioutput_enabled) {
if (!avioutput_requested)
void AVIOutput_Initialize (void)
{
+ if (avioutput_init)
+ return;
+
InitializeCriticalSection (&AVIOutput_CriticalSection);
cs_allocated = 1;
if (!pcompvars)
return;
pcompvars->cbSize = sizeof (COMPVARS);
-
- if (!avioutput_init) {
- AVIFileInit ();
- avioutput_init = 1;
- }
+ AVIFileInit ();
+ avioutput_init = 1;
}
static void close_device (int unitnum);
static int open_device (int unitnum);
-static void mcierr (TCHAR *str, DWORD err)
+static int mcierr (TCHAR *str, DWORD err)
{
TCHAR es[1000];
if (err == MMSYSERR_NOERROR)
- return;
+ return MMSYSERR_NOERROR;
if (mciGetErrorString (err, es, sizeof es))
write_log (L"MCIErr: %s: %d = '%s'\n", str, err, es);
+ return err;
}
static int win32_error (int unitnum, const TCHAR *format,...)
mciOpen.lpstrElementName = elname;
mciOpen.lpstrAlias = alname;
flags = MCI_OPEN_ELEMENT | MCI_OPEN_SHAREABLE | MCI_OPEN_ALIAS | MCI_OPEN_TYPE | MCI_OPEN_TYPE_ID | MCI_WAIT;
- err = mciSendCommand (0, MCI_OPEN, flags, (DWORD)(LPVOID)&mciOpen);
+ err = mciSendCommand (0, MCI_OPEN, flags, (DWORD_PTR)(LPVOID)&mciOpen);
ciw->mciid = mciOpen.wDeviceID;
if (err != MMSYSERR_NOERROR) {
if (closed)
{
struct dev_info_ioctl *ciw = &ciw32[unitnum];
- open_mci(unitnum);
+ open_mci (unitnum);
if (ciw->mciid > 0) {
memset (buf, 0, sizeof buf);
memset (&mciStatusParms, 0, sizeof mciStatusParms);
mciStatusParms.dwItem = MCI_STATUS_MODE;
- err = mciSendCommand (ciw->mciid, MCI_STATUS, MCI_STATUS_ITEM | MCI_WAIT, (DWORD)(LPVOID)&mciStatusParms);
+ err = mciSendCommand (ciw->mciid, MCI_STATUS, MCI_STATUS_ITEM | MCI_WAIT, (DWORD_PTR)&mciStatusParms);
if (err != MMSYSERR_NOERROR)
return 0;
mode = mciStatusParms.dwReturn;
mciStatusParms.dwItem = MCI_STATUS_CURRENT_TRACK;
- err = mciSendCommand (ciw->mciid, MCI_STATUS, MCI_STATUS_ITEM | MCI_WAIT, (DWORD)(LPVOID)&mciStatusParms);
+ err = mciSendCommand (ciw->mciid, MCI_STATUS, MCI_STATUS_ITEM | MCI_WAIT, (DWORD_PTR)&mciStatusParms);
if (err != MMSYSERR_NOERROR)
return 0;
trk = mciStatusParms.dwReturn - 1;
if (trk < 0)
trk = 0;
mciStatusParms.dwItem = MCI_STATUS_POSITION;
- err = mciSendCommand (ciw->mciid, MCI_STATUS, MCI_STATUS_ITEM | MCI_WAIT, (DWORD)(LPVOID)&mciStatusParms);
+ err = mciSendCommand (ciw->mciid, MCI_STATUS, MCI_STATUS_ITEM | MCI_WAIT, (DWORD_PTR)&mciStatusParms);
if (err != MMSYSERR_NOERROR)
return 0;
pos = (((mciStatusParms.dwReturn >> 16) & 0xff) << 0) | (((mciStatusParms.dwReturn >> 8) & 0xff) << 8) | (((mciStatusParms.dwReturn >> 0) & 0xff) << 16);
DWORD err;
MCI_STATUS_PARMS mciStatusParms;
+ memset (&mciStatusParms, 0, sizeof mciStatusParms);
mciStatusParms.dwItem = MCI_STATUS_MEDIA_PRESENT;
- err = mciSendCommand (ciw->mciid, MCI_STATUS, MCI_STATUS_ITEM | MCI_WAIT, (DWORD)(LPVOID)&mciStatusParms);
+ err = mciSendCommand (ciw->mciid, MCI_STATUS, MCI_STATUS_ITEM | MCI_WAIT, (DWORD_PTR)&mciStatusParms);
if (err != MMSYSERR_NOERROR)
return 0;
if (mciStatusParms.dwReturn)
return 0;
} else {
+
struct device_info di;
memset (&di, 0, sizeof di);
return fetch_geometry (unitnum, &di);
+
}
}
#include "sysconfig.h"
#include "sysdeps.h"
-#if defined (OPENGL) && defined (GFXFILTER)
+#if defined (D3D) && defined (GFXFILTER)
#include "options.h"
#include "xwin.h"
#include "win32.h"
#include "win32gfx.h"
#include "gfxfilter.h"
+#include "statusline.h"
#include <d3d9.h>
#include <d3dx9.h>
static int psEnabled, psActive, psPreProcess;
static int tformat;
-static int d3d_enabled, d3d_ex, scanlines_ok;
+static int d3d_enabled, d3d_ex;
static LPDIRECT3D9 d3d;
static LPDIRECT3D9EX d3dex;
static D3DPRESENT_PARAMETERS dpp;
static LPDIRECT3DDEVICE9 d3ddev;
static LPDIRECT3DDEVICE9EX d3ddevex;
static D3DSURFACE_DESC dsdbb;
-static LPDIRECT3DTEXTURE9 texture, sltexture;
+static LPDIRECT3DTEXTURE9 texture, sltexture, ledtexture;
static LPDIRECT3DTEXTURE9 lpWorkTexture1, lpWorkTexture2;
static LPDIRECT3DVOLUMETEXTURE9 lpHq2xLookupTexture;
static IDirect3DVertexBuffer9 *vertexBuffer;
+static ID3DXSprite *sprite;
static HWND d3dhwnd;
static D3DXMATRIX m_matProj;
static D3DXMATRIX m_matPreView;
static D3DXMATRIX m_matPreWorld;
+static int ledwidth, ledheight;
static int twidth, theight, max_texture_w, max_texture_h;
static int tin_w, tin_h, window_h, window_w;
static int t_depth;
return t;
}
-
static int createtexture (int w, int h)
{
HRESULT hr;
return 1;
}
+static void updateleds (void)
+{
+ D3DLOCKED_RECT locked;
+ HRESULT hr;
+ static rc[256], gc[256], bc[256], a[256];
+ static int done;
+ int i, y;
+
+ if (!done) {
+ for (i = 0; i < 256; i++) {
+ rc[i] = i << 16;
+ gc[i] = i << 8;
+ bc[i] = i << 0;
+ a[i] = i << 24;
+ }
+ done = 1;
+ }
+ hr = IDirect3DTexture9_LockRect (ledtexture, 0, &locked, NULL, D3DLOCK_DISCARD);
+ if (FAILED (hr)) {
+ write_log (L"SL IDirect3DTexture9_LockRect failed: %s\n", D3D_ErrorString (hr));
+ return;
+ }
+ for (y = 0; y < TD_TOTAL_HEIGHT; y++) {
+ uae_u8 *buf = (uae_u8*)locked.pBits + y * locked.Pitch;
+ draw_status_line_single (buf, 32 / 8, y, ledwidth, rc, gc, bc, a);
+ }
+ IDirect3DTexture9_UnlockRect (ledtexture, 0);
+}
+
+static int createledtexture (void)
+{
+ ledwidth = window_w;
+ ledheight = TD_TOTAL_HEIGHT;
+ ledtexture = createtext (&ledwidth, &ledheight, D3DFMT_A8R8G8B8);
+ if (!ledtexture)
+ return 0;
+ return 1;
+}
+
static int createsltexture (void)
{
UINT ww = required_sl_texture_w;
UINT hh = required_sl_texture_h;
- sltexture = createtext (&ww, &hh, D3DFMT_A4R4G4B4);
+ sltexture = createtext (&ww, &hh, t_depth < 32 ? D3DFMT_A4R4G4B4 : D3DFMT_A8R8G8B8);
if (!sltexture)
return 0;
required_sl_texture_w = ww;
required_sl_texture_h = hh;
write_log (L"D3D: SL %d*%d texture allocated\n", ww, hh);
-
- scanlines_ok = 1;
return 1;
}
hr = IDirect3DDevice9_SetSamplerState (d3ddev, 0, D3DSAMP_MINFILTER, v);
hr = IDirect3DDevice9_SetSamplerState (d3ddev, 0, D3DSAMP_MAGFILTER, v);
hr = IDirect3DDevice9_SetSamplerState (d3ddev, 0, D3DSAMP_MIPFILTER, D3DTEXF_NONE);
+ hr = IDirect3DDevice9_SetRenderState (d3ddev, D3DRS_ALPHABLENDENABLE, FALSE);
}
static void setupscenecoordssl (void)
int l1, l2;
int x, y, yy;
uae_u8 *sld, *p;
+ int bpp;
- if (!scanlines_ok)
+ if (!sltexture)
return;
if (osl1 == currprefs.gfx_filter_scanlines && osl3 == currprefs.gfx_filter_scanlinelevel && osl2 == currprefs.gfx_filter_scanlineratio && !force)
return;
+ bpp = t_depth < 32 ? 2 : 4;
osl1 = currprefs.gfx_filter_scanlines;
osl3 = currprefs.gfx_filter_scanlinelevel;
osl2 = currprefs.gfx_filter_scanlineratio;
}
sld = (uae_u8*)locked.pBits;
for (y = 0; y < required_sl_texture_h; y++)
- memset (sld + y * locked.Pitch, 0, required_sl_texture_w * 2);
+ memset (sld + y * locked.Pitch, 0, required_sl_texture_w * bpp);
for (y = 1; y < required_sl_texture_h; y += l1 + l2) {
for (yy = 0; yy < l2 && y + yy < required_sl_texture_h; yy++) {
for (x = 0; x < required_sl_texture_w; x++) {
- /* 16-bit, A4R4G4B4 */
uae_u8 sll = sl42;
- p = &sld[(y + yy) * locked.Pitch + (x * 2)];
- p[1] = (sl4 << 4) | (sll << 0);
- p[0] = (sll << 4) | (sll << 0);
+ p = &sld[(y + yy) * locked.Pitch + (x * bpp)];
+ if (bpp < 4) {
+ /* 16-bit, A4R4G4B4 */
+ p[1] = (sl4 << 4) | (sll << 0);
+ p[0] = (sll << 4) | (sll << 0);
+ } else {
+ /* 32-bit, A8R8G8B8 */
+ uae_u8 sll4 = sl42 | (sl42 << 4);
+ uae_u8 sll2 = sll | (sll << 4);
+ p[0] = sll4;
+ p[1] = sll2;
+ p[2] = sll2;
+ p[3] = sll2;
+ }
}
}
}
IDirect3DTexture9_UnlockRect (sltexture, 0);
- if (scanlines_ok) {
- /* enable alpha blending for scanlines */
- IDirect3DDevice9_SetRenderState (d3ddev, D3DRS_ALPHABLENDENABLE, TRUE);
- IDirect3DDevice9_SetRenderState (d3ddev, D3DRS_SRCBLEND, D3DBLEND_SRCALPHA);
- IDirect3DDevice9_SetRenderState (d3ddev, D3DRS_DESTBLEND, D3DBLEND_INVSRCALPHA);
- } else {
- IDirect3DDevice9_SetRenderState (d3ddev, D3DRS_ALPHABLENDENABLE, FALSE);
- }
}
IDirect3DTexture9_Release (texture);
texture = NULL;
}
+ if (sprite) {
+ sprite->lpVtbl->Release (sprite);
+ sprite = NULL;
+ }
+ if (ledtexture) {
+ IDirect3DTexture9_Release (ledtexture);
+ ledtexture = NULL;
+ }
if (sltexture) {
IDirect3DTexture9_Release (sltexture);
sltexture = NULL;
return 0;
if (currprefs.gfx_filter_scanlines > 0)
createsltexture ();
+ createledtexture ();
vbsize = sizeof (struct TLVERTEX) * 4;
if (psPreProcess)
psPreProcess = 0;
psActive = 0;
resetcount = 0;
+ changed_prefs.leds_on_screen = currprefs.leds_on_screen = currprefs.leds_on_screen & ~STATUSLINE_TARGET;
}
const TCHAR *D3D_init (HWND ahwnd, int w_w, int w_h, int t_w, int t_h, int depth)
D3D_free ();
D3D_canshaders ();
d3d_enabled = 0;
- scanlines_ok = 0;
if (currprefs.gfx_filter != UAE_FILTER_DIRECT3D) {
_tcscpy (errmsg, L"D3D: not enabled");
return errmsg;
return errmsg;
}
+ changed_prefs.leds_on_screen = currprefs.leds_on_screen = currprefs.leds_on_screen | STATUSLINE_TARGET;
+
+ hr = D3DXCreateSprite (d3ddev, &sprite);
+ if (FAILED (hr)) {
+ write_log (L"LED D3DXSprite filaed: %s\n", D3D_ErrorString (hr));
+ }
+
createscanlines (1);
d3d_enabled = 1;
return 0;
hr = IDirect3DDevice9_SetTexture (d3ddev, 0, (IDirect3DBaseTexture9*)texture);
hr = IDirect3DDevice9_DrawPrimitive (d3ddev, D3DPT_TRIANGLESTRIP, 0, 2);
- if (scanlines_ok) {
- setupscenecoordssl ();
- settransformsl ();
- hr = IDirect3DDevice9_SetTexture (d3ddev, 0, (IDirect3DBaseTexture9*)sltexture);
- hr = IDirect3DDevice9_DrawPrimitive (d3ddev, D3DPT_TRIANGLESTRIP, 0, 2);
- }
+ }
+ if (sprite && (sltexture || ledtexture)) {
+ D3DXVECTOR3 v;
+ sprite->lpVtbl->Begin (sprite, D3DXSPRITE_ALPHABLEND);
+ if (sltexture) {
+ v.x = v.y = v.z = 0;
+ sprite->lpVtbl->Draw (sprite, sltexture, NULL, NULL, &v, 0xffffffff);
+ }
+ if (ledtexture) {
+ v.x = 0;
+ v.y = window_h - TD_TOTAL_HEIGHT;
+ v.z = 0;
+ sprite->lpVtbl->Draw (sprite, ledtexture, NULL, NULL, &v, 0xffffffff);
+ }
+ sprite->lpVtbl->End (sprite);
}
hr = IDirect3DDevice9_EndScene (d3ddev);
HRESULT hr;
RECT r;
+ if (currprefs.leds_on_screen & STATUSLINE_CHIPSET)
+ updateleds ();
+
hr = IDirect3DTexture9_UnlockRect (texture, 0);
r.left = 0; r.right = window_w;
r.top = 0; r.bottom = window_h;
int locksurface (LPDIRECTDRAWSURFACE7 surf, LPDDSURFACEDESC2 desc)
{
+ static int cnt = 50;
HRESULT ddrval;
desc->dwSize = sizeof (*desc);
while (FAILED (ddrval = IDirectDrawSurface7_Lock (surf, NULL, desc, DDLOCK_SURFACEMEMORYPTR | DDLOCK_WAIT, NULL))) {
if (FAILED (ddrval))
return 0;
} else if (ddrval != DDERR_SURFACEBUSY) {
- write_log (L"locksurface: %s\n", DXError (ddrval));
+ if (cnt > 0) {
+ cnt--;
+ write_log (L"locksurface %d: %s\n", cnt, DXError (ddrval));
+ }
return 0;
}
}
#define WIN32_LEAN_AND_MEAN
-#define _WIN32_WINNT 0x500
+#define _WIN32_WINNT 0x600
#include "sysconfig.h"
#include "sysdeps.h"
cnt++;
}
+static int getsignfromhandle (HANDLE h, DWORD *sign, DWORD *pstyle)
+{
+ int ok;
+ DWORD written, outsize;
+ DRIVE_LAYOUT_INFORMATION_EX *dli;
+
+ ok = 0;
+ outsize = sizeof (DRIVE_LAYOUT_INFORMATION_EX) + sizeof (PARTITION_INFORMATION_EX) * 32;
+ dli = xmalloc (outsize);
+ if (DeviceIoControl (h, IOCTL_DISK_GET_DRIVE_LAYOUT_EX, NULL, 0, dli, outsize, &written, NULL)) {
+ *sign = dli->Mbr.Signature;
+ *pstyle = dli->PartitionStyle;
+ ok = 1;
+ } else if (DeviceIoControl (h, IOCTL_DISK_GET_DRIVE_LAYOUT, NULL, 0, dli, outsize, &written, NULL)) {
+ DRIVE_LAYOUT_INFORMATION *dli2 = (DRIVE_LAYOUT_INFORMATION*)dli;
+ *sign = dli2->Signature;
+ *pstyle = PARTITION_STYLE_MBR;
+ ok = 1;
+ }
+ xfree (dli);
+ return ok;
+}
+
+static int ismounted (HANDLE hd)
+{
+ HANDLE h;
+ TCHAR volname[MAX_DPATH];
+ int mounted;
+ DWORD sign, pstyle;
+
+ if (!getsignfromhandle (hd, &sign, &pstyle))
+ return 0;
+ if (pstyle == PARTITION_STYLE_GPT)
+ return 1;
+ if (pstyle == PARTITION_STYLE_RAW)
+ return 0;
+ mounted = 0;
+ h = FindFirstVolume (volname, sizeof volname / sizeof (TCHAR));
+ while (h && !mounted) {
+ HANDLE d;
+ if (volname[_tcslen (volname) - 1] == '\\')
+ volname[_tcslen (volname) - 1] = 0;
+ d = CreateFile (volname, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE,
+ NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
+ if (d != INVALID_HANDLE_VALUE) {
+ DWORD isntfs, outsize, written;
+ isntfs = 0;
+ if (DeviceIoControl (d, FSCTL_IS_VOLUME_MOUNTED, NULL, 0, NULL, 0, &written, NULL)) {
+ VOLUME_DISK_EXTENTS *vde;
+ NTFS_VOLUME_DATA_BUFFER ntfs;
+ if (DeviceIoControl (d, FSCTL_GET_NTFS_VOLUME_DATA, NULL, 0, &ntfs, sizeof ntfs, &written, NULL)) {
+ isntfs = 1;
+ }
+ outsize = sizeof (VOLUME_DISK_EXTENTS) + sizeof (DISK_EXTENT) * 32;
+ vde = xmalloc (outsize);
+ if (DeviceIoControl (d, IOCTL_VOLUME_GET_VOLUME_DISK_EXTENTS, NULL, 0, vde, outsize, &written, NULL)) {
+ int i;
+ for (i = 0; i < vde->NumberOfDiskExtents; i++) {
+ TCHAR pdrv[MAX_DPATH];
+ HANDLE ph;
+ _stprintf (pdrv, L"\\\\.\\PhysicalDrive%d", vde->Extents[i].DiskNumber);
+ ph = CreateFile (pdrv, 0, FILE_SHARE_READ | FILE_SHARE_WRITE,
+ NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
+ if (ph != INVALID_HANDLE_VALUE) {
+ DWORD sign2;
+ if (getsignfromhandle (ph, &sign2, &pstyle)) {
+ if (sign == sign2 && pstyle == PARTITION_STYLE_MBR)
+ mounted = isntfs ? -1 : 1;
+ }
+ CloseHandle (ph);
+ }
+ }
+ }
+ }
+ CloseHandle (d);
+ } else {
+ write_log (L"'%s': %d\n", volname, GetLastError ());
+ }
+ if (!FindNextVolume (h, volname, sizeof volname / sizeof (TCHAR)))
+ break;
+ }
+ FindVolumeClose (h);
+ return mounted;
+}
+
#define CA "Commodore\0Amiga\0"
-static int safetycheck (HANDLE *h, uae_u64 offset, uae_u8 *buf, int blocksize)
+static int safetycheck (HANDLE *h, const TCHAR *name, uae_u64 offset, uae_u8 *buf, int blocksize)
{
int i, j, blocks = 63, empty = 1;
DWORD outlen, high;
write_log (L"hd accepted (adide rdb detected at block %d)\n", j);
return -3;
}
- if (!memcmp (buf, "RDSK", 4)) {
+ if (!memcmp (buf, "RDSK", 4) || !memcmp (buf, "DRKS", 4)) {
if (do_rdbdump)
rdbdump (h, offset, buf, blocksize);
write_log (L"hd accepted (rdb detected at block %d)\n", j);
offset += blocksize;
}
if (!empty) {
- write_log (L"hd ignored, not empty and no RDB detected\n");
+ int mounted;
+ if (regexiststree (NULL, L"DangerousDrives")) {
+ UAEREG *fkey = regcreatetree (NULL, L"DangerousDrives");
+ int match = 0;
+ if (fkey) {
+ int idx = 0;
+ DWORD size, size2;
+ TCHAR tmp2[MAX_DPATH], tmp[MAX_DPATH];
+ for (;;) {
+ size = sizeof (tmp) / sizeof (TCHAR);
+ size2 = sizeof (tmp2) / sizeof (TCHAR);
+ if (!regenumstr (fkey, idx, tmp, &size, tmp2, &size2))
+ break;
+ if (!_tcscmp (tmp, name))
+ match = 1;
+ idx++;
+ }
+ regclosetree (fkey);
+ }
+ if (match) {
+ write_log (L"hd accepted, enabled in registry!\n");
+ return -7;
+ }
+ }
+ mounted = ismounted (h);
+ if (!mounted) {
+ write_log (L"hd accepted, not empty and not mounted in Windows\n");
+ return -8;
+ }
+ if (mounted < 0) {
+ write_log (L"hd ignored, NTFS partitions\n");
+ return 0;
+ }
+ if (harddrive_dangerous == 0x1234dead)
+ return -6;
+ write_log (L"hd ignored, not empty and no RDB detected or Windows mounted\n");
return 0;
}
write_log (L"hd accepted (empty)\n");
static TCHAR *hdz[] = { L"hdz", L"zip", L"rar", L"7z", NULL };
+#if 0
+static void getserial (HANDLE h)
+{
+ DWORD outsize, written;
+ DISK_GEOMETRY_EX *out;
+ VOLUME_DISK_EXTENTS *vde;
+
+ DWORD serial, mcl, fsflags;
+ if (GetVolumeInformationByHandleW (h, NULL, 0, &serial, &mcl, &fsflags, NULL, 0)) {
+ }
+
+ outsize = sizeof (DISK_GEOMETRY_EX) + 10 * (sizeof (DISK_DETECTION_INFO) + sizeof (DISK_PARTITION_INFO));
+ out = xmalloc (outsize);
+ if (DeviceIoControl (h, IOCTL_DISK_GET_DRIVE_GEOMETRY_EX, NULL, 0, out, outsize, &written, NULL)) {
+ DISK_DETECTION_INFO *ddi = DiskGeometryGetDetect (out);
+ DISK_PARTITION_INFO *dpi = DiskGeometryGetPartition (out);
+ write_log (L"");
+ }
+ xfree (out);
+
+
+ outsize = sizeof (VOLUME_DISK_EXTENTS) + sizeof (DISK_EXTENT) * 10;
+ vde = xmalloc (outsize);
+ if (DeviceIoControl (h, IOCTL_VOLUME_GET_VOLUME_DISK_EXTENTS, NULL, 0, vde, outsize, &written, NULL)) {
+ if (vde->NumberOfDiskExtents > 0)
+ write_log(L"%d\n", vde->Extents[0].DiskNumber);
+ }
+ xfree (vde);
+}
+#endif
+
int hdf_open_target (struct hardfiledata *hfd, const TCHAR *pname)
{
HANDLE h = INVALID_HANDLE_VALUE;
hfd->physsize = hfd->virtsize = udi->size;
hfd->blocksize = udi->bytespersector;
if (hfd->offset == 0 && !hfd->drive_empty) {
- int sf = safetycheck (hfd->handle, 0, hfd->cache, hfd->blocksize);
+ int sf = safetycheck (hfd->handle, udi->device_path, 0, hfd->cache, hfd->blocksize);
if (sf > 0)
goto end;
+ if (sf == 0 && !hfd->readonly && harddrive_dangerous != 0x1234dead) {
+ write_log (L"'%s' forced read-only, safetycheck enabled\n", udi->device_path);
+ hfd->dangerous = 1;
+ // clear GENERIC_WRITE
+ CloseHandle (h);
+ h = CreateFile (udi->device_path,
+ GENERIC_READ,
+ FILE_SHARE_READ | FILE_SHARE_WRITE,
+ NULL, OPEN_EXISTING, flags, NULL);
+ hfd->handle = h;
+ if (h == INVALID_HANDLE_VALUE)
+ goto end;
+ if (!DeviceIoControl(h, FSCTL_ALLOW_EXTENDED_DASD_IO, NULL, 0, NULL, 0, &r, NULL))
+ write_log (L"WARNING: '%s' FSCTL_ALLOW_EXTENDED_DASD_IO returned %d\n", name, GetLastError ());
+ }
+
+#if 0
if (sf == 0 && hfd->warned >= 0) {
if (harddrive_dangerous != 0x1234dead) {
if (!hfd->warned)
}
} else {
hfd->warned = -1;
+#endif
}
hfd->handle_valid = HDF_HANDLE_WIN32;
hfd->emptyname = my_strdup (name);
hfd->cache = 0;
hfd->cache_valid = 0;
hfd->drive_empty = 0;
+ hfd->dangerous = 0;
}
int hdf_dup_target (struct hardfiledata *dhfd, const struct hardfiledata *shfd)
return 0;
if (shfd->handle_valid == HDF_HANDLE_WIN32) {
HANDLE duphandle;
- if (!DuplicateHandle (GetCurrentProcess(), shfd->handle, GetCurrentProcess() , &duphandle, 0, FALSE, DUPLICATE_SAME_ACCESS))
+ if (!DuplicateHandle (GetCurrentProcess (), shfd->handle, GetCurrentProcess () , &duphandle, 0, FALSE, DUPLICATE_SAME_ACCESS))
return 0;
dhfd->handle = duphandle;
dhfd->handle_valid = HDF_HANDLE_WIN32;
static int hdf_write_2 (struct hardfiledata *hfd, void *buffer, uae_u64 offset, int len)
{
DWORD outlen = 0;
+
if (hfd->readonly)
return 0;
+ if (hfd->dangerous)
+ return 0;
hfd->cache_valid = 0;
hdf_seek (hfd, offset);
poscheck (hfd, len);
while (len > 0) {
int maxlen = len > CACHE_SIZE ? CACHE_SIZE : len;
int ret = hdf_write_2(hfd, p, offset, maxlen);
+ if (ret < 0)
+ return ret;
got += ret;
if (ret != maxlen)
return got;
{
_tcscpy (udi->vendor_id, L"UAE");
_tcscpy (udi->product_id, L"DISK");
- _tcscpy (udi->product_rev, L"1.0");
+ _tcscpy (udi->product_rev, L"1.1");
_stprintf (udi->device_name, L"%s", udi->device_path);
udi->removablemedia = 1;
}
write_log (L"opening device '%s'\n", udi->device_path);
hDevice = CreateFile(
udi->device_path, // device interface name
- GENERIC_READ | GENERIC_WRITE, // dwDesiredAccess
+ GENERIC_READ, // dwDesiredAccess
FILE_SHARE_READ | FILE_SHARE_WRITE, // dwShareMode
NULL, // lpSecurityAttributes
OPEN_EXISTING, // dwCreationDistribution
continue;
}
nonzeropart++;
- if (pi->PartitionType != 0x76) {
- write_log (L"type not 0x76\n");
+ if (pi->PartitionType != 0x76 && pi->PartitionType != 0x30) {
+ write_log (L"type not 0x76 or 0x30\n");
continue;
}
memmove (udi, udi2, sizeof (*udi));
udi->offset = pi->StartingOffset.QuadPart;
udi->size = pi->PartitionLength.QuadPart;
write_log (L"used\n");
- if (safetycheck (hDevice, udi->offset, buffer, dg.BytesPerSector) <= 0) {
+ if (safetycheck (hDevice, udi->device_path, udi->offset, buffer, dg.BytesPerSector) <= 0) {
_stprintf (udi->device_name, L"HD_P#%d_%s", pi->PartitionNumber, orgname);
udi++;
(*index2)++;
write_log (L"no MBR partition table detected, checking for RDB\n");
}
- udi->dangerous = safetycheck (hDevice, 0, buffer, dg.BytesPerSector);
+ udi->dangerous = safetycheck (hDevice, udi->device_path, 0, buffer, dg.BytesPerSector);
if (udi->dangerous > 0)
goto end;
amipartfound:
return ret;
}
-
#endif
+
+
static int num_drives;
static int hdf_init2 (int force)
#ifdef WINDDK
buffer = VirtualAlloc (NULL, 65536, MEM_COMMIT, PAGE_READWRITE);
if (buffer) {
+ errormode = SetErrorMode (SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX);
memset (uae_drives, 0, sizeof (uae_drives));
num_drives = 0;
hIntDevInfo = SetupDiGetClassDevs (&GUID_DEVINTERFACE_DISK, NULL, NULL, DIGCF_PRESENT | DIGCF_INTERFACEDEVICE);
index++;
num_drives = index2;
}
- SetupDiDestroyDeviceInfoList(hIntDevInfo);
+ SetupDiDestroyDeviceInfoList (hIntDevInfo);
}
- errormode = SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX);
- dwDriveMask = GetLogicalDrives();
+ dwDriveMask = GetLogicalDrives ();
for(drive = 'A'; drive <= 'Z'; drive++) {
if((dwDriveMask & 1) && (drive >= 'C' || usefloppydrives)) {
TCHAR tmp1[20], tmp2[20];
DWORD drivetype;
_stprintf (tmp1, L"%c:\\", drive);
- drivetype = GetDriveType(tmp1);
+ drivetype = GetDriveType (tmp1);
if (drivetype != DRIVE_REMOTE) {
_stprintf (tmp2, L"\\\\.\\%c:", drive);
GetDevicePropertyFromName (tmp2, index, &index2, buffer, 1);
}
dwDriveMask >>= 1;
}
- SetErrorMode(errormode);
+ SetErrorMode (errormode);
#if 0
hIntDevInfo = SetupDiGetClassDevs (&GUID_DEVCLASS_MTD, NULL, NULL, DIGCF_PRESENT);
if (hIntDevInfo != INVALID_HANDLE_VALUE) {
VirtualFree (buffer, 0, MEM_RELEASE);
}
num_drives = index2;
- write_log (L"Drive scan result: %d Amiga formatted drives detected\n", num_drives);
+ write_log (L"Drive scan result: %d drives detected\n", num_drives);
#endif
return num_drives;
}
return num_drives;
}
-TCHAR *hdf_getnameharddrive (int index, int flags, int *sectorsize)
+TCHAR *hdf_getnameharddrive (int index, int flags, int *sectorsize, int *dangerousdrive)
{
static TCHAR name[512];
TCHAR tmp[32];
TCHAR *dang = L"?";
TCHAR *rw = L"RW";
+ if (dangerousdrive)
+ *dangerousdrive = 0;
switch (uae_drives[index].dangerous)
{
+ case -6:
+ dang = L"[MBR]";
+ break;
+ case -7:
+ dang = L"[!]";
+ break;
+ case -8:
+ dang = L"[UNK]";
+ break;
case -9:
- dang = L"Empty";
+ dang = L"[EMPTY]";
break;
case -3:
- dang = L"CPRM";
+ dang = L"(CPRM)";
break;
case -2:
- dang = L"SRAM";
+ dang = L"(SRAM)";
break;
case -1:
- dang = L"RDB";
+ dang = L"(RDB)";
break;
case 0:
- dang = L"NON-EMPTY";
+ dang = L"[OS]";
+ if (dangerousdrive)
+ *dangerousdrive |= 1;
break;
}
- if (nomedia)
- dang = L"NO MEDIA";
- if (uae_drives[index].readonly)
+ if (nomedia) {
+ dang = L"[NO MEDIA]";
+ if (dangerousdrive)
+ *dangerousdrive &= ~1;
+ }
+ if (uae_drives[index].readonly) {
rw = L"RO";
+ if (dangerousdrive && !nomedia)
+ *dangerousdrive |= 2;
+ }
if (sectorsize)
*sectorsize = uae_drives[index].bytespersector;
static int progressdialogreturn;
static int progressdialogactive;
-static INT_PTR CALLBACK ProgressDialogProc(HWND hDlg, UINT msg, WPARAM wParam, LPARAM lParam)
+static INT_PTR CALLBACK ProgressDialogProc (HWND hDlg, UINT msg, WPARAM wParam, LPARAM lParam)
{
switch(msg)
{
extern HINSTANCE hInst;
#define COPY_CACHE_SIZE 1024*1024
-int harddrive_to_hdf(HWND hDlg, struct uae_prefs *p, int idx)
+int harddrive_to_hdf (HWND hDlg, struct uae_prefs *p, int idx)
{
HANDLE h = INVALID_HANDLE_VALUE, hdst = INVALID_HANDLE_VALUE;
void *cache = NULL;
if (hdst == INVALID_HANDLE_VALUE)
goto err;
li.QuadPart = size;
- ret = SetFilePointer(hdst, li.LowPart, &li.HighPart, FILE_BEGIN);
- if (ret == INVALID_FILE_SIZE && GetLastError() != NO_ERROR)
+ ret = SetFilePointer (hdst, li.LowPart, &li.HighPart, FILE_BEGIN);
+ if (ret == INVALID_FILE_SIZE && GetLastError () != NO_ERROR)
goto err;
- if (!SetEndOfFile(hdst))
+ if (!SetEndOfFile (hdst))
goto err;
li.QuadPart = 0;
- SetFilePointer(hdst, 0, &li.HighPart, FILE_BEGIN);
+ SetFilePointer (hdst, 0, &li.HighPart, FILE_BEGIN);
li.QuadPart = 0;
- SetFilePointer(h, 0, &li.HighPart, FILE_BEGIN);
+ SetFilePointer (h, 0, &li.HighPart, FILE_BEGIN);
progressdialogreturn = -1;
progressdialogactive = 1;
hwnd = CreateDialog (hUIDLL ? hUIDLL : hInst, MAKEINTRESOURCE (IDD_PROGRESSBAR), hDlg, ProgressDialogProc);
if (hwnd == NULL)
goto err;
- hwndprogress = GetDlgItem(hwnd, IDC_PROGRESSBAR);
- hwndprogresstxt = GetDlgItem(hwnd, IDC_PROGRESSBAR_TEXT);
+ hwndprogress = GetDlgItem (hwnd, IDC_PROGRESSBAR);
+ hwndprogresstxt = GetDlgItem (hwnd, IDC_PROGRESSBAR_TEXT);
ShowWindow (hwnd, SW_SHOW);
pct = 0;
cnt = 1000;
if (progressdialogreturn >= 0)
break;
if (cnt > 0) {
- SendMessage(hwndprogress, PBM_SETPOS, (WPARAM)pct, 0);
+ SendMessage (hwndprogress, PBM_SETPOS, (WPARAM)pct, 0);
_stprintf (tmp, L"%dM / %dM (%d%%)", (int)(written >> 20), (int)(size >> 20), pct);
- SendMessage(hwndprogresstxt, WM_SETTEXT, 0, (LPARAM)tmp);
+ SendMessage (hwndprogresstxt, WM_SETTEXT, 0, (LPARAM)tmp);
while (PeekMessage (&msg, 0, 0, 0, PM_REMOVE)) {
TranslateMessage (&msg);
DispatchMessage (&msg);
got = gotdst = 0;
li.QuadPart = sizecnt;
if (SetFilePointer(h, li.LowPart, &li.HighPart, FILE_BEGIN) == INVALID_SET_FILE_POINTER) {
- DWORD err = GetLastError();
+ DWORD err = GetLastError ();
if (err != NO_ERROR) {
progressdialogreturn = 3;
break;
get = COPY_CACHE_SIZE;
if (sizecnt + get > size)
get = size - sizecnt;
- if (!ReadFile(h, cache, get, &got, NULL)) {
+ if (!ReadFile (h, cache, get, &got, NULL)) {
progressdialogreturn = 4;
break;
}
if (got > 0) {
if (written + got > size)
got = size - written;
- if (!WriteFile(hdst, cache, got, &gotdst, NULL)) {
+ if (!WriteFile (hdst, cache, got, &gotdst, NULL)) {
progressdialogreturn = 5;
break;
}
ok:
if (h != INVALID_HANDLE_VALUE)
- CloseHandle(h);
+ CloseHandle (h);
if (cache)
- VirtualFree(cache, 0, MEM_RELEASE);
+ VirtualFree (cache, 0, MEM_RELEASE);
if (hdst != INVALID_HANDLE_VALUE)
- CloseHandle(hdst);
+ CloseHandle (hdst);
return retcode;
}
#if defined(NATMEM_OFFSET)
#define BARRIER 32
+#define MAXZ3MEM 0x7F000000
static struct shmid_ds shmids[MAX_SHMID];
static int memwatchok = 0;
}
size64 = total64;
if (maxmem < 0)
- size64 = 0x7f000000;
+ size64 = MAXZ3MEM;
else if (maxmem > 0)
size64 = maxmem * 1024 * 1024;
if (os_64bit) {
- if (size64 > 0x7f000000)
- size64 = 0x7f000000;
+ if (size64 > MAXZ3MEM)
+ size64 = MAXZ3MEM;
} else {
- if (size64 > 0x7f000000)
- size64 = 0x7f000000;
+ if (size64 > MAXZ3MEM)
+ size64 = MAXZ3MEM;
}
if (size64 < 8 * 1024 * 1024)
size64 = 8 * 1024 * 1024;
int cnt, actual;
DWORD evtmask;
- uae_set_thread_priority (2);
+ uae_set_thread_priority (NULL, 1);
sd->threadactive = 1;
uae_sem_post (&sd->sync_sem);
startwce(sd, &evtmask);
NULL);
if (hCom == INVALID_HANDLE_VALUE) {
write_log (L"SERIAL: failed to open '%s' err=%d\n", sername, GetLastError());
- closeser();
+ closeser ();
return 0;
}
//extern int warned_JIT_0xF10000;
//warned_JIT_0xF10000 = 0;
if (flashscreen > 0) {
- DX_Fill (0, 0, -1, 30, 0x000000);
- DX_Invalidate (0, 0, -1, 30);
+ //DX_Fill (0, 0, -1, 30, 0x000000); can't do anymore
+ //DX_Invalidate (0, 0, -1, 30);
flashscreen--;
if (flashscreen == 0)
picasso_refresh ();
#endif
}
-static int enumserialports_2(void)
+static int enumserialports_2 (int cnt)
{
// Create a device information set that will be the container for
// the device interfaces.
SP_DEVICE_INTERFACE_DATA ifcData;
DWORD dwDetDataSize = sizeof (SP_DEVICE_INTERFACE_DETAIL_DATA) + 256;
DWORD ii;
- int cnt = 0;
hDevInfo = SetupDiGetClassDevs (&GUID_CLASS_COMPORT, NULL, NULL, DIGCF_PRESENT | DIGCF_DEVICEINTERFACE);
if(hDevInfo == INVALID_HANDLE_VALUE)
return cnt;
}
-int enumserialports(void)
+int enumserialports (void)
{
int cnt, i, j;
TCHAR name[256];
TCHAR devname[1000];
write_log (L"Serial port enumeration..\n");
- cnt = enumserialports_2 ();
+
+ comports[0].dev = my_strdup (L"ENET:H");
+ comports[0].cfgname = my_strdup (comports[0].dev);
+ comports[0].name = my_strdup (L"NET (host)");
+ comports[1].dev = my_strdup (L"ENET:L");
+ comports[1].cfgname = my_strdup (comports[1].dev);
+ comports[1].name = my_strdup (L"NET (client)");
+
+ cnt = enumserialports_2 (2);
for (i = 0; i < 10; i++) {
_stprintf(name, L"COM%d", i);
if (!QueryDosDevice (name, devname, sizeof devname))
cnt++;
}
}
- if (isIPC (COMPIPENAME)) {
- comports[j].dev = xmalloc (100);
- _stprintf (comports[cnt].dev, L"\\\\.\\pipe\\%s", COMPIPENAME);
- comports[j].cfgname = my_strdup (COMPIPENAME);
- comports[j].name = my_strdup (COMPIPENAME);
- }
write_log (L"Serial port enumeration end\n");
return cnt;
}
-void sernametodev(TCHAR *sername)
+void sernametodev (TCHAR *sername)
{
int i;
sername[0] = 0;
}
-void serdevtoname(TCHAR *sername)
+void serdevtoname (TCHAR *sername)
{
int i;
for (i = 0; i < MAX_SERIAL_PORTS && comports[i].name; i++) {
#define P96DX 0
#define WINCURSOR 1
+static int multithreaded = 0;
+
#include "sysconfig.h"
#include "sysdeps.h"
static int interrupt_enabled;
int p96vblank;
+static uae_sem_t sem;
+static int thread_alive;
+
static uaecptr uaegfx_resname,
uaegfx_resid,
uaegfx_init,
return currprefs.gfx_pfullscreen && currprefs.gfx_pvsync;
}
+static void flushpixels_do (void)
+{
+ if (multithreaded)
+ uae_sem_post (&sem);
+ else
+ flushpixels ();
+}
+
void picasso_handle_vsync (void)
{
static int vsynccnt;
return;
framecnt++;
- mouseupdate ();
+ if (!multithreaded)
+ mouseupdate ();
if (thisisvsync) {
- if (doskip () && p96skipmode == 0) {
- ;
+ if (multithreaded) {
+ uae_sem_post (&sem);
} else {
- flushpixels ();
+ if (doskip () && p96skipmode == 0) {
+ ;
+ } else {
+ flushpixels_do ();
+ }
+ gfx_unlock_picasso ();
}
- gfx_unlock_picasso ();
}
}
width = picasso96_state.Width;
height = picasso96_state.Height;
}
- flushpixels ();
+ flushpixels_do ();
} else {
write_log (L"ERROR - picasso_refresh() can't refresh!\n");
}
yy = 0;
for (y = dst_height - TD_TOTAL_HEIGHT; y < dst_height; y++) {
uae_u8 *buf = dst + y * pitch;
- draw_status_line_single (buf, picasso_vidinfo.pixbytes, yy, picasso96_state.Width, p96rc, p96gc, p96bc);
+ draw_status_line_single (buf, picasso_vidinfo.pixbytes, yy, picasso96_state.Width, p96rc, p96gc, p96bc, NULL);
yy++;
}
}
CallLib (ctx, get_long (4), -168); /* AddIntServer */
}
+static void *picasso_copy (void *data)
+{
+ thread_alive = 1;
+ while (thread_alive) {
+ uae_sem_wait (&sem);
+ if (!thread_alive)
+ break;
+ if (!picasso_on)
+ continue;
+ if (dx_islost ())
+ continue;
+ mouseupdate ();
+ flushpixels ();
+ }
+ thread_alive = -1;
+ return NULL;
+}
+
+
static uaecptr uaegfx_card_install (TrapContext *ctx, uae_u32 extrasize)
{
uae_u32 functable, datatable, a2;
uaecptr findcardfunc, initcardfunc;
uaecptr exec = get_long (4);
- uaegfx_resid = ds (L"UAE Graphics Card 3.2");
+ uaegfx_resid = ds (L"UAE Graphics Card 3.3");
uaegfx_vblankname = ds (L"UAE Graphics Card VBLANK");
- uaegfx_vblankname = ds (L"UAE Graphics Card PORTS");
+ uaegfx_portsname = ds (L"UAE Graphics Card PORTS");
/* Open */
openfunc = here ();
if (currprefs.win32_rtgvblankrate >= -1)
initvblankirq (ctx, uaegfx_base);
+ if (multithreaded && thread_alive == 0) {
+ uae_sem_init (&sem, FALSE, FALSE);
+ uae_start_thread (L"rtg_copy", picasso_copy, NULL, NULL);
+ }
+
write_log (L"uaegfx.card %d.%d init @%08X\n", UAEGFX_VERSION, UAEGFX_REVISION, uaegfx_base);
return uaegfx_base;
}
extern struct picasso96_state_struct picasso96_state;
extern uae_u16 picasso96_pixel_format;
-extern int DX_InvertRect (int X, int Y, int Width, int Height);
extern void DX_SetPalette (int start, int count);
-extern void DX_Invalidate (int, int, int, int);
-extern int DX_Flip (void);
extern void picasso_enablescreen (int on);
extern void picasso_refresh (void);
extern void picasso_handle_vsync (void);
thp->arg = arg;
hThread = (HANDLE)_beginthreadex (NULL, 0, thread_init, thp, 0, &foo);
if (hThread) {
- SetThreadPriority (hThread, THREAD_PRIORITY_ABOVE_NORMAL);
if (name)
write_log (L"Thread '%s' started (%d)\n", name, hThread);
} else {
DWORD_PTR cpu_affinity = 1, cpu_paffinity = 1;
-void uae_set_thread_priority (int pri)
+void uae_set_thread_priority (uae_thread_id *tid, int pri)
{
+ int pri2 = GetThreadPriority (NULL);
+
+ pri2 += pri;
+ if (pri2 > 1)
+ pri2 = 1;
+ if (pri2 < -1)
+ pri2 = -1;
+ SetThreadPriority (tid ? *tid : NULL, pri2);
}
#define IDS_FRONTEND 19
#define IDS_CHIPSET2 20
#define IDS_GAMEPORTS 21
+#define IDS_RTG 22
#define IDS_EXTTEXT 100
#define IDS_EXTACTUAL 101
#define IDS_SOUND 102
#define IDI_FILE 349
#define IDS_AUTOSCALE_RESIZE 350
#define IDS_PRINTER_ASCII 351
+#define IDD_DIALOG2 351
+#define IDD_RTG 351
#define IDS_PRINTER_EPSON 352
#define IDS_PRINTER_POSTSCRIPT_DETECTION 353
#define IDS_PRINTER_POSTSCRIPT_EMULATION 354
#ifndef APSTUDIO_READONLY_SYMBOLS
#define _APS_NO_MFC 1
#define _APS_3D_CONTROLS 1
-#define _APS_NEXT_RESOURCE_VALUE 350
+#define _APS_NEXT_RESOURCE_VALUE 352
#define _APS_NEXT_COMMAND_VALUE 40045
#define _APS_NEXT_CONTROL_VALUE 1790
#define _APS_NEXT_SYMED_VALUE 101
RTEXT "Fullscreen:",IDC_STATIC,17,19,40,15,SS_CENTERIMAGE\r
END\r
\r
-IDD_MEMORY DIALOGEX 0, 0, 300, 239\r
+IDD_MEMORY DIALOGEX 0, 0, 300, 158\r
STYLE DS_LOCALEDIT | DS_SETFONT | DS_3DLOOK | DS_CONTROL | WS_CHILD\r
EXSTYLE WS_EX_CONTEXTHELP\r
FONT 8, "MS Sans Serif", 0, 0, 0x1\r
RTEXT "Z3 Fast:",IDC_Z3TEXT,139,51,30,10,SS_CENTERIMAGE\r
CONTROL "Slider1",IDC_Z3FASTMEM,"msctls_trackbar32",TBS_AUTOTICKS | TBS_TOP | WS_TABSTOP,179,47,60,20\r
EDITTEXT IDC_Z3FASTRAM,243,50,34,12,ES_CENTER | ES_READONLY\r
- RTEXT "Memory: [] Graphics card memory. Required for RTG (Picasso96) emulation.",IDC_GFXCARDTEXT,25,98,53,10,SS_NOTIFY | SS_CENTERIMAGE\r
- CONTROL "Slider1",IDC_P96MEM,"msctls_trackbar32",TBS_AUTOTICKS | TBS_TOP | WS_TABSTOP,88,94,60,20\r
- EDITTEXT IDC_P96RAM,152,97,34,12,ES_CENTER | ES_READONLY\r
- GROUPBOX "A3000/A4000 Advanced Memory Settings",-1,13,179,275,57\r
- RTEXT "Motherboard Fast RAM:",-1,39,194,129,10,SS_CENTERIMAGE\r
- CONTROL "",IDC_MBMEM1,"msctls_trackbar32",TBS_AUTOTICKS | TBS_TOP | WS_TABSTOP,180,190,59,20\r
- EDITTEXT IDC_MBRAM1,243,193,34,12,ES_CENTER | ES_READONLY\r
- RTEXT "Prosessor Slot Fast RAM:",-1,39,217,129,10,SS_CENTERIMAGE\r
- CONTROL "",IDC_MBMEM2,"msctls_trackbar32",TBS_AUTOTICKS | TBS_TOP | WS_TABSTOP,180,213,59,20\r
- EDITTEXT IDC_MBRAM2,243,216,34,12,ES_CENTER | ES_READONLY\r
- GROUPBOX "RTG Graphics Card Settings",-1,14,81,275,95\r
- CONTROL "Scale if smaller than display size setting",IDC_RTG_SCALE,\r
- "Button",BS_AUTOCHECKBOX | WS_TABSTOP,25,128,162,10\r
- CONTROL "Match host and RTG color depth if possible",IDC_RTG_MATCH_DEPTH,\r
- "Button",BS_AUTOCHECKBOX | WS_TABSTOP,25,116,163,10\r
- COMBOBOX IDC_RTG_8BIT,211,91,68,150,CBS_DROPDOWNLIST | CBS_HASSTRINGS | WS_VSCROLL | WS_TABSTOP\r
- COMBOBOX IDC_RTG_16BIT,211,106,68,150,CBS_DROPDOWNLIST | CBS_HASSTRINGS | WS_VSCROLL | WS_TABSTOP\r
- COMBOBOX IDC_RTG_24BIT,211,121,68,150,CBS_DROPDOWNLIST | CBS_HASSTRINGS | WS_VSCROLL | WS_TABSTOP\r
- COMBOBOX IDC_RTG_32BIT,211,136,68,150,CBS_DROPDOWNLIST | CBS_HASSTRINGS | WS_VSCROLL | WS_TABSTOP\r
- CONTROL "Always scale in windowed mode",IDC_RTG_SCALE_ALLOW,\r
- "Button",BS_AUTOCHECKBOX | WS_TABSTOP,25,140,162,10\r
- COMBOBOX IDC_RTG_SCALE_ASPECTRATIO,211,155,68,150,CBS_DROPDOWNLIST | CBS_HASSTRINGS | WS_VSCROLL | WS_TABSTOP\r
- RTEXT "Aspect ratio:",-1,153,156,52,10,SS_CENTERIMAGE\r
- RTEXT "Refresh rate:",-1,22,157,51,10,SS_CENTERIMAGE\r
- COMBOBOX IDC_RTG_VBLANKRATE,77,155,68,150,CBS_DROPDOWN | CBS_HASSTRINGS | WS_VSCROLL | WS_TABSTOP\r
+ GROUPBOX "A3000/A4000 Advanced Memory Settings",-1,14,84,274,65\r
+ RTEXT "Motherboard Fast RAM:",-1,40,100,129,10,SS_CENTERIMAGE\r
+ CONTROL "",IDC_MBMEM1,"msctls_trackbar32",TBS_AUTOTICKS | TBS_TOP | WS_TABSTOP,181,96,59,20\r
+ EDITTEXT IDC_MBRAM1,244,99,34,12,ES_CENTER | ES_READONLY\r
+ RTEXT "Prosessor Slot Fast RAM:",-1,40,123,129,10,SS_CENTERIMAGE\r
+ CONTROL "",IDC_MBMEM2,"msctls_trackbar32",TBS_AUTOTICKS | TBS_TOP | WS_TABSTOP,181,119,59,20\r
+ EDITTEXT IDC_MBRAM2,244,122,34,12,ES_CENTER | ES_READONLY\r
END\r
\r
IDD_CPU DIALOGEX 0, 0, 300, 226\r
GROUPBOX "",IDC_PANEL_FRAME,112,4,303,247,NOT WS_VISIBLE\r
CONTROL "",IDC_PANELTREE,"SysTreeView32",TVS_HASLINES | TVS_SHOWSELALWAYS | TVS_NOSCROLL | WS_BORDER | WS_HSCROLL | WS_TABSTOP,5,5,101,248,WS_EX_CLIENTEDGE\r
GROUPBOX "",IDC_PANEL_FRAME_OUTER,110,2,307,251\r
- PUSHBUTTON "Reset",IDC_RESETAMIGA,6,259,47,14\r
- PUSHBUTTON "Quit",IDC_QUITEMU,57,259,47,14\r
+ PUSHBUTTON "Reset",IDC_RESETAMIGA,5,259,47,14\r
+ PUSHBUTTON "Quit",IDC_QUITEMU,55,259,47,14\r
DEFPUSHBUTTON "OK",IDOK,260,259,50,14\r
PUSHBUTTON "Cancel",IDCANCEL,313,259,50,14\r
PUSHBUTTON "Help",IDHELP,366,259,50,14,WS_DISABLED\r
- PUSHBUTTON "Restart",IDC_RESTARTEMU,109,259,47,14,NOT WS_VISIBLE\r
+ PUSHBUTTON "Restart",IDC_RESTARTEMU,106,259,47,14,NOT WS_VISIBLE\r
END\r
\r
IDD_PATHS DIALOGEX 0, 0, 300, 243\r
CTEXT "Enter address",IDC_DBG_ADDRINPUTTXT,20,1,100,10,SS_CENTERIMAGE | WS_TABSTOP\r
END\r
\r
+IDD_RTG DIALOGEX 0, 0, 300, 135\r
+STYLE DS_LOCALEDIT | DS_SETFONT | DS_3DLOOK | DS_CONTROL | WS_CHILD\r
+FONT 8, "MS Sans Serif", 0, 0, 0x1\r
+BEGIN\r
+ RTEXT "Memory: [] Graphics card memory. Required for RTG (Picasso96) emulation.",IDC_GFXCARDTEXT,25,44,53,10,SS_NOTIFY | SS_CENTERIMAGE\r
+ CONTROL "",IDC_P96MEM,"msctls_trackbar32",TBS_AUTOTICKS | TBS_TOP | WS_TABSTOP,88,40,60,20\r
+ EDITTEXT IDC_P96RAM,152,43,34,12,ES_CENTER | ES_READONLY\r
+ GROUPBOX "RTG Graphics Card Settings",-1,14,12,275,113\r
+ CONTROL "Scale if smaller than display size setting",IDC_RTG_SCALE,\r
+ "Button",BS_AUTOCHECKBOX | WS_TABSTOP,25,74,162,10\r
+ CONTROL "Match host and RTG color depth if possible",IDC_RTG_MATCH_DEPTH,\r
+ "Button",BS_AUTOCHECKBOX | WS_TABSTOP,25,62,163,10\r
+ COMBOBOX IDC_RTG_8BIT,211,29,68,150,CBS_DROPDOWNLIST | CBS_HASSTRINGS | WS_VSCROLL | WS_TABSTOP\r
+ COMBOBOX IDC_RTG_16BIT,211,44,68,150,CBS_DROPDOWNLIST | CBS_HASSTRINGS | WS_VSCROLL | WS_TABSTOP\r
+ COMBOBOX IDC_RTG_24BIT,211,59,68,150,CBS_DROPDOWNLIST | CBS_HASSTRINGS | WS_VSCROLL | WS_TABSTOP\r
+ COMBOBOX IDC_RTG_32BIT,211,74,68,150,CBS_DROPDOWNLIST | CBS_HASSTRINGS | WS_VSCROLL | WS_TABSTOP\r
+ CONTROL "Always scale in windowed mode",IDC_RTG_SCALE_ALLOW,\r
+ "Button",BS_AUTOCHECKBOX | WS_TABSTOP,25,86,162,10\r
+ COMBOBOX IDC_RTG_SCALE_ASPECTRATIO,211,101,68,150,CBS_DROPDOWNLIST | CBS_HASSTRINGS | WS_VSCROLL | WS_TABSTOP\r
+ RTEXT "Aspect ratio:",-1,153,102,52,10,SS_CENTERIMAGE\r
+ RTEXT "Refresh rate:",-1,22,103,51,10,SS_CENTERIMAGE\r
+ COMBOBOX IDC_RTG_VBLANKRATE,77,101,68,150,CBS_DROPDOWN | CBS_HASSTRINGS | WS_VSCROLL | WS_TABSTOP\r
+END\r
+\r
\r
/////////////////////////////////////////////////////////////////////////////\r
//\r
//\r
\r
VS_VERSION_INFO VERSIONINFO\r
- FILEVERSION 1,6,1,0\r
- PRODUCTVERSION 1,6,1,0\r
+ FILEVERSION 1,6,2,0\r
+ PRODUCTVERSION 1,6,2,0\r
FILEFLAGSMASK 0x3fL\r
#ifdef _DEBUG\r
FILEFLAGS 0x1L\r
BLOCK "040904b0"\r
BEGIN\r
VALUE "FileDescription", "WinUAE"\r
- VALUE "FileVersion", "1.6.1"\r
+ VALUE "FileVersion", "1.6.2"\r
VALUE "InternalName", "WinUAE"\r
VALUE "LegalCopyright", "© 1996-2009 under the GNU Public License (GPL)"\r
VALUE "OriginalFilename", "WinUAE.exe"\r
VALUE "ProductName", "WinUAE"\r
- VALUE "ProductVersion", "1.6.1"\r
+ VALUE "ProductVersion", "1.6.2"\r
END\r
END\r
BLOCK "VarFileInfo"\r
IDS_FRONTEND "Frontend"\r
IDS_CHIPSET2 "Adv. Chipset"\r
IDS_GAMEPORTS "Game ports"\r
+ IDS_RTG "Display Card"\r
END\r
\r
STRINGTABLE \r
BEGIN\r
IDS_ROM_AVAILABLE "available"\r
IDS_ROM_UNAVAILABLE "unavailable"\r
- IDS_HARDDRIVESAFETYWARNING1 \r
- "Warning: The drive safety check is active. Selected drive is not empty and non-RDB partitioned."\r
IDS_NUMSG_KS68EC020 "The selected system ROM requires a 68020 with 24-bit addressing or higher CPU."\r
IDS_ROMSCANNOROMS "No supported system ROMs detected."\r
IDS_NUMSG_KICKREP "You need to have a floppy disk (image file) in DF0: to use the system ROM replacement."\r
IDS_HDCLONE_FAIL "Hard drive image file creation failed.\nError code %d:%d."\r
IDS_NUMSG_KS68030 "The selected system ROM requires a 68030 CPU."\r
IDS_NUMSG_EXPROMNEED "One of the following expansion boot ROMs is required:\n\n%s\n\nCheck the System ROM path in the Paths panel and click Rescan ROMs."\r
- IDS_HARDDRIVESAFETYWARNING2 \r
- "Warning: The drive safety check has been disabled, and non-empty and non-RDB partitioned hard disk(s) were detected."\r
IDS_SB_FAVORITENAME "Enter name..."\r
IDS_SB_CUSTOMEVENT "Enter custom event string.."\r
END\r
#undef APSTUDIO_READONLY_SYMBOLS
/////////////////////////////////////////////////////////////////////////////
-// English (U.S.) resources
+// English (Neutral) resources
#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU)
#ifdef _WIN32
-LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
+LANGUAGE LANG_ENGLISH, SUBLANG_NEUTRAL
#pragma code_page(1252)
#endif //_WIN32
+/////////////////////////////////////////////////////////////////////////////
+//
+// Icon
+//
+
+// Icon with lowest ID value placed first to ensure application icon
+// remains consistent on all systems.
+IDI_APPICON ICON "winuae.ico"
+IDI_FLOPPY ICON "35floppy.ico"
+IDI_ABOUT ICON "amigainfo.ico"
+IDI_HARDDISK ICON "Drive.ico"
+IDI_CPU ICON "cpu.ico"
+IDI_GAMEPORTS ICON "joystick.ico"
+IDI_IOPORTS ICON "joystick.ico"
+IDI_INPUT ICON "joystick.ico"
+IDI_MISC1 ICON "misc.ico"
+IDI_MISC2 ICON "misc.ico"
+IDI_MOVE_UP ICON "move_up.ico"
+IDI_MOVE_DOWN ICON "move_dow.ico"
+IDI_AVIOUTPUT ICON "avioutput.ico"
+IDI_DISK ICON "Drive.ico"
+IDI_FOLDER ICON "folder.ico"
+IDI_SOUND ICON "sound.ico"
+IDI_DISPLAY ICON "screen.ico"
+IDI_ROOT ICON "root.ico"
+IDI_MEMORY ICON "chip.ico"
+IDI_QUICKSTART ICON "quickstart.ico"
+IDI_PATHS ICON "paths.ico"
+IDI_DISKIMAGE ICON "diskimage.ico"
+IDI_PORTS ICON "port.ico"
+IDI_CONFIGFILE ICON "configfile.ico"
+IDI_FILE ICON "file.ico"
+
/////////////////////////////////////////////////////////////////////////////
//
// Accelerator
RTEXT "Resolution:",IDC_STATIC,27,152,59,15,SS_CENTERIMAGE
COMBOBOX IDC_LORES,100,152,102,150,CBS_DROPDOWNLIST | CBS_HASSTRINGS | WS_VSCROLL | WS_TABSTOP
CONTROL "Remove interlace artifacts",IDC_FLICKERFIXER,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,19,139,99,10
- RTEXT "Windowed:",IDC_STATIC,18,46,38,15,SS_CENTERIMAGE
- RTEXT "Fullscreen:",IDC_STATIC,17,19,38,15,SS_CENTERIMAGE
+ RTEXT "Windowed:",IDC_STATIC,17,46,40,15,SS_CENTERIMAGE
+ RTEXT "Fullscreen:",IDC_STATIC,17,19,40,15,SS_CENTERIMAGE
END
IDD_MEMORY DIALOGEX 0, 0, 300, 239
END
-/////////////////////////////////////////////////////////////////////////////
-//
-// Icon
-//
-
-// Icon with lowest ID value placed first to ensure application icon
-// remains consistent on all systems.
-IDI_APPICON ICON "winuae.ico"
-IDI_FLOPPY ICON "35floppy.ico"
-IDI_ABOUT ICON "amigainfo.ico"
-IDI_HARDDISK ICON "Drive.ico"
-IDI_CPU ICON "cpu.ico"
-IDI_GAMEPORTS ICON "joystick.ico"
-IDI_IOPORTS ICON "joystick.ico"
-IDI_INPUT ICON "joystick.ico"
-IDI_MISC1 ICON "misc.ico"
-IDI_MISC2 ICON "misc.ico"
-IDI_MOVE_UP ICON "move_up.ico"
-IDI_MOVE_DOWN ICON "move_dow.ico"
-IDI_AVIOUTPUT ICON "avioutput.ico"
-IDI_DISK ICON "Drive.ico"
-IDI_CONFIGFILE ICON "file.ico"
-IDI_FOLDER ICON "folder.ico"
-IDI_SOUND ICON "sound.ico"
-IDI_DISPLAY ICON "screen.ico"
-IDI_ROOT ICON "root.ico"
-IDI_MEMORY ICON "chip.ico"
-IDI_QUICKSTART ICON "quickstart.ico"
-IDI_PATHS ICON "paths.ico"
-
/////////////////////////////////////////////////////////////////////////////
//
// Version
//
VS_VERSION_INFO VERSIONINFO
- FILEVERSION 1,6,0,0
- PRODUCTVERSION 1,6,0,0
+ FILEVERSION 1,6,1,0
+ PRODUCTVERSION 1,6,1,0
FILEFLAGSMASK 0x3fL
#ifdef _DEBUG
FILEFLAGS 0x1L
BLOCK "040904b0"
BEGIN
VALUE "FileDescription", "WinUAE"
- VALUE "FileVersion", "1.6.0"
+ VALUE "FileVersion", "1.6.1"
VALUE "InternalName", "WinUAE"
VALUE "LegalCopyright", "© 1996-2009 under the GNU Public License (GPL)"
VALUE "OriginalFilename", "WinUAE.exe"
VALUE "ProductName", "WinUAE"
- VALUE "ProductVersion", "1.6.0"
+ VALUE "ProductVersion", "1.6.1"
END
END
BLOCK "VarFileInfo"
END
END
+
/////////////////////////////////////////////////////////////////////////////
//
// DESIGNINFO
IDS_SOUND_51 "5.1 Channels"
IDS_AUTOMATIC "Automatic"
IDS_ALL "All"
+ IDS_INPUTTOGGLE "Toggle"
END
STRINGTABLE
IDS_RES_SUPERHIRES "SuperHires"
END
-#endif // English (U.S.) resources
+#endif // English (Neutral) resources
/////////////////////////////////////////////////////////////////////////////
}
}
trimws (tmp2);
- return inputdevice_joyport_config (&changed_prefs, tmp2, num, type);
+ return inputdevice_joyport_config (&changed_prefs, tmp2, num, 0, type);
}
static int port_insert (int num, const TCHAR *name)
if (num < 0 || num >= MAX_JPORTS)
return FALSE;
if (_tcslen (name) == 0) {
- inputdevice_joyport_config (&changed_prefs, L"none", num, 0);
+ inputdevice_joyport_config (&changed_prefs, L"none", num, 0, 0);
return TRUE;
}
if (_tcslen (name) >= sizeof (tmp1) / sizeof (TCHAR) - 1)
}
if (full > 1)
m |= RP_SCREENMODE_FULLWINDOW;
+ if (p->gfx_filter_scanlines || p->gfx_linedbl == 2)
+ m |= RP_SCREENMODE_SCANLINES;
sm->dwScreenMode = m;
if (log_rp)
p->gfx_xcenter_size = sm->lClipWidth;
p->gfx_ycenter_size = sm->lClipHeight;
+ if (sm->dwScreenMode & RP_SCREENMODE_SCANLINES) {
+ if (p->gfx_linedbl > 0) {
+ p->gfx_linedbl = 2;
+ p->gfx_filter_scanlines = 0;
+ }
+ }
+
updatewinfsmode (p);
hwndset = 0;
}
feat = RP_FEATURE_POWERLED | RP_FEATURE_SCREEN1X | RP_FEATURE_FULLSCREEN;
feat |= RP_FEATURE_PAUSE | RP_FEATURE_TURBO | RP_FEATURE_VOLUME | RP_FEATURE_SCREENCAPTURE;
- feat |= RP_FEATURE_STATE;
+ feat |= RP_FEATURE_STATE | RP_FEATURE_SCANLINES;
if (!WIN32GFX_IsPicassoScreen ())
feat |= RP_FEATURE_SCREEN2X | RP_FEATURE_SCREEN4X;
RPSendMessagex (RPIPCGM_FEATURES, feat, 0, NULL, 0, &guestinfo, NULL);
bi.bmiHeader.biClrImportant = 0;
// Reserve memory for bitmap bits
- if (!(lpvBits = malloc (bi.bmiHeader.biSizeImage)))
+ if (!(lpvBits = xmalloc (bi.bmiHeader.biSizeImage)))
goto oops; // out of memory
// Have GetDIBits convert offscreen_bitmap to a DIB (device-independent bitmap):
PaStream *pastream;
HANDLE paevent;
int opacounter;
+ int pablocking;
};
#define ADJUST_SIZE 30
lines += 1.0;
if (have_sound) {
- scaled_sample_evtime_orig = 227.0 * (lines + maxvpos) * freq * CYCLE_UNIT / (float)sdp->obtainedfreq;
+ scaled_sample_evtime_orig = maxhpos * (lines + maxvpos) * freq * CYCLE_UNIT / (float)sdp->obtainedfreq;
scaled_sample_evtime = scaled_sample_evtime_orig;
}
}
static void finish_sound_buffer_pa (struct sound_data *sd, uae_u16 *sndbuffer)
{
struct sound_dp *s = sd->data;
- while (s->opacounter == s->pacounter && s->pastream && !sd->paused)
- WaitForSingleObject (s->paevent, 10);
- ResetEvent (s->paevent);
- s->opacounter = s->pacounter;
- memcpy (s->pasoundbuffer[s->patoggle], sndbuffer, sd->sndbufsize);
+ if (s->pablocking) {
+ if (s->paframesperbuffer != sd->sndbufsize / (sd->channels * 2)) {
+ write_log (L"sound buffer size mistmatch %d <> %d\n", s->paframesperbuffer, sd->sndbufsize / (sd->channels * 2));
+ } else {
+ Pa_WriteStream (s->pastream, sndbuffer, s->paframesperbuffer);
+ }
+ } else {
+ while (s->opacounter == s->pacounter && s->pastream && !sd->paused)
+ WaitForSingleObject (s->paevent, 10);
+ s->opacounter = s->pacounter;
+ memcpy (s->pasoundbuffer[s->patoggle], sndbuffer, sd->sndbufsize);
+ }
}
static int _cdecl portAudioCallback (const void *inputBuffer, void *outputBuffer,
struct sound_dp *s = sd->data;
if (framesPerBuffer != sd->sndbufsize / (sd->channels * 2)) {
- write_log (L"%d <> %d\n", framesPerBuffer, sd->sndbufsize / (sd->channels * 2));
+ write_log (L"sound buffer size mistmatch %d <> %d\n", framesPerBuffer, sd->sndbufsize / (sd->channels * 2));
} else {
memcpy (outputBuffer, s->pasoundbuffer[s->patoggle], sd->sndbufsize);
}
PaError err;
TCHAR *name;
TCHAR *errtxt;
+ int defaultrate = 0;
size = sd->sndbufsize;
s->paframesperbuffer = size;
- sd->sndbufsize = size * ch * 2;
sd->devicetype = SOUND_DEVICE_PA;
memset (&p, 0, sizeof p);
di = Pa_GetDeviceInfo (dev);
- p.channelCount = ch;
- p.device = dev;
- p.hostApiSpecificStreamInfo = NULL;
- p.sampleFormat = paInt16;
- p.suggestedLatency = di->defaultLowOutputLatency;
- p.hostApiSpecificStreamInfo = NULL;
for (;;) {
int err2;
+ p.channelCount = ch;
+ p.device = dev;
+ p.hostApiSpecificStreamInfo = NULL;
+ p.sampleFormat = paInt16;
+ p.suggestedLatency = di->defaultLowOutputLatency;
+ p.hostApiSpecificStreamInfo = NULL;
+
err = Pa_IsFormatSupported (NULL, &p, freq);
if (err == paFormatIsSupported)
break;
errtxt = au (Pa_GetErrorText (err));
write_log (L"PASOUND: sound format not supported, ch=%d, rate=%d. %s\n", freq, ch, errtxt);
xfree (errtxt);
- if (freq < 48000) {
- freq = 48000;
- err = Pa_IsFormatSupported (NULL, &p, freq);
- if (err == paFormatIsSupported) {
- sd->freq = freq;
- break;
+ if (err == paInvalidChannelCount) {
+ if (ch > 2) {
+ ch = sd->channels = 2;
+ continue;
}
+ goto end;
+ }
+ if (freq < 44000 && err == paInvalidSampleRate) {
+ freq = 44000;
+ sd->freq = freq;
+ continue;
+ }
+ if (freq < 48000 && err == paInvalidSampleRate) {
+ freq = 48000;
+ sd->freq = freq;
+ continue;
}
- if (freq != di->defaultSampleRate) {
+ if (freq != di->defaultSampleRate && err == paInvalidSampleRate && !defaultrate) {
freq = di->defaultSampleRate;
- err = Pa_IsFormatSupported (NULL, &p, freq);
- if (err == paFormatIsSupported) {
- sd->freq = freq;
- break;
- }
+ sd->freq = freq;
+ defaultrate = 1;
+ continue;
}
- if (err2 != err) {
+ goto end;
+ }
+ sd->sndbufsize = size * ch * 2;
+// s->pablocking = 1;
+// err = Pa_OpenStream (&s->pastream, NULL, &p, freq, s->paframesperbuffer, paNoFlag, NULL, NULL);
+// if (err != paNoError) {
+ s->pablocking = 0;
+ err = Pa_OpenStream (&s->pastream, NULL, &p, freq, s->paframesperbuffer, paNoFlag, portAudioCallback, sd);
+ if (err != paNoError) {
errtxt = au (Pa_GetErrorText (err));
- write_log (L"PASOUND: sound format not supported, ch=%d, rate=%d. %s\n", freq, ch, errtxt);
+ write_log (L"PASOUND: Pa_OpenStream() error %d (%s)\n", err, errtxt);
xfree (errtxt);
+ goto end;
}
- goto end;
- }
- err = Pa_OpenStream (&s->pastream, NULL, &p, freq, s->paframesperbuffer, paNoFlag, portAudioCallback, sd);
- if (err != paNoError) {
- errtxt = au (Pa_GetErrorText (err));
- write_log (L"PASOUND: Pa_OpenStream() error %d (%s)\n", err, errtxt);
- xfree (errtxt);
- goto end;
- }
+// }
s->paevent = CreateEvent (NULL, FALSE, FALSE, NULL);
for (i = 0; i < 2; i++)
s->pasoundbuffer[i] = xcalloc (sd->sndbufsize, 1);
if (s->max_sndbufsize * 2 > s->dsoundbuf)
s->max_sndbufsize = s->dsoundbuf / 2;
+ sd->samplesize = sd->channels * 2;
recalc_offsets (sd);
int maxfreq = DSCaps.dwMaxSecondarySampleRate;
if (minfreq > freq && freq < 22050) {
freq = minfreq;
- changed_prefs.sound_freq = currprefs.sound_freq = freq;
+ sd->freq = freq;
write_log (L"DSSOUND: minimum supported frequency: %d\n", minfreq);
}
if (maxfreq < freq && freq > 44100) {
freq = maxfreq;
- changed_prefs.sound_freq = currprefs.sound_freq = freq;
+ sd->freq = freq;
write_log (L"DSSOUND: maximum supported frequency: %d\n", maxfreq);
}
}
sd->sndbufsize = bufsize;
sd->freq = freq;
sd->channels = channels;
- sd->samplesize = channels * 2;
sd->paused = 1;
if (sound_devices[index].type == SOUND_DEVICE_AL)
ret = open_audio_al (sd, index);
ret = open_audio_ds (sd, index);
else if (sound_devices[index].type == SOUND_DEVICE_PA)
ret = open_audio_pa (sd, index);
+ sd->samplesize = sd->channels * 2;
return ret;
}
void close_sound_device (struct sound_data *sd)
static int open_sound (void)
{
- int ret = 0, num;
+ int ret = 0, num, ch;
int size = currprefs.sound_maxbsiz;
if (!currprefs.produce_sound)
num = enumerate_sound_devices ();
if (currprefs.win32_soundcard >= num)
currprefs.win32_soundcard = changed_prefs.win32_soundcard = 0;
- ret = open_sound_device (sdp, currprefs.win32_soundcard, size, currprefs.sound_freq, get_audio_nativechannels ());
+ ch = get_audio_nativechannels (currprefs.sound_stereo);
+ ret = open_sound_device (sdp, currprefs.win32_soundcard, size, currprefs.sound_freq, ch);
if (!ret)
return 0;
+ currprefs.sound_freq = changed_prefs.sound_freq = sdp->freq;
+ if (ch != sdp->channels)
+ currprefs.sound_stereo = changed_prefs.sound_stereo = get_audio_stereomode (sdp->channels);
set_volume (currprefs.sound_volume, sdp->mute);
- if (get_audio_amigachannels () == 4)
+ if (get_audio_amigachannels (currprefs.sound_stereo) == 4)
sample_handler = sample16ss_handler;
else
- sample_handler = get_audio_ismono () ? sample16_handler : sample16s_handler;
+ sample_handler = get_audio_ismono (currprefs.sound_stereo) ? sample16_handler : sample16s_handler;
sdp->obtainedfreq = currprefs.sound_freq;
if (currprefs.turbo_emulation)
return;
if (currprefs.sound_stereo_swap_paula) {
- if (get_audio_nativechannels () == 2 || get_audio_nativechannels () == 4)
+ if (get_audio_nativechannels (currprefs.sound_stereo) == 2 || get_audio_nativechannels (currprefs.sound_stereo) == 4)
channelswap ((uae_s16*)paula_sndbuffer, sdp->sndbufsize / 2);
- else if (get_audio_nativechannels () == 6)
+ else if (get_audio_nativechannels (currprefs.sound_stereo) == 6)
channelswap6 ((uae_s16*)paula_sndbuffer, sdp->sndbufsize / 2);
}
#ifdef DRIVESOUND
TCHAR tmp[MAX_DPATH], *s1, *s2;
num = Pa_GetDeviceCount ();
+ if (num < 0) {
+ TCHAR *errtxt = au (Pa_GetErrorText (num));
+ write_log (L"PA: Pa_GetDeviceCount() failed: %08x (%s)\n", num, errtxt);
+ xfree (errtxt);
+ return;
+ }
for (j = 0; j < num; j++) {
const PaDeviceInfo *di;
const PaHostApiInfo *hai;
int blocking_sound_device (struct sound_data *sd);
-STATIC_INLINE void check_sound_buffers (void)
+STATIC_INLINE void check_sound_buffers (int outputsample, int doublesample)
{
+ static uae_u16 *paula_sndbufpt_prev;
+ uae_u16 *start;
+ int len;
+
+ start = paula_sndbufpt;
if (currprefs.sound_stereo == SND_4CH_CLONEDSTEREO) {
((uae_u16*)paula_sndbufpt)[0] = ((uae_u16*)paula_sndbufpt)[-2];
((uae_u16*)paula_sndbufpt)[1] = ((uae_u16*)paula_sndbufpt)[-1];
p[1] = sum >> 3;
paula_sndbufpt = (uae_u16 *)(((uae_u8 *)paula_sndbufpt) + 4 * 2);
}
+ if (outputsample == 0) {
+ paula_sndbufpt_prev = start;
+ return;
+ }
+ len = paula_sndbufpt - start;
+ if (outputsample < 0) {
+ int i;
+ uae_s16 *p1 = (uae_s16*)paula_sndbufpt_prev;
+ uae_s16 *p2 = (uae_s16*)start;
+ for (i = 0; i < len; i++)
+ p1[i] = (p1[i] + p2[i]) / 2;
+ paula_sndbufpt -= len;
+ }
if ((uae_u8*)paula_sndbufpt - (uae_u8*)paula_sndbuffer >= paula_sndbufsize) {
finish_sound_buffer ();
paula_sndbufpt = paula_sndbuffer;
}
+ if (doublesample) {
+ memcpy (paula_sndbufpt, start, len * 2);
+ if ((uae_u8*)paula_sndbufpt - (uae_u8*)paula_sndbuffer >= paula_sndbufsize) {
+ finish_sound_buffer ();
+ paula_sndbufpt = paula_sndbuffer;
+ }
+ }
}
STATIC_INLINE void clear_sound_buffers (void)
#define CD32 /* CD32 emulation */
#define CDTV /* CDTV emulation */
#define D3D /* D3D display filter support */
-#define OPENGL /* OpenGL display filter support */
+//#define OPENGL /* OpenGL display filter support */
#define PARALLEL_PORT /* parallel port emulation */
#define PARALLEL_DIRECT /* direct parallel port emulation */
#define SERIAL_PORT /* serial port emulation */
extern int uae_start_thread (TCHAR *name, void *(*f)(void *), void *arg, uae_thread_id *thread);
extern int uae_start_thread_fast (void *(*f)(void *), void *arg, uae_thread_id *thread);
extern void uae_end_thread (uae_thread_id *thread);
-extern void uae_set_thread_priority (int);
+extern void uae_set_thread_priority (uae_thread_id *, int);
#include "commpipe.h"
#include <shlwapi.h>
#include <dbghelp.h>
#include <float.h>
+#include <WtsApi32.h>
#include "resource"
void setmouseactive (int active)
{
+ //write_log (L"setmouseactive %d->%d\n", mouseactive, active);
if (active == 0)
releasecapture ();
if (mouseactive == active && active >= 0)
if (rp_isactive () && isfullscreen () == 0)
donotfocus = 0;
#endif
+ if (isfullscreen () > 0)
+ donotfocus = 0;
if (donotfocus) {
focus = 0;
mouseactive = 0;
{
case WM_SETFOCUS:
- winuae_active (hWnd, minimized);
+ winuae_active (hWnd, minimized);
minimized = 0;
dx_check ();
return 0;
case WM_KEYDOWN:
if (dinput_wmkey ((uae_u32)lParam))
- gui_display (-1);
+ inputdevice_add_inputcode (AKS_ENTERGUI, 1);
return 0;
case WM_LBUTTONUP:
return 0;
case WM_LBUTTONDOWN:
case WM_LBUTTONDBLCLK:
- if (!mouseactive && isfullscreen() <= 0 && !gui_active && (!mousehack_alive () || currprefs.input_tablet != TABLET_MOUSEHACK)) {
- setmouseactive (message == WM_LBUTTONDBLCLK ? 2 : 1);
+ if (!mouseactive && !gui_active && (!mousehack_alive () || currprefs.input_tablet != TABLET_MOUSEHACK || isfullscreen () > 0)) {
+ setmouseactive ((message == WM_LBUTTONDBLCLK || isfullscreen() > 0) ? 2 : 1);
} else if (dinput_winmouse () >= 0 && isfocus ()) {
setmousebuttonstate (dinput_winmouse (), 0, 1);
}
mx -= mouseposx;
my -= mouseposy;
- //write_log(L"%d %d %d %d\n", mx, my, mouseposx, mouseposy);
+ //write_log (L"%d %d %d %d %d %d %d\n", wm, mouseactive, focus, mx, my, mouseposx, mouseposy);
if (recapture && isfullscreen () <= 0) {
setmouseactive (1);
return 0;
int myy = (amigawin_rect.bottom - amigawin_rect.top) / 2;
mx = mx - mxx;
my = my - myy;
+ //write_log (L"%d:%dx%d\n", dinput_winmouse(), mx, my);
setmousestate (dinput_winmouse (), 0, mx, 0);
setmousestate (dinput_winmouse (), 1, my, 0);
}
}
} else if (num == 4) {
if (nm->code == NM_CLICK)
- gui_display (-1);
+ inputdevice_add_inputcode (AKS_ENTERGUI, 1);
else
uae_reset (0);
}
if ((HWND) wParam == hwndNextViewer)
hwndNextViewer = (HWND) lParam;
else if (hwndNextViewer != NULL)
- SendMessage(hwndNextViewer, message, wParam, lParam);
+ SendMessage (hwndNextViewer, message, wParam, lParam);
return 0;
case WM_DRAWCLIPBOARD:
clipboard_changed (hWnd);
- SendMessage(hwndNextViewer, message, wParam, lParam);
+ SendMessage (hwndNextViewer, message, wParam, lParam);
return 0;
+ case WM_WTSSESSION_CHANGE:
+ {
+ static int wasactive;
+ switch (wParam)
+ {
+ case WTS_CONSOLE_CONNECT:
+ case WTS_SESSION_UNLOCK:
+ if (wasactive)
+ winuae_active (hWnd, 0);
+ wasactive = 0;
+ break;
+ case WTS_CONSOLE_DISCONNECT:
+ case WTS_SESSION_LOCK:
+ wasactive = mouseactive;
+ winuae_inactive (hWnd, 0);
+ break;
+ }
+ }
+
+
case WT_PROXIMITY:
{
send_tablet_proximity (LOWORD (lParam) ? 1 : 0);
case WM_NOTIFY:
case WM_ENABLE:
case WT_PACKET:
+ case WM_WTSSESSION_CHANGE:
return AmigaWindowProc (hWnd, message, wParam, lParam);
case WM_DISPLAYCHANGE:
return DefWindowProc (hWnd, message, wParam, lParam);
}
+int handle_msgpump (void)
+{
+ int got = 0;
+ MSG msg;
+
+ while (PeekMessage (&msg, 0, 0, 0, PM_REMOVE)) {
+ got = 1;
+ TranslateMessage (&msg);
+ DispatchMessage (&msg);
+ }
+ return got;
+}
+
void handle_events (void)
{
MSG msg;
TCHAR **argv = NULL, **argv2 = NULL, **argv3;
int argc, i;
+#if 0
#ifdef _DEBUG
{
int tmp = _CrtSetDbgFlag(_CRTDBG_REPORT_FLAG);
_CrtSetDbgFlag(tmp);
}
#endif
-
+#endif
if (!osdetect ())
return 0;
if (!dxdetect ())
else
default_freq = 60;
}
-#ifdef AVIOUTPUT
- AVIOutput_Initialize ();
-#endif
WIN32_HandleRegistryStuff ();
WIN32_InitLang ();
WIN32_InitHtmlHelp ();
const SHChangeNotifyEntry *pshcne);
typedef BOOL (CALLBACK *SHCHANGENOTIFYDEREGISTER)(ULONG ulID);
-void addnotifications (HWND hwnd, int remove)
+void addnotifications (HWND hwnd, int remove, int isgui)
{
static ULONG ret;
static HDEVNOTIFY hdn;
+ static int wtson;
LPITEMIDLIST ppidl;
SHCHANGENOTIFYREGISTER pSHChangeNotifyRegister;
SHCHANGENOTIFYDEREGISTER pSHChangeNotifyDeregister;
if (hdn)
UnregisterDeviceNotification (hdn);
hdn = 0;
+ if (os_winxp && wtson && !isgui)
+ WTSUnRegisterSessionNotification (hwnd);
+ wtson = 0;
} else {
DEV_BROADCAST_DEVICEINTERFACE NotificationFilter = { 0 };
if(pSHChangeNotifyRegister && SHGetSpecialFolderLocation (hwnd, CSIDL_DESKTOP, &ppidl) == NOERROR) {
NotificationFilter.dbcc_devicetype = DBT_DEVTYP_DEVICEINTERFACE;
NotificationFilter.dbcc_classguid = GUID_DEVINTERFACE_HID;
hdn = RegisterDeviceNotification (hwnd, &NotificationFilter, DEVICE_NOTIFY_WINDOW_HANDLE);
+ if (os_winxp && !isgui)
+ wtson = WTSRegisterSessionNotification (hwnd, NOTIFY_FOR_THIS_SESSION);
}
}
#define GETBDM(x) (((x) - ((x / 10000) * 10000)) / 100)
#define GETBDD(x) ((x) % 100)
-#define WINUAEPUBLICBETA 0
+#define WINUAEPUBLICBETA 1
-#define WINUAEBETA L""
-#define WINUAEDATE MAKEBD(2009, 6, 18)
+#define WINUAEBETA L"Beta 0"
+#define WINUAEDATE MAKEBD(2009, 7, 9)
#define WINUAEEXTRA L""
#define WINUAEREV L""
extern void send_tablet (int x, int y, int z, int pres, uae_u32 buttons, int flags, int ax, int ay, int az, int rx, int ry, int rz, RECT *r);
extern void send_tablet_proximity (int);
-void addnotifications (HWND hwnd, int remove);
+void addnotifications (HWND hwnd, int remove, int isgui);
int win32_hardfile_media_change (const TCHAR *drvname, int inserted);
extern int CheckRM (TCHAR *DriveName);
void systray (HWND hwnd, int remove);
DWORD dwHold;
BOOL result = FALSE;
- _stprintf(filename, L"%s.", DriveName);
- dwHold = GetFileAttributes(filename);
+ _stprintf (filename, L"%s.", DriveName);
+ dwHold = GetFileAttributes (filename);
if(dwHold != 0xFFFFFFFF)
result = TRUE;
return result;
return result;
}
-static void filesys_addexternals(void)
+static int getidfromhandle (HANDLE h)
+{
+ int drvnum = -1;
+ DWORD written, outsize;
+ VOLUME_DISK_EXTENTS *vde;
+
+ outsize = sizeof (VOLUME_DISK_EXTENTS) + sizeof (DISK_EXTENT) * 32;
+ vde = xmalloc (outsize);
+ if (DeviceIoControl (h, IOCTL_VOLUME_GET_VOLUME_DISK_EXTENTS, NULL, 0, vde, outsize, &written, NULL)) {
+ if (vde->NumberOfDiskExtents > 0)
+ drvnum = vde->Extents[0].DiskNumber;
+ }
+ xfree (vde);
+ return drvnum;
+}
+
+static int hfdcheck (TCHAR drive)
+{
+ HANDLE h;
+ TCHAR tmp[16];
+ int disknum, i;
+
+ _stprintf (tmp, L"\\\\.\\%c:", drive);
+ h = CreateFile (tmp, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
+ if (h == INVALID_HANDLE_VALUE)
+ return 0;
+ disknum = getidfromhandle (h);
+ CloseHandle (h);
+ for (i = 0; i < MAX_FILESYSTEM_UNITS; i++) {
+ struct hardfiledata *hfd = get_hardfile_data (i);
+ int reopen = 0;
+ if (!hfd || !(hfd->flags & HFD_FLAGS_REALDRIVE) || !hfd->handle_valid)
+ continue;
+ if (getidfromhandle (hfd->handle) == disknum)
+ return 1;
+ }
+ return 0;
+}
+
+static void filesys_addexternals (void)
{
int drive, drivetype;
UINT errormode;
TCHAR volumename[MAX_DPATH] = L"";
- TCHAR volumepath[6];
+ TCHAR volumepath[16];
DWORD dwDriveMask;
int drvnum = 0;
- errormode = SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX);
- dwDriveMask = GetLogicalDrives();
+ errormode = SetErrorMode (SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX);
+ dwDriveMask = GetLogicalDrives ();
dwDriveMask >>= 2; // Skip A and B drives...
for(drive = 'C'; drive <= 'Z'; ++drive) {
- _stprintf(volumepath, L"%c:\\", drive);
+ _stprintf (volumepath, L"%c:\\", drive);
/* Is this drive-letter valid (it used to check for media in drive) */
if(dwDriveMask & 1) {
- TCHAR devname[100];
+ TCHAR devname[MAX_DPATH];
BOOL inserted = CheckRM (volumepath); /* Is there a disk inserted? */
int nok = FALSE;
int rw = 1;
drivetype = GetDriveType (volumepath);
+ if (inserted && drivetype != DRIVE_NO_ROOT_DIR && drivetype != DRIVE_UNKNOWN) {
+ if (hfdcheck (drive)) {
+ write_log (L"Drive %c:\\ ignored, was configured as a harddrive\n", drive);
+ continue;
+ }
+ }
devname[0] = 0;
for (;;) {
if (drivetype == DRIVE_CDROM && currprefs.win32_automount_cddrives) {
} /* if drivemask */
dwDriveMask >>= 1;
}
- SetErrorMode(errormode);
+ SetErrorMode (errormode);
}
{ UAE_FILTER_DIRECT3D, 0, 1, L"Direct3D", L"direct3d", 1, 0, 0, 0, 0 },
+#ifdef OPENGL
{ UAE_FILTER_OPENGL, 0, 1, L"OpenGL (unsupported)", L"opengl", 1, 0, 0, 0, 0 },
+#endif
{ UAE_FILTER_SCALE2X, 0, 2, L"Scale2X", L"scale2x", 0, 0, UAE_FILTER_MODE_16_16 | UAE_FILTER_MODE_32_32, 0, 0, 0 },
DDSURFACEDESC2 desc;
RECT sr, dr;
int y;
+ int lx, ly, sx;
if (!(currprefs.leds_on_screen & STATUSLINE_CHIPSET) || !tempsurf)
return;
- SetRect (&sr, 0, 0, dst_width, TD_TOTAL_HEIGHT);
- SetRect (&dr, 0, dst_height - TD_TOTAL_HEIGHT, dst_width, dst_height);
+ lx = dst_width;
+ ly = dst_height;
+ sx = lx;
+ if (sx > dst_width)
+ sx = dst_width;
+ SetRect (&sr, 0, 0, sx, TD_TOTAL_HEIGHT);
+ SetRect (&dr, lx - sx, ly - TD_TOTAL_HEIGHT, lx, ly);
DirectDraw_BlitRect (tempsurf, &sr, NULL, &dr);
if (locksurface (tempsurf, &desc)) {
- int yy = 0;
- for (y = dst_height - TD_TOTAL_HEIGHT; y < dst_height; y++) {
- uae_u8 *buf = (uae_u8*)desc.lpSurface + yy * desc.lPitch;
- draw_status_line_single (buf, dst_depth / 8, yy, dst_width, rc, gc, bc);
- yy++;
+ for (y = 0; y < TD_TOTAL_HEIGHT; y++) {
+ uae_u8 *buf = (uae_u8*)desc.lpSurface + y * desc.lPitch;
+ draw_status_line_single (buf, dst_depth / 8, y, sx, rc, gc, bc, NULL);
}
unlocksurface (tempsurf);
DirectDraw_BlitRect (NULL, &dr, tempsurf, &sr);
int readactive, writeactive;
DWORD actual;
- uae_set_thread_priority (2);
+ uae_set_thread_priority (NULL, 2);
sd->threadactive = 1;
uae_sem_post (&sd->sync_sem);
readactive = 0;
struct pcap_pkthdr *header;
const u_char *pkt_data;
- uae_set_thread_priority (2);
+ uae_set_thread_priority (NULL, 1);
sd->threadactiver = 1;
uae_sem_post (&sd->sync_semr);
while (sd->threadactiver == 1) {
{
struct uaenetdatawin32 *sd = arg;
- uae_set_thread_priority (2);
+ uae_set_thread_priority (NULL, 1);
sd->threadactivew = 1;
uae_sem_post (&sd->sync_semw);
while (sd->threadactivew == 1) {
yy = 0;
for (sy = dst_height - dxdata.statusheight; sy < dst_height; sy++) {
uae_u8 *buf = (uae_u8*)desc.lpSurface + yy * desc.lPitch;
- draw_status_line_single (buf, currentmode->current_depth / 8, yy, dst_width, p96rc, p96gc, p96bc);
+ draw_status_line_single (buf, currentmode->current_depth / 8, yy, dst_width, p96rc, p96gc, p96bc, NULL);
yy++;
}
unlocksurface (dxdata.statussurface);
hStatusWnd = 0;
}
if (hAmigaWnd) {
- addnotifications (hAmigaWnd, TRUE);
+ addnotifications (hAmigaWnd, TRUE, FALSE);
#ifdef OPENGL
OGL_free ();
#endif
}
flags &= ~DM_DDRAW;
}
+#if defined (OPENGL)
if (usedfilter->type == UAE_FILTER_OPENGL) {
flags |= DM_OPENGL;
flags &= ~DM_DDRAW;
}
+#endif
}
}
#endif
int ret, i;
inputdevice_unacquire ();
- reset_sound();
+ reset_sound ();
in_sizemove = 0;
updatewinfsmode (&currprefs);
+#ifdef D3D
D3D_free ();
+#endif
+#ifdef OPENGL
OGL_free ();
+#endif
if (!DirectDraw_Start (displayGUID))
return 0;
write_log (L"DirectDraw GUID=%s\n", outGUID (displayGUID));
if (changed_prefs.gfx_afullscreen == 1) {
if (currprefs.gfx_filter == UAE_FILTER_DIRECT3D && changed_prefs.gfx_filter != UAE_FILTER_DIRECT3D)
display_change_requested = 1;
+#ifdef OPENGL
if (currprefs.gfx_filter == UAE_FILTER_OPENGL && changed_prefs.gfx_filter != UAE_FILTER_OPENGL)
display_change_requested = 1;
- if (changed_prefs.gfx_filter == UAE_FILTER_DIRECT3D && currprefs.gfx_filter != UAE_FILTER_DIRECT3D)
- display_change_requested = 1;
if (changed_prefs.gfx_filter == UAE_FILTER_OPENGL && currprefs.gfx_filter != UAE_FILTER_OPENGL)
display_change_requested = 1;
+#endif
+ if (changed_prefs.gfx_filter == UAE_FILTER_DIRECT3D && currprefs.gfx_filter != UAE_FILTER_DIRECT3D)
+ display_change_requested = 1;
}
if (display_change_requested) {
update_gfxparams ();
clearscreen ();
if (currprefs.gfx_afullscreen != currprefs.gfx_pfullscreen ||
- (currprefs.gfx_afullscreen == 1 && (currprefs.gfx_filter == UAE_FILTER_DIRECT3D || currprefs.gfx_filter == UAE_FILTER_OPENGL))) {
+ (currprefs.gfx_afullscreen == 1 && (currprefs.gfx_filter == UAE_FILTER_DIRECT3D
+#ifdef OPENGL
+ || currprefs.gfx_filter == UAE_FILTER_OPENGL
+#endif
+ ))) {
mode = 1;
} else {
mode = modeswitchneeded (&wc);
GetWindowRect (hAmigaWnd, &amigawin_rect);
if (dxfs || d3dfs)
SetCursorPos (x + w / 2, y + h / 2);
- addnotifications (hAmigaWnd, FALSE);
+ addnotifications (hAmigaWnd, FALSE, FALSE);
if (hMainWnd != hAmigaWnd) {
if (!currprefs.headless)
ShowWindow (hMainWnd, firstwindow ? SW_SHOWDEFAULT : SW_SHOWNORMAL);
int WIN32GFX_SetPalette (void);
void WIN32GFX_WindowMove (void);
void WIN32GFX_WindowSize (void);;
+void DX_Invalidate (int x, int y, int width, int height);
RGBFTYPE WIN32GFX_FigurePixelFormats (RGBFTYPE colortype);
int WIN32GFX_AdjustScreenmode (struct MultiDisplay *md, uae_u32 *pwidth, uae_u32 *pheight, uae_u32 *ppixbits);
static int LOADSAVE_ID = -1, MEMORY_ID = -1, KICKSTART_ID = -1, CPU_ID = -1,
DISPLAY_ID = -1, HW3D_ID = -1, CHIPSET_ID = -1, CHIPSET2_ID = -1, SOUND_ID = -1, FLOPPY_ID = -1, DISK_ID = -1,
HARDDISK_ID = -1, IOPORTS_ID = -1, GAMEPORTS_ID = -1, INPUT_ID = -1, MISC1_ID = -1, MISC2_ID = -1, AVIOUTPUT_ID = -1,
- PATHS_ID = -1, QUICKSTART_ID = -1, ABOUT_ID = -1, FRONTEND_ID = -1;
+ PATHS_ID = -1, QUICKSTART_ID = -1, ABOUT_ID = -1, RTG_ID = -1, FRONTEND_ID = -1;
static HWND pages[MAX_C_PAGES];
#define MAX_IMAGETOOLTIPS 10
static HWND guiDlg, panelDlg, ToolTipHWND;
if (dynamic) {
result = vhd_create (init_path, hfsize);
} else {
- SetCursor (LoadCursor(NULL, IDC_WAIT));
+ SetCursor (LoadCursor (NULL, IDC_WAIT));
if ((hf = CreateFile (init_path, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL)) != INVALID_HANDLE_VALUE) {
if (sparse) {
DWORD ret;
*size2p = s2;
}
-static INT_PTR CALLBACK MemoryDlgProc (HWND hDlg, UINT msg, WPARAM wParam, LPARAM lParam)
+static INT_PTR CALLBACK RTGDlgProc (HWND hDlg, UINT msg, WPARAM wParam, LPARAM lParam)
{
int v;
TCHAR tmp[100];
switch (msg)
{
case WM_INITDIALOG:
- pages[MEMORY_ID] = hDlg;
- currentpage = MEMORY_ID;
- WIN32GUI_LoadUIString(IDS_ALL, tmp, sizeof tmp / sizeof (TCHAR));
+ pages[RTG_ID] = hDlg;
+ currentpage = RTG_ID;
+ WIN32GUI_LoadUIString(IDS_ALL, tmp, sizeof tmp / sizeof (TCHAR));
SendDlgItemMessage (hDlg, IDC_RTG_8BIT, CB_RESETCONTENT, 0, 0);
SendDlgItemMessage (hDlg, IDC_RTG_8BIT, CB_ADDSTRING, 0, (LPARAM)L"(8bit)");
SendDlgItemMessage (hDlg, IDC_RTG_8BIT, CB_ADDSTRING, 0, (LPARAM)L"8-bit (*)");
SendDlgItemMessage (hDlg, IDC_RTG_32BIT, CB_ADDSTRING, 0, (LPARAM)L"A8B8G8R8");
SendDlgItemMessage (hDlg, IDC_RTG_32BIT, CB_ADDSTRING, 0, (LPARAM)L"R8G8B8A8");
SendDlgItemMessage (hDlg, IDC_RTG_32BIT, CB_ADDSTRING, 0, (LPARAM)L"B8G8R8A8 (*)");
- SendDlgItemMessage (hDlg, IDC_CHIPMEM, TBM_SETRANGE, TRUE, MAKELONG (MIN_CHIP_MEM, MAX_CHIP_MEM));
- SendDlgItemMessage (hDlg, IDC_FASTMEM, TBM_SETRANGE, TRUE, MAKELONG (MIN_FAST_MEM, MAX_FAST_MEM));
- SendDlgItemMessage (hDlg, IDC_SLOWMEM, TBM_SETRANGE, TRUE, MAKELONG (MIN_SLOW_MEM, MAX_SLOW_MEM));
- SendDlgItemMessage (hDlg, IDC_Z3FASTMEM, TBM_SETRANGE, TRUE, MAKELONG (MIN_Z3_MEM, MAX_Z3_MEM));
SendDlgItemMessage (hDlg, IDC_P96MEM, TBM_SETRANGE, TRUE, MAKELONG (MIN_P96_MEM, MAX_P96_MEM));
- SendDlgItemMessage (hDlg, IDC_MBMEM1, TBM_SETRANGE, TRUE, MAKELONG (MIN_MB_MEM, MAX_MB_MEM));
- SendDlgItemMessage (hDlg, IDC_MBMEM2, TBM_SETRANGE, TRUE, MAKELONG (MIN_MB_MEM, MAX_MB_MEM));
SendDlgItemMessage (hDlg, IDC_RTG_SCALE_ASPECTRATIO, CB_RESETCONTENT, 0, 0);
WIN32GUI_LoadUIString (IDS_DISABLED, tmp, sizeof tmp / sizeof (TCHAR));
SendDlgItemMessage (hDlg, IDC_RTG_SCALE_ASPECTRATIO, CB_ADDSTRING, 0, (LPARAM)tmp);
break;
case WM_HSCROLL:
- workprefs.chipmem_size = memsizes[msi_chip[SendMessage (GetDlgItem (hDlg, IDC_CHIPMEM), TBM_GETPOS, 0, 0)]];
- workprefs.bogomem_size = memsizes[msi_bogo[SendMessage (GetDlgItem (hDlg, IDC_SLOWMEM), TBM_GETPOS, 0, 0)]];
- workprefs.fastmem_size = memsizes[msi_fast[SendMessage (GetDlgItem (hDlg, IDC_FASTMEM), TBM_GETPOS, 0, 0)]];
- workprefs.z3fastmem_size = memsizes[msi_z3fast[SendMessage (GetDlgItem (hDlg, IDC_Z3FASTMEM), TBM_GETPOS, 0, 0)]];
- updatez3 (&workprefs.z3fastmem_size, &workprefs.z3fastmem2_size);
workprefs.gfxmem_size = memsizes[msi_gfx[SendMessage (GetDlgItem (hDlg, IDC_P96MEM), TBM_GETPOS, 0, 0)]];
- workprefs.mbresmem_low_size = memsizes[msi_gfx[SendMessage (GetDlgItem (hDlg, IDC_MBMEM1), TBM_GETPOS, 0, 0)]];
- workprefs.mbresmem_high_size = memsizes[msi_gfx[SendMessage (GetDlgItem (hDlg, IDC_MBMEM2), TBM_GETPOS, 0, 0)]];
fix_values_memorydlg ();
values_to_memorydlg (hDlg);
enable_for_memorydlg (hDlg);
return FALSE;
}
+
+static INT_PTR CALLBACK MemoryDlgProc (HWND hDlg, UINT msg, WPARAM wParam, LPARAM lParam)
+{
+ int v;
+ static int recursive = 0;
+
+ switch (msg)
+ {
+ case WM_INITDIALOG:
+ pages[MEMORY_ID] = hDlg;
+ currentpage = MEMORY_ID;
+ SendDlgItemMessage (hDlg, IDC_CHIPMEM, TBM_SETRANGE, TRUE, MAKELONG (MIN_CHIP_MEM, MAX_CHIP_MEM));
+ SendDlgItemMessage (hDlg, IDC_FASTMEM, TBM_SETRANGE, TRUE, MAKELONG (MIN_FAST_MEM, MAX_FAST_MEM));
+ SendDlgItemMessage (hDlg, IDC_SLOWMEM, TBM_SETRANGE, TRUE, MAKELONG (MIN_SLOW_MEM, MAX_SLOW_MEM));
+ SendDlgItemMessage (hDlg, IDC_Z3FASTMEM, TBM_SETRANGE, TRUE, MAKELONG (MIN_Z3_MEM, MAX_Z3_MEM));
+ SendDlgItemMessage (hDlg, IDC_MBMEM1, TBM_SETRANGE, TRUE, MAKELONG (MIN_MB_MEM, MAX_MB_MEM));
+ SendDlgItemMessage (hDlg, IDC_MBMEM2, TBM_SETRANGE, TRUE, MAKELONG (MIN_MB_MEM, MAX_MB_MEM));
+
+ case WM_USER:
+ recursive++;
+ fix_values_memorydlg ();
+ values_to_memorydlg (hDlg);
+ enable_for_memorydlg (hDlg);
+ recursive--;
+ break;
+
+ case WM_HSCROLL:
+ workprefs.chipmem_size = memsizes[msi_chip[SendMessage (GetDlgItem (hDlg, IDC_CHIPMEM), TBM_GETPOS, 0, 0)]];
+ workprefs.bogomem_size = memsizes[msi_bogo[SendMessage (GetDlgItem (hDlg, IDC_SLOWMEM), TBM_GETPOS, 0, 0)]];
+ workprefs.fastmem_size = memsizes[msi_fast[SendMessage (GetDlgItem (hDlg, IDC_FASTMEM), TBM_GETPOS, 0, 0)]];
+ workprefs.z3fastmem_size = memsizes[msi_z3fast[SendMessage (GetDlgItem (hDlg, IDC_Z3FASTMEM), TBM_GETPOS, 0, 0)]];
+ updatez3 (&workprefs.z3fastmem_size, &workprefs.z3fastmem2_size);
+ workprefs.mbresmem_low_size = memsizes[msi_gfx[SendMessage (GetDlgItem (hDlg, IDC_MBMEM1), TBM_GETPOS, 0, 0)]];
+ workprefs.mbresmem_high_size = memsizes[msi_gfx[SendMessage (GetDlgItem (hDlg, IDC_MBMEM2), TBM_GETPOS, 0, 0)]];
+ fix_values_memorydlg ();
+ values_to_memorydlg (hDlg);
+ enable_for_memorydlg (hDlg);
+ break;
+
+ case WM_COMMAND:
+ {
+ if (recursive > 0)
+ break;
+ recursive++;
+ switch (LOWORD (wParam))
+ {
+ ;
+ }
+ if (HIWORD (wParam) == CBN_SELENDOK || HIWORD (wParam) == CBN_KILLFOCUS || HIWORD (wParam) == CBN_EDITCHANGE) {
+ }
+ recursive--;
+ }
+ break;
+ }
+ return FALSE;
+}
+
static void addromfiles (UAEREG *fkey, HWND hDlg, DWORD d, TCHAR *path, int type)
{
int idx;
hdf->controller = HD_CONTROLLER_PCMCIA_SRAM;
break;
}
- if (!memcmp (tmp, "RDSK\0\0\0", 7) || (tmp[0] == 0x53 && tmp[1] == 0x10 && tmp[2] == 0x9b && tmp[3] == 0x13 && tmp[4] == 0 && tmp[5] == 0)) {
+ if (!memcmp (tmp, "RDSK\0\0\0", 7) || !memcmp (tmp, "DRKS\0\0", 6) || (tmp[0] == 0x53 && tmp[1] == 0x10 && tmp[2] == 0x9b && tmp[3] == 0x13 && tmp[4] == 0 && tmp[5] == 0)) {
// RDSK or ADIDE "encoded" RDSK
hdf->sectors = 0;
hdf->surfaces = 0;
ew (hDlg, IDC_HDF_CONTROLLER, FALSE);
index = -1;
for (i = 0; i < hdf_getnumharddrives (); i++) {
- SendDlgItemMessage (hDlg, IDC_HARDDRIVE, CB_ADDSTRING, 0, (LPARAM)hdf_getnameharddrive (i, 1, NULL));
- if (!_tcscmp (current_hfdlg.filename, hdf_getnameharddrive (i, 0, NULL)))
+ SendDlgItemMessage (hDlg, IDC_HARDDRIVE, CB_ADDSTRING, 0, (LPARAM)hdf_getnameharddrive (i, 1, NULL, NULL));
+ if (!_tcscmp (current_hfdlg.filename, hdf_getnameharddrive (i, 0, NULL, NULL)))
index = i;
}
if (index >= 0) {
if (oposn != posn && posn != CB_ERR) {
oposn = posn;
if (posn >= 0) {
+ int dang = 1;
+ hdf_getnameharddrive (posn, 1, NULL, &dang);
ew (hDlg, IDC_HARDDRIVE_IMAGE, TRUE);
ew (hDlg, IDOK, TRUE);
- ew (hDlg, IDC_HDF_RW, TRUE);
+ ew (hDlg, IDC_HDF_RW, !dang);
+ if (dang)
+ current_hfdlg.rw = FALSE;
ew (hDlg, IDC_HDF_CONTROLLER, TRUE);
hardfile_testrdb (hDlg, ¤t_hfdlg);
SendDlgItemMessage (hDlg, IDC_HDF_CONTROLLER, CB_SETCURSEL, current_hfdlg.controller, 0);
+ CheckDlgButton(hDlg, IDC_HDF_RW, current_hfdlg.rw);
}
}
if (HIWORD (wParam) == BN_CLICKED) {
}
}
if (posn != CB_ERR)
- _tcscpy (current_hfdlg.filename, hdf_getnameharddrive ((int)posn, 0, ¤t_hfdlg.blocksize));
+ _tcscpy (current_hfdlg.filename, hdf_getnameharddrive ((int)posn, 0, ¤t_hfdlg.blocksize, NULL));
current_hfdlg.rw = IsDlgButtonChecked (hDlg, IDC_HDF_RW);
posn = SendDlgItemMessage (hDlg, IDC_HDF_CONTROLLER, CB_GETCURSEL, 0, 0);
if (posn != CB_ERR)
while (uaefilters[i].name) {
switch (uaefilters[i].type)
{
+#if 0
#ifndef D3D
case UAE_FILTER_DIRECT3D:
nofilter = 1;
case UAE_FILTER_OPENGL:
nofilter = 1;
break;
+#endif
#endif
default:
nofilter = 0;
CN (MEMORY_ID);
CN (FLOPPY_ID);
CN (HARDDISK_ID);
+ CN (RTG_ID);
p = CreateFolderNode (TVhDlg, IDS_TREEVIEW_HOST, root, LOADSAVE_ID, CONFIG_TYPE_HOST);
CN (DISPLAY_ID);
doit = 1;
} else if (pBHdr && pBHdr->dbch_devicetype == DBT_DEVTYP_DEVICEINTERFACE) {
DEV_BROADCAST_DEVICEINTERFACE *dbd = (DEV_BROADCAST_DEVICEINTERFACE*)lParam;
- write_log (L"%s: %s\n", wParam == DBT_DEVICEREMOVECOMPLETE ? "Removed" : "Inserted",
+ write_log (L"%s: %s\n", wParam == DBT_DEVICEREMOVECOMPLETE ? L"Removed" : L"Inserted",
dbd->dbcc_name);
if (wParam == DBT_DEVICEREMOVECOMPLETE)
doit = 1;
PostQuitMessage (0);
return TRUE;
case WM_CLOSE:
- addnotifications (hDlg, 1);
+ addnotifications (hDlg, TRUE, TRUE);
DestroyWindow(hDlg);
if (dialogreturn < 0) {
dialogreturn = 0;
centerWindow (hDlg);
createTreeView (hDlg, currentpage);
updatePanel (hDlg, currentpage);
- addnotifications (hDlg, 0);
+ addnotifications (hDlg, FALSE, TRUE);
return TRUE;
case WM_DROPFILES:
if (dragdrop (hDlg, (HDROP)wParam, (gui_active || full_property_sheet) ? &workprefs : &changed_prefs, currentpage))
panelresource = getresource (IDD_PANEL);
LOADSAVE_ID = init_page (IDD_LOADSAVE, IDI_FILE, IDS_LOADSAVE, LoadSaveDlgProc, NULL, L"gui/configurations.htm");
MEMORY_ID = init_page (IDD_MEMORY, IDI_MEMORY, IDS_MEMORY, MemoryDlgProc, NULL, L"gui/ram.htm");
+ RTG_ID = init_page (IDD_RTG, IDI_DISPLAY, IDS_RTG, RTGDlgProc, NULL, L"gui/rtg.htm");
KICKSTART_ID = init_page (IDD_KICKSTART, IDI_MEMORY, IDS_KICKSTART, KickstartDlgProc, NULL, L"gui/rom.htm");
CPU_ID = init_page (IDD_CPU, IDI_CPU, IDS_CPU, CPUDlgProc, NULL, L"gui/cpu.htm");
DISPLAY_ID = init_page (IDD_DISPLAY, IDI_DISPLAY, IDS_DISPLAY, DisplayDlgProc, NULL, L"gui/display.htm");
Detect64BitPortabilityProblems="false"
DebugInformationFormat="4"
CallingConvention="2"
- CompileAs="1"
+ CompileAs="0"
DisableSpecificWarnings="4996"
EnablePREfast="false"
/>
<Tool
Name="VCLinkerTool"
AdditionalOptions="/MACHINE:I386"
- AdditionalDependencies="opengl32.lib glu32.lib ws2_32.lib ddraw.lib dxguid.lib winmm.lib comctl32.lib version.lib vfw32.lib msacm32.lib dsound.lib dinput8.lib d3d9.lib d3dx9.lib winio.lib setupapi.lib wininet.lib capsimg.lib dxerr9.lib shlwapi.lib zlibstat.lib libpng.lib lglcd.lib wpcap.lib packet.lib openal32.lib wintab32.lib portaudio_x86.lib freetype.lib"
+ AdditionalDependencies="opengl32.lib ws2_32.lib ddraw.lib dxguid.lib winmm.lib comctl32.lib version.lib msacm32.lib dsound.lib dinput8.lib d3d9.lib d3dx9.lib winio.lib setupapi.lib wininet.lib dxerr9.lib shlwapi.lib zlibstat.lib libpng.lib lglcd.lib wpcap.lib packet.lib openal32.lib wintab32.lib portaudio_x86.lib freetype.lib vfw32.lib wtsapi32.lib"
ShowProgress="0"
OutputFile="d:\amiga\winuae.exe"
LinkIncremental="2"
SuppressStartupBanner="true"
GenerateManifest="false"
- DelayLoadDLLs="wpcap.dll;packet.dll;d3dx9_41.dll;openal32.dll;wintab32.dll;portaudio_x86.dll;freetype6.dll"
+ DelayLoadDLLs="wpcap.dll;packet.dll;d3dx9_41.dll;openal32.dll;wintab32.dll;portaudio_x86.dll;freetype6.dll;ws2_32.dll;msacm32.dll;wtsapi32.dll"
GenerateDebugInformation="true"
ProgramDatabaseFile=".\Debug/winuae.pdb"
SubSystem="2"
StackReserveSize="2621440"
StackCommitSize="2621440"
- LargeAddressAware="0"
+ LargeAddressAware="2"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
/>
WarningLevel="3"
SuppressStartupBanner="true"
CallingConvention="2"
- CompileAs="1"
+ CompileAs="0"
DisableSpecificWarnings="4996"
ForcedIncludeFiles=""
/>
/>
<Tool
Name="VCLinkerTool"
- AdditionalDependencies="opengl32.lib glu32.lib ws2_32.lib ddraw.lib dxguid.lib winmm.lib comctl32.lib version.lib vfw32.lib msacm32.lib dsound.lib dinput8.lib d3d9.lib d3dx9.lib winio.lib setupapi.lib wininet.lib dxerr9.lib shlwapi.lib zlibstat.lib libpng.lib lglcd.lib wpcap.lib packet.lib openal32.lib wintab32.lib portaudio_x86.lib freetype.lib"
+ AdditionalDependencies="opengl32.lib ws2_32.lib ddraw.lib dxguid.lib winmm.lib comctl32.lib version.lib msacm32.lib dsound.lib dinput8.lib d3d9.lib d3dx9.lib winio.lib setupapi.lib wininet.lib dxerr9.lib shlwapi.lib zlibstat.lib libpng.lib lglcd.lib wpcap.lib packet.lib openal32.lib wintab32.lib portaudio_x86.lib freetype.lib vfw32.lib wtsapi32.lib"
OutputFile="d:\amiga\winuae.exe"
LinkIncremental="1"
SuppressStartupBanner="true"
AdditionalLibraryDirectories=""
GenerateManifest="true"
AdditionalManifestDependencies=""
- DelayLoadDLLs="wpcap.dll;packet.dll;d3dx9_41.dll;openal32.dll;wintab32.dll;portaudio_x86.dll;freetype6.dll"
+ DelayLoadDLLs="wpcap.dll;packet.dll;d3dx9_41.dll;openal32.dll;wintab32.dll;portaudio_x86.dll;freetype6.dlll;ws2_32.dll;msacm32.dll;wtsapi32.dll"
GenerateDebugInformation="true"
ProgramDatabaseFile=".\Release/winuae.pdb"
SubSystem="2"
StackReserveSize="2621440"
StackCommitSize="2621440"
+ LargeAddressAware="2"
OptimizeReferences="2"
EnableCOMDATFolding="2"
OptimizeForWindows98="0"
/>
<Tool
Name="VCLinkerTool"
- AdditionalDependencies="opengl32.lib glu32.lib ws2_32.lib ddraw.lib dxguid.lib winmm.lib comctl32.lib version.lib vfw32.lib msacm32.lib dsound.lib dinput8.lib d3d9.lib d3dx9.lib winio.lib setupapi.lib wininet.lib dxerr9.lib shlwapi.lib zlibstat.lib libpng.lib lglcd.lib wpcap.lib packet.lib openal32.lib wintab32.lib portaudio_x86.lib freetype.lib"
+ AdditionalDependencies="opengl32.lib ws2_32.lib ddraw.lib dxguid.lib winmm.lib comctl32.lib version.lib msacm32.lib dsound.lib dinput8.lib d3d9.lib d3dx9.lib winio.lib setupapi.lib wininet.lib dxerr9.lib shlwapi.lib zlibstat.lib libpng.lib lglcd.lib wpcap.lib packet.lib openal32.lib wintab32.lib portaudio_x86.lib freetype.lib vfw32.lib wtsapi32.lib"
OutputFile="d:\amiga\winuae.exe"
LinkIncremental="1"
SuppressStartupBanner="true"
AdditionalLibraryDirectories=""
GenerateManifest="true"
AdditionalManifestDependencies=""
- DelayLoadDLLs="wpcap.dll;packet.dll;d3dx9_41.dll;openal32.dll;wintab32.dll;portaudio_x86.dll;freetype6.dll"
+ DelayLoadDLLs="wpcap.dll;packet.dll;d3dx9_41.dll;openal32.dll;wintab32.dll;portaudio_x86.dll;freetype6.dll;opengl32.dll;ws2_32.dll;msacm32.dll;wtsapi32.dll"
GenerateDebugInformation="true"
ProgramDatabaseFile=".\FullRelease/winuae.pdb"
SubSystem="2"
RelativePath="..\mman.c"
>
</File>
- <File
- RelativePath="..\opengl.c"
- >
- </File>
<File
RelativePath="..\parser.c"
>
>
</File>
<File
- RelativePath="..\resources\configfile.ico"
+ RelativePath=".\configfile.ico"
>
</File>
<File
- RelativePath=".\configfile.ico"
+ RelativePath="..\resources\configfile.ico"
>
</File>
<File
>
</File>
<File
- RelativePath="..\resources\file.ico"
+ RelativePath=".\file.ico"
>
</File>
<File
- RelativePath=".\file.ico"
+ RelativePath="..\resources\file.ico"
>
</File>
<File
>
</File>
<File
- RelativePath=".\port.ico"
+ RelativePath="..\resources\port.ico"
>
</File>
<File
- RelativePath="..\resources\port.ico"
+ RelativePath=".\port.ico"
>
</File>
<File
+
+Beta 1:
+
+- command line filename will be detected as a disk image if file don't
+ have known extensions but is small enough and first 3 bytes contain
+ "DOS" (previously detected as hardfiles..)
+- check for windows messages more than once per frame, may reduce
+ latency (in worst case it may increase power requirements..)
+- marked DLLs that are rarely used/only in specific configurations
+ as delay loaded, may make startup slightly faster
+- programmed display modes had bad sound (161b4 sound update was not
+ fully complete.. most stupid 1.6.1 bug so far)
+- "Add PC drives at startup" does not anymore mount drives that are
+ also configured as real harddrives
+- inserting/ejecting USB input devices on the fly replaced all "<none>"
+ and all keyboard layouts with keyboard layout A (old bug)
+- portaudio fallback code added, unsupported samplerate = try 44000,
+ 48000 and default reported before disabling sound. Same with number
+ of channels, unsupported channels = fall back to stereo
+- real harddrive safetycheck modified, now all drives can be mounted as
+ long as drive does not have any Windows mounted partitions. Any drive
+ with one or more Windows mounted partitions are only available in read
+ only mode. Command line parameter is now only needed if you want to
+ add drives that have Windows mounted FAT partitions. Drives with NTFS
+ partition(s) are never (there is a complex way..) allowed in
+ read-write mode.
+- SCSI (RDB hdf emulation) write commands return proper write protected
+ sense status if hdf is read-only
+- CD32 CD controller emulation improved, CDXL animations should run more
+ smoothly now, previously emulation couldn't load more than few sectors
+ before cd driver decided to read some previously loaded sectors again
+- added seek delays to CD32 emulation (CD32 drive has really slow seeks)
+- CD32 Lotus Trilogy's Lotus 3 finally loads (stupid loader overwriting
+ already loaded data if CD DMA sequence is not identical to real
+ hardware)
+- CD32 CD audio moved to separate thread, no more emulator pausing when
+ CD audio starts or track changes (possibly introduces other issues..)
+- distortion in sound was possible with short square wave samples (160)
+- final 16-bit sound samples always had lowest bit (or two lowest bits)
+ zero, now lowest bit(s) are duplicated from real lowest significant
+ bit(s)
+- A590/A2091 SCSI ROM 7.0 was not accepted (debugging code..)
+- added new RTG configuration panel, more space for future options, also
+ old RTG setting panel was not really in correct place anymore
+- RDB filesystem loader still had fse_PatchFlags hack instead of
+ handling it 100% correctly (no functional change, at least with any
+ popular filesystem)
+- D3D 2D sprites (ID3DXSPrite) used in D3D filter onscreen leds, correct
+ positioning, no scaling and free transparency. Scanline texture also
+ converted to 2D sprite, now works correctly with D3D filters (future
+ plan: convert all DirectDraw code to D3D 2D sprites)
+- accept also partition type 0x30 (another Amithlon like RDB drive
+ inside real PC partition)
+- OpenGL filter removed, totally obsolete now. Gone forever unless
+ someone updates it. (includes correct positioning and scaling)
+
+1.6.1
+
Beta 5: (1.6.1 RC)
- JIT indirect/direct state is remembered when JIT is switched off and
consoleopen = 0;
}
-static void writeconsole (const TCHAR *buffer)
+static void writeconsole_2 (const TCHAR *buffer)
{
DWORD temp;
}
}
+static void writeconsole (const TCHAR *buffer)
+{
+ if (_tcslen (buffer) > 256) {
+ TCHAR *p = my_strdup (buffer);
+ TCHAR *p2 = p;
+ while (_tcslen (p) > 256) {
+ TCHAR tmp = p[256];
+ p[256] = 0;
+ writeconsole_2 (p);
+ p[256] = tmp;
+ p += 256;
+ }
+ writeconsole_2 (p);
+ xfree (p2);
+ } else {
+ writeconsole_2 (buffer);
+ }
+}
+
static void flushconsole (void)
{
if (consoleopen > 0) {
{
struct devstruct *dev = (struct devstruct*)devs;
- uae_set_thread_priority (2);
+ uae_set_thread_priority (NULL, 1);
dev->thread_running = 1;
uae_sem_post (&dev->sync_sem);
for (;;) {
*
*/
+#define OPEN_LOG 0
+
#include "sysconfig.h"
#include "sysdeps.h"
static uae_u8 *restore_log (uae_u8 *src)
{
- //write_log (src);
+#if OPEN_LOG > 0
+ TCHAR *s = utf8u (src);
+ write_log (L"%s\n", s);
+ xfree (s);
+#endif
src += strlen (src) + 1;
return src;
}
{
struct devstruct *dev = (struct devstruct*)devs;
- uae_set_thread_priority (2);
+ uae_set_thread_priority (NULL, 1);
dev->thread_running = 1;
uae_sem_post (&dev->sync_sem);
for (;;) {
/* Defined traps */
static struct Trap traps[MAX_TRAPS];
-static unsigned int trap_count;
+static unsigned int trap_count = 1;
static const int trace_traps = 0;
{
ExtendedTrapContext *context = (ExtendedTrapContext *) arg;
- uae_set_thread_priority (2);
+ uae_set_thread_priority (NULL, 1);
/* Wait until main thread is ready to switch to the
* this trap context. */
{
struct devstruct *dev = (struct devstruct*)devs;
- uae_set_thread_priority (2);
+ uae_set_thread_priority (NULL, 1);
dev->thread_running = 1;
uae_sem_post (&dev->sync_sem);
for (;;) {
return ZFILE_DISKIMAGE;
if (!memcmp (buf, "RDSK", 4))
return ZFILE_HDFRDB;
- if (!memcmp (buf, "DOS", 3))
- return ZFILE_HDF;
+ if (!memcmp (buf, "DOS", 3)) {
+ if (z->size < 4 * 1024 * 1024)
+ return ZFILE_DISKIMAGE;
+ else
+ return ZFILE_HDF;
+ }
if (ext != NULL) {
if (strcasecmp (ext, L"hdf") == 0)
return ZFILE_HDF;
}
l->zfdmask = mask;
} else {
+ struct _stat64 st;
l = zfile_create (NULL);
l->mode = my_strdup (mode);
l->name = my_strdup (name);
zfile_fclose (l);
return 0;
}
+ if (stat (l->name, &st) != -1)
+ l->size = st.st_size;
l->f = f;
}
return l;