From: Toni Wilen Date: Thu, 9 Jul 2009 15:08:40 +0000 (+0300) Subject: imported winuaesrc1620b0.zip X-Git-Tag: 2100~73 X-Git-Url: https://git.unchartedbackwaters.co.uk/w/?a=commitdiff_plain;h=9e246f609b499c31acb7e905bbe6cd2825c08b22;p=francis%2Fwinuae.git imported winuaesrc1620b0.zip --- diff --git a/a2091.c b/a2091.c index 7011fb30..246a8117 100644 --- a/a2091.c +++ b/a2091.c @@ -7,7 +7,7 @@ * */ -#define A2091_DEBUG 0 +#define A2091_DEBUG 1 #define A3000_DEBUG 0 #define WD33C93_DEBUG 0 @@ -142,6 +142,7 @@ static uae_u8 *rom; static int rombankswitcher, rombank; static int rom_size, rom_mask; +static int old_dmac = 0; static uae_u32 dmac_istr, dmac_cntr; static uae_u32 dmac_dawr; static uae_u32 dmac_acr; @@ -316,6 +317,17 @@ static void wd_cmd_sel_xfer (void) set_status (wd_phase, 1); } +static void dmacheck (void) +{ + dmac_acr++; + if (old_dmac && (dmac_cntr & CNTR_TCEN)) { + if (dmac_wtc == 0) + dmac_istr |= ISTR_E_INT; + else + dmac_wtc--; + } +} + static void do_dma (void) { if (currprefs.cs_cdtvscsi) @@ -329,7 +341,7 @@ static void do_dma (void) put_byte (dmac_acr, v); if (wd_dataoffset < sizeof wd_data) wd_data[wd_dataoffset++] = v; - dmac_acr++; + dmacheck (); if (status) break; } @@ -340,7 +352,7 @@ static void do_dma (void) if (wd_dataoffset < sizeof wd_data) wd_data[wd_dataoffset++] = v; status = scsi_send_data (SCSIID, v); - dmac_acr++; + dmacheck (); if (status) break; } @@ -662,6 +674,22 @@ static uae_u32 dmac_bget2 (uaecptr addr) case 0x43: v = dmac_cntr; break; + case 0x80: + if (old_dmac) + v = (dmac_wtc >> 24) & 0xff; + break; + case 0x81: + if (old_dmac) + v = (dmac_wtc >> 16) & 0xff; + break; + case 0x82: + if (old_dmac) + v = (dmac_wtc >> 8) & 0xff; + break; + case 0x83: + if (old_dmac) + v = (dmac_wtc >> 0) & 0xff; + break; case 0x91: v = wdscsi_getauxstatus (); break; @@ -696,8 +724,9 @@ static uae_u32 dmac_bget2 (uaecptr addr) break; case 0xe8: case 0xe9: - /* FLUSH */ - dmac_istr |= ISTR_FE_FLG; + /* FLUSH (new only) */ + if (!old_dmac && dmac_dma) + dmac_istr |= ISTR_FE_FLG; break; } #if A2091_DEBUG > 0 @@ -749,8 +778,11 @@ static void dmac_bput2 (uaecptr addr, uae_u32 b) dmac_acr |= b << 8; break; case 0x87: - dmac_acr &= 0xffffff01; - dmac_acr |= (b & ~ 1) << 0; + dmac_acr &= 0xffffff00; + dmac_acr |= b << 0; + dmac_acr &= ~1; + if (old_dmac) + dmac_acr &= ~3; break; case 0x8e: dmac_dawr &= 0x00ff; @@ -1287,7 +1319,7 @@ void a2091_init (void) memset (dmacmemory, 0xff, 100); ew (0x00, 0xc0 | 0x01 | 0x10); /* A590/A2091 hardware id */ - ew (0x04, 0x03); + ew (0x04, old_dmac ? 0x02 : 0x03); /* commodore's manufacturer id */ ew (0x10, 0x02); ew (0x14, 0x02); @@ -1305,7 +1337,6 @@ void a2091_init (void) roms[2] = 53; roms[3] = 56; roms[4] = -1; - roms[0] = 53; rombankswitcher = 0; rombank = 0; diff --git a/akiko.c b/akiko.c index 41539dd5..8b86dfa6 100644 --- a/akiko.c +++ b/akiko.c @@ -372,7 +372,7 @@ static uae_u32 cdrom_status1, cdrom_status2; static uae_u8 cdrom_status3; static uae_u32 cdrom_address1, cdrom_address2; static uae_u32 cdrom_longmask; -static uae_u32 cdrom_readmask_r, cdrom_readmask_w; +static uae_u32 cdrom_readmask; static uae_u8 cdrom_command_offset_complete; /* 0x19 */ static uae_u8 cdrom_command_offset_todo; /* 0x1d */ static uae_u8 cdrom_result_complete; /* 0x1a */ @@ -387,13 +387,15 @@ static int cdrom_toc_counter; static uae_u32 cdrom_toc_crc; static uae_u8 cdrom_toc_buffer[MAX_TOC_ENTRIES * 13]; static uae_u8 cdrom_toc_cd_buffer[4 + MAX_TOC_ENTRIES * 11]; +static uae_u8 qcode_buf[12]; +static int qcode_valid; static int cdrom_disk, cdrom_paused, cdrom_playing; static int cdrom_command_active; static int cdrom_command_length; static int cdrom_checksum_error; static int cdrom_data_offset, cdrom_speed, cdrom_sector_counter; -static int cdrom_current_sector; +static int cdrom_current_sector, cdrom_seek_delay; static int cdrom_data_end, cdrom_leadout; static int cdrom_audiotimeout; static int cdrom_led; @@ -407,6 +409,11 @@ static uae_u8 *sector_buffer_info_1, *sector_buffer_info_2; static int unitnum = -1; static int cdromok = 0; static int cd_hunt; +static volatile int mediachanged, mediacheckcounter; +static volatile int frame2counter; + +static smp_comm_pipe requests; +static volatile int akiko_thread_running; static void checkint (void) { @@ -461,16 +468,33 @@ static int lsn2msf (int sectors) return msf; } -static void cdaudiostop (void) +static void cdaudiostop_do (void) { - cdrom_playing = 0; - cdrom_paused = 0; + qcode_valid = 0; if (unitnum < 0) return; sys_command_cd_pause (DF_IOCTL, unitnum, 0); sys_command_cd_stop (DF_IOCTL, unitnum); sys_command_cd_pause (DF_IOCTL, unitnum, 1); +} + +static void cdaudiostop (void) +{ + cdrom_playing = 0; + cdrom_paused = 0; cdrom_audiotimeout = 0; + write_comm_pipe_u32 (&requests, 0x104, 1); +} + +static void cdaudioplay_do (void) +{ + uae_u32 startmsf = read_comm_pipe_u32_blocking (&requests); + uae_u32 endmsf = read_comm_pipe_u32_blocking (&requests); + uae_u32 scan = read_comm_pipe_u32_blocking (&requests); + qcode_valid = 0; + if (unitnum < 0) + return; + sys_command_cd_play (DF_IOCTL, unitnum, startmsf, endmsf, scan); } static uae_u32 last_play_end; @@ -500,7 +524,11 @@ static int cd_play_audio (uae_u32 startmsf, uae_u32 endmsf, int scan) #endif last_play_end = endmsf; cdrom_audiotimeout = 0; - return sys_command_cd_play (DF_IOCTL, unitnum, startmsf, endmsf, scan); + write_comm_pipe_u32 (&requests, 0x110, 0); + write_comm_pipe_u32 (&requests, startmsf, 0); + write_comm_pipe_u32 (&requests, endmsf, 0); + write_comm_pipe_u32 (&requests, scan, 1); + return 1; } @@ -513,9 +541,9 @@ static int cd_qcode (uae_u8 *d) if (d) memset (d, 0, 11); last_play_pos = 0; - buf = sys_command_cd_qcode (DF_IOCTL, unitnum); - if (!buf) + if (!qcode_valid) return 0; + buf = qcode_buf; as = buf[1]; if (as != 0x11 && as != 0x12 && as != 0x13 && as != 0x15) /* audio status ok? */ return 0; @@ -664,22 +692,22 @@ static int sys_cddev_open (void) sys_command_close (DF_IOCTL, unitnum); return 1; } - if (!sys_command_ismedia(DF_IOCTL, unitnum, 0)) + if (!sys_command_ismedia (DF_IOCTL, unitnum, 0)) cd_hunt = 1; write_log (L"using drive %s (unit %d, media %d)\n", di2->label, unitnum, di2->media_inserted); /* make sure CD audio is not playing */ - cdaudiostop (); + cdaudiostop_do (); return 0; } /* close device */ static void sys_cddev_close (void) { - cdaudiostop (); + cdaudiostop_do (); sys_command_close (DF_IOCTL, unitnum); } -static int command_lengths[] = { 1,2,1,1,12,2,1,1,4,1,-1,-1,-1,-1,-1 }; +static int command_lengths[] = { 1,2,1,1,12,2,1,1,4,1,-1,-1,-1,-1,-1,-1 }; static void cdrom_return_data (int len) { @@ -705,6 +733,7 @@ static void cdrom_return_data (int len) write_log (L"%02X\n", checksum); #endif cdrom_result_complete += len + 1; + cdrom_result_complete &= 0xff; set_status (CDSTATUS_DATA_AVAILABLE); } @@ -780,7 +809,7 @@ static int cdrom_command_pause (void) if (cdrom_paused) return 2; cdrom_audiotimeout = 0; - sys_command_cd_pause (DF_IOCTL, unitnum,1); + write_comm_pipe_u32 (&requests, 0x102, 1); cdrom_paused = 1; return 2; } @@ -797,7 +826,7 @@ static int cdrom_command_unpause (void) if (!cdrom_playing) return 2; cdrom_paused = 0; - sys_command_cd_pause (DF_IOCTL, unitnum,0); + write_comm_pipe_u32 (&requests, 0x103, 1); return 2; } @@ -820,6 +849,15 @@ static int cdrom_command_multi (void) if (cdrom_command_buffer[7] == 0x80) { /* data read */ int cdrom_data_offset_end = msf2lsn (endpos); cdrom_data_offset = msf2lsn (seekpos); + cdrom_seek_delay = abs (cdrom_current_sector - cdrom_data_offset); + if (cdrom_seek_delay < 100) { + cdrom_seek_delay = 1; + } else { + cdrom_seek_delay /= 1000; + cdrom_seek_delay += 10; + if (cdrom_seek_delay > 100) + cdrom_seek_delay = 100; + } #if AKIKO_DEBUG_IO_CMD write_log (L"READ DATA %06X (%d) - %06X (%d) SPD=%dx PC=%08X\n", seekpos, cdrom_data_offset, endpos, cdrom_data_offset_end, cdrom_speed, M68K_GETPC); @@ -954,48 +992,64 @@ extern void encode_l2 (uae_u8 *p, int address); /* DMA transfer one CD sector */ static void cdrom_run_read (void) { - int i, j, sector; + int i, sector, inc; int read = 0; - uae_u8 buf[2352]; int sec; + static int seccnt; if (!(cdrom_longmask & 0x04000000)) return; - if (!cdrom_readmask_w) + if (!cdrom_readmask) { + cdrom_longmask &= ~0x08000000; + return; + } + if (!(cdrom_longmask & 0x08000000)) return; if (cdrom_data_offset < 0) return; - j = cdrom_sector_counter & 15; - if (unitnum >= 0 && (cdrom_readmask_w & (1 << j))) { - sector = cdrom_current_sector = cdrom_data_offset + cdrom_sector_counter; + if (unitnum < 0) + return; + + inc = 1; + // always use highest available slot or Lotus 3 (Lotus Trilogy) fails to load + for (seccnt = 15; seccnt >= 0; seccnt--) { + if (cdrom_readmask & (1 << seccnt)) + break; + } + if (cdrom_readmask & (1 << seccnt)) { + sector = cdrom_current_sector = cdrom_data_offset + cdrom_sector_counter; sec = sector - sector_buffer_sector_1; if (sector_buffer_sector_1 >= 0 && sec >= 0 && sec < SECTOR_BUFFER_SIZE) { if (sector_buffer_info_1[sec] != 0xff && sector_buffer_info_1[sec] != 0) { + uae_u8 buf[2352]; + memcpy (buf + 16, sector_buffer_1 + sec * 2048, 2048); encode_l2 (buf, sector + 150); buf[0] = 0; buf[1] = 0; buf[2] = 0; - buf[3] = cdrom_sector_counter; + buf[3] = cdrom_sector_counter & 31; for (i = 0; i < 2352; i++) - put_byte (cdrom_address1 + j * 4096 + i, buf[i]); - cdrom_readmask_r |= 1 << j; + put_byte (cdrom_address1 + seccnt * 4096 + i, buf[i]); + for (i = 0; i < 73 * 2; i++) + put_byte (cdrom_address1 + seccnt * 4096 + 0xc00 + i, 0); + cdrom_readmask &= ~(1 << seccnt); + set_status (CDSTATUS_DATASECTOR); + } else { + inc = 0; } if (sector_buffer_info_1[sec] != 0xff) sector_buffer_info_1[sec]--; - } else { - return; - } #if AKIKO_DEBUG_IO_CMD - write_log (L"read sector=%d, scnt=%d -> %d. %08X\n", - cdrom_data_offset, cdrom_sector_counter, sector, cdrom_address1 + j * 4096); + write_log (L"read sector=%d, scnt=%d -> %d. %08X\n", + cdrom_data_offset, cdrom_sector_counter, sector, cdrom_address1 + seccnt * 4096); #endif - cdrom_readmask_w &= ~(1 << j); + } else { + inc = 0; + } } - cdrom_sector_counter++; - if (cdrom_readmask_w == 0) - set_status (CDSTATUS_DATASECTOR); - + if (inc) + cdrom_sector_counter++; } static uae_sem_t akiko_sem; @@ -1003,8 +1057,6 @@ static int lastmediastate = 0; static void akiko_handler (void) { - static int mediacheckcnt; - if (unitnum < 0) return; if (cdrom_result_complete > cdrom_result_last_pos && cdrom_result_complete - cdrom_result_last_pos < 100) { @@ -1013,22 +1065,15 @@ static void akiko_handler (void) } if (cdrom_result_last_pos < cdrom_result_complete) return; - if (mediacheckcnt > 0) - mediacheckcnt--; - if (mediacheckcnt == 0) { - int media = sys_command_ismedia (DF_IOCTL, unitnum, 0); - mediacheckcnt = 312 * 50 * 2; - if (media != lastmediastate) { - write_log (L"media changed = %d\n", media); - lastmediastate = cdrom_disk = media; - cdrom_return_data (cdrom_command_media_status ()); - if (!media) - cd_hunt = 1; - cdrom_toc (); - /* do not remove! first try may fail */ - cdrom_toc (); - return; - } + if (mediachanged) { + mediachanged = 0; + cdrom_return_data (cdrom_command_media_status ()); + if (!lastmediastate) + cd_hunt = 1; + cdrom_toc (); + /* do not remove! first try may fail */ + cdrom_toc (); + return; } if (cdrom_toc_counter >= 0 && !cdrom_command_active && cdrom_dosomething) { cdrom_return_data (cdrom_return_toc_entry ()); @@ -1049,22 +1094,21 @@ static void akiko_internal (void) } } -static void do_hunt(void) +static void do_hunt (void) { int i; for (i = 0; i < MAX_TOTAL_DEVICES; i++) { - if (sys_command_ismedia(DF_IOCTL, i, 1) > 0) + if (sys_command_ismedia (DF_IOCTL, i, 1) > 0) break; } if (i == MAX_TOTAL_DEVICES) return; if (unitnum >= 0) { int ou = unitnum; - cdaudiostop(); unitnum = -1; - sys_command_close(DF_IOCTL, ou); + sys_command_close (DF_IOCTL, ou); } - if (sys_command_open(DF_IOCTL, i) > 0) { + if (sys_command_open (DF_IOCTL, i) > 0) { unitnum = i; cd_hunt = 0; write_log (L"CD32: autodetected unit %d\n", unitnum); @@ -1081,7 +1125,7 @@ void AKIKO_hsync_handler (void) if (cd_hunt) { static int huntcnt; if (huntcnt <= 0) { - do_hunt(); + do_hunt (); huntcnt = 312 * 50 * 2; } huntcnt--; @@ -1091,13 +1135,22 @@ void AKIKO_hsync_handler (void) if (framecounter <= 0) { if (cdrom_led) gui_cd_led (0, 1); - cdrom_run_read (); - framecounter = 1000000 / (74 * 75 * cdrom_speed); + if (cdrom_seek_delay <= 0) { + cdrom_run_read (); + } else { + cdrom_seek_delay--; + } + framecounter = 1000000 / (59 * 75 * cdrom_speed); set_status (CDSTATUS_FRAME); cdrom_status3++; } + + if (frame2counter > 0) + frame2counter--; + if (mediacheckcounter > 0) + mediacheckcounter--; + if (cdrom_playing) { - static int frame2counter; if (cdrom_audiotimeout > 0) { cdrom_audiotimeout--; if (cdrom_audiotimeout == 0) { @@ -1107,13 +1160,51 @@ void AKIKO_hsync_handler (void) cdrom_return_data (2); } } - frame2counter--; + } + akiko_internal (); + akiko_handler (); +} + +/* cdrom data buffering thread */ +static void *akiko_thread (void *null) +{ + int i; + uae_u8 *tmp1; + uae_u8 *tmp2; + int tmp3; + uae_u8 *p; + int offset; + int sector; + + while (akiko_thread_running || comm_pipe_has_data (&requests)) { + + if (comm_pipe_has_data (&requests)) { + uae_u32 b = read_comm_pipe_u32_blocking (&requests); + switch (b) + { + case 0x0102: // pause + sys_command_cd_pause (DF_IOCTL, unitnum, 1); + break; + case 0x0103: // unpause + sys_command_cd_pause (DF_IOCTL, unitnum, 0); + break; + case 0x0104: // stop + cdaudiostop_do (); + break; + case 0x0110: // do_play! + cdaudioplay_do (); + break; + } + } + if (frame2counter <= 0) { uae_u8 *s; - frame2counter = 312 * 50 * 2; + frame2counter = 312 * 50 / 2; s = sys_command_cd_qcode (DF_IOCTL, unitnum); if (s) { uae_u8 as = s[1]; + memcpy (qcode_buf, s, sizeof qcode_buf); + qcode_valid = 1; if (as == AUDIO_STATUS_IN_PROGRESS) { int lsn = msf2lsn ((s[5 + 4] << 16) | (s[6 + 4] << 8) | (s[7 + 4] << 0)); //write_log("%d %d (%d %d)\n", lsn, msf2lsn (last_play_end) - lsn, cdrom_leadout, msf2lsn (last_play_end)); @@ -1125,25 +1216,18 @@ void AKIKO_hsync_handler (void) } } } - } - akiko_internal (); - akiko_handler (); -} - -static volatile int akiko_thread_running; -/* cdrom data buffering thread */ -static void *akiko_thread (void *null) -{ - int i; - uae_u8 *tmp1; - uae_u8 *tmp2; - int tmp3; - uae_u8 *p; - int offset; - int sector; + if (mediacheckcounter <= 0) { + int media = sys_command_ismedia (DF_IOCTL, unitnum, 1); + mediacheckcounter = 312 * 50 * 2; + if (media != lastmediastate) { + write_log (L"media changed = %d\n", media); + lastmediastate = cdrom_disk = media; + mediachanged = 1; + cdaudiostop_do (); + } + } - while(akiko_thread_running) { uae_sem_wait (&akiko_sem); sector = cdrom_current_sector; for (i = 0; i < SECTOR_BUFFER_SIZE; i++) { @@ -1282,7 +1366,7 @@ static uae_u32 akiko_bget2 (uaecptr addr, int msg) break; case 0x20: case 0x21: - v = akiko_get_long (cdrom_readmask_r, addr - 0x20 + 2); + v = akiko_get_long (cdrom_readmask, addr - 0x20 + 2); break; case 0x24: case 0x25: @@ -1341,24 +1425,10 @@ static uae_u32 REGPARAM2 akiko_lget (uaecptr addr) v |= akiko_bget2 (addr + 1, 0) << 16; v |= akiko_bget2 (addr + 0, 0) << 24; if (addr < 0x30 && (addr != 4 && addr != 8) && AKIKO_DEBUG_IO) - write_log (L"akiko_lget %08: %08X %08X\n", M68K_GETPC, addr, v); + write_log (L"akiko_lget %08X: %08X %08X\n", M68K_GETPC, addr, v); return v; } -static void write_readmask(uae_u16 v) -{ - int i, cnt; - - cnt = 0; - for (i = 0; i < 16; i++) { - if (v & (1 << i)) - cnt++; - } - cdrom_readmask_w |= v; - cdrom_readmask_r = 0; -} - - static void akiko_bput2 (uaecptr addr, uae_u32 v, int msg) { uae_u32 tmp; @@ -1411,6 +1481,7 @@ static void akiko_bput2 (uaecptr addr, uae_u32 v, int msg) case 0x12: case 0x13: akiko_put_long (&cdrom_address1, addr - 0x10, v); + cdrom_address1 &= ~0xffff; break; case 0x14: case 0x15: @@ -1439,10 +1510,12 @@ static void akiko_bput2 (uaecptr addr, uae_u32 v, int msg) cdrom_result_last_pos = v; break; case 0x20: - write_readmask(v <<8); - break; case 0x21: - write_readmask(v); + tmp = cdrom_readmask; + akiko_put_long (&cdrom_readmask, addr - 0x20 + 2, v); + cdrom_readmask |= tmp; + cdrom_readmask &= 0xffff; + cdrom_status1 &= ~CDSTATUS_DATASECTOR; break; case 0x24: case 0x25: @@ -1452,6 +1525,8 @@ static void akiko_bput2 (uaecptr addr, uae_u32 v, int msg) akiko_put_long (&cdrom_longmask, addr - 0x24, v); if ((cdrom_longmask & 0x04000000) && !(tmp & 0x04000000)) cdrom_sector_counter = 0; + if (!(cdrom_longmask & 0x08000000) && (tmp & 0x08000000)) + cdrom_readmask = 0; break; } else if (addr < 0x30) { break; @@ -1524,7 +1599,7 @@ static void akiko_cdrom_free (void) void akiko_reset (void) { - cdaudiostop (); + cdaudiostop_do (); nvram_read (); state = I2C_WAIT; bitcounter = -1; @@ -1538,6 +1613,7 @@ void akiko_reset (void) lastmediastate = 0; if (akiko_thread_running > 0) { + cdaudiostop (); akiko_thread_running = 0; while(akiko_thread_running == 0) Sleep (10); @@ -1546,30 +1622,6 @@ void akiko_reset (void) akiko_cdrom_free (); } -static uae_u8 patchdata1[]={0x0c,0x82,0x00,0x00,0x03,0xe8,0x64,0x00,0x00,0x46}; -static uae_u8 patchdata2[]={0x0c,0x82,0x00,0x00,0x03,0xe8,0x4e,0x71,0x4e,0x71}; - -static void patchrom (void) -{ - int i; - uae_u8 *p = (uae_u8*)extendedkickmemory; - - if (!p) - return; - for (i = 0; i < 524288 - sizeof (patchdata1); i++) { - if (!memcmp (p + i, patchdata1, sizeof(patchdata1))) { - p[i + 6] = 0x4e; - p[i + 7] = 0x71; - p[i + 8] = 0x4e; - p[i + 9] = 0x71; - write_log (L"extended rom delay loop patched at 0x%08x\n", i + 6 + 0xe00000); - return; - } - if (!memcmp (p + i, patchdata2, sizeof(patchdata2))) - return; - } - write_log (L"couldn't patch extended rom\n"); -} void akiko_free (void) { @@ -1593,7 +1645,6 @@ int akiko_init (void) sector_buffer_info_2 = xmalloc (SECTOR_BUFFER_SIZE); sector_buffer_sector_1 = -1; sector_buffer_sector_2 = -1; - patchrom (); } } uae_sem_init (&akiko_sem, 0, 1); @@ -1603,6 +1654,7 @@ int akiko_init (void) } if (cdromok && !akiko_thread_running) { akiko_thread_running = 1; + init_comm_pipe (&requests, 100, 1); uae_start_thread (L"akiko", akiko_thread, 0, NULL); } return 1; @@ -1634,7 +1686,7 @@ uae_u8 *save_akiko(int *len) save_u8 (cdrom_command_offset_todo); save_u8 (0); save_u8 (cdrom_result_last_pos); - save_u16 ((uae_u16)cdrom_readmask_w); + save_u16 ((uae_u16)cdrom_readmask); save_u16 (0); save_u32 (cdrom_longmask); save_u32 (0); @@ -1666,7 +1718,7 @@ uae_u8 *save_akiko(int *len) return dstbak; } -uae_u8 *restore_akiko(uae_u8 *src) +uae_u8 *restore_akiko (uae_u8 *src) { uae_u32 v; int i; @@ -1692,7 +1744,7 @@ uae_u8 *restore_akiko(uae_u8 *src) cdrom_command_offset_todo = restore_u8 (); restore_u8 (); cdrom_result_last_pos = restore_u8 (); - cdrom_readmask_w = restore_u16 (); + cdrom_readmask = restore_u16 (); restore_u16 (); cdrom_longmask = restore_u32 (); restore_u32 (); @@ -1728,17 +1780,20 @@ uae_u8 *restore_akiko(uae_u8 *src) return src; } -void restore_akiko_finish(void) +void restore_akiko_finish (void) { if (!currprefs.cs_cd32cd) return; akiko_c2p_do (); - sys_command_cd_pause (DF_IOCTL, unitnum, 0); - sys_command_cd_stop (DF_IOCTL, unitnum); - sys_command_cd_pause (DF_IOCTL, unitnum, 1); + write_comm_pipe_u32 (&requests, 0x102, 1); // pause + write_comm_pipe_u32 (&requests, 0x104, 1); // stop + write_comm_pipe_u32 (&requests, 0x103, 1); // unpause if (cdrom_playing) { - sys_command_cd_pause (DF_IOCTL, unitnum, 0); - sys_command_cd_play (DF_IOCTL, unitnum, last_play_pos, last_play_end, 0); + write_comm_pipe_u32 (&requests, 0x103, 1); // unpause + write_comm_pipe_u32 (&requests, 0x110, 0); // play + write_comm_pipe_u32 (&requests, last_play_pos, 0); + write_comm_pipe_u32 (&requests, last_play_end, 0); + write_comm_pipe_u32 (&requests, 0, 1); } } @@ -1747,11 +1802,11 @@ void restore_akiko_finish(void) void akiko_entergui (void) { if (cdrom_playing) - sys_command_cd_pause (DF_IOCTL, unitnum, 1); + write_comm_pipe_u32 (&requests, 0x102, 1); } void akiko_exitgui (void) { if (cdrom_playing) - sys_command_cd_pause (DF_IOCTL, unitnum, 0); + write_comm_pipe_u32 (&requests, 0x103, 1); } diff --git a/audio.c b/audio.c index 8662d4a4..b1b338f8 100644 --- a/audio.c +++ b/audio.c @@ -280,7 +280,19 @@ static float next_sample_evtime; typedef uae_s8 sample8_t; #define DO_CHANNEL_1(v, c) do { (v) *= audio_channel[c].vol; } while (0) #define SBASEVAL16(logn) ((logn) == 1 ? SOUND16_BASE_VAL >> 1 : SOUND16_BASE_VAL) -#define FINISH_DATA(data, b, logn) do { if (14 - (b) + (logn) > 0) (data) >>= 14 - (b) + (logn); else (data) <<= (b) - 14 - (logn); } while (0); + +STATIC_INLINE int FINISH_DATA (int data, int bits, int logn) +{ + if (14 - bits + logn > 0) { + data >>= 14 - bits + logn; + } else { + int shift = bits - 14 - logn; + int right = data & ((1 << shift) - 1); + data <<= shift; + data |= right; + } + return data; +} static uae_u32 right_word_saved[SOUND_MAX_DELAY_BUFFER]; static uae_u32 left_word_saved[SOUND_MAX_DELAY_BUFFER]; @@ -541,9 +553,9 @@ static void sample16i_sinc_handler (void) samplexx_sinc_handler (datas); data1 = datas[0] + datas[3] + datas[1] + datas[2]; - FINISH_DATA (data1, 16, 2); + data1 = FINISH_DATA (data1, 16, 2); PUT_SOUND_WORD_MONO (data1); - check_sound_buffers (); + check_sound_buffers (outputsample, doublesample); } void sample16_handler (void) @@ -552,6 +564,8 @@ void sample16_handler (void) uae_u32 data1 = audio_channel[1].current_sample; uae_u32 data2 = audio_channel[2].current_sample; uae_u32 data3 = audio_channel[3].current_sample; + uae_u32 data; + DO_CHANNEL_1 (data0, 0); DO_CHANNEL_1 (data1, 1); DO_CHANNEL_1 (data2, 2); @@ -563,16 +577,10 @@ void sample16_handler (void) data0 += data1; data0 += data2; data0 += data3; - if (outputsample) { - uae_u32 data = SBASEVAL16(2) + data0; - FINISH_DATA (data, 16, 2); - PUT_SOUND_WORD_MONO (data); - check_sound_buffers (); - if (doublesample) { - PUT_SOUND_WORD_MONO (data); - check_sound_buffers (); - } - } + data = SBASEVAL16(2) + data0; + data = FINISH_DATA (data, 16, 2); + PUT_SOUND_WORD_MONO (data); + check_sound_buffers (outputsample, doublesample); } /* This interpolator examines sample points when Paula switches the output @@ -583,15 +591,9 @@ static void sample16i_anti_handler (void) samplexx_anti_handler (datas); data1 = datas[0] + datas[3] + datas[1] + datas[2]; - if (outputsample) { - FINISH_DATA (data1, 16, 2); - PUT_SOUND_WORD_MONO (data1); - check_sound_buffers (); - if (doublesample) { - PUT_SOUND_WORD_MONO (data1); - check_sound_buffers (); - } - } + data1 = FINISH_DATA (data1, 16, 2); + PUT_SOUND_WORD_MONO (data1); + check_sound_buffers (outputsample, doublesample); } static void sample16i_rh_handler (void) @@ -606,6 +608,8 @@ static void sample16i_rh_handler (void) uae_u32 data1p = audio_channel[1].last_sample; uae_u32 data2p = audio_channel[2].last_sample; uae_u32 data3p = audio_channel[3].last_sample; + uae_u32 data; + DO_CHANNEL_1 (data0, 0); DO_CHANNEL_1 (data1, 1); DO_CHANNEL_1 (data2, 2); @@ -637,16 +641,10 @@ static void sample16i_rh_handler (void) delta = audio_channel[3].per; ratio = ((audio_channel[3].evtime % delta) << 8) / delta; data0 += (data3 * (256 - ratio) + data3p * ratio) >> 8; - if (outputsample) { - uae_u32 data = SBASEVAL16(2) + data0; - FINISH_DATA (data, 16, 2); - PUT_SOUND_WORD_MONO (data); - check_sound_buffers (); - if (doublesample) { - PUT_SOUND_WORD_MONO (data); - check_sound_buffers (); - } - } + data = SBASEVAL16(2) + data0; + data = FINISH_DATA (data, 16, 2); + PUT_SOUND_WORD_MONO (data); + check_sound_buffers (outputsample, doublesample); } static void sample16i_crux_handler (void) @@ -659,6 +657,8 @@ static void sample16i_crux_handler (void) uae_u32 data1p = audio_channel[1].last_sample; uae_u32 data2p = audio_channel[2].last_sample; uae_u32 data3p = audio_channel[3].last_sample; + uae_u32 data; + DO_CHANNEL_1 (data0, 0); DO_CHANNEL_1 (data1, 1); DO_CHANNEL_1 (data2, 2); @@ -712,16 +712,10 @@ static void sample16i_crux_handler (void) data1 += data2; data0 += data3; data0 += data1; - if (outputsample) { - uae_u32 data = SBASEVAL16(2) + data0; - FINISH_DATA (data, 16, 2); - PUT_SOUND_WORD_MONO (data); - check_sound_buffers (); - if (doublesample) { - PUT_SOUND_WORD_MONO (data); - check_sound_buffers (); - } - } + data = SBASEVAL16(2) + data0; + data = FINISH_DATA (data, 16, 2); + PUT_SOUND_WORD_MONO (data); + check_sound_buffers (outputsample, doublesample); } #ifdef HAVE_STEREO_SUPPORT @@ -749,24 +743,17 @@ void sample16ss_handler (void) data2 &= audio_channel[2].adk_mask; data3 &= audio_channel[3].adk_mask; - if (outputsample) { - put_sound_word_left (data0 << 2); - put_sound_word_right (data1 << 2); - if (currprefs.sound_stereo == SND_6CH) - make6ch (data0, data1, data2, data3); - put_sound_word_left2 (data3 << 2); - put_sound_word_right2 (data2 << 2); - check_sound_buffers (); - if (doublesample) { - put_sound_word_left (data0 << 2); - put_sound_word_right (data1 << 2); - if (currprefs.sound_stereo == SND_6CH) - make6ch (data0, data1, data2, data3); - put_sound_word_left2 (data3 << 2); - put_sound_word_right2 (data2 << 2); - check_sound_buffers (); - } - } + data0 = FINISH_DATA (data0, 16, 0); + data1 = FINISH_DATA (data1, 16, 0); + data2 = FINISH_DATA (data2, 16, 0); + data3 = FINISH_DATA (data3, 16, 0); + put_sound_word_left (data0); + put_sound_word_right (data1); + if (currprefs.sound_stereo == SND_6CH) + make6ch (data0, data1, data2, data3); + put_sound_word_left2 (data3); + put_sound_word_right2 (data2); + check_sound_buffers (outputsample, doublesample); } /* This interpolator examines sample points when Paula switches the output @@ -774,27 +761,21 @@ void sample16ss_handler (void) void sample16ss_anti_handler (void) { + int data0, data1, data2, data3; int datas[4]; samplexx_anti_handler (datas); - if (outputsample) { - put_sound_word_left (datas[0] << 2); - put_sound_word_right (datas[1] << 2); - if (currprefs.sound_stereo == SND_6CH) - make6ch (datas[0], datas[1], datas[2], datas[3]); - put_sound_word_left2 (datas[3] << 2); - put_sound_word_right2 (datas[2] << 2); - check_sound_buffers (); - if (doublesample) { - put_sound_word_left (datas[0] << 2); - put_sound_word_right (datas[1] << 2); - if (currprefs.sound_stereo == SND_6CH) - make6ch (datas[0], datas[1], datas[2], datas[3]); - put_sound_word_left2 (datas[3] << 2); - put_sound_word_right2 (datas[2] << 2); - check_sound_buffers (); - } - } + data0 = FINISH_DATA (datas[0], 16, 0); + data1 = FINISH_DATA (datas[1], 16, 0); + data2 = FINISH_DATA (datas[2], 16, 0); + data3 = FINISH_DATA (datas[3], 16, 0); + put_sound_word_left (data0); + put_sound_word_right (data1); + if (currprefs.sound_stereo == SND_6CH) + make6ch (data0, data1, data2, data3); + put_sound_word_left2 (data3); + put_sound_word_right2 (data2); + check_sound_buffers (outputsample, doublesample); } static void sample16si_anti_handler (void) @@ -802,45 +783,32 @@ static void sample16si_anti_handler (void) int datas[4], data1, data2; samplexx_anti_handler (datas); - if (outputsample) { - data1 = datas[0] + datas[3]; - data2 = datas[1] + datas[2]; - FINISH_DATA (data1, 16, 1); - FINISH_DATA (data2, 16, 1); - put_sound_word_left (data1); - put_sound_word_right (data2); - check_sound_buffers (); - if (doublesample) { - put_sound_word_left (data1); - put_sound_word_right (data2); - check_sound_buffers (); - } - } + data1 = datas[0] + datas[3]; + data2 = datas[1] + datas[2]; + data1 = FINISH_DATA (data1, 16, 1); + data2 = FINISH_DATA (data2, 16, 1); + put_sound_word_left (data1); + put_sound_word_right (data2); + check_sound_buffers (outputsample, doublesample); } void sample16ss_sinc_handler (void) { + int data0, data1, data2, data3; int datas[4]; samplexx_sinc_handler (datas); - if (outputsample) { - put_sound_word_left (datas[0] << 2); - put_sound_word_right (datas[1] << 2); - if (currprefs.sound_stereo == SND_6CH) - make6ch (datas[0], datas[1], datas[2], datas[3]); - put_sound_word_left2 (datas[3] << 2); - put_sound_word_right2 (datas[2] << 2); - check_sound_buffers (); - if (doublesample) { - put_sound_word_left (datas[0] << 2); - put_sound_word_right (datas[1] << 2); - if (currprefs.sound_stereo == SND_6CH) - make6ch (datas[0], datas[1], datas[2], datas[3]); - put_sound_word_left2 (datas[3] << 2); - put_sound_word_right2 (datas[2] << 2); - check_sound_buffers (); - } - } + data0 = FINISH_DATA (datas[0], 16, 0); + data1 = FINISH_DATA (datas[1], 16, 0); + data2 = FINISH_DATA (datas[2], 16, 0); + data3 = FINISH_DATA (datas[3], 16, 0); + put_sound_word_left (data0); + put_sound_word_right (data1); + if (currprefs.sound_stereo == SND_6CH) + make6ch (data0, data1, data2, data3); + put_sound_word_left2 (data3); + put_sound_word_right2 (data2); + check_sound_buffers (outputsample, doublesample); } static void sample16si_sinc_handler (void) @@ -848,20 +816,13 @@ static void sample16si_sinc_handler (void) int datas[4], data1, data2; samplexx_sinc_handler (datas); - if (outputsample) { - data1 = datas[0] + datas[3]; - data2 = datas[1] + datas[2]; - FINISH_DATA (data1, 16, 1); - FINISH_DATA (data2, 16, 1); - put_sound_word_left (data1); - put_sound_word_right (data2); - check_sound_buffers (); - if (doublesample) { - put_sound_word_left (data1); - put_sound_word_right (data2); - check_sound_buffers (); - } - } + data1 = datas[0] + datas[3]; + data2 = datas[1] + datas[2]; + data1 = FINISH_DATA (data1, 16, 1); + data2 = FINISH_DATA (data2, 16, 1); + put_sound_word_left (data1); + put_sound_word_right (data2); + check_sound_buffers (outputsample, doublesample); } void sample16s_handler (void) @@ -882,20 +843,13 @@ void sample16s_handler (void) data0 += data3; data1 += data2; - if (outputsample) { - data2 = SBASEVAL16(1) + data0; - FINISH_DATA (data2, 16, 1); - data3 = SBASEVAL16(1) + data1; - FINISH_DATA (data3, 16, 1); - put_sound_word_left (data2); - put_sound_word_right (data3); - check_sound_buffers (); - if (doublesample) { - put_sound_word_left (data2); - put_sound_word_right (data3); - check_sound_buffers (); - } - } + data2 = SBASEVAL16(1) + data0; + data2 = FINISH_DATA (data2, 16, 1); + data3 = SBASEVAL16(1) + data1; + data3 = FINISH_DATA (data3, 16, 1); + put_sound_word_left (data2); + put_sound_word_right (data3); + check_sound_buffers (outputsample, doublesample); } static void sample16si_crux_handler (void) @@ -961,20 +915,13 @@ static void sample16si_crux_handler (void) } data1 += data2; data0 += data3; - if (outputsample) { - data2 = SBASEVAL16(1) + data0; - FINISH_DATA (data2, 16, 1); - data3 = SBASEVAL16(1) + data1; - FINISH_DATA (data3, 16, 1); - put_sound_word_left (data2); - put_sound_word_right (data3); - check_sound_buffers (); - if (doublesample) { - put_sound_word_left (data2); - put_sound_word_right (data3); - check_sound_buffers (); - } - } + data2 = SBASEVAL16(1) + data0; + data2 = FINISH_DATA (data2, 16, 1); + data3 = SBASEVAL16(1) + data1; + data3 = FINISH_DATA (data3, 16, 1); + put_sound_word_left (data2); + put_sound_word_right (data3); + check_sound_buffers (outputsample, doublesample); } static void sample16si_rh_handler (void) @@ -1021,20 +968,13 @@ static void sample16si_rh_handler (void) delta = audio_channel[3].per; ratio = ((audio_channel[3].evtime % delta) << 8) / delta; data0 += (data3 * (256 - ratio) + data3p * ratio) >> 8; - if (outputsample) { - data2 = SBASEVAL16(1) + data0; - FINISH_DATA (data2, 16, 1); - data3 = SBASEVAL16(1) + data1; - FINISH_DATA (data3, 16, 1); - put_sound_word_left (data2); - put_sound_word_right (data3); - check_sound_buffers (); - if (doublesample) { - put_sound_word_left (data2); - put_sound_word_right (data3); - check_sound_buffers (); - } - } + data2 = SBASEVAL16(1) + data0; + data2 = FINISH_DATA (data2, 16, 1); + data3 = SBASEVAL16(1) + data1; + data3 = FINISH_DATA (data3, 16, 1); + put_sound_word_left (data2); + put_sound_word_right (data3); + check_sound_buffers (outputsample, doublesample); } #else @@ -1537,6 +1477,7 @@ void set_audio (void) void update_audio (void) { unsigned long int n_cycles = 0; + static int samplecounter; if (!isaudio ()) goto end; @@ -1587,19 +1528,25 @@ void update_audio (void) if (rounded == best_evtime) { /* Before the following addition, next_sample_evtime is in range [-0.5, 0.5) */ next_sample_evtime += scaled_sample_evtime; - if (extrasamples > 0) { - outputsample = 1; - doublesample = 1; - extrasamples--; - } else if (extrasamples < 0) { - outputsample = 0; - doublesample = 0; - extrasamples++; - } else { - outputsample = 1; - doublesample = 0; + doublesample = 0; + if (--samplecounter <= 0) { + samplecounter = currprefs.sound_freq / 100; + if (extrasamples > 0) { + outputsample = 1; + doublesample = 1; + extrasamples--; + } else if (extrasamples < 0) { + outputsample = 0; + doublesample = 0; + extrasamples++; + } } (*sample_handler) (); + if (outputsample == 0) + outputsample = -1; + else if (outputsample < 0) + outputsample = 1; + } } diff --git a/autoconf.c b/autoconf.c index 37578f88..61ddb35a 100644 --- a/autoconf.c +++ b/autoconf.c @@ -238,9 +238,8 @@ void rtarea_init (void) deftrap (NULL); /* Generic emulator trap */ - EXPANSION_nullfunc = here (); - calltrap (deftrap (nullfunc)); - dw (RTS); + dw (0); + dw (0); a = here(); /* Dummy trap - removing this breaks the filesys emulation. */ diff --git a/blitter.c b/blitter.c index 7bd9e139..99b11c4d 100644 --- a/blitter.c +++ b/blitter.c @@ -1225,7 +1225,7 @@ void restore_blitter_finish (void) { if (bltstate == BLT_init) { write_log (L"blitter was started but DMA was inactive during save\n"); - do_blitter (0); + //do_blitter (0); } } diff --git a/catweasel.c b/catweasel.c index 841c9b70..320bc5c7 100644 --- a/catweasel.c +++ b/catweasel.c @@ -639,8 +639,8 @@ int catweasel_init(void) } //catweasel_init_controller(&cwc); - _stprintf (tmp, L"CW: Catweasel MK%d @%p (%s) enabled. %s.", - cwc.type, (uae_u8*)cwc.iobase, name, cwc.direct_access ? L"DIRECTIO": L"API"); + _stprintf (tmp, L"CW: Catweasel MK%d @%08x (%s) enabled. %s.", + cwc.type, (int)cwc.iobase, name, cwc.direct_access ? L"DIRECTIO": L"API"); if (cwc.direct_access) { if (cwc.can_sid) { TCHAR *p = tmp + _tcslen (tmp); diff --git a/cd32_fmv.c b/cd32_fmv.c index 6714ba00..ceaba01e 100644 --- a/cd32_fmv.c +++ b/cd32_fmv.c @@ -20,6 +20,34 @@ //#define FMV_DEBUG +#define FMV_BASE 0x40000 +#define AUDIO_BASE 0x50000 +#define VIDEO_BASE 0x70000 +#define VIDEO_RAM 0x80000 + +// L64111 registers (from datasheet) +#define A_DATA 0 +#define A_CONTROL1 2 +#define A_CONTROL2 4 +#define A_CONTROL3 6 +#define A_INT1 8 +#define A_INT2 10 +#define A_TCR 12 +#define A_TORH 14 +#define A_TORL 16 +#define A_PARAM1 18 +#define A_PARAM2 20 +#define A_PARAM3 22 +#define A_PRESENT1 24 +#define A_PRESENT2 26 +#define A_PRESENT3 28 +#define A_PRESENT4 30 +#define A_PRESENT5 32 +#define A_FIFO 34 +#define A_CB_STATUS 36 +#define A_CB_WRITE 38 +#define A_CB_READ 40 + static int fmv_mask; static uae_u8 *rom; static int rom_size = 262144; @@ -31,7 +59,7 @@ static uae_u8 fmv_bget2 (uaecptr addr) #ifdef FMV_DEBUG write_log (L"fmv_bget2 %08X PC=%8X\n", addr, M68K_GETPC); #endif - if (addr >= rom_size) { + if (addr >= rom_size && addr < 0x80000) { write_log (L"fmv_bget2 %08X PC=%8X\n", addr, M68K_GETPC); return 0; } @@ -39,8 +67,8 @@ static uae_u8 fmv_bget2 (uaecptr addr) } static void fmv_bput2 (uaecptr addr, uae_u8 v) { - if (addr >= rom_size && addr < 0xf0000) { - ;//write_log (L"fmv_bput2 %08X=%02X PC=%8X\n", addr, v & 0xff, M68K_GETPC); + if (addr >= rom_size && addr < 0x80000) { + write_log (L"fmv_bput2 %08X=%02X PC=%8X\n", addr, v & 0xff, M68K_GETPC); } } diff --git a/cdtv.c b/cdtv.c index 159897da..f6cb63ed 100644 --- a/cdtv.c +++ b/cdtv.c @@ -684,7 +684,7 @@ static void *dev_thread (void *p) sys_command_cd_pause (DF_IOCTL, unitnum, 0); break; case 0x0104: // stop - cdaudiostop(); + cdaudiostop (); break; case 0x0110: // do_play! do_play (); diff --git a/cfgfile.c b/cfgfile.c index 7cac94bb..7c9c92c3 100644 --- a/cfgfile.c +++ b/cfgfile.c @@ -1197,27 +1197,27 @@ static int cfgfile_parse_host (struct uae_prefs *p, TCHAR *option, TCHAR *value) } if (_tcscmp (option, L"joyportfriendlyname0") == 0 || _tcscmp (option, L"joyportfriendlyname1") == 0) { - inputdevice_joyport_config (p, value, _tcscmp (option, L"joyportfriendlyname0") == 0 ? 0 : 1, 2); + inputdevice_joyport_config (p, value, _tcscmp (option, L"joyportfriendlyname0") == 0 ? 0 : 1, 0, 2); return 1; } if (_tcscmp (option, L"joyportfriendlyname2") == 0 || _tcscmp (option, L"joyportfriendlyname3") == 0) { - inputdevice_joyport_config (p, value, _tcscmp (option, L"joyportfriendlyname2") == 0 ? 2 : 3, 2); + inputdevice_joyport_config (p, value, _tcscmp (option, L"joyportfriendlyname2") == 0 ? 2 : 3, 0, 2); return 1; } if (_tcscmp (option, L"joyportname0") == 0 || _tcscmp (option, L"joyportname1") == 0) { - inputdevice_joyport_config (p, value, _tcscmp (option, L"joyportname0") == 0 ? 0 : 1, 1); + inputdevice_joyport_config (p, value, _tcscmp (option, L"joyportname0") == 0 ? 0 : 1, 0, 1); return 1; } if (_tcscmp (option, L"joyportname2") == 0 || _tcscmp (option, L"joyportname3") == 0) { - inputdevice_joyport_config (p, value, _tcscmp (option, L"joyportname2") == 0 ? 2 : 3, 1); + inputdevice_joyport_config (p, value, _tcscmp (option, L"joyportname2") == 0 ? 2 : 3, 0, 1); return 1; } if (_tcscmp (option, L"joyport0") == 0 || _tcscmp (option, L"joyport1") == 0) { - inputdevice_joyport_config (p, value, _tcscmp (option, L"joyport0") == 0 ? 0 : 1, 0); + inputdevice_joyport_config (p, value, _tcscmp (option, L"joyport0") == 0 ? 0 : 1, 0, 0); return 1; } if (_tcscmp (option, L"joyport2") == 0 || _tcscmp (option, L"joyport3") == 0) { - inputdevice_joyport_config (p, value, _tcscmp (option, L"joyport2") == 0 ? 2 : 3, 0); + inputdevice_joyport_config (p, value, _tcscmp (option, L"joyport2") == 0 ? 2 : 3, 0, 0); return 1; } if (cfgfile_strval (option, value, L"joyport0mode", &p->jports[0].mode, joyportmodes, 0)) diff --git a/drawing.c b/drawing.c index 508ef9c1..3e361fa1 100644 --- a/drawing.c +++ b/drawing.c @@ -2408,9 +2408,12 @@ STATIC_INLINE void putpixel (uae_u8 *buf, int bpp, int x, xcolnr c8, int opaq) } } -STATIC_INLINE uae_u32 ledcolor (uae_u32 c, uae_u32 *rc, uae_u32 *gc, uae_u32 *bc) +STATIC_INLINE uae_u32 ledcolor (uae_u32 c, uae_u32 *rc, uae_u32 *gc, uae_u32 *bc, uae_u32 *a) { - return rc[(c >> 16) & 0xff] | gc[(c >> 8) & 0xff] | bc[(c >> 0) & 0xff]; + uae_u32 v = rc[(c >> 16) & 0xff] | gc[(c >> 8) & 0xff] | bc[(c >> 0) & 0xff]; + if (a) + v |= a[255 - ((c >> 24) & 0xff)]; + return v; } static void write_tdnumber (uae_u8 *buf, int bpp, int x, int y, int num, uae_u32 c1, uae_u32 c2) @@ -2428,14 +2431,14 @@ static void write_tdnumber (uae_u8 *buf, int bpp, int x, int y, int num, uae_u32 } } -void draw_status_line_single (uae_u8 *buf, int bpp, int y, int totalwidth, uae_u32 *rc, uae_u32 *gc, uae_u32 *bc) +void draw_status_line_single (uae_u8 *buf, int bpp, int y, int totalwidth, uae_u32 *rc, uae_u32 *gc, uae_u32 *bc, uae_u32 *alpha) { int x_start, j, led, border; uae_u32 c1, c2, cb; - c1 = ledcolor (0xffffff, rc, gc, bc); - c2 = ledcolor (0x000000, rc, gc, bc); - cb = ledcolor (TD_BORDER, rc, gc, bc); + c1 = ledcolor (0x00ffffff, rc, gc, bc, alpha); + c2 = ledcolor (0x00000000, rc, gc, bc, alpha); + cb = ledcolor (TD_BORDER, rc, gc, bc, alpha); if (td_pos & TD_RIGHT) x_start = totalwidth - TD_PADX - NUM_LEDS * TD_WIDTH; @@ -2530,10 +2533,12 @@ void draw_status_line_single (uae_u8 *buf, int bpp, int y, int totalwidth, uae_u off_rgb = 0x000000; am = 3; } - c = ledcolor (on ? on_rgb : off_rgb, rc, gc, bc); + on_rgb |= 0x33000000; + off_rgb |= 0x33000000; + c = ledcolor (on ? on_rgb : off_rgb, rc, gc, bc, alpha); border = 0; if (y == 0 || y == TD_TOTAL_HEIGHT - 1) { - c = ledcolor (TD_BORDER, rc, gc, bc); + c = ledcolor (TD_BORDER, rc, gc, bc, alpha); border = 1; } @@ -2576,7 +2581,7 @@ static void draw_status_line (int line) if (xlinebuffer == 0) xlinebuffer = row_map[line]; buf = xlinebuffer; - draw_status_line_single (buf, bpp, y, gfxvidinfo.width, xredcolors, xgreencolors, xbluecolors); + draw_status_line_single (buf, bpp, y, gfxvidinfo.width, xredcolors, xgreencolors, xbluecolors, NULL); } #define LIGHTPEN_HEIGHT 12 diff --git a/driveclick.c b/driveclick.c index feb809d3..aabd48ef 100644 --- a/driveclick.c +++ b/driveclick.c @@ -305,7 +305,7 @@ static int clickcnt; static void mix (void) { - int total = ((uae_u8*)paula_sndbufpt - (uae_u8*)paula_sndbuffer) / (get_audio_nativechannels () * 2); + int total = ((uae_u8*)paula_sndbufpt - (uae_u8*)paula_sndbuffer) / (get_audio_nativechannels (currprefs.sound_stereo) * 2); if (currprefs.dfxclickvolume > 0) { while (clickcnt < total) { @@ -335,7 +335,7 @@ void driveclick_mix (uae_s16 *sndbuffer, int size) return; mix (); clickcnt = 0; - switch (get_audio_nativechannels ()) + switch (get_audio_nativechannels (currprefs.sound_stereo)) { case 6: for (i = 0; i < size / 6; i++) { diff --git a/filesys.c b/filesys.c index df38f39b..58666515 100644 --- a/filesys.c +++ b/filesys.c @@ -4843,7 +4843,7 @@ static void *filesys_thread (void *unit_v) { UnitInfo *ui = (UnitInfo *)unit_v; - uae_set_thread_priority (2); + uae_set_thread_priority (NULL, 1); for (;;) { uae_u8 *pck; uae_u8 *msg; @@ -5154,9 +5154,11 @@ static uae_u32 REGPARAM2 filesys_dev_bootfilesys (TrapContext *context) while (get_long (fsnode)) { dostype2 = get_long (fsnode + 14); if (dostype2 == dostype) { - if (get_long (fsnode + 22) & (1 << 7)) { - put_long (devicenode + 32, get_long (fsnode + 54)); /* dn_SegList */ - put_long (devicenode + 36, -1); /* dn_GlobalVec */ + int i; + uae_u32 pf = get_long (fsnode + 22); // fse_PatchFlags + for (i = 0; i < 32; i++) { + if (pf & (1 << i)) + put_long (devicenode + 4 + i * 4, get_long (fsnode + 22 + 4 + i * 4)); } return 1; } diff --git a/gayle.c b/gayle.c index ff60de9c..1f5f2ca5 100644 --- a/gayle.c +++ b/gayle.c @@ -7,7 +7,7 @@ */ #define GAYLE_LOG 0 -#define IDE_LOG 1 +#define IDE_LOG 0 #define MBRES_LOG 0 #define PCMCIA_LOG 1 @@ -207,6 +207,7 @@ static void ps (int offset, TCHAR *src, int max) ide->secbuf[offset ^ 1] = c; offset++; } + xfree (s); } static int isideirq (void) @@ -612,6 +613,7 @@ static void ide_read_sectors (int flags) put_lbachs (ide, lba, cyl, head, sec, nsec, lba48); ide->data_multi = multi ? ide->multiple_mode : 1; } + static void ide_write_sectors (int flags) { unsigned int cyl, head, sec, nsec; @@ -815,7 +817,7 @@ static uae_u32 ide_read (uaecptr addr) switch (ide_reg) { case IDE_DRVADDR: - v = 0; + v = ((ide_drv ? 2 : 1) | ((ide_select & 15) << 2)) ^ 0xff; break; case IDE_DATA: break; @@ -1893,7 +1895,7 @@ uae_u8 *save_gayle (int *len) if (currprefs.cs_ide <= 0) return NULL; - dstbak = dst = (uae_u8*)malloc (1000); + dstbak = dst = malloc (1000); save_u8 (currprefs.cs_ide); save_u8 (gayle_int); save_u8 (gayle_irq); @@ -1916,7 +1918,7 @@ uae_u8 *save_ide (int num, int *len) ide = idedrive[num]; if (ide->hdhfd.size == 0) return NULL; - dstbak = dst = (uae_u8*)malloc (1000); + dstbak = dst = malloc (1000); save_u32 (num); save_u64 (ide->hdhfd.size); save_string (ide->hdhfd.path); diff --git a/hardfile.c b/hardfile.c index 26c4a739..317ef05f 100644 --- a/hardfile.c +++ b/hardfile.c @@ -406,7 +406,7 @@ static int hdf_open2 (struct hardfiledata *hfd, const TCHAR *pname) hfd->vhd_bitmapsize = ((hfd->vhd_blocksize / (8 * 512)) + 511) & ~511; } write_log (L"HDF is VHD %s image, virtual size=%dK\n", - hfd->vhd_type == 2 ? "fixed" : "dynamic", + hfd->vhd_type == 2 ? L"fixed" : L"dynamic", hfd->virtsize / 1024); return 1; nonvhd: @@ -942,8 +942,8 @@ int scsi_emulate (struct hardfiledata *hfd, struct hd_hardfiledata *hdhfd, uae_u char *ss; *reply_len = *sense_len = 0; - memset(r, 0, 256); - memset(s, 0, 256); + memset (r, 0, 256); + memset (s, 0, 256); switch (cmdbuf[0]) { case 0x00: /* TEST UNIT READY */ @@ -956,7 +956,8 @@ int scsi_emulate (struct hardfiledata *hfd, struct hd_hardfiledata *hdhfd, uae_u offset = ((cmdbuf[1] & 31) << 16) | (cmdbuf[2] << 8) | cmdbuf[3]; offset *= hfd->blocksize; len = cmdbuf[4]; - if (!len) len = 256; + if (!len) + len = 256; len *= hfd->blocksize; if (checkbounds(hfd, offset, len)) scsi_len = (uae_u32)cmd_readx (hfd, scsi_data, offset, len); @@ -964,10 +965,13 @@ int scsi_emulate (struct hardfiledata *hfd, struct hd_hardfiledata *hdhfd, uae_u case 0x0a: /* WRITE (6) */ if (nodisk (hfd)) goto nodisk; + if (hfd->readonly || hfd->dangerous) + goto readprot; offset = ((cmdbuf[1] & 31) << 16) | (cmdbuf[2] << 8) | cmdbuf[3]; offset *= hfd->blocksize; len = cmdbuf[4]; - if (!len) len = 256; + if (!len) + len = 256; len *= hfd->blocksize; if (checkbounds(hfd, offset, len)) scsi_len = (uae_u32)cmd_writex (hfd, scsi_data, offset, len); @@ -1132,6 +1136,8 @@ int scsi_emulate (struct hardfiledata *hfd, struct hd_hardfiledata *hdhfd, uae_u case 0x2a: /* WRITE (10) */ if (nodisk (hfd)) goto nodisk; + if (hfd->readonly || hfd->dangerous) + goto readprot; offset = rl (cmdbuf + 2); offset *= hfd->blocksize; len = rl (cmdbuf + 7 - 2) & 0xffff; @@ -1182,6 +1188,8 @@ int scsi_emulate (struct hardfiledata *hfd, struct hd_hardfiledata *hdhfd, uae_u case 0xaa: /* WRITE (12) */ if (nodisk (hfd)) goto nodisk; + if (hfd->readonly || hfd->dangerous) + goto readprot; offset = rl (cmdbuf + 2); offset *= hfd->blocksize; len = rl (cmdbuf + 6); @@ -1192,13 +1200,19 @@ int scsi_emulate (struct hardfiledata *hfd, struct hd_hardfiledata *hdhfd, uae_u case 0x37: /* READ DEFECT DATA */ if (nodisk (hfd)) goto nodisk; - write_log (L"UAEHF: READ DEFECT DATA\n"); status = 2; /* CHECK CONDITION */ s[0] = 0x70; s[2] = 0; /* NO SENSE */ s[12] = 0x1c; /* DEFECT LIST NOT FOUND */ ls = 12; break; +readprot: + status = 2; /* CHECK CONDITION */ + s[0] = 0x70; + s[2] = 7; /* DATA PROTECT */ + s[12] = 0x27; /* WRITE PROTECTED */ + ls = 12; + break; nodisk: status = 2; /* CHECK CONDITION */ s[0] = 0x70; @@ -1266,7 +1280,7 @@ static int handle_scsi (uaecptr request, struct hardfiledata *hfd) } scsi_log ("\n"); - status = scsi_emulate(hfd, NULL, cmdbuf, scsi_cmd_len, scsi_data_ptr, &scsi_len, reply, &reply_len, sense, &sense_len); + status = scsi_emulate (hfd, NULL, cmdbuf, scsi_cmd_len, scsi_data_ptr, &scsi_len, reply, &reply_len, sense, &sense_len); put_word (acmd + 18, status != 0 ? 0 : scsi_cmd_len); /* fake scsi_CmdActual */ put_byte (acmd + 21, status); /* scsi_Status */ @@ -1547,7 +1561,7 @@ static uae_u32 hardfile_do_io (struct hardfiledata *hfd, struct hardfileprivdata case CMD_FORMAT: /* Format */ if (nodisk (hfd)) goto no_disk; - if (hfd->readonly) { + if (hfd->readonly || hfd->dangerous) { error = 28; /* write protect */ } else { offset = get_long (request + 44); @@ -1574,7 +1588,7 @@ static uae_u32 hardfile_do_io (struct hardfiledata *hfd, struct hardfileprivdata case NSCMD_TD_FORMAT64: if (nodisk (hfd)) goto no_disk; - if (hfd->readonly) { + if (hfd->readonly || hfd->dangerous) { error = 28; /* write protect */ } else { offset64 = get_long (request + 44) | ((uae_u64)get_long (request + 32) << 32); @@ -1646,7 +1660,7 @@ static uae_u32 hardfile_do_io (struct hardfiledata *hfd, struct hardfileprivdata break; case CMD_PROTSTATUS: - if (hfd->readonly) + if (hfd->readonly || hfd->dangerous) actual = -1; else actual = 0; @@ -1783,7 +1797,7 @@ static void *hardfile_thread (void *devs) { struct hardfileprivdata *hfpd = (struct hardfileprivdata*)devs; - uae_set_thread_priority (2); + uae_set_thread_priority (NULL, 1); hfpd->thread_running = 1; uae_sem_post (&hfpd->sync_sem); for (;;) { diff --git a/include/audio.h b/include/audio.h index 0217f284..01286899 100644 --- a/include/audio.h +++ b/include/audio.h @@ -41,19 +41,34 @@ extern void write_wavheader (struct zfile *wavfile, uae_u32 size, uae_u32 freq); enum { SND_MONO, SND_STEREO, SND_4CH_CLONEDSTEREO, SND_4CH, SND_6CH_CLONEDSTEREO, SND_6CH, SND_NONE }; -STATIC_INLINE int get_audio_nativechannels (void) +STATIC_INLINE int get_audio_stereomode (int channels) +{ + switch (channels) + { + case 1: + return SND_MONO; + case 2: + return SND_STEREO; + case 4: + return SND_4CH; + case 6: + return SND_6CH; + } + return SND_STEREO; +} +STATIC_INLINE int get_audio_nativechannels (int stereomode) { int ch[] = { 1, 2, 4, 4, 6, 6, 0 }; - return ch[currprefs.sound_stereo]; + return ch[stereomode]; } -STATIC_INLINE int get_audio_amigachannels (void) +STATIC_INLINE int get_audio_amigachannels (int stereomode) { int ch[] = { 1, 2, 2, 4, 2, 4, 0 }; - return ch[currprefs.sound_stereo]; + return ch[stereomode]; } -STATIC_INLINE int get_audio_ismono (void) +STATIC_INLINE int get_audio_ismono (int stereomode) { - if (currprefs.sound_stereo == 0) + if (stereomode == 0) return 1; return 0; } diff --git a/include/filesys.h b/include/filesys.h index 588b4949..8d5a11ba 100644 --- a/include/filesys.h +++ b/include/filesys.h @@ -18,6 +18,7 @@ struct hardfiledata { void *handle; int handle_valid; int readonly; + int dangerous; int flags; uae_u8 *cache; int cache_valid; @@ -30,7 +31,6 @@ struct hardfiledata { unsigned int cylinders; unsigned int sectors; unsigned int heads; - int warned; uae_u8 *virtual_rdb; uae_u64 virtual_size; int unitnum; @@ -100,7 +100,7 @@ extern void hdf_close (struct hardfiledata *hfd); extern int hdf_read (struct hardfiledata *hfd, void *buffer, uae_u64 offset, int len); extern int hdf_write (struct hardfiledata *hfd, void *buffer, uae_u64 offset, int len); extern int hdf_getnumharddrives (void); -extern TCHAR *hdf_getnameharddrive (int index, int flags, int *sectorsize); +extern TCHAR *hdf_getnameharddrive (int index, int flags, int *sectorsize, int *dangerousdrive); extern int isspecialdrive(const TCHAR *name); extern int get_native_path(uae_u32 lock, TCHAR *out); extern void hardfile_do_disk_change (struct uaedev_config_info *uci, int insert); diff --git a/include/gfxfilter.h b/include/gfxfilter.h index a5da1614..a9397e3d 100644 --- a/include/gfxfilter.h +++ b/include/gfxfilter.h @@ -44,13 +44,13 @@ extern void _cdecl hq4x_32 (unsigned char*, unsigned char*, DWORD, DWORD, DWORD) #define UAE_FILTER_NULL 1 #define UAE_FILTER_DIRECT3D 2 -#define UAE_FILTER_OPENGL 3 -#define UAE_FILTER_SCALE2X 4 -#define UAE_FILTER_SUPEREAGLE 5 -#define UAE_FILTER_SUPER2XSAI 6 -#define UAE_FILTER_2XSAI 7 -#define UAE_FILTER_PAL 8 -#define UAE_FILTER_HQ 9 +//#define UAE_FILTER_OPENGL 3 +#define UAE_FILTER_SCALE2X 3 +#define UAE_FILTER_SUPEREAGLE 4 +#define UAE_FILTER_SUPER2XSAI 5 +#define UAE_FILTER_2XSAI 6 +#define UAE_FILTER_PAL 7 +#define UAE_FILTER_HQ 8 #define UAE_FILTER_MODE_16 16 #define UAE_FILTER_MODE_16_16 16 diff --git a/include/inputdevice.h b/include/inputdevice.h index 7390c19d..949bdcba 100644 --- a/include/inputdevice.h +++ b/include/inputdevice.h @@ -131,7 +131,7 @@ extern void inputdevice_reset (void); extern void write_inputdevice_config (struct uae_prefs *p, struct zfile *f); extern void read_inputdevice_config (struct uae_prefs *p, TCHAR *option, TCHAR *value); extern void reset_inputdevice_config (struct uae_prefs *pr); -extern int inputdevice_joyport_config (struct uae_prefs *p, TCHAR *value, int portnum, int type); +extern int inputdevice_joyport_config (struct uae_prefs *p, TCHAR *value, int portnum, int mode, int type); extern int inputdevice_getjoyportdevice (int jport); extern void inputdevice_init (void); diff --git a/include/memory.h b/include/memory.h index 46cb79b0..9bdfe993 100644 --- a/include/memory.h +++ b/include/memory.h @@ -157,7 +157,7 @@ extern uae_u8 *baseaddr[MEMORY_BANKS]; if ((b)->baseaddr) \ baseaddr[bankindex(addr)] = (b)->baseaddr - (realstart); \ else \ - baseaddr[bankindex(addr)] = (uae_u8*)(((long)b)+1); \ + baseaddr[bankindex(addr)] = (uae_u8*)(((uae_u8*)b)+1); \ } while (0) #else #define put_mem_bank(addr, b, realstart) \ diff --git a/include/options.h b/include/options.h index ff578fd2..db902ac4 100644 --- a/include/options.h +++ b/include/options.h @@ -9,7 +9,7 @@ #define UAEMAJOR 1 #define UAEMINOR 6 -#define UAESUBREV 1 +#define UAESUBREV 2 typedef enum { KBD_LANG_US, KBD_LANG_DK, KBD_LANG_DE, KBD_LANG_SE, KBD_LANG_FR, KBD_LANG_IT, KBD_LANG_ES } KbdLang; diff --git a/include/statusline.h b/include/statusline.h index 3ec82393..feacab46 100644 --- a/include/statusline.h +++ b/include/statusline.h @@ -23,5 +23,5 @@ static int td_pos = (TD_RIGHT|TD_BOTTOM); #define STATUSLINE_RTG 2 #define STATUSLINE_TARGET 0x80 -extern void draw_status_line_single (uae_u8 *buf, int bpp, int y, int totalwidth, uae_u32 *rc, uae_u32 *gc, uae_u32 *bc); +extern void draw_status_line_single (uae_u8 *buf, int bpp, int y, int totalwidth, uae_u32 *rc, uae_u32 *gc, uae_u32 *bc, uae_u32 *alpha); diff --git a/include/xwin.h b/include/xwin.h index c494b079..1e158241 100644 --- a/include/xwin.h +++ b/include/xwin.h @@ -19,6 +19,7 @@ extern int graphics_setup (void); extern int graphics_init (void); extern void graphics_leave (void); extern void handle_events (void); +extern int handle_msgpump (void); extern void setup_brkhandler (void); extern int isfullscreen (void); extern void toggle_fullscreen (void); diff --git a/inputdevice.c b/inputdevice.c index 60f9ce17..f57a3b69 100644 --- a/inputdevice.c +++ b/inputdevice.c @@ -434,6 +434,7 @@ static struct input_queue_struct input_queue[INPUT_QUEUE_SIZE]; static void freejport (struct uae_prefs *dst, int num) { memset (&dst->jports[num], 0, sizeof (struct jport)); + dst->jports[num].id = -1; } static void copyjport (const struct uae_prefs *src, struct uae_prefs *dst, int num) { @@ -1930,6 +1931,11 @@ void inputdevice_hsync (void) #ifdef CATWEASEL catweasel_hsync (); #endif + if ((vpos & 31) == 31 && handle_msgpump ()) { + idev[IDTYPE_MOUSE].read (); + idev[IDTYPE_JOYSTICK].read (); + idev[IDTYPE_KEYBOARD].read (); + } if (inputdelay > 0) { inputdelay--; if (inputdelay == 0) { @@ -2149,7 +2155,7 @@ void inputdevice_handle_inputcode (void) gui_display (-1); break; case AKS_SCREENSHOT: - screenshot(1, 1); + screenshot (1, 1); break; #ifdef ACTION_REPLAY case AKS_FREEZEBUTTON: @@ -2670,7 +2676,7 @@ static int switchdevice (struct uae_input_device *id, int num, int button) return 0; if (name) { write_log (L"inputdevice change '%s':%d->%d\n", name, num, newport); - inputdevice_joyport_config (&changed_prefs, name, newport, 2); + inputdevice_joyport_config (&changed_prefs, name, newport, -1, 2); inputdevice_copyconfig (&changed_prefs, &currprefs); return 1; } @@ -3282,21 +3288,32 @@ void inputdevice_updateconfig (struct uae_prefs *prefs) #endif } -/* called when devices get inserted or removed */ +/* called when devices get inserted or removed + * store old devices temporarily, enumerate all devices + * restore old devices back (order may have changed) + */ void inputdevice_devicechange (struct uae_prefs *prefs) { int acc = input_acquired; int i, idx; TCHAR *jports[MAX_JPORTS]; + int jportskb[MAX_JPORTS], jportsmode[MAX_JPORTS]; for (i = 0; i < MAX_JPORTS; i++) { - jports[i] = 0; - idx = inputdevice_getjoyportdevice (prefs->jports[i].id) - JSEM_LASTKBD; - if (idx >= 0) { - struct inputdevice_functions *idf = getidf (idx); - int devidx = inputdevice_get_device_index (idx); + jports[i] = NULL; + jportskb[i] = -1; + idx = inputdevice_getjoyportdevice (prefs->jports[i].id); + if (idx >= JSEM_LASTKBD) { + struct inputdevice_functions *idf; + int devidx; + idx -= JSEM_LASTKBD; + idf = getidf (idx); + devidx = inputdevice_get_device_index (idx); jports[i] = my_strdup (idf->get_uniquename (devidx)); + } else { + jportskb[i] = idx; } + jportsmode[i] = prefs->jports[i].mode; } inputdevice_unacquire (); @@ -3312,9 +3329,14 @@ void inputdevice_devicechange (struct uae_prefs *prefs) for (i = 0; i < MAX_JPORTS; i++) { freejport (prefs, i); - if (jports[i]) - inputdevice_joyport_config (prefs, jports[i], i, 2); - xfree (jports[i]); + if (jports[i]) { + inputdevice_joyport_config (prefs, jports[i], i, jportsmode[i], 2); + xfree (jports[i]); + } else if (jportskb[i] >= 0) { + TCHAR tmp[10]; + _stprintf (tmp, L"kbd%d", jportskb[i]); + inputdevice_joyport_config (prefs, tmp, i, jportsmode[i], 0); + } } if (prefs == &changed_prefs) @@ -4210,7 +4232,7 @@ int jsem_iskbdjoy (int port, const struct uae_prefs *p) return v; } -int inputdevice_joyport_config (struct uae_prefs *p, TCHAR *value, int portnum, int type) +int inputdevice_joyport_config (struct uae_prefs *p, TCHAR *value, int portnum, int mode, int type) { switch (type) { @@ -4232,7 +4254,8 @@ int inputdevice_joyport_config (struct uae_prefs *p, TCHAR *value, int portnum, TCHAR *name2 = idf->get_uniquename (i); if ((name1 && !_tcscmp (name1, value)) || (name2 && !_tcscmp (name2, value))) { p->jports[portnum].id = idnum + i; - p->jports[portnum].mode = 0; + if (mode >= 0) + p->jports[portnum].mode = mode; return 1; } } @@ -4272,7 +4295,8 @@ int inputdevice_joyport_config (struct uae_prefs *p, TCHAR *value, int portnum, } if (got == 2) { p->jports[portnum].id = start; - p->jports[portnum].mode = 0; + if (mode >= 0) + p->jports[portnum].mode = mode; return 1; } } diff --git a/jit2/codegen_x86.cpp b/jit2/codegen_x86.cpp new file mode 100644 index 00000000..4885e083 --- /dev/null +++ b/jit2/codegen_x86.cpp @@ -0,0 +1,4754 @@ +/* + * compiler/codegen_x86.cpp - IA-32 code generator + * + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * + * Adaptation for Basilisk II and improvements, copyright 2000-2005 + * Gwenole Beauchesne + * + * Basilisk II (C) 1997-2008 Christian Bauer + * + * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* This should eventually end up in machdep/, but for now, x86 is the + only target, and it's easier this way... */ + +#include "flags_x86.h" + +/************************************************************************* + * Some basic information about the the target CPU * + *************************************************************************/ + +#define EAX_INDEX 0 +#define ECX_INDEX 1 +#define EDX_INDEX 2 +#define EBX_INDEX 3 +#define ESP_INDEX 4 +#define EBP_INDEX 5 +#define ESI_INDEX 6 +#define EDI_INDEX 7 +#if defined(__x86_64__) +#define R8_INDEX 8 +#define R9_INDEX 9 +#define R10_INDEX 10 +#define R11_INDEX 11 +#define R12_INDEX 12 +#define R13_INDEX 13 +#define R14_INDEX 14 +#define R15_INDEX 15 +#endif +/* XXX this has to match X86_Reg8H_Base + 4 */ +#define AH_INDEX (0x10+4+EAX_INDEX) +#define CH_INDEX (0x10+4+ECX_INDEX) +#define DH_INDEX (0x10+4+EDX_INDEX) +#define BH_INDEX (0x10+4+EBX_INDEX) + +/* The register in which subroutines return an integer return value */ +#define REG_RESULT EAX_INDEX + +/* The registers subroutines take their first and second argument in */ +#if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION ) +/* Handle the _fastcall parameters of ECX and EDX */ +#define REG_PAR1 ECX_INDEX +#define REG_PAR2 EDX_INDEX +#elif defined(__x86_64__) +#define REG_PAR1 EDI_INDEX +#define REG_PAR2 ESI_INDEX +#else +#define REG_PAR1 EAX_INDEX +#define REG_PAR2 EDX_INDEX +#endif + +#define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */ +#if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION ) +#define REG_PC_TMP EAX_INDEX +#else +#define REG_PC_TMP ECX_INDEX /* Another register that is not the above */ +#endif + +#define SHIFTCOUNT_NREG ECX_INDEX /* Register that can be used for shiftcount. + -1 if any reg will do */ +#define MUL_NREG1 EAX_INDEX /* %eax will hold the low 32 bits after a 32x32 mul */ +#define MUL_NREG2 EDX_INDEX /* %edx will hold the high 32 bits */ + +#define STACK_ALIGN 16 +#define STACK_OFFSET sizeof(void *) + +uae_s8 always_used[]={4,-1}; +#if defined(__x86_64__) +uae_s8 can_byte[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1}; +uae_s8 can_word[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1}; +#else +uae_s8 can_byte[]={0,1,2,3,-1}; +uae_s8 can_word[]={0,1,2,3,5,6,7,-1}; +#endif + +#if USE_OPTIMIZED_CALLS +/* Make sure interpretive core does not use cpuopti */ +uae_u8 call_saved[]={0,0,0,1,1,1,1,1}; +#error FIXME: code not ready +#else +/* cpuopti mutate instruction handlers to assume registers are saved + by the caller */ +uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0}; +#endif + +/* This *should* be the same as call_saved. But: + - We might not really know which registers are saved, and which aren't, + so we need to preserve some, but don't want to rely on everyone else + also saving those registers + - Special registers (such like the stack pointer) should not be "preserved" + by pushing, even though they are "saved" across function calls +*/ +#if defined(__x86_64__) +/* callee-saved registers as defined by Linux AMD64 ABI: rbx, rbp, rsp, r12 - r15 */ +/* preserve r11 because it's generally used to hold pointers to functions */ +static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1}; +#else +/* callee-saved registers as defined by System V IA-32 ABI: edi, esi, ebx, ebp */ +static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,1,1}; +#endif + +/* Whether classes of instructions do or don't clobber the native flags */ +#define CLOBBER_MOV +#define CLOBBER_LEA +#define CLOBBER_CMOV +#define CLOBBER_POP +#define CLOBBER_PUSH +#define CLOBBER_SUB clobber_flags() +#define CLOBBER_SBB clobber_flags() +#define CLOBBER_CMP clobber_flags() +#define CLOBBER_ADD clobber_flags() +#define CLOBBER_ADC clobber_flags() +#define CLOBBER_AND clobber_flags() +#define CLOBBER_OR clobber_flags() +#define CLOBBER_XOR clobber_flags() + +#define CLOBBER_ROL clobber_flags() +#define CLOBBER_ROR clobber_flags() +#define CLOBBER_SHLL clobber_flags() +#define CLOBBER_SHRL clobber_flags() +#define CLOBBER_SHRA clobber_flags() +#define CLOBBER_TEST clobber_flags() +#define CLOBBER_CL16 +#define CLOBBER_CL8 +#define CLOBBER_SE32 +#define CLOBBER_SE16 +#define CLOBBER_SE8 +#define CLOBBER_ZE32 +#define CLOBBER_ZE16 +#define CLOBBER_ZE8 +#define CLOBBER_SW16 clobber_flags() +#define CLOBBER_SW32 +#define CLOBBER_SETCC +#define CLOBBER_MUL clobber_flags() +#define CLOBBER_BT clobber_flags() +#define CLOBBER_BSF clobber_flags() + +/* The older code generator is now deprecated. */ +#define USE_NEW_RTASM 1 + +#if USE_NEW_RTASM + +#if defined(__x86_64__) +#define X86_TARGET_64BIT 1 +/* The address override prefix causes a 5 cycles penalty on Intel Core + processors. Another solution would be to decompose the load in an LEA, + MOV (to zero-extend), MOV (from memory): is it better? */ +#define ADDR32 x86_emit_byte(0x67), +#else +#define ADDR32 /**/ +#endif +#define X86_FLAT_REGISTERS 0 +#define X86_OPTIMIZE_ALU 1 +#define X86_OPTIMIZE_ROTSHI 1 +#include "codegen_x86.h" + +#define x86_emit_byte(B) emit_byte(B) +#define x86_emit_word(W) emit_word(W) +#define x86_emit_long(L) emit_long(L) +#define x86_emit_quad(Q) emit_quad(Q) +#define x86_get_target() get_target() +#define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__) + +static void jit_fail(const char *msg, const char *file, int line, const char *function) +{ + fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n", + function, file, line, msg); + abort(); +} + +LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r)) +{ +#if defined(__x86_64__) + PUSHQr(r); +#else + PUSHLr(r); +#endif +} +LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r)) + +LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r)) +{ +#if defined(__x86_64__) + POPQr(r); +#else + POPLr(r); +#endif +} +LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r)) + +LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d)) +{ +#if defined(__x86_64__) + POPQm(d, X86_NOREG, X86_NOREG, 1); +#else + POPLm(d, X86_NOREG, X86_NOREG, 1); +#endif +} +LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d)) + +LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i)) +{ + BTLir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b)) +{ + BTLrr(b, r); +} +LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b)) + +LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i)) +{ + BTCLir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b)) +{ + BTCLrr(b, r); +} +LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b)) + +LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i)) +{ + BTRLir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b)) +{ + BTRLrr(b, r); +} +LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b)) + +LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i)) +{ + BTSLir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b)) +{ + BTSLrr(b, r); +} +LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b)) + +LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i)) +{ + SUBWir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i)) + +LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s)) +{ + MOVLmr(s, X86_NOREG, X86_NOREG, 1, d); +} +LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s)) +{ + MOVLim(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s)) +{ + MOVWim(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s)) +{ + MOVBim(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s)) + +LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i)) +{ + ROLBim(i, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i)) +{ + ROLBir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i)) +{ + ROLWir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i)) +{ + ROLLir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r)) +{ + ROLLrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r)) +{ + ROLWrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r)) +{ + ROLBrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r)) +{ + SHLLrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r)) +{ + SHLWrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r)) +{ + SHLBrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i)) +{ + RORBir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i)) +{ + RORWir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s)) +{ + ORLmr(s, X86_NOREG, X86_NOREG, 1, d); +} +LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s)) + +LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i)) +{ + RORLir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r)) +{ + RORLrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r)) +{ + RORWrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r)) +{ + RORBrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r)) +{ + SHRLrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r)) +{ + SHRWrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r)) +{ + SHRBrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r)) +{ + SARLrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r)) +{ + SARWrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r)) +{ + SARBrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i)) +{ + SHLLir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i)) +{ + SHLWir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i)) +{ + SHLBir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i)) +{ + SHRLir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i)) +{ + SHRWir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i)) +{ + SHRBir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i)) +{ + SARLir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i)) +{ + SARWir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i)) +{ + SARBir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah)) +{ + SAHF(); +} +LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah)) + +LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax)) +{ + CPUID(); +} +LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax)) + +LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah)) +{ + LAHF(); +} +LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah)) + +LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc)) +{ + SETCCir(cc, d); +} +LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc)) + +LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc)) +{ + SETCCim(cc, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc)) + +LOWFUNC(READ,NONE,3,raw_cmov_b_rr,(RW1 d, R1 s, IMM cc)) +{ + /* replacement using branch and mov */ + int8 *target_p = (int8 *)x86_get_target() + 1; + JCCSii(cc^1, 0); + MOVBrr(s, d); + *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1); +} +LENDFUNC(READ,NONE,3,raw_cmov_b_rr,(RW1 d, R1 s, IMM cc)) + +LOWFUNC(READ,NONE,3,raw_cmov_w_rr,(RW2 d, R2 s, IMM cc)) +{ + if (have_cmov) + CMOVWrr(cc, s, d); + else { /* replacement using branch and mov */ + int8 *target_p = (int8 *)x86_get_target() + 1; + JCCSii(cc^1, 0); + MOVWrr(s, d); + *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1); + } +} +LENDFUNC(READ,NONE,3,raw_cmov_w_rr,(RW2 d, R2 s, IMM cc)) + +LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc)) +{ + if (have_cmov) + CMOVLrr(cc, s, d); + else { /* replacement using branch and mov */ + int8 *target_p = (int8 *)x86_get_target() + 1; + JCCSii(cc^1, 0); + MOVLrr(s, d); + *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1); + } +} +LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc)) + +LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s)) +{ + BSFLrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s)) + +LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s)) +{ + MOVSLQrr(s, d); +} +LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s)) + +LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s)) +{ + MOVSWLrr(s, d); +} +LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s)) + +LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s)) +{ + MOVSBLrr(s, d); +} +LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s)) + +LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s)) +{ + MOVZWLrr(s, d); +} +LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s)) + +LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s)) +{ + MOVZBLrr(s, d); +} +LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s)) + +LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s)) +{ + IMULLrr(s, d); +} +LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s)) + +LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s)) +{ + if (d!=MUL_NREG1 || s!=MUL_NREG2) { + write_log("Bad register in IMUL: d=%d, s=%d\n",d,s); + abort(); + } + IMULLr(s); +} +LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s)) + +LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s)) +{ + if (d!=MUL_NREG1 || s!=MUL_NREG2) { + write_log("Bad register in MUL: d=%d, s=%d\n",d,s); + abort(); + } + MULLr(s); +} +LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s)) + +LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s)) +{ + abort(); /* %^$&%^$%#^ x86! */ +} +LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s)) + +LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s)) +{ + MOVBrr(s, d); +} +LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s)) + +LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s)) +{ + MOVWrr(s, d); +} +LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s)) + +LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor)) +{ + ADDR32 MOVLmr(0, baser, index, factor, d); +} +LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor)) +{ + ADDR32 MOVWmr(0, baser, index, factor, d); +} +LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor)) +{ + ADDR32 MOVBmr(0, baser, index, factor, d); +} +LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s)) +{ + ADDR32 MOVLrm(s, 0, baser, index, factor); +} +LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s)) + +LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s)) +{ + ADDR32 MOVWrm(s, 0, baser, index, factor); +} +LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s)) + +LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s)) +{ + ADDR32 MOVBrm(s, 0, baser, index, factor); +} +LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s)) + +LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s)) +{ + ADDR32 MOVLrm(s, base, baser, index, factor); +} +LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s)) + +LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s)) +{ + ADDR32 MOVWrm(s, base, baser, index, factor); +} +LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s)) + +LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s)) +{ + ADDR32 MOVBrm(s, base, baser, index, factor); +} +LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s)) + +LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor)) +{ + ADDR32 MOVLmr(base, baser, index, factor, d); +} +LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor)) +{ + ADDR32 MOVWmr(base, baser, index, factor, d); +} +LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor)) +{ + ADDR32 MOVBmr(base, baser, index, factor, d); +} +LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor)) +{ + ADDR32 MOVLmr(base, X86_NOREG, index, factor, d); +} +LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond)) +{ + if (have_cmov) + ADDR32 CMOVLmr(cond, base, X86_NOREG, index, factor, d); + else { /* replacement using branch and mov */ + int8 *target_p = (int8 *)x86_get_target() + 1; + JCCSii(cond^1, 0); + ADDR32 MOVLmr(base, X86_NOREG, index, factor, d); + *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1); + } +} +LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond)) + +LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond)) +{ + if (have_cmov) + CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d); + else { /* replacement using branch and mov */ + int8 *target_p = (int8 *)x86_get_target() + 1; + JCCSii(cond^1, 0); + MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d); + *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1); + } +} +LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond)) + +LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset)) +{ + ADDR32 MOVLmr(offset, s, X86_NOREG, 1, d); +} +LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset)) + +LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset)) +{ + ADDR32 MOVWmr(offset, s, X86_NOREG, 1, d); +} +LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset)) + +LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset)) +{ + ADDR32 MOVBmr(offset, s, X86_NOREG, 1, d); +} +LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset)) + +LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset)) +{ + ADDR32 MOVLmr(offset, s, X86_NOREG, 1, d); +} +LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset)) + +LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset)) +{ + ADDR32 MOVWmr(offset, s, X86_NOREG, 1, d); +} +LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset)) + +LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset)) +{ + ADDR32 MOVBmr(offset, s, X86_NOREG, 1, d); +} +LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset)) +{ + ADDR32 MOVLim(i, offset, d, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset)) +{ + ADDR32 MOVWim(i, offset, d, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset)) +{ + ADDR32 MOVBim(i, offset, d, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset)) +{ + ADDR32 MOVLrm(s, offset, d, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset)) +{ + ADDR32 MOVWrm(s, offset, d, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset)) +{ + ADDR32 MOVBrm(s, offset, d, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset)) + +LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset)) +{ + LEALmr(offset, s, X86_NOREG, 1, d); +} +LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset)) + +LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset)) +{ + LEALmr(offset, s, index, factor, d); +} +LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset)) + +LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor)) +{ + LEALmr(0, s, index, factor, d); +} +LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor)) + +LOWFUNC(NONE,NONE,4,raw_lea_l_r_scaled,(W4 d, R4 index, IMM factor)) +{ + LEALmr(0, X86_NOREG, index, factor, d); +} +LENDFUNC(NONE,NONE,4,raw_lea_l_r_scaled,(W4 d, R4 index, IMM factor)) + +LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset)) +{ + ADDR32 MOVLrm(s, offset, d, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset)) +{ + ADDR32 MOVWrm(s, offset, d, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset)) +{ + ADDR32 MOVBrm(s, offset, d, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset)) + +LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r)) +{ + BSWAPLr(r); +} +LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r)) + +LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r)) +{ + ROLWir(8, r); +} +LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r)) + +LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s)) +{ + MOVLrr(s, d); +} +LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s)) +{ + MOVLrm(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s)) +{ + MOVWrm(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s)) + +LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s)) +{ + MOVWmr(s, X86_NOREG, X86_NOREG, 1, d); +} +LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s)) +{ + MOVBrm(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s)) + +LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s)) +{ + MOVBmr(s, X86_NOREG, X86_NOREG, 1, d); +} +LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s)) + +LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s)) +{ + MOVLir(s, d); +} +LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s)) + +LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s)) +{ + MOVWir(s, d); +} +LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s)) + +LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s)) +{ + MOVBir(s, d); +} +LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s)) + +LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s)) +{ + ADCLim(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s)) + +LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s)) +{ + ADDLim(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s)) + +LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s)) +{ + ADDWim(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s)) + +LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s)) +{ + ADDBim(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s)) + +LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i)) +{ + TESTLir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s)) +{ + TESTLrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s)) +{ + TESTWrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s)) +{ + TESTBrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i)) +{ + XORLir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i)) +{ + ANDLir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i)) +{ + ANDWir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s)) +{ + ANDLrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s)) +{ + ANDWrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s)) +{ + ANDBrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i)) +{ + ORLir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s)) +{ + ORLrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s)) +{ + ORWrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s)) +{ + ORBrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s)) + +LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s)) +{ + ADCLrr(s, d); +} +LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s)) + +LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s)) +{ + ADCWrr(s, d); +} +LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s)) + +LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s)) +{ + ADCBrr(s, d); +} +LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s)) +{ + ADDLrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s)) +{ + ADDWrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s)) +{ + ADDBrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i)) +{ + SUBLir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i)) +{ + SUBBir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i)) +{ + ADDLir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i)) +{ + ADDWir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i)) +{ + ADDBir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i)) + +LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s)) +{ + SBBLrr(s, d); +} +LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s)) + +LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s)) +{ + SBBWrr(s, d); +} +LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s)) + +LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s)) +{ + SBBBrr(s, d); +} +LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s)) +{ + SUBLrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s)) +{ + SUBWrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s)) +{ + SUBBrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s)) +{ + CMPLrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i)) +{ + CMPLir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s)) +{ + CMPWrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s)) + +LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s)) +{ + CMPBim(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i)) +{ + CMPBir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s)) +{ + CMPBrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s)) + +LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor)) +{ + ADDR32 CMPLmr(offset, X86_NOREG, index, factor, d); +} +LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor)) + +LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s)) +{ + XORLrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s)) +{ + XORWrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s)) +{ + XORBrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s)) +{ + SUBLim(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s)) + +LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s)) +{ + CMPLim(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s)) + +LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2)) +{ + XCHGLrr(r2, r1); +} +LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2)) + +LOWFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2)) +{ + XCHGBrr(r2, r1); +} +LENDFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2)) + +LOWFUNC(READ,WRITE,0,raw_pushfl,(void)) +{ + PUSHF(); +} +LENDFUNC(READ,WRITE,0,raw_pushfl,(void)) + +LOWFUNC(WRITE,READ,0,raw_popfl,(void)) +{ + POPF(); +} +LENDFUNC(WRITE,READ,0,raw_popfl,(void)) + +/* Generate floating-point instructions */ +static inline void x86_fadd_m(MEMR s) +{ + FADDDm(s,X86_NOREG,X86_NOREG,1); +} + +#else + +const bool optimize_accum = true; +const bool optimize_imm8 = true; +const bool optimize_shift_once = true; + +/************************************************************************* + * Actual encoding of the instructions on the target CPU * + *************************************************************************/ + +static __inline__ int isaccum(int r) +{ + return (r == EAX_INDEX); +} + +static __inline__ int isbyte(uae_s32 x) +{ + return (x>=-128 && x<=127); +} + +static __inline__ int isword(uae_s32 x) +{ + return (x>=-32768 && x<=32767); +} + +LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r)) +{ + emit_byte(0x50+r); +} +LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r)) + +LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r)) +{ + emit_byte(0x58+r); +} +LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r)) + +LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d)) +{ + emit_byte(0x8f); + emit_byte(0x05); + emit_long(d); +} +LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d)) + +LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i)) +{ + emit_byte(0x0f); + emit_byte(0xba); + emit_byte(0xe0+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b)) +{ + emit_byte(0x0f); + emit_byte(0xa3); + emit_byte(0xc0+8*b+r); +} +LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b)) + +LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i)) +{ + emit_byte(0x0f); + emit_byte(0xba); + emit_byte(0xf8+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b)) +{ + emit_byte(0x0f); + emit_byte(0xbb); + emit_byte(0xc0+8*b+r); +} +LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b)) + + +LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i)) +{ + emit_byte(0x0f); + emit_byte(0xba); + emit_byte(0xf0+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b)) +{ + emit_byte(0x0f); + emit_byte(0xb3); + emit_byte(0xc0+8*b+r); +} +LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b)) + +LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i)) +{ + emit_byte(0x0f); + emit_byte(0xba); + emit_byte(0xe8+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b)) +{ + emit_byte(0x0f); + emit_byte(0xab); + emit_byte(0xc0+8*b+r); +} +LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b)) + +LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i)) +{ + emit_byte(0x66); + if (isbyte(i)) { + emit_byte(0x83); + emit_byte(0xe8+d); + emit_byte(i); + } + else { + if (optimize_accum && isaccum(d)) + emit_byte(0x2d); + else { + emit_byte(0x81); + emit_byte(0xe8+d); + } + emit_word(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i)) + + +LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s)) +{ + emit_byte(0x8b); + emit_byte(0x05+8*d); + emit_long(s); +} +LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s)) +{ + emit_byte(0xc7); + emit_byte(0x05); + emit_long(d); + emit_long(s); +} +LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s)) +{ + emit_byte(0x66); + emit_byte(0xc7); + emit_byte(0x05); + emit_long(d); + emit_word(s); +} +LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s)) +{ + emit_byte(0xc6); + emit_byte(0x05); + emit_long(d); + emit_byte(s); +} +LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s)) + +LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i)) +{ + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd0); + emit_byte(0x05); + emit_long(d); + } + else { + emit_byte(0xc0); + emit_byte(0x05); + emit_long(d); + emit_byte(i); + } +} +LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i)) +{ + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd0); + emit_byte(0xc0+r); + } + else { + emit_byte(0xc0); + emit_byte(0xc0+r); + emit_byte(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i)) +{ + emit_byte(0x66); + emit_byte(0xc1); + emit_byte(0xc0+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i)) +{ + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd1); + emit_byte(0xc0+r); + } + else { + emit_byte(0xc1); + emit_byte(0xc0+r); + emit_byte(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r)) +{ + emit_byte(0xd3); + emit_byte(0xc0+d); +} +LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r)) +{ + emit_byte(0x66); + emit_byte(0xd3); + emit_byte(0xc0+d); +} +LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r)) +{ + emit_byte(0xd2); + emit_byte(0xc0+d); +} +LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r)) +{ + emit_byte(0xd3); + emit_byte(0xe0+d); +} +LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r)) +{ + emit_byte(0x66); + emit_byte(0xd3); + emit_byte(0xe0+d); +} +LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r)) +{ + emit_byte(0xd2); + emit_byte(0xe0+d); +} +LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i)) +{ + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd0); + emit_byte(0xc8+r); + } + else { + emit_byte(0xc0); + emit_byte(0xc8+r); + emit_byte(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i)) +{ + emit_byte(0x66); + emit_byte(0xc1); + emit_byte(0xc8+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i)) + +// gb-- used for making an fpcr value in compemu_fpp.cpp +LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s)) +{ + emit_byte(0x0b); + emit_byte(0x05+8*d); + emit_long(s); +} +LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s)) + +LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i)) +{ + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd1); + emit_byte(0xc8+r); + } + else { + emit_byte(0xc1); + emit_byte(0xc8+r); + emit_byte(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r)) +{ + emit_byte(0xd3); + emit_byte(0xc8+d); +} +LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r)) +{ + emit_byte(0x66); + emit_byte(0xd3); + emit_byte(0xc8+d); +} +LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r)) +{ + emit_byte(0xd2); + emit_byte(0xc8+d); +} +LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r)) +{ + emit_byte(0xd3); + emit_byte(0xe8+d); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r)) +{ + emit_byte(0x66); + emit_byte(0xd3); + emit_byte(0xe8+d); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r)) +{ + emit_byte(0xd2); + emit_byte(0xe8+d); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r)) +{ + emit_byte(0xd3); + emit_byte(0xf8+d); +} +LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r)) +{ + emit_byte(0x66); + emit_byte(0xd3); + emit_byte(0xf8+d); +} +LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r)) +{ + emit_byte(0xd2); + emit_byte(0xf8+d); +} +LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i)) +{ + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd1); + emit_byte(0xe0+r); + } + else { + emit_byte(0xc1); + emit_byte(0xe0+r); + emit_byte(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i)) +{ + emit_byte(0x66); + emit_byte(0xc1); + emit_byte(0xe0+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i)) +{ + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd0); + emit_byte(0xe0+r); + } + else { + emit_byte(0xc0); + emit_byte(0xe0+r); + emit_byte(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i)) +{ + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd1); + emit_byte(0xe8+r); + } + else { + emit_byte(0xc1); + emit_byte(0xe8+r); + emit_byte(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i)) +{ + emit_byte(0x66); + emit_byte(0xc1); + emit_byte(0xe8+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i)) +{ + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd0); + emit_byte(0xe8+r); + } + else { + emit_byte(0xc0); + emit_byte(0xe8+r); + emit_byte(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i)) +{ + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd1); + emit_byte(0xf8+r); + } + else { + emit_byte(0xc1); + emit_byte(0xf8+r); + emit_byte(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i)) +{ + emit_byte(0x66); + emit_byte(0xc1); + emit_byte(0xf8+r); + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i)) +{ + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd0); + emit_byte(0xf8+r); + } + else { + emit_byte(0xc0); + emit_byte(0xf8+r); + emit_byte(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah)) +{ + emit_byte(0x9e); +} +LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah)) + +LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax)) +{ + emit_byte(0x0f); + emit_byte(0xa2); +} +LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax)) + +LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah)) +{ + emit_byte(0x9f); +} +LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah)) + +LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc)) +{ + emit_byte(0x0f); + emit_byte(0x90+cc); + emit_byte(0xc0+d); +} +LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc)) + +LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc)) +{ + emit_byte(0x0f); + emit_byte(0x90+cc); + emit_byte(0x05); + emit_long(d); +} +LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc)) + +LOWFUNC(READ,NONE,3,raw_cmov_b_rr,(RW1 d, R1 s, IMM cc)) +{ + /* replacement using branch and mov */ + int uncc=(cc^1); + emit_byte(0x70+uncc); + emit_byte(3); /* skip next 2 bytes if not cc=true */ + emit_byte(0x88); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(READ,NONE,3,raw_cmov_b_rr,(RW1 d, R1 s, IMM cc)) + +LOWFUNC(READ,NONE,3,raw_cmov_w_rr,(RW2 d, R2 s, IMM cc)) +{ + if (have_cmov) { + emit_byte(0x66); + emit_byte(0x0f); + emit_byte(0x40+cc); + emit_byte(0xc0+8*d+s); + } + else { /* replacement using branch and mov */ + int uncc=(cc^1); + emit_byte(0x70+uncc); + emit_byte(3); /* skip next 3 bytes if not cc=true */ + emit_byte(0x66); + emit_byte(0x89); + emit_byte(0xc0+8*s+d); + } +} +LENDFUNC(READ,NONE,3,raw_cmov_w_rr,(RW2 d, R2 s, IMM cc)) + +LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc)) +{ + if (have_cmov) { + emit_byte(0x0f); + emit_byte(0x40+cc); + emit_byte(0xc0+8*d+s); + } + else { /* replacement using branch and mov */ + int uncc=(cc^1); + emit_byte(0x70+uncc); + emit_byte(2); /* skip next 2 bytes if not cc=true */ + emit_byte(0x89); + emit_byte(0xc0+8*s+d); + } +} +LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc)) + +LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s)) +{ + emit_byte(0x0f); + emit_byte(0xbc); + emit_byte(0xc0+8*d+s); +} +LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s)) + +LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s)) +{ + emit_byte(0x0f); + emit_byte(0xbf); + emit_byte(0xc0+8*d+s); +} +LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s)) + +LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s)) +{ + emit_byte(0x0f); + emit_byte(0xbe); + emit_byte(0xc0+8*d+s); +} +LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s)) + +LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s)) +{ + emit_byte(0x0f); + emit_byte(0xb7); + emit_byte(0xc0+8*d+s); +} +LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s)) + +LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s)) +{ + emit_byte(0x0f); + emit_byte(0xb6); + emit_byte(0xc0+8*d+s); +} +LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s)) + +LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s)) +{ + emit_byte(0x0f); + emit_byte(0xaf); + emit_byte(0xc0+8*d+s); +} +LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s)) + +LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s)) +{ + if (d!=MUL_NREG1 || s!=MUL_NREG2) + abort(); + emit_byte(0xf7); + emit_byte(0xea); +} +LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s)) + +LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s)) +{ + if (d!=MUL_NREG1 || s!=MUL_NREG2) { + printf("Bad register in MUL: d=%d, s=%d\n",d,s); + abort(); + } + emit_byte(0xf7); + emit_byte(0xe2); +} +LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s)) + +LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s)) +{ + abort(); /* %^$&%^$%#^ x86! */ + emit_byte(0x0f); + emit_byte(0xaf); + emit_byte(0xc0+8*d+s); +} +LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s)) + +LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s)) +{ + emit_byte(0x88); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s)) + +LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x89); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s)) + +LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor)) +{ + int isebp=(baser==5)?0x40:0; + int fi; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + + + emit_byte(0x8b); + emit_byte(0x04+8*d+isebp); + emit_byte(baser+8*index+0x40*fi); + if (isebp) + emit_byte(0x00); +} +LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor)) +{ + int fi; + int isebp; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + isebp=(baser==5)?0x40:0; + + emit_byte(0x66); + emit_byte(0x8b); + emit_byte(0x04+8*d+isebp); + emit_byte(baser+8*index+0x40*fi); + if (isebp) + emit_byte(0x00); +} +LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor)) +{ + int fi; + int isebp; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + isebp=(baser==5)?0x40:0; + + emit_byte(0x8a); + emit_byte(0x04+8*d+isebp); + emit_byte(baser+8*index+0x40*fi); + if (isebp) + emit_byte(0x00); +} +LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s)) +{ + int fi; + int isebp; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + + + isebp=(baser==5)?0x40:0; + + emit_byte(0x89); + emit_byte(0x04+8*s+isebp); + emit_byte(baser+8*index+0x40*fi); + if (isebp) + emit_byte(0x00); +} +LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s)) + +LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s)) +{ + int fi; + int isebp; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + isebp=(baser==5)?0x40:0; + + emit_byte(0x66); + emit_byte(0x89); + emit_byte(0x04+8*s+isebp); + emit_byte(baser+8*index+0x40*fi); + if (isebp) + emit_byte(0x00); +} +LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s)) + +LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s)) +{ + int fi; + int isebp; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + isebp=(baser==5)?0x40:0; + + emit_byte(0x88); + emit_byte(0x04+8*s+isebp); + emit_byte(baser+8*index+0x40*fi); + if (isebp) + emit_byte(0x00); +} +LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s)) + +LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s)) +{ + int fi; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + + emit_byte(0x89); + emit_byte(0x84+8*s); + emit_byte(baser+8*index+0x40*fi); + emit_long(base); +} +LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s)) + +LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s)) +{ + int fi; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + + emit_byte(0x66); + emit_byte(0x89); + emit_byte(0x84+8*s); + emit_byte(baser+8*index+0x40*fi); + emit_long(base); +} +LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s)) + +LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s)) +{ + int fi; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + + emit_byte(0x88); + emit_byte(0x84+8*s); + emit_byte(baser+8*index+0x40*fi); + emit_long(base); +} +LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s)) + +LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor)) +{ + int fi; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + + emit_byte(0x8b); + emit_byte(0x84+8*d); + emit_byte(baser+8*index+0x40*fi); + emit_long(base); +} +LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor)) +{ + int fi; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + + emit_byte(0x66); + emit_byte(0x8b); + emit_byte(0x84+8*d); + emit_byte(baser+8*index+0x40*fi); + emit_long(base); +} +LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor)) +{ + int fi; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + + emit_byte(0x8a); + emit_byte(0x84+8*d); + emit_byte(baser+8*index+0x40*fi); + emit_long(base); +} +LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor)) +{ + int fi; + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: + fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor); + abort(); + } + emit_byte(0x8b); + emit_byte(0x04+8*d); + emit_byte(0x05+8*index+64*fi); + emit_long(base); +} +LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond)) +{ + int fi; + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: + fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor); + abort(); + } + if (have_cmov) { + emit_byte(0x0f); + emit_byte(0x40+cond); + emit_byte(0x04+8*d); + emit_byte(0x05+8*index+64*fi); + emit_long(base); + } + else { /* replacement using branch and mov */ + int uncc=(cond^1); + emit_byte(0x70+uncc); + emit_byte(7); /* skip next 7 bytes if not cc=true */ + emit_byte(0x8b); + emit_byte(0x04+8*d); + emit_byte(0x05+8*index+64*fi); + emit_long(base); + } +} +LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond)) + +LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond)) +{ + if (have_cmov) { + emit_byte(0x0f); + emit_byte(0x40+cond); + emit_byte(0x05+8*d); + emit_long(mem); + } + else { /* replacement using branch and mov */ + int uncc=(cond^1); + emit_byte(0x70+uncc); + emit_byte(6); /* skip next 6 bytes if not cc=true */ + emit_byte(0x8b); + emit_byte(0x05+8*d); + emit_long(mem); + } +} +LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond)) + +LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset)) +{ + Dif(!isbyte(offset)) abort(); + emit_byte(0x8b); + emit_byte(0x40+8*d+s); + emit_byte(offset); +} +LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset)) + +LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset)) +{ + Dif(!isbyte(offset)) abort(); + emit_byte(0x66); + emit_byte(0x8b); + emit_byte(0x40+8*d+s); + emit_byte(offset); +} +LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset)) + +LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset)) +{ + Dif(!isbyte(offset)) abort(); + emit_byte(0x8a); + emit_byte(0x40+8*d+s); + emit_byte(offset); +} +LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset)) + +LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset)) +{ + emit_byte(0x8b); + emit_byte(0x80+8*d+s); + emit_long(offset); +} +LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset)) + +LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset)) +{ + emit_byte(0x66); + emit_byte(0x8b); + emit_byte(0x80+8*d+s); + emit_long(offset); +} +LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset)) + +LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset)) +{ + emit_byte(0x8a); + emit_byte(0x80+8*d+s); + emit_long(offset); +} +LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset)) +{ + Dif(!isbyte(offset)) abort(); + emit_byte(0xc7); + emit_byte(0x40+d); + emit_byte(offset); + emit_long(i); +} +LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset)) +{ + Dif(!isbyte(offset)) abort(); + emit_byte(0x66); + emit_byte(0xc7); + emit_byte(0x40+d); + emit_byte(offset); + emit_word(i); +} +LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset)) +{ + Dif(!isbyte(offset)) abort(); + emit_byte(0xc6); + emit_byte(0x40+d); + emit_byte(offset); + emit_byte(i); +} +LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset)) +{ + Dif(!isbyte(offset)) abort(); + emit_byte(0x89); + emit_byte(0x40+8*s+d); + emit_byte(offset); +} +LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset)) +{ + Dif(!isbyte(offset)) abort(); + emit_byte(0x66); + emit_byte(0x89); + emit_byte(0x40+8*s+d); + emit_byte(offset); +} +LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset)) +{ + Dif(!isbyte(offset)) abort(); + emit_byte(0x88); + emit_byte(0x40+8*s+d); + emit_byte(offset); +} +LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset)) + +LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset)) +{ + if (optimize_imm8 && isbyte(offset)) { + emit_byte(0x8d); + emit_byte(0x40+8*d+s); + emit_byte(offset); + } + else { + emit_byte(0x8d); + emit_byte(0x80+8*d+s); + emit_long(offset); + } +} +LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset)) + +LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset)) +{ + int fi; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + + if (optimize_imm8 && isbyte(offset)) { + emit_byte(0x8d); + emit_byte(0x44+8*d); + emit_byte(0x40*fi+8*index+s); + emit_byte(offset); + } + else { + emit_byte(0x8d); + emit_byte(0x84+8*d); + emit_byte(0x40*fi+8*index+s); + emit_long(offset); + } +} +LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset)) + +LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor)) +{ + int isebp=(s==5)?0x40:0; + int fi; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + + emit_byte(0x8d); + emit_byte(0x04+8*d+isebp); + emit_byte(0x40*fi+8*index+s); + if (isebp) + emit_byte(0); +} +LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor)) + +LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset)) +{ + if (optimize_imm8 && isbyte(offset)) { + emit_byte(0x89); + emit_byte(0x40+8*s+d); + emit_byte(offset); + } + else { + emit_byte(0x89); + emit_byte(0x80+8*s+d); + emit_long(offset); + } +} +LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset)) +{ + emit_byte(0x66); + emit_byte(0x89); + emit_byte(0x80+8*s+d); + emit_long(offset); +} +LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset)) +{ + if (optimize_imm8 && isbyte(offset)) { + emit_byte(0x88); + emit_byte(0x40+8*s+d); + emit_byte(offset); + } + else { + emit_byte(0x88); + emit_byte(0x80+8*s+d); + emit_long(offset); + } +} +LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset)) + +LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r)) +{ + emit_byte(0x0f); + emit_byte(0xc8+r); +} +LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r)) + +LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r)) +{ + emit_byte(0x66); + emit_byte(0xc1); + emit_byte(0xc0+r); + emit_byte(0x08); +} +LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r)) + +LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s)) +{ + emit_byte(0x89); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s)) +{ + emit_byte(0x89); + emit_byte(0x05+8*s); + emit_long(d); +} +LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x89); + emit_byte(0x05+8*s); + emit_long(d); +} +LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s)) + +LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s)) +{ + emit_byte(0x66); + emit_byte(0x8b); + emit_byte(0x05+8*d); + emit_long(s); +} +LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s)) +{ + emit_byte(0x88); + emit_byte(0x05+8*(s&0xf)); /* XXX this handles %ah case (defined as 0x10+4) and others */ + emit_long(d); +} +LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s)) + +LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s)) +{ + emit_byte(0x8a); + emit_byte(0x05+8*d); + emit_long(s); +} +LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s)) + +LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s)) +{ + emit_byte(0xb8+d); + emit_long(s); +} +LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s)) + +LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s)) +{ + emit_byte(0x66); + emit_byte(0xb8+d); + emit_word(s); +} +LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s)) + +LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s)) +{ + emit_byte(0xb0+d); + emit_byte(s); +} +LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s)) + +LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s)) +{ + emit_byte(0x81); + emit_byte(0x15); + emit_long(d); + emit_long(s); +} +LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s)) + +LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s)) +{ + if (optimize_imm8 && isbyte(s)) { + emit_byte(0x83); + emit_byte(0x05); + emit_long(d); + emit_byte(s); + } + else { + emit_byte(0x81); + emit_byte(0x05); + emit_long(d); + emit_long(s); + } +} +LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s)) + +LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s)) +{ + emit_byte(0x66); + emit_byte(0x81); + emit_byte(0x05); + emit_long(d); + emit_word(s); +} +LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s)) + +LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s)) +{ + emit_byte(0x80); + emit_byte(0x05); + emit_long(d); + emit_byte(s); +} +LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s)) + +LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i)) +{ + if (optimize_accum && isaccum(d)) + emit_byte(0xa9); + else { + emit_byte(0xf7); + emit_byte(0xc0+d); + } + emit_long(i); +} +LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s)) +{ + emit_byte(0x85); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x85); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s)) +{ + emit_byte(0x84); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i)) +{ + emit_byte(0x81); + emit_byte(0xf0+d); + emit_long(i); +} +LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i)) +{ + if (optimize_imm8 && isbyte(i)) { + emit_byte(0x83); + emit_byte(0xe0+d); + emit_byte(i); + } + else { + if (optimize_accum && isaccum(d)) + emit_byte(0x25); + else { + emit_byte(0x81); + emit_byte(0xe0+d); + } + emit_long(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i)) +{ + emit_byte(0x66); + if (optimize_imm8 && isbyte(i)) { + emit_byte(0x83); + emit_byte(0xe0+d); + emit_byte(i); + } + else { + if (optimize_accum && isaccum(d)) + emit_byte(0x25); + else { + emit_byte(0x81); + emit_byte(0xe0+d); + } + emit_word(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s)) +{ + emit_byte(0x21); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x21); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s)) +{ + emit_byte(0x20); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i)) +{ + if (optimize_imm8 && isbyte(i)) { + emit_byte(0x83); + emit_byte(0xc8+d); + emit_byte(i); + } + else { + if (optimize_accum && isaccum(d)) + emit_byte(0x0d); + else { + emit_byte(0x81); + emit_byte(0xc8+d); + } + emit_long(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s)) +{ + emit_byte(0x09); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x09); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s)) +{ + emit_byte(0x08); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s)) + +LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s)) +{ + emit_byte(0x11); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s)) + +LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x11); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s)) + +LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s)) +{ + emit_byte(0x10); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s)) +{ + emit_byte(0x01); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x01); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s)) +{ + emit_byte(0x00); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i)) +{ + if (isbyte(i)) { + emit_byte(0x83); + emit_byte(0xe8+d); + emit_byte(i); + } + else { + if (optimize_accum && isaccum(d)) + emit_byte(0x2d); + else { + emit_byte(0x81); + emit_byte(0xe8+d); + } + emit_long(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i)) +{ + if (optimize_accum && isaccum(d)) + emit_byte(0x2c); + else { + emit_byte(0x80); + emit_byte(0xe8+d); + } + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i)) +{ + if (isbyte(i)) { + emit_byte(0x83); + emit_byte(0xc0+d); + emit_byte(i); + } + else { + if (optimize_accum && isaccum(d)) + emit_byte(0x05); + else { + emit_byte(0x81); + emit_byte(0xc0+d); + } + emit_long(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i)) +{ + emit_byte(0x66); + if (isbyte(i)) { + emit_byte(0x83); + emit_byte(0xc0+d); + emit_byte(i); + } + else { + if (optimize_accum && isaccum(d)) + emit_byte(0x05); + else { + emit_byte(0x81); + emit_byte(0xc0+d); + } + emit_word(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i)) +{ + if (optimize_accum && isaccum(d)) + emit_byte(0x04); + else { + emit_byte(0x80); + emit_byte(0xc0+d); + } + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i)) + +LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s)) +{ + emit_byte(0x19); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s)) + +LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x19); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s)) + +LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s)) +{ + emit_byte(0x18); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s)) +{ + emit_byte(0x29); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x29); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s)) +{ + emit_byte(0x28); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s)) +{ + emit_byte(0x39); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i)) +{ + if (optimize_imm8 && isbyte(i)) { + emit_byte(0x83); + emit_byte(0xf8+r); + emit_byte(i); + } + else { + if (optimize_accum && isaccum(r)) + emit_byte(0x3d); + else { + emit_byte(0x81); + emit_byte(0xf8+r); + } + emit_long(i); + } +} +LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x39); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s)) + +LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s)) +{ + emit_byte(0x80); + emit_byte(0x3d); + emit_long(d); + emit_byte(s); +} +LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i)) +{ + if (optimize_accum && isaccum(d)) + emit_byte(0x3c); + else { + emit_byte(0x80); + emit_byte(0xf8+d); + } + emit_byte(i); +} +LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s)) +{ + emit_byte(0x38); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s)) + +LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor)) +{ + int fi; + + switch(factor) { + case 1: fi=0; break; + case 2: fi=1; break; + case 4: fi=2; break; + case 8: fi=3; break; + default: abort(); + } + emit_byte(0x39); + emit_byte(0x04+8*d); + emit_byte(5+8*index+0x40*fi); + emit_long(offset); +} +LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor)) + +LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s)) +{ + emit_byte(0x31); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s)) +{ + emit_byte(0x66); + emit_byte(0x31); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s)) +{ + emit_byte(0x30); + emit_byte(0xc0+8*s+d); +} +LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s)) +{ + if (optimize_imm8 && isbyte(s)) { + emit_byte(0x83); + emit_byte(0x2d); + emit_long(d); + emit_byte(s); + } + else { + emit_byte(0x81); + emit_byte(0x2d); + emit_long(d); + emit_long(s); + } +} +LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s)) + +LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s)) +{ + if (optimize_imm8 && isbyte(s)) { + emit_byte(0x83); + emit_byte(0x3d); + emit_long(d); + emit_byte(s); + } + else { + emit_byte(0x81); + emit_byte(0x3d); + emit_long(d); + emit_long(s); + } +} +LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s)) + +LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2)) +{ + emit_byte(0x87); + emit_byte(0xc0+8*r1+r2); +} +LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2)) + +LOWFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2)) +{ + emit_byte(0x86); + emit_byte(0xc0+8*(r1&0xf)+(r2&0xf)); /* XXX this handles upper-halves registers (e.g. %ah defined as 0x10+4) */ +} +LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2)) + +/************************************************************************* + * FIXME: mem access modes probably wrong * + *************************************************************************/ + +LOWFUNC(READ,WRITE,0,raw_pushfl,(void)) +{ + emit_byte(0x9c); +} +LENDFUNC(READ,WRITE,0,raw_pushfl,(void)) + +LOWFUNC(WRITE,READ,0,raw_popfl,(void)) +{ + emit_byte(0x9d); +} +LENDFUNC(WRITE,READ,0,raw_popfl,(void)) + +/* Generate floating-point instructions */ +static inline void x86_fadd_m(MEMR s) +{ + emit_byte(0xdc); + emit_byte(0x05); + emit_long(s); +} + +#endif + +/************************************************************************* + * Unoptimizable stuff --- jump * + *************************************************************************/ + +static __inline__ void raw_call_r(R4 r) +{ +#if USE_NEW_RTASM + CALLsr(r); +#else + emit_byte(0xff); + emit_byte(0xd0+r); +#endif +} + +static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m) +{ +#if USE_NEW_RTASM + CALLsm(base, X86_NOREG, r, m); +#else + int mu; + switch(m) { + case 1: mu=0; break; + case 2: mu=1; break; + case 4: mu=2; break; + case 8: mu=3; break; + default: abort(); + } + emit_byte(0xff); + emit_byte(0x14); + emit_byte(0x05+8*r+0x40*mu); + emit_long(base); +#endif +} + +static __inline__ void raw_jmp_r(R4 r) +{ +#if USE_NEW_RTASM + JMPsr(r); +#else + emit_byte(0xff); + emit_byte(0xe0+r); +#endif +} + +static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m) +{ +#if USE_NEW_RTASM + JMPsm(base, X86_NOREG, r, m); +#else + int mu; + switch(m) { + case 1: mu=0; break; + case 2: mu=1; break; + case 4: mu=2; break; + case 8: mu=3; break; + default: abort(); + } + emit_byte(0xff); + emit_byte(0x24); + emit_byte(0x05+8*r+0x40*mu); + emit_long(base); +#endif +} + +static __inline__ void raw_jmp_m(uae_u32 base) +{ + emit_byte(0xff); + emit_byte(0x25); + emit_long(base); +} + + +static __inline__ void raw_call(uae_u32 t) +{ +#if USE_NEW_RTASM + CALLm(t); +#else + emit_byte(0xe8); + emit_long(t-(uae_u32)target-4); +#endif +} + +static __inline__ void raw_jmp(uae_u32 t) +{ +#if USE_NEW_RTASM + JMPm(t); +#else + emit_byte(0xe9); + emit_long(t-(uae_u32)target-4); +#endif +} + +static __inline__ void raw_jl(uae_u32 t) +{ + emit_byte(0x0f); + emit_byte(0x8c); + emit_long(t-(uintptr)target-4); +} + +static __inline__ void raw_jz(uae_u32 t) +{ + emit_byte(0x0f); + emit_byte(0x84); + emit_long(t-(uintptr)target-4); +} + +static __inline__ void raw_jnz(uae_u32 t) +{ + emit_byte(0x0f); + emit_byte(0x85); + emit_long(t-(uintptr)target-4); +} + +static __inline__ void raw_jnz_l_oponly(void) +{ + emit_byte(0x0f); + emit_byte(0x85); +} + +static __inline__ void raw_jcc_l_oponly(int cc) +{ + emit_byte(0x0f); + emit_byte(0x80+cc); +} + +static __inline__ void raw_jnz_b_oponly(void) +{ + emit_byte(0x75); +} + +static __inline__ void raw_jz_b_oponly(void) +{ + emit_byte(0x74); +} + +static __inline__ void raw_jcc_b_oponly(int cc) +{ + emit_byte(0x70+cc); +} + +static __inline__ void raw_jmp_l_oponly(void) +{ + emit_byte(0xe9); +} + +static __inline__ void raw_jmp_b_oponly(void) +{ + emit_byte(0xeb); +} + +static __inline__ void raw_ret(void) +{ + emit_byte(0xc3); +} + +static __inline__ void raw_nop(void) +{ + emit_byte(0x90); +} + +static __inline__ void raw_emit_nop_filler(int nbytes) +{ + /* Source: GNU Binutils 2.12.90.0.15 */ + /* Various efficient no-op patterns for aligning code labels. + Note: Don't try to assemble the instructions in the comments. + 0L and 0w are not legal. */ + static const uae_u8 f32_1[] = + {0x90}; /* nop */ + static const uae_u8 f32_2[] = + {0x89,0xf6}; /* movl %esi,%esi */ + static const uae_u8 f32_3[] = + {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */ + static const uae_u8 f32_4[] = + {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */ + static const uae_u8 f32_5[] = + {0x90, /* nop */ + 0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */ + static const uae_u8 f32_6[] = + {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */ + static const uae_u8 f32_7[] = + {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */ + static const uae_u8 f32_8[] = + {0x90, /* nop */ + 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */ + static const uae_u8 f32_9[] = + {0x89,0xf6, /* movl %esi,%esi */ + 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ + static const uae_u8 f32_10[] = + {0x8d,0x76,0x00, /* leal 0(%esi),%esi */ + 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ + static const uae_u8 f32_11[] = + {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */ + 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ + static const uae_u8 f32_12[] = + {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */ + 0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */ + static const uae_u8 f32_13[] = + {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */ + 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ + static const uae_u8 f32_14[] = + {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */ + 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ + static const uae_u8 f32_15[] = + {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */ + 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90}; + static const uae_u8 f32_16[] = + {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */ + 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90}; + static const uae_u8 *const f32_patt[] = { + f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8, + f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15 + }; + static const uae_u8 prefixes[4] = { 0x66, 0x66, 0x66, 0x66 }; + +#if defined(__x86_64__) + /* The recommended way to pad 64bit code is to use NOPs preceded by + maximally four 0x66 prefixes. Balance the size of nops. */ + if (nbytes == 0) + return; + + int i; + int nnops = (nbytes + 3) / 4; + int len = nbytes / nnops; + int remains = nbytes - nnops * len; + + for (i = 0; i < remains; i++) { + emit_block(prefixes, len); + raw_nop(); + } + for (; i < nnops; i++) { + emit_block(prefixes, len - 1); + raw_nop(); + } +#else + int nloops = nbytes / 16; + while (nloops-- > 0) + emit_block(f32_16, sizeof(f32_16)); + + nbytes %= 16; + if (nbytes) + emit_block(f32_patt[nbytes - 1], nbytes); +#endif +} + + +/************************************************************************* + * Flag handling, to and fro UAE flag register * + *************************************************************************/ + +static __inline__ void raw_flags_evicted(int r) +{ + //live.state[FLAGTMP].status=CLEAN; + live.state[FLAGTMP].status=INMEM; + live.state[FLAGTMP].realreg=-1; + /* We just "evicted" FLAGTMP. */ + if (live.nat[r].nholds!=1) { + /* Huh? */ + abort(); + } + live.nat[r].nholds=0; +} + +#define FLAG_NREG1_FLAGREG 0 /* Set to -1 if any register will do */ +static __inline__ void raw_flags_to_reg_FLAGREG(int r) +{ + raw_lahf(0); /* Most flags in AH */ + //raw_setcc(r,0); /* V flag in AL */ + raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0); + +#if 1 /* Let's avoid those nasty partial register stalls */ + //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r); + raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,AH_INDEX); + raw_flags_evicted(r); +#endif +} + +#define FLAG_NREG2_FLAGREG 0 /* Set to -1 if any register will do */ +static __inline__ void raw_reg_to_flags_FLAGREG(int r) +{ + raw_cmp_b_ri(r,-127); /* set V */ + raw_sahf(0); +} + +#define FLAG_NREG3_FLAGREG 0 /* Set to -1 if any register will do */ +static __inline__ void raw_flags_set_zero_FLAGREG(int s, int tmp) +{ + raw_mov_l_rr(tmp,s); + raw_lahf(s); /* flags into ah */ + raw_and_l_ri(s,0xffffbfff); + raw_and_l_ri(tmp,0x00004000); + raw_xor_l_ri(tmp,0x00004000); + raw_or_l(s,tmp); + raw_sahf(s); +} + +static __inline__ void raw_flags_init_FLAGREG(void) { } + +#define FLAG_NREG1_FLAGSTK -1 /* Set to -1 if any register will do */ +static __inline__ void raw_flags_to_reg_FLAGSTK(int r) +{ + raw_pushfl(); + raw_pop_l_r(r); + raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r); + raw_flags_evicted(r); +} + +#define FLAG_NREG2_FLAGSTK -1 /* Set to -1 if any register will do */ +static __inline__ void raw_reg_to_flags_FLAGSTK(int r) +{ + raw_push_l_r(r); + raw_popfl(); +} + +#define FLAG_NREG3_FLAGSTK -1 /* Set to -1 if any register will do */ +static __inline__ void raw_flags_set_zero_FLAGSTK(int s, int tmp) +{ + raw_mov_l_rr(tmp,s); + raw_pushfl(); + raw_pop_l_r(s); + raw_and_l_ri(s,0xffffffbf); + raw_and_l_ri(tmp,0x00000040); + raw_xor_l_ri(tmp,0x00000040); + raw_or_l(s,tmp); + raw_push_l_r(s); + raw_popfl(); +} + +static __inline__ void raw_flags_init_FLAGSTK(void) { } + +#if defined(__x86_64__) +/* Try to use the LAHF/SETO method on x86_64 since it is faster. + This can't be the default because some older CPUs don't support + LAHF/SAHF in long mode. */ +static int FLAG_NREG1_FLAGGEN = 0; +static __inline__ void raw_flags_to_reg_FLAGGEN(int r) +{ + if (have_lahf_lm) { + // NOTE: the interpreter uses the normal EFLAGS layout + // pushf/popf CF(0) ZF( 6) SF( 7) OF(11) + // sahf/lahf CF(8) ZF(14) SF(15) OF( 0) + assert(r == 0); + raw_setcc(r,0); /* V flag in AL */ + raw_lea_l_r_scaled(0,0,8); /* move it to its EFLAGS location */ + raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,0); + raw_lahf(0); /* most flags in AH */ + raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,AH_INDEX); + raw_flags_evicted(r); + } + else + raw_flags_to_reg_FLAGSTK(r); +} + +static int FLAG_NREG2_FLAGGEN = 0; +static __inline__ void raw_reg_to_flags_FLAGGEN(int r) +{ + if (have_lahf_lm) { + raw_xchg_b_rr(0,AH_INDEX); + raw_cmp_b_ri(r,-120); /* set V */ + raw_sahf(0); + } + else + raw_reg_to_flags_FLAGSTK(r); +} + +static int FLAG_NREG3_FLAGGEN = 0; +static __inline__ void raw_flags_set_zero_FLAGGEN(int s, int tmp) +{ + if (have_lahf_lm) + raw_flags_set_zero_FLAGREG(s, tmp); + else + raw_flags_set_zero_FLAGSTK(s, tmp); +} + +static __inline__ void raw_flags_init_FLAGGEN(void) +{ + if (have_lahf_lm) { + FLAG_NREG1_FLAGGEN = FLAG_NREG1_FLAGREG; + FLAG_NREG2_FLAGGEN = FLAG_NREG2_FLAGREG; + FLAG_NREG1_FLAGGEN = FLAG_NREG3_FLAGREG; + } + else { + FLAG_NREG1_FLAGGEN = FLAG_NREG1_FLAGSTK; + FLAG_NREG2_FLAGGEN = FLAG_NREG2_FLAGSTK; + FLAG_NREG1_FLAGGEN = FLAG_NREG3_FLAGSTK; + } +} +#endif + +#ifdef SAHF_SETO_PROFITABLE +#define FLAG_SUFFIX FLAGREG +#elif defined __x86_64__ +#define FLAG_SUFFIX FLAGGEN +#else +#define FLAG_SUFFIX FLAGSTK +#endif + +#define FLAG_GLUE_2(x, y) x ## _ ## y +#define FLAG_GLUE_1(x, y) FLAG_GLUE_2(x, y) +#define FLAG_GLUE(x) FLAG_GLUE_1(x, FLAG_SUFFIX) + +#define raw_flags_init FLAG_GLUE(raw_flags_init) +#define FLAG_NREG1 FLAG_GLUE(FLAG_NREG1) +#define raw_flags_to_reg FLAG_GLUE(raw_flags_to_reg) +#define FLAG_NREG2 FLAG_GLUE(FLAG_NREG2) +#define raw_reg_to_flags FLAG_GLUE(raw_reg_to_flags) +#define FLAG_NREG3 FLAG_GLUE(FLAG_NREG3) +#define raw_flags_set_zero FLAG_GLUE(raw_flags_set_zero) + +/* Apparently, there are enough instructions between flag store and + flag reload to avoid the partial memory stall */ +static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r) +{ +#if 1 + raw_mov_l_rm(target,(uintptr)live.state[r].mem); +#else + raw_mov_b_rm(target,(uintptr)live.state[r].mem); + raw_mov_b_rm(target+4,((uintptr)live.state[r].mem)+1); +#endif +} + +/* FLAGX is byte sized, and we *do* write it at that size */ +static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r) +{ + if (live.nat[target].canbyte) + raw_mov_b_rm(target,(uintptr)live.state[r].mem); + else if (live.nat[target].canword) + raw_mov_w_rm(target,(uintptr)live.state[r].mem); + else + raw_mov_l_rm(target,(uintptr)live.state[r].mem); +} + +static __inline__ void raw_dec_sp(int off) +{ + if (off) raw_sub_l_ri(ESP_INDEX,off); +} + +static __inline__ void raw_inc_sp(int off) +{ + if (off) raw_add_l_ri(ESP_INDEX,off); +} + +/************************************************************************* + * Handling mistaken direct memory access * + *************************************************************************/ + +// gb-- I don't need that part for JIT Basilisk II +#if defined(NATMEM_OFFSET) && 0 +#include +#include + +#define SIG_READ 1 +#define SIG_WRITE 2 + +static int in_handler=0; +static uae_u8 veccode[256]; + +static void vec(int x, struct sigcontext sc) +{ + uae_u8* i=(uae_u8*)sc.eip; + uae_u32 addr=sc.cr2; + int r=-1; + int size=4; + int dir=-1; + int len=0; + int j; + + write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip); + if (!canbang) + write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n"); + if (in_handler) + write_log("Argh --- Am already in a handler. Shouldn't happen!\n"); + + if (canbang && i>=compiled_code && i<=current_compile_p) { + if (*i==0x66) { + i++; + size=2; + len++; + } + + switch(i[0]) { + case 0x8a: + if ((i[1]&0xc0)==0x80) { + r=(i[1]>>3)&7; + dir=SIG_READ; + size=1; + len+=6; + break; + } + break; + case 0x88: + if ((i[1]&0xc0)==0x80) { + r=(i[1]>>3)&7; + dir=SIG_WRITE; + size=1; + len+=6; + break; + } + break; + case 0x8b: + if ((i[1]&0xc0)==0x80) { + r=(i[1]>>3)&7; + dir=SIG_READ; + len+=6; + break; + } + if ((i[1]&0xc0)==0x40) { + r=(i[1]>>3)&7; + dir=SIG_READ; + len+=3; + break; + } + break; + case 0x89: + if ((i[1]&0xc0)==0x80) { + r=(i[1]>>3)&7; + dir=SIG_WRITE; + len+=6; + break; + } + if ((i[1]&0xc0)==0x40) { + r=(i[1]>>3)&7; + dir=SIG_WRITE; + len+=3; + break; + } + break; + } + } + + if (r!=-1) { + void* pr=NULL; + write_log("register was %d, direction was %d, size was %d\n",r,dir,size); + + switch(r) { + case 0: pr=&(sc.eax); break; + case 1: pr=&(sc.ecx); break; + case 2: pr=&(sc.edx); break; + case 3: pr=&(sc.ebx); break; + case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break; + case 5: pr=(size>1)? + (void*)(&(sc.ebp)): + (void*)(((uae_u8*)&(sc.ecx))+1); break; + case 6: pr=(size>1)? + (void*)(&(sc.esi)): + (void*)(((uae_u8*)&(sc.edx))+1); break; + case 7: pr=(size>1)? + (void*)(&(sc.edi)): + (void*)(((uae_u8*)&(sc.ebx))+1); break; + default: abort(); + } + if (pr) { + blockinfo* bi; + + if (currprefs.comp_oldsegv) { + addr-=NATMEM_OFFSET; + + if ((addr>=0x10000000 && addr<0x40000000) || + (addr>=0x50000000)) { + write_log("Suspicious address in %x SEGV handler.\n",addr); + } + if (dir==SIG_READ) { + switch(size) { + case 1: *((uae_u8*)pr)=get_byte(addr); break; + case 2: *((uae_u16*)pr)=get_word(addr); break; + case 4: *((uae_u32*)pr)=get_long(addr); break; + default: abort(); + } + } + else { /* write */ + switch(size) { + case 1: put_byte(addr,*((uae_u8*)pr)); break; + case 2: put_word(addr,*((uae_u16*)pr)); break; + case 4: put_long(addr,*((uae_u32*)pr)); break; + default: abort(); + } + } + write_log("Handled one access!\n"); + fflush(stdout); + segvcount++; + sc.eip+=len; + } + else { + void* tmp=target; + int i; + uae_u8 vecbuf[5]; + + addr-=NATMEM_OFFSET; + + if ((addr>=0x10000000 && addr<0x40000000) || + (addr>=0x50000000)) { + write_log("Suspicious address in %x SEGV handler.\n",addr); + } + + target=(uae_u8*)sc.eip; + for (i=0;i<5;i++) + vecbuf[i]=target[i]; + emit_byte(0xe9); + emit_long((uintptr)veccode-(uintptr)target-4); + write_log("Create jump to %p\n",veccode); + + write_log("Handled one access!\n"); + fflush(stdout); + segvcount++; + + target=veccode; + + if (dir==SIG_READ) { + switch(size) { + case 1: raw_mov_b_ri(r,get_byte(addr)); break; + case 2: raw_mov_w_ri(r,get_byte(addr)); break; + case 4: raw_mov_l_ri(r,get_byte(addr)); break; + default: abort(); + } + } + else { /* write */ + switch(size) { + case 1: put_byte(addr,*((uae_u8*)pr)); break; + case 2: put_word(addr,*((uae_u16*)pr)); break; + case 4: put_long(addr,*((uae_u32*)pr)); break; + default: abort(); + } + } + for (i=0;i<5;i++) + raw_mov_b_mi(sc.eip+i,vecbuf[i]); + raw_mov_l_mi((uintptr)&in_handler,0); + emit_byte(0xe9); + emit_long(sc.eip+len-(uintptr)target-4); + in_handler=1; + target=tmp; + } + bi=active; + while (bi) { + if (bi->handler && + (uae_u8*)bi->direct_handler<=i && + (uae_u8*)bi->nexthandler>i) { + write_log("deleted trigger (%p<%p<%p) %p\n", + bi->handler, + i, + bi->nexthandler, + bi->pc_p); + invalidate_block(bi); + raise_in_cl_list(bi); + set_special(0); + return; + } + bi=bi->next; + } + /* Not found in the active list. Might be a rom routine that + is in the dormant list */ + bi=dormant; + while (bi) { + if (bi->handler && + (uae_u8*)bi->direct_handler<=i && + (uae_u8*)bi->nexthandler>i) { + write_log("deleted trigger (%p<%p<%p) %p\n", + bi->handler, + i, + bi->nexthandler, + bi->pc_p); + invalidate_block(bi); + raise_in_cl_list(bi); + set_special(0); + return; + } + bi=bi->next; + } + write_log("Huh? Could not find trigger!\n"); + return; + } + } + write_log("Can't handle access!\n"); + for (j=0;j<10;j++) { + write_log("instruction byte %2d is %02x\n",j,i[j]); + } + write_log("Please send the above info (starting at \"fault address\") to\n" + "bmeyer@csse.monash.edu.au\n" + "This shouldn't happen ;-)\n"); + fflush(stdout); + signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */ +} +#endif + + +/************************************************************************* + * Checking for CPU features * + *************************************************************************/ + +struct cpuinfo_x86 { + uae_u8 x86; // CPU family + uae_u8 x86_vendor; // CPU vendor + uae_u8 x86_processor; // CPU canonical processor type + uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise + uae_u32 x86_hwcap; + uae_u8 x86_model; + uae_u8 x86_mask; + int cpuid_level; // Maximum supported CPUID level, -1=no CPUID + char x86_vendor_id[16]; +}; +struct cpuinfo_x86 cpuinfo; + +enum { + X86_VENDOR_INTEL = 0, + X86_VENDOR_CYRIX = 1, + X86_VENDOR_AMD = 2, + X86_VENDOR_UMC = 3, + X86_VENDOR_NEXGEN = 4, + X86_VENDOR_CENTAUR = 5, + X86_VENDOR_RISE = 6, + X86_VENDOR_TRANSMETA = 7, + X86_VENDOR_NSC = 8, + X86_VENDOR_UNKNOWN = 0xff +}; + +enum { + X86_PROCESSOR_I386, /* 80386 */ + X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */ + X86_PROCESSOR_PENTIUM, + X86_PROCESSOR_PENTIUMPRO, + X86_PROCESSOR_K6, + X86_PROCESSOR_ATHLON, + X86_PROCESSOR_PENTIUM4, + X86_PROCESSOR_X86_64, + X86_PROCESSOR_max +}; + +static const char * x86_processor_string_table[X86_PROCESSOR_max] = { + "80386", + "80486", + "Pentium", + "PentiumPro", + "K6", + "Athlon", + "Pentium4", + "x86-64" +}; + +static struct ptt { + const int align_loop; + const int align_loop_max_skip; + const int align_jump; + const int align_jump_max_skip; + const int align_func; +} +x86_alignments[X86_PROCESSOR_max] = { + { 4, 3, 4, 3, 4 }, + { 16, 15, 16, 15, 16 }, + { 16, 7, 16, 7, 16 }, + { 16, 15, 16, 7, 16 }, + { 32, 7, 32, 7, 32 }, + { 16, 7, 16, 7, 16 }, + { 0, 0, 0, 0, 0 }, + { 16, 7, 16, 7, 16 } +}; + +static void +x86_get_cpu_vendor(struct cpuinfo_x86 *c) +{ + char *v = c->x86_vendor_id; + + if (!strcmp(v, "GenuineIntel")) + c->x86_vendor = X86_VENDOR_INTEL; + else if (!strcmp(v, "AuthenticAMD")) + c->x86_vendor = X86_VENDOR_AMD; + else if (!strcmp(v, "CyrixInstead")) + c->x86_vendor = X86_VENDOR_CYRIX; + else if (!strcmp(v, "Geode by NSC")) + c->x86_vendor = X86_VENDOR_NSC; + else if (!strcmp(v, "UMC UMC UMC ")) + c->x86_vendor = X86_VENDOR_UMC; + else if (!strcmp(v, "CentaurHauls")) + c->x86_vendor = X86_VENDOR_CENTAUR; + else if (!strcmp(v, "NexGenDriven")) + c->x86_vendor = X86_VENDOR_NEXGEN; + else if (!strcmp(v, "RiseRiseRise")) + c->x86_vendor = X86_VENDOR_RISE; + else if (!strcmp(v, "GenuineTMx86") || + !strcmp(v, "TransmetaCPU")) + c->x86_vendor = X86_VENDOR_TRANSMETA; + else + c->x86_vendor = X86_VENDOR_UNKNOWN; +} + +static void +cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx) +{ + const int CPUID_SPACE = 4096; + uae_u8* cpuid_space = (uae_u8 *)vm_acquire(CPUID_SPACE); + if (cpuid_space == VM_MAP_FAILED) + abort(); + vm_protect(cpuid_space, CPUID_SPACE, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE); + + static uae_u32 s_op, s_eax, s_ebx, s_ecx, s_edx; + uae_u8* tmp=get_target(); + + s_op = op; + set_target(cpuid_space); + raw_push_l_r(0); /* eax */ + raw_push_l_r(1); /* ecx */ + raw_push_l_r(2); /* edx */ + raw_push_l_r(3); /* ebx */ + raw_mov_l_rm(0,(uintptr)&s_op); + raw_cpuid(0); + raw_mov_l_mr((uintptr)&s_eax,0); + raw_mov_l_mr((uintptr)&s_ebx,3); + raw_mov_l_mr((uintptr)&s_ecx,1); + raw_mov_l_mr((uintptr)&s_edx,2); + raw_pop_l_r(3); + raw_pop_l_r(2); + raw_pop_l_r(1); + raw_pop_l_r(0); + raw_ret(); + set_target(tmp); + + ((cpuop_func*)cpuid_space)(0); + if (eax != NULL) *eax = s_eax; + if (ebx != NULL) *ebx = s_ebx; + if (ecx != NULL) *ecx = s_ecx; + if (edx != NULL) *edx = s_edx; + + vm_release(cpuid_space, CPUID_SPACE); +} + +static void +raw_init_cpu(void) +{ + struct cpuinfo_x86 *c = &cpuinfo; + + /* Defaults */ + c->x86_processor = X86_PROCESSOR_max; + c->x86_vendor = X86_VENDOR_UNKNOWN; + c->cpuid_level = -1; /* CPUID not detected */ + c->x86_model = c->x86_mask = 0; /* So far unknown... */ + c->x86_vendor_id[0] = '\0'; /* Unset */ + c->x86_hwcap = 0; + + /* Get vendor name */ + c->x86_vendor_id[12] = '\0'; + cpuid(0x00000000, + (uae_u32 *)&c->cpuid_level, + (uae_u32 *)&c->x86_vendor_id[0], + (uae_u32 *)&c->x86_vendor_id[8], + (uae_u32 *)&c->x86_vendor_id[4]); + x86_get_cpu_vendor(c); + + /* Intel-defined flags: level 0x00000001 */ + c->x86_brand_id = 0; + if ( c->cpuid_level >= 0x00000001 ) { + uae_u32 tfms, brand_id; + cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap); + c->x86 = (tfms >> 8) & 15; + if (c->x86 == 0xf) + c->x86 += (tfms >> 20) & 0xff; /* extended family */ + c->x86_model = (tfms >> 4) & 15; + if (c->x86_model == 0xf) + c->x86_model |= (tfms >> 12) & 0xf0; /* extended model */ + c->x86_brand_id = brand_id & 0xff; + c->x86_mask = tfms & 15; + } else { + /* Have CPUID level 0 only - unheard of */ + c->x86 = 4; + } + + /* AMD-defined flags: level 0x80000001 */ + uae_u32 xlvl; + cpuid(0x80000000, &xlvl, NULL, NULL, NULL); + if ( (xlvl & 0xffff0000) == 0x80000000 ) { + if ( xlvl >= 0x80000001 ) { + uae_u32 features, extra_features; + cpuid(0x80000001, NULL, NULL, &extra_features, &features); + if (features & (1 << 29)) { + /* Assume x86-64 if long mode is supported */ + c->x86_processor = X86_PROCESSOR_X86_64; + } + if (extra_features & (1 << 0)) + have_lahf_lm = true; + } + } + + /* Canonicalize processor ID */ + switch (c->x86) { + case 3: + c->x86_processor = X86_PROCESSOR_I386; + break; + case 4: + c->x86_processor = X86_PROCESSOR_I486; + break; + case 5: + if (c->x86_vendor == X86_VENDOR_AMD) + c->x86_processor = X86_PROCESSOR_K6; + else + c->x86_processor = X86_PROCESSOR_PENTIUM; + break; + case 6: + if (c->x86_vendor == X86_VENDOR_AMD) + c->x86_processor = X86_PROCESSOR_ATHLON; + else + c->x86_processor = X86_PROCESSOR_PENTIUMPRO; + break; + case 15: + if (c->x86_processor == X86_PROCESSOR_max) { + switch (c->x86_vendor) { + case X86_VENDOR_INTEL: + c->x86_processor = X86_PROCESSOR_PENTIUM4; + break; + case X86_VENDOR_AMD: + /* Assume a 32-bit Athlon processor if not in long mode */ + c->x86_processor = X86_PROCESSOR_ATHLON; + break; + } + } + break; + } + if (c->x86_processor == X86_PROCESSOR_max) { + c->x86_processor = X86_PROCESSOR_I386; + fprintf(stderr, "Error: unknown processor type, assuming i386\n"); + fprintf(stderr, " Family : %d\n", c->x86); + fprintf(stderr, " Model : %d\n", c->x86_model); + fprintf(stderr, " Mask : %d\n", c->x86_mask); + fprintf(stderr, " Vendor : %s [%d]\n", c->x86_vendor_id, c->x86_vendor); + if (c->x86_brand_id) + fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id); + } + + /* Have CMOV support? */ + have_cmov = c->x86_hwcap & (1 << 15); +#if defined(__x86_64__) + if (!have_cmov) { + write_log("x86-64 implementations are bound to have CMOV!\n"); + abort(); + } +#endif + + /* Can the host CPU suffer from partial register stalls? */ + have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL); +#if 1 + /* It appears that partial register writes are a bad idea even on + AMD K7 cores, even though they are not supposed to have the + dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */ + if (c->x86_processor == X86_PROCESSOR_ATHLON) + have_rat_stall = true; +#endif + + /* Alignments */ + if (tune_alignment) { + align_loops = x86_alignments[c->x86_processor].align_loop; + align_jumps = x86_alignments[c->x86_processor].align_jump; + } + + write_log("Max CPUID level=%d Processor is %s [%s]\n", + c->cpuid_level, c->x86_vendor_id, + x86_processor_string_table[c->x86_processor]); + + raw_flags_init(); +} + +static bool target_check_bsf(void) +{ + bool mismatch = false; + for (int g_ZF = 0; g_ZF <= 1; g_ZF++) { + for (int g_CF = 0; g_CF <= 1; g_CF++) { + for (int g_OF = 0; g_OF <= 1; g_OF++) { + for (int g_SF = 0; g_SF <= 1; g_SF++) { + for (int value = -1; value <= 1; value++) { + unsigned long flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF; + unsigned long tmp = value; + __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0" + : "+r" (flags), "+r" (tmp) : : "cc"); + int OF = (flags >> 11) & 1; + int SF = (flags >> 7) & 1; + int ZF = (flags >> 6) & 1; + int CF = flags & 1; + tmp = (value == 0); + if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF) + mismatch = true; + } + }}}} + if (mismatch) + write_log("Target CPU defines all flags on BSF instruction\n"); + return !mismatch; +} + + +/************************************************************************* + * FPU stuff * + *************************************************************************/ + + +static __inline__ void raw_fp_init(void) +{ + int i; + + for (i=0;i1) { + emit_byte(0x9b); + emit_byte(0xdb); + emit_byte(0xe3); + live.tos=-1; + } +#endif + while (live.tos>=1) { + emit_byte(0xde); + emit_byte(0xd9); + live.tos-=2; + } + while (live.tos>=0) { + emit_byte(0xdd); + emit_byte(0xd8); + live.tos--; + } + raw_fp_init(); +} + +static __inline__ void make_tos(int r) +{ + int p,q; + + if (live.spos[r]<0) { /* Register not yet on stack */ + emit_byte(0xd9); + emit_byte(0xe8); /* Push '1' on the stack, just to grow it */ + live.tos++; + live.spos[r]=live.tos; + live.onstack[live.tos]=r; + return; + } + /* Register is on stack */ + if (live.tos==live.spos[r]) + return; + p=live.spos[r]; + q=live.onstack[live.tos]; + + emit_byte(0xd9); + emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */ + live.onstack[live.tos]=r; + live.spos[r]=live.tos; + live.onstack[p]=q; + live.spos[q]=p; +} + +static __inline__ void make_tos2(int r, int r2) +{ + int q; + + make_tos(r2); /* Put the reg that's supposed to end up in position2 + on top */ + + if (live.spos[r]<0) { /* Register not yet on stack */ + make_tos(r); /* This will extend the stack */ + return; + } + /* Register is on stack */ + emit_byte(0xd9); + emit_byte(0xc9); /* Move r2 into position 2 */ + + q=live.onstack[live.tos-1]; + live.onstack[live.tos]=q; + live.spos[q]=live.tos; + live.onstack[live.tos-1]=r2; + live.spos[r2]=live.tos-1; + + make_tos(r); /* And r into 1 */ +} + +static __inline__ int stackpos(int r) +{ + if (live.spos[r]<0) + abort(); + if (live.tos=0) { + /* source is on top of stack, and we already have the dest */ + int dd=stackpos(d); + emit_byte(0xdd); + emit_byte(0xd0+dd); + } + else { + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source on tos */ + tos_make(d); /* store to destination, pop if necessary */ + } +} +LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s)) + +LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base)) +{ + emit_byte(0xd9); + emit_byte(0xa8+index); + emit_long(base); +} +LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base)) + + +LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s)) +{ + int ds; + + if (d!=s) { + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xd9); + emit_byte(0xfa); /* take square root */ + tos_make(d); /* store to destination */ + } + else { + make_tos(d); + emit_byte(0xd9); + emit_byte(0xfa); /* take square root */ + } +} +LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s)) +{ + int ds; + + if (d!=s) { + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xd9); + emit_byte(0xe1); /* take fabs */ + tos_make(d); /* store to destination */ + } + else { + make_tos(d); + emit_byte(0xd9); + emit_byte(0xe1); /* take fabs */ + } +} +LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s)) +{ + int ds; + + if (d!=s) { + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xd9); + emit_byte(0xfc); /* take frndint */ + tos_make(d); /* store to destination */ + } + else { + make_tos(d); + emit_byte(0xd9); + emit_byte(0xfc); /* take frndint */ + } +} +LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s)) +{ + int ds; + + if (d!=s) { + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xd9); + emit_byte(0xff); /* take cos */ + tos_make(d); /* store to destination */ + } + else { + make_tos(d); + emit_byte(0xd9); + emit_byte(0xff); /* take cos */ + } +} +LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s)) +{ + int ds; + + if (d!=s) { + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xd9); + emit_byte(0xfe); /* take sin */ + tos_make(d); /* store to destination */ + } + else { + make_tos(d); + emit_byte(0xd9); + emit_byte(0xfe); /* take sin */ + } +} +LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s)) + +static const double one=1; +LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s)) +{ + int ds; + + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source */ + + emit_byte(0xd9); + emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */ + emit_byte(0xd9); + emit_byte(0xfc); /* rndint */ + emit_byte(0xd9); + emit_byte(0xc9); /* swap top two elements */ + emit_byte(0xd8); + emit_byte(0xe1); /* subtract rounded from original */ + emit_byte(0xd9); + emit_byte(0xf0); /* f2xm1 */ + x86_fadd_m((uintptr)&one); /* Add '1' without using extra stack space */ + emit_byte(0xd9); + emit_byte(0xfd); /* and scale it */ + emit_byte(0xdd); + emit_byte(0xd9); /* take he rounded value off */ + tos_make(d); /* store to destination */ +} +LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s)) +{ + int ds; + + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xd9); + emit_byte(0xea); /* fldl2e */ + emit_byte(0xde); + emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */ + + emit_byte(0xd9); + emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */ + emit_byte(0xd9); + emit_byte(0xfc); /* rndint */ + emit_byte(0xd9); + emit_byte(0xc9); /* swap top two elements */ + emit_byte(0xd8); + emit_byte(0xe1); /* subtract rounded from original */ + emit_byte(0xd9); + emit_byte(0xf0); /* f2xm1 */ + x86_fadd_m((uintptr)&one); /* Add '1' without using extra stack space */ + emit_byte(0xd9); + emit_byte(0xfd); /* and scale it */ + emit_byte(0xdd); + emit_byte(0xd9); /* take he rounded value off */ + tos_make(d); /* store to destination */ +} +LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s)) +{ + int ds; + + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xd9); + emit_byte(0xe8); /* push '1' */ + emit_byte(0xd9); + emit_byte(0xc9); /* swap top two */ + emit_byte(0xd9); + emit_byte(0xf1); /* take 1*log2(x) */ + tos_make(d); /* store to destination */ +} +LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s)) + + +LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s)) +{ + int ds; + + if (d!=s) { + usereg(s); + ds=stackpos(s); + emit_byte(0xd9); + emit_byte(0xc0+ds); /* duplicate source */ + emit_byte(0xd9); + emit_byte(0xe0); /* take fchs */ + tos_make(d); /* store to destination */ + } + else { + make_tos(d); + emit_byte(0xd9); + emit_byte(0xe0); /* take fchs */ + } +} +LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s)) +{ + int ds; + + usereg(s); + usereg(d); + + if (live.spos[s]==live.tos) { + /* Source is on top of stack */ + ds=stackpos(d); + emit_byte(0xdc); + emit_byte(0xc0+ds); /* add source to dest*/ + } + else { + make_tos(d); + ds=stackpos(s); + + emit_byte(0xd8); + emit_byte(0xc0+ds); /* add source to dest*/ + } +} +LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s)) +{ + int ds; + + usereg(s); + usereg(d); + + if (live.spos[s]==live.tos) { + /* Source is on top of stack */ + ds=stackpos(d); + emit_byte(0xdc); + emit_byte(0xe8+ds); /* sub source from dest*/ + } + else { + make_tos(d); + ds=stackpos(s); + + emit_byte(0xd8); + emit_byte(0xe0+ds); /* sub src from dest */ + } +} +LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s)) +{ + int ds; + + usereg(s); + usereg(d); + + make_tos(d); + ds=stackpos(s); + + emit_byte(0xdd); + emit_byte(0xe0+ds); /* cmp dest with source*/ +} +LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s)) +{ + int ds; + + usereg(s); + usereg(d); + + if (live.spos[s]==live.tos) { + /* Source is on top of stack */ + ds=stackpos(d); + emit_byte(0xdc); + emit_byte(0xc8+ds); /* mul dest by source*/ + } + else { + make_tos(d); + ds=stackpos(s); + + emit_byte(0xd8); + emit_byte(0xc8+ds); /* mul dest by source*/ + } +} +LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s)) +{ + int ds; + + usereg(s); + usereg(d); + + if (live.spos[s]==live.tos) { + /* Source is on top of stack */ + ds=stackpos(d); + emit_byte(0xdc); + emit_byte(0xf8+ds); /* div dest by source */ + } + else { + make_tos(d); + ds=stackpos(s); + + emit_byte(0xd8); + emit_byte(0xf0+ds); /* div dest by source*/ + } +} +LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s)) +{ + int ds; + + usereg(s); + usereg(d); + + make_tos2(d,s); + ds=stackpos(s); + + if (ds!=1) { + printf("Failed horribly in raw_frem_rr! ds is %d\n",ds); + abort(); + } + emit_byte(0xd9); + emit_byte(0xf8); /* take rem from dest by source */ +} +LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s)) + +LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s)) +{ + int ds; + + usereg(s); + usereg(d); + + make_tos2(d,s); + ds=stackpos(s); + + if (ds!=1) { + printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds); + abort(); + } + emit_byte(0xd9); + emit_byte(0xf5); /* take rem1 from dest by source */ +} +LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s)) + + +LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r)) +{ + make_tos(r); + emit_byte(0xd9); /* ftst */ + emit_byte(0xe4); +} +LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r)) + +/* %eax register is clobbered if target processor doesn't support fucomi */ +#define FFLAG_NREG_CLOBBER_CONDITION !have_cmov +#define FFLAG_NREG EAX_INDEX + +static __inline__ void raw_fflags_into_flags(int r) +{ + int p; + + usereg(r); + p=stackpos(r); + + emit_byte(0xd9); + emit_byte(0xee); /* Push 0 */ + emit_byte(0xd9); + emit_byte(0xc9+p); /* swap top two around */ + if (have_cmov) { + // gb-- fucomi is for P6 cores only, not K6-2 then... + emit_byte(0xdb); + emit_byte(0xe9+p); /* fucomi them */ + } + else { + emit_byte(0xdd); + emit_byte(0xe1+p); /* fucom them */ + emit_byte(0x9b); + emit_byte(0xdf); + emit_byte(0xe0); /* fstsw ax */ + raw_sahf(0); /* sahf */ + } + emit_byte(0xdd); + emit_byte(0xd9+p); /* store value back, and get rid of 0 */ +} diff --git a/jit2/codegen_x86.h b/jit2/codegen_x86.h new file mode 100644 index 00000000..febee58b --- /dev/null +++ b/jit2/codegen_x86.h @@ -0,0 +1,2565 @@ +/******************** -*- mode: C; tab-width: 8 -*- ******************** + * + * Run-time assembler for IA-32 and AMD64 + * + ***********************************************************************/ + + +/*********************************************************************** + * + * This file is derived from CCG. + * + * Copyright 1999, 2000, 2001, 2002, 2003 Ian Piumarta + * + * Adaptations and enhancements for AMD64 support, Copyright 2003-2008 + * Gwenole Beauchesne + * + * Basilisk II (C) 1997-2008 Christian Bauer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + ***********************************************************************/ + +#ifndef X86_RTASM_H +#define X86_RTASM_H + +/* NOTES + * + * o Best viewed on a 1024x768 screen with fixed-6x10 font ;-) + * + * TODO + * + * o Fix FIXMEs + * o SSE instructions + * o Optimize for cases where register numbers are not integral constants + */ + +/* --- Configuration ------------------------------------------------------- */ + +/* Define to settle a "flat" register set, i.e. different regno for + each size variant. */ +#ifndef X86_FLAT_REGISTERS +#define X86_FLAT_REGISTERS 1 +#endif + +/* Define to generate x86-64 code. */ +#ifndef X86_TARGET_64BIT +#define X86_TARGET_64BIT 0 +#endif + +/* Define to optimize ALU instructions. */ +#ifndef X86_OPTIMIZE_ALU +#define X86_OPTIMIZE_ALU 1 +#endif + +/* Define to optimize rotate/shift instructions. */ +#ifndef X86_OPTIMIZE_ROTSHI +#define X86_OPTIMIZE_ROTSHI 1 +#endif + +/* Define to optimize absolute addresses for RIP relative addressing. */ +#ifndef X86_RIP_RELATIVE_ADDR +#define X86_RIP_RELATIVE_ADDR 1 +#endif + + +/* --- Macros -------------------------------------------------------------- */ + +/* Functions used to emit code. + * + * x86_emit_byte(B) + * x86_emit_word(W) + * x86_emit_long(L) + */ + +/* Get pointer to current code + * + * x86_get_target() + */ + +/* Abort assembler, fatal failure. + * + * x86_emit_failure(MSG) + */ + +#define x86_emit_failure0(MSG) (x86_emit_failure(MSG),0) + + +/* --- Register set -------------------------------------------------------- */ + +enum { + X86_RIP = -2, +#if X86_FLAT_REGISTERS + X86_NOREG = 0, + X86_Reg8L_Base = 0x10, + X86_Reg8H_Base = 0x20, + X86_Reg16_Base = 0x30, + X86_Reg32_Base = 0x40, + X86_Reg64_Base = 0x50, + X86_RegMMX_Base = 0x60, + X86_RegXMM_Base = 0x70, + X86_RegFPU_Base = 0x80 +#else + X86_NOREG = -1, + X86_Reg8L_Base = 0, + X86_Reg8H_Base = 16, + X86_Reg16_Base = 0, + X86_Reg32_Base = 0, + X86_Reg64_Base = 0, + X86_RegMMX_Base = 0, + X86_RegXMM_Base = 0, + X86_RegFPU_Base = 0 +#endif +}; + +enum { + X86_AL = X86_Reg8L_Base, + X86_CL, X86_DL, X86_BL, + X86_SPL, X86_BPL, X86_SIL, X86_DIL, + X86_R8B, X86_R9B, X86_R10B, X86_R11B, + X86_R12B, X86_R13B, X86_R14B, X86_R15B, + X86_AH = X86_Reg8H_Base + 4, + X86_CH, X86_DH, X86_BH +}; + +enum { + X86_AX = X86_Reg16_Base, + X86_CX, X86_DX, X86_BX, + X86_SP, X86_BP, X86_SI, X86_DI, + X86_R8W, X86_R9W, X86_R10W, X86_R11W, + X86_R12W, X86_R13W, X86_R14W, X86_R15W +}; + +enum { + X86_EAX = X86_Reg32_Base, + X86_ECX, X86_EDX, X86_EBX, + X86_ESP, X86_EBP, X86_ESI, X86_EDI, + X86_R8D, X86_R9D, X86_R10D, X86_R11D, + X86_R12D, X86_R13D, X86_R14D, X86_R15D +}; + +enum { + X86_RAX = X86_Reg64_Base, + X86_RCX, X86_RDX, X86_RBX, + X86_RSP, X86_RBP, X86_RSI, X86_RDI, + X86_R8, X86_R9, X86_R10, X86_R11, + X86_R12, X86_R13, X86_R14, X86_R15 +}; + +enum { + X86_MM0 = X86_RegMMX_Base, + X86_MM1, X86_MM2, X86_MM3, + X86_MM4, X86_MM5, X86_MM6, X86_MM7, +}; + +enum { + X86_XMM0 = X86_RegXMM_Base, + X86_XMM1, X86_XMM2, X86_XMM3, + X86_XMM4, X86_XMM5, X86_XMM6, X86_XMM7, + X86_XMM8, X86_XMM9, X86_XMM10, X86_XMM11, + X86_XMM12, X86_XMM13, X86_XMM14, X86_XMM15 +}; + +enum { + X86_ST0 = X86_RegFPU_Base, + X86_ST1, X86_ST2, X86_ST3, + X86_ST4, X86_ST5, X86_ST6, X86_ST7 +}; + +/* Register control and access + * + * _r0P(R) Null register? + * _rIP(R) RIP register? + * _rXP(R) Extended register? + * + * _rC(R) Class of register (only valid if X86_FLAT_REGISTERS) + * _rR(R) Full register number + * _rN(R) Short register number for encoding + * + * _r1(R) 8-bit register ID + * _r2(R) 16-bit register ID + * _r4(R) 32-bit register ID + * _r8(R) 64-bit register ID + * _rM(R) MMX register ID + * _rX(R) XMM register ID + * _rF(R) FPU register ID + * _rA(R) Address register ID used for EA calculation + */ + +#define _rST0P(R) ((int)(R) == (int)X86_ST0) +#define _r0P(R) ((int)(R) == (int)X86_NOREG) +#define _rIP(R) (X86_TARGET_64BIT ? ((int)(R) == (int)X86_RIP) : 0) + +#if X86_FLAT_REGISTERS +#define _rC(R) ((R) & 0xf0) +#define _rR(R) ((R) & 0x0f) +#define _rN(R) ((R) & 0x07) +#define _rXP(R) ((R) > 0 && _rR(R) > 7) +#else +#define _rN(R) ((R) & 0x07) +#define _rR(R) (int(R)) +#define _rXP(R) (_rR(R) > 7 && _rR(R) < 16) +#endif + +#if !defined(_ASM_SAFETY) || ! X86_FLAT_REGISTERS +#define _r1(R) _rN(R) +#define _r2(R) _rN(R) +#define _r4(R) _rN(R) +#define _r8(R) _rN(R) +#define _rA(R) _rN(R) +#define _rM(R) _rN(R) +#define _rX(R) _rN(R) +#define _rF(R) _rN(R) +#else +#define _r1(R) ( ((_rC(R) & (X86_Reg8L_Base | X86_Reg8H_Base)) != 0) ? _rN(R) : x86_emit_failure0( "8-bit register required")) +#define _r2(R) ( (_rC(R) == X86_Reg16_Base) ? _rN(R) : x86_emit_failure0("16-bit register required")) +#define _r4(R) ( (_rC(R) == X86_Reg32_Base) ? _rN(R) : x86_emit_failure0("32-bit register required")) +#define _r8(R) ( (_rC(R) == X86_Reg64_Base) ? _rN(R) : x86_emit_failure0("64-bit register required")) +#define _rA(R) ( X86_TARGET_64BIT ? \ + ( (_rC(R) == X86_Reg64_Base) ? _rN(R) : x86_emit_failure0("not a valid 64-bit base/index expression")) : \ + ( (_rC(R) == X86_Reg32_Base) ? _rN(R) : x86_emit_failure0("not a valid 32-bit base/index expression")) ) +#define _rM(R) ( (_rC(R) == X86_RegMMX_Base) ? _rN(R) : x86_emit_failure0("MMX register required")) +#define _rX(R) ( (_rC(R) == X86_RegXMM_Base) ? _rN(R) : x86_emit_failure0("SSE register required")) +#define _rF(R) ( (_rC(R) == X86_RegFPU_Base) ? _rN(R) : x86_emit_failure0("FPU register required")) +#endif + +#define _rSP() (X86_TARGET_64BIT ? (int)X86_RSP : (int)X86_ESP) +#define _r1e8lP(R) (int(R) >= X86_SPL && int(R) <= X86_DIL) +#define _rbpP(R) (_rR(R) == _rR(X86_RBP)) +#define _rspP(R) (_rR(R) == _rR(X86_RSP)) +#define _rbp13P(R) (_rN(R) == _rN(X86_RBP)) +#define _rsp12P(R) (_rN(R) == _rN(X86_RSP)) + + +/* ========================================================================= */ +/* --- UTILITY ------------------------------------------------------------- */ +/* ========================================================================= */ + +typedef signed char _sc; +typedef unsigned char _uc; +typedef signed short _ss; +typedef unsigned short _us; +typedef signed int _sl; +typedef unsigned int _ul; + +#define _UC(X) ((_uc )(unsigned long)(X)) +#define _US(X) ((_us )(unsigned long)(X)) +#define _SL(X) ((_sl )(unsigned long)(X)) +#define _UL(X) ((_ul )(unsigned long)(X)) + +#define _PUC(X) ((_uc *)(X)) +#define _PUS(X) ((_us *)(X)) +#define _PSL(X) ((_sl *)(X)) +#define _PUL(X) ((_ul *)(X)) + +#define _B(B) x86_emit_byte((B)) +#define _W(W) x86_emit_word((W)) +#define _L(L) x86_emit_long((L)) +#define _Q(Q) x86_emit_quad((Q)) + +#define _MASK(N) ((unsigned)((1<<(N)))-1) +#define _siP(N,I) (!((((unsigned)(I))^(((unsigned)(I))<<1))&~_MASK(N))) +#define _uiP(N,I) (!(((unsigned)(I))&~_MASK(N))) +#define _suiP(N,I) (_siP(N,I) | _uiP(N,I)) + +#ifndef _ASM_SAFETY +#define _ck_s(W,I) (_UL(I) & _MASK(W)) +#define _ck_u(W,I) (_UL(I) & _MASK(W)) +#define _ck_su(W,I) (_UL(I) & _MASK(W)) +#define _ck_d(W,I) (_UL(I) & _MASK(W)) +#else +#define _ck_s(W,I) (_siP(W,I) ? (_UL(I) & _MASK(W)) : x86_emit_failure0( "signed integer `"#I"' too large for "#W"-bit field")) +#define _ck_u(W,I) (_uiP(W,I) ? (_UL(I) & _MASK(W)) : x86_emit_failure0("unsigned integer `"#I"' too large for "#W"-bit field")) +#define _ck_su(W,I) (_suiP(W,I) ? (_UL(I) & _MASK(W)) : x86_emit_failure0( "integer `"#I"' too large for "#W"-bit field")) +#define _ck_d(W,I) (_siP(W,I) ? (_UL(I) & _MASK(W)) : x86_emit_failure0( "displacement `"#I"' too large for "#W"-bit field")) +#endif + +#define _s0P(I) ((I)==0) +#define _s8P(I) _siP(8,I) +#define _s16P(I) _siP(16,I) +#define _u8P(I) _uiP(8,I) +#define _u16P(I) _uiP(16,I) + +#define _su8(I) _ck_su(8,I) +#define _su16(I) _ck_su(16,I) + +#define _s1(I) _ck_s( 1,I) +#define _s2(I) _ck_s( 2,I) +#define _s3(I) _ck_s( 3,I) +#define _s4(I) _ck_s( 4,I) +#define _s5(I) _ck_s( 5,I) +#define _s6(I) _ck_s( 6,I) +#define _s7(I) _ck_s( 7,I) +#define _s8(I) _ck_s( 8,I) +#define _s9(I) _ck_s( 9,I) +#define _s10(I) _ck_s(10,I) +#define _s11(I) _ck_s(11,I) +#define _s12(I) _ck_s(12,I) +#define _s13(I) _ck_s(13,I) +#define _s14(I) _ck_s(14,I) +#define _s15(I) _ck_s(15,I) +#define _s16(I) _ck_s(16,I) +#define _s17(I) _ck_s(17,I) +#define _s18(I) _ck_s(18,I) +#define _s19(I) _ck_s(19,I) +#define _s20(I) _ck_s(20,I) +#define _s21(I) _ck_s(21,I) +#define _s22(I) _ck_s(22,I) +#define _s23(I) _ck_s(23,I) +#define _s24(I) _ck_s(24,I) +#define _s25(I) _ck_s(25,I) +#define _s26(I) _ck_s(26,I) +#define _s27(I) _ck_s(27,I) +#define _s28(I) _ck_s(28,I) +#define _s29(I) _ck_s(29,I) +#define _s30(I) _ck_s(30,I) +#define _s31(I) _ck_s(31,I) +#define _u1(I) _ck_u( 1,I) +#define _u2(I) _ck_u( 2,I) +#define _u3(I) _ck_u( 3,I) +#define _u4(I) _ck_u( 4,I) +#define _u5(I) _ck_u( 5,I) +#define _u6(I) _ck_u( 6,I) +#define _u7(I) _ck_u( 7,I) +#define _u8(I) _ck_u( 8,I) +#define _u9(I) _ck_u( 9,I) +#define _u10(I) _ck_u(10,I) +#define _u11(I) _ck_u(11,I) +#define _u12(I) _ck_u(12,I) +#define _u13(I) _ck_u(13,I) +#define _u14(I) _ck_u(14,I) +#define _u15(I) _ck_u(15,I) +#define _u16(I) _ck_u(16,I) +#define _u17(I) _ck_u(17,I) +#define _u18(I) _ck_u(18,I) +#define _u19(I) _ck_u(19,I) +#define _u20(I) _ck_u(20,I) +#define _u21(I) _ck_u(21,I) +#define _u22(I) _ck_u(22,I) +#define _u23(I) _ck_u(23,I) +#define _u24(I) _ck_u(24,I) +#define _u25(I) _ck_u(25,I) +#define _u26(I) _ck_u(26,I) +#define _u27(I) _ck_u(27,I) +#define _u28(I) _ck_u(28,I) +#define _u29(I) _ck_u(29,I) +#define _u30(I) _ck_u(30,I) +#define _u31(I) _ck_u(31,I) + +/* ========================================================================= */ +/* --- ASSEMBLER ----------------------------------------------------------- */ +/* ========================================================================= */ + +#define _b00 0 +#define _b01 1 +#define _b10 2 +#define _b11 3 + +#define _b000 0 +#define _b001 1 +#define _b010 2 +#define _b011 3 +#define _b100 4 +#define _b101 5 +#define _b110 6 +#define _b111 7 + +#define _OFF4(D) (_UL(D) - _UL(x86_get_target())) +#define _CKD8(D) _ck_d(8, ((_uc) _OFF4(D)) ) + +#define _D8(D) (_B(0), ((*(_PUC(x86_get_target())-1))= _CKD8(D))) +#define _D32(D) (_L(0), ((*(_PUL(x86_get_target())-1))= _OFF4(D))) + +#ifndef _ASM_SAFETY +# define _M(M) (M) +# define _r(R) (R) +# define _m(M) (M) +# define _s(S) (S) +# define _i(I) (I) +# define _b(B) (B) +#else +# define _M(M) (((M)>3) ? x86_emit_failure0("internal error: mod = " #M) : (M)) +# define _r(R) (((R)>7) ? x86_emit_failure0("internal error: reg = " #R) : (R)) +# define _m(M) (((M)>7) ? x86_emit_failure0("internal error: r/m = " #M) : (M)) +# define _s(S) (((S)>3) ? x86_emit_failure0("internal error: memory scale = " #S) : (S)) +# define _i(I) (((I)>7) ? x86_emit_failure0("internal error: memory index = " #I) : (I)) +# define _b(B) (((B)>7) ? x86_emit_failure0("internal error: memory base = " #B) : (B)) +#endif + +#define _Mrm(Md,R,M) _B((_M(Md)<<6)|(_r(R)<<3)|_m(M)) +#define _SIB(Sc,I, B) _B((_s(Sc)<<6)|(_i(I)<<3)|_b(B)) + +#define _SCL(S) ((((S)==1) ? _b00 : \ + (((S)==2) ? _b01 : \ + (((S)==4) ? _b10 : \ + (((S)==8) ? _b11 : x86_emit_failure0("illegal scale: " #S)))))) + + +/* --- Memory subformats - urgh! ------------------------------------------- */ + +/* _r_D() is RIP addressing mode if X86_TARGET_64BIT, use _r_DSIB() instead */ +#define _r_D( R, D ) (_Mrm(_b00,_rN(R),_b101 ) ,_L((_sl)(D))) +#define _r_DSIB(R, D ) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(1),_b100 ,_b101 ),_L((_sl)(D))) +#define _r_0B( R, B ) (_Mrm(_b00,_rN(R),_rA(B)) ) +#define _r_0BIS(R, B,I,S) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_rA(B)) ) +#define _r_1B( R, D,B ) (_Mrm(_b01,_rN(R),_rA(B)) ,_B((_sc)(D))) +#define _r_1BIS(R, D,B,I,S) (_Mrm(_b01,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_rA(B)),_B((_sc)(D))) +#define _r_4B( R, D,B ) (_Mrm(_b10,_rN(R),_rA(B)) ,_L((_sl)(D))) +#define _r_4IS( R, D,I,S) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_b101 ),_L((_sl)(D))) +#define _r_4BIS(R, D,B,I,S) (_Mrm(_b10,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_rA(B)),_L((_sl)(D))) + +#define _r_DB( R, D,B ) ((_s0P(D) && (!_rbp13P(B)) ? _r_0B (R, B ) : (_s8P(D) ? _r_1B( R,D,B ) : _r_4B( R,D,B )))) +#define _r_DBIS(R, D,B,I,S) ((_s0P(D) && (!_rbp13P(B)) ? _r_0BIS(R, B,I,S) : (_s8P(D) ? _r_1BIS(R,D,B,I,S) : _r_4BIS(R,D,B,I,S)))) + +/* Use RIP-addressing in 64-bit mode, if possible */ +#define _x86_RIP_addressing_possible(D,O) (X86_RIP_RELATIVE_ADDR && \ + ((uintptr)x86_get_target() + 4 + (O) - (D) <= 0xffffffff)) + +#define _r_X( R, D,B,I,S,O) (_r0P(I) ? (_r0P(B) ? (!X86_TARGET_64BIT ? _r_D(R,D) : \ + (_x86_RIP_addressing_possible(D, O) ? \ + _r_D(R, (D) - ((uintptr)x86_get_target() + 4 + (O))) : \ + _r_DSIB(R,D))) : \ + (_rIP(B) ? _r_D (R,D ) : \ + (_rsp12P(B) ? _r_DBIS(R,D,_rSP(),_rSP(),1) : \ + _r_DB (R,D, B )))) : \ + (_r0P(B) ? _r_4IS (R,D, I,S) : \ + (!_rspP(I) ? _r_DBIS(R,D, B, I,S) : \ + x86_emit_failure("illegal index register: %esp")))) + + +/* --- Instruction formats ------------------------------------------------- */ + +#define _m32only(X) (! X86_TARGET_64BIT ? X : x86_emit_failure("invalid instruction in 64-bit mode")) +#define _m64only(X) ( X86_TARGET_64BIT ? X : x86_emit_failure("invalid instruction in 32-bit mode")) +#define _m64(X) ( X86_TARGET_64BIT ? X : ((void)0) ) + +/* _format Opcd ModR/M dN(rB,rI,Sc) imm... */ + +#define _d16() ( _B(0x66 ) ) +#define _O( OP ) ( _B( OP ) ) +#define _Or( OP,R ) ( _B( (OP)|_r(R)) ) +#define _OO( OP ) ( _B((OP)>>8), _B(( (OP) )&0xff) ) +#define _OOr( OP,R ) ( _B((OP)>>8), _B(( (OP)|_r(R))&0xff) ) +#define _Os( OP,B ) ( _s8P(B) ? _B(((OP)|_b10)) : _B(OP) ) +#define _sW( W ) ( _s8P(W) ? _B(W):_W(W) ) +#define _sL( L ) ( _s8P(L) ? _B(L):_L(L) ) +#define _sWO( W ) ( _s8P(W) ? 1 : 2 ) +#define _sLO( L ) ( _s8P(L) ? 1 : 4 ) +#define _O_B( OP ,B ) ( _O ( OP ) ,_B(B) ) +#define _O_W( OP ,W ) ( _O ( OP ) ,_W(W) ) +#define _O_L( OP ,L ) ( _O ( OP ) ,_L(L) ) +#define _OO_L( OP ,L ) ( _OO ( OP ) ,_L(L) ) +#define _O_D8( OP ,D ) ( _O ( OP ) ,_D8(D) ) +#define _O_D32( OP ,D ) ( _O ( OP ) ,_D32(D) ) +#define _OO_D32( OP ,D ) ( _OO ( OP ) ,_D32(D) ) +#define _Os_sW( OP ,W ) ( _Os ( OP,W) ,_sW(W) ) +#define _Os_sL( OP ,L ) ( _Os ( OP,L) ,_sL(L) ) +#define _O_W_B( OP ,W,B) ( _O ( OP ) ,_W(W),_B(B)) +#define _Or_B( OP,R ,B ) ( _Or ( OP,R) ,_B(B) ) +#define _Or_W( OP,R ,W ) ( _Or ( OP,R) ,_W(W) ) +#define _Or_L( OP,R ,L ) ( _Or ( OP,R) ,_L(L) ) +#define _Or_Q( OP,R ,Q ) ( _Or ( OP,R) ,_Q(Q) ) +#define _O_Mrm( OP ,MO,R,M ) ( _O ( OP ),_Mrm(MO,R,M ) ) +#define _OO_Mrm( OP ,MO,R,M ) ( _OO ( OP ),_Mrm(MO,R,M ) ) +#define _O_Mrm_B( OP ,MO,R,M ,B ) ( _O ( OP ),_Mrm(MO,R,M ) ,_B(B) ) +#define _O_Mrm_W( OP ,MO,R,M ,W ) ( _O ( OP ),_Mrm(MO,R,M ) ,_W(W) ) +#define _O_Mrm_L( OP ,MO,R,M ,L ) ( _O ( OP ),_Mrm(MO,R,M ) ,_L(L) ) +#define _OO_Mrm_B( OP ,MO,R,M ,B ) ( _OO ( OP ),_Mrm(MO,R,M ) ,_B(B) ) +#define _Os_Mrm_sW(OP ,MO,R,M ,W ) ( _Os ( OP,W),_Mrm(MO,R,M ),_sW(W) ) +#define _Os_Mrm_sL(OP ,MO,R,M ,L ) ( _Os ( OP,L),_Mrm(MO,R,M ),_sL(L) ) +#define _O_r_X( OP ,R ,MD,MB,MI,MS ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS,0) ) +#define _OO_r_X( OP ,R ,MD,MB,MI,MS ) ( _OO ( OP ),_r_X( R ,MD,MB,MI,MS,0) ) +#define _O_r_X_B( OP ,R ,MD,MB,MI,MS,B ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS,1) ,_B(B) ) +#define _O_r_X_W( OP ,R ,MD,MB,MI,MS,W ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS,2) ,_W(W) ) +#define _O_r_X_L( OP ,R ,MD,MB,MI,MS,L ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS,4) ,_L(L) ) +#define _OO_r_X_B( OP ,R ,MD,MB,MI,MS,B ) ( _OO ( OP ),_r_X( R ,MD,MB,MI,MS,1) ,_B(B) ) +#define _Os_r_X_sW(OP ,R ,MD,MB,MI,MS,W ) ( _Os ( OP,W),_r_X( R ,MD,MB,MI,MS,_sWO(W)),_sW(W)) +#define _Os_r_X_sL(OP ,R ,MD,MB,MI,MS,L ) ( _Os ( OP,L),_r_X( R ,MD,MB,MI,MS,_sLO(L)),_sL(L)) +#define _O_X_B( OP ,MD,MB,MI,MS,B ) ( _O_r_X_B( OP ,0 ,MD,MB,MI,MS ,B) ) +#define _O_X_W( OP ,MD,MB,MI,MS,W ) ( _O_r_X_W( OP ,0 ,MD,MB,MI,MS ,W) ) +#define _O_X_L( OP ,MD,MB,MI,MS,L ) ( _O_r_X_L( OP ,0 ,MD,MB,MI,MS ,L) ) + + +/* --- REX prefixes -------------------------------------------------------- */ + +#define _VOID() ((void)0) +#define _BIT(X) (!!(X)) +#define _d64(W,R,X,B) (_B(0x40|(W)<<3|(R)<<2|(X)<<1|(B))) + +#define __REXwrxb(L,W,R,X,B) ((W|R|X|B) || (L) ? _d64(W,R,X,B) : _VOID()) +#define __REXwrx_(L,W,R,X,MR) (__REXwrxb(L,W,R,X,_BIT(_rIP(MR)?0:_rXP(MR)))) +#define __REXw_x_(L,W,R,X,MR) (__REXwrx_(L,W,_BIT(_rXP(R)),X,MR)) +#define __REX_reg(RR) (__REXwrxb(0,0,0,00,_BIT(_rXP(RR)))) +#define __REX_mem(MB,MI) (__REXwrxb(0,0,0,_BIT(_rXP(MI)),_BIT(_rXP(MB)))) + +// FIXME: can't mix new (SPL,BPL,SIL,DIL) with (AH,BH,CH,DH) +#define _REXBrr(RR,MR) _m64(__REXw_x_(_r1e8lP(RR)||_r1e8lP(MR),0,RR,0,MR)) +#define _REXBmr(MB,MI,RD) _m64(__REXw_x_(_r1e8lP(RD)||_r1e8lP(MB),0,RD,_BIT(_rXP(MI)),MB)) +#define _REXBrm(RS,MB,MI) _REXBmr(MB,MI,RS) + +#define _REXBLrr(RR,MR) _m64(__REXw_x_(_r1e8lP(MR),0,RR,0,MR)) +#define _REXLrr(RR,MR) _m64(__REXw_x_(0,0,RR,0,MR)) +#define _REXLmr(MB,MI,RD) _m64(__REXw_x_(0,0,RD,_BIT(_rXP(MI)),MB)) +#define _REXLrm(RS,MB,MI) _REXLmr(MB,MI,RS) +#define _REXLr(RR) _m64(__REX_reg(RR)) +#define _REXLm(MB,MI) _m64(__REX_mem(MB,MI)) + +#define _REXQrr(RR,MR) _m64only(__REXw_x_(0,1,RR,0,MR)) +#define _REXQmr(MB,MI,RD) _m64only(__REXw_x_(0,1,RD,_BIT(_rXP(MI)),MB)) +#define _REXQrm(RS,MB,MI) _REXQmr(MB,MI,RS) +#define _REXQr(RR) _m64only(__REX_reg(RR)) +#define _REXQm(MB,MI) _m64only(__REX_mem(MB,MI)) + + +/* ========================================================================= */ +/* --- Fully-qualified intrinsic instructions ------------------------------ */ +/* ========================================================================= */ + +/* OPCODE + i = immediate operand + * + r = register operand + * + m = memory operand (disp,base,index,scale) + * + sr/sm = a star preceding a register or memory + * + 0 = top of stack register (for FPU instructions) + * + * NOTE in x86-64 mode: a memory operand with only a valid + * displacement value will lead to the expect absolute mode. If + * RIP addressing is necessary, X86_RIP shall be used as the base + * register argument. + */ + +/* --- ALU instructions ---------------------------------------------------- */ + +enum { + X86_ADD = 0, + X86_OR = 1, + X86_ADC = 2, + X86_SBB = 3, + X86_AND = 4, + X86_SUB = 5, + X86_XOR = 6, + X86_CMP = 7, +}; + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define _ALUBrr(OP,RS, RD) (_REXBrr(RS, RD), _O_Mrm (((OP) << 3) ,_b11,_r1(RS),_r1(RD) )) +#define _ALUBmr(OP, MD, MB, MI, MS, RD) (_REXBmr(MB, MI, RD), _O_r_X (((OP) << 3) + 2 ,_r1(RD) ,MD,MB,MI,MS )) +#define _ALUBrm(OP, RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (((OP) << 3) ,_r1(RS) ,MD,MB,MI,MS )) +#define _ALUBir(OP, IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_AL) ? \ + (_REXBrr(0, RD), _O_B (((OP) << 3) + 4 ,_su8(IM))) : \ + (_REXBrr(0, RD), _O_Mrm_B (0x80 ,_b11,OP ,_r1(RD) ,_su8(IM))) ) +#define _ALUBim(OP, IM, MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X_B (0x80 ,OP ,MD,MB,MI,MS ,_su8(IM))) + +#define _ALUWrr(OP, RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (((OP) << 3) + 1,_b11,_r2(RS),_r2(RD) )) +#define _ALUWmr(OP, MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _O_r_X (((OP) << 3) + 3 ,_r2(RD) ,MD,MB,MI,MS )) +#define _ALUWrm(OP, RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (((OP) << 3) + 1 ,_r2(RS) ,MD,MB,MI,MS )) +#define _ALUWir(OP, IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_AX) ? \ + (_d16(), _REXLrr(0, RD), _O_W (((OP) << 3) + 5 ,_su16(IM))) : \ + (_d16(), _REXLrr(0, RD), _Os_Mrm_sW (0x81 ,_b11,OP ,_r2(RD) ,_su16(IM))) ) +#define _ALUWim(OP, IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _Os_r_X_sW (0x81 ,OP ,MD,MB,MI,MS ,_su16(IM))) + +#define _ALULrr(OP, RS, RD) (_REXLrr(RS, RD), _O_Mrm (((OP) << 3) + 1,_b11,_r4(RS),_r4(RD) )) +#define _ALULmr(OP, MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _O_r_X (((OP) << 3) + 3 ,_r4(RD) ,MD,MB,MI,MS )) +#define _ALULrm(OP, RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _O_r_X (((OP) << 3) + 1 ,_r4(RS) ,MD,MB,MI,MS )) +#define _ALULir(OP, IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_EAX) ? \ + (_REXLrr(0, RD), _O_L (((OP) << 3) + 5 ,IM )) : \ + (_REXLrr(0, RD), _Os_Mrm_sL (0x81 ,_b11,OP ,_r4(RD) ,IM )) ) +#define _ALULim(OP, IM, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _Os_r_X_sL (0x81 ,OP ,MD,MB,MI,MS ,IM )) + +#define _ALUQrr(OP, RS, RD) (_REXQrr(RS, RD), _O_Mrm (((OP) << 3) + 1,_b11,_r8(RS),_r8(RD) )) +#define _ALUQmr(OP, MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _O_r_X (((OP) << 3) + 3 ,_r8(RD) ,MD,MB,MI,MS )) +#define _ALUQrm(OP, RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _O_r_X (((OP) << 3) + 1 ,_r8(RS) ,MD,MB,MI,MS )) +#define _ALUQir(OP, IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_RAX) ? \ + (_REXQrr(0, RD), _O_L (((OP) << 3) + 5 ,IM )) : \ + (_REXQrr(0, RD), _Os_Mrm_sL (0x81 ,_b11,OP ,_r8(RD) ,IM )) ) +#define _ALUQim(OP, IM, MD, MB, MI, MS) (_REXQrm(0, MB, MI), _Os_r_X_sL (0x81 ,OP ,MD,MB,MI,MS ,IM )) + +#define ADCBrr(RS, RD) _ALUBrr(X86_ADC, RS, RD) +#define ADCBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_ADC, MD, MB, MI, MS, RD) +#define ADCBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_ADC, RS, MD, MB, MI, MS) +#define ADCBir(IM, RD) _ALUBir(X86_ADC, IM, RD) +#define ADCBim(IM, MD, MB, MI, MS) _ALUBim(X86_ADC, IM, MD, MB, MI, MS) + +#define ADCWrr(RS, RD) _ALUWrr(X86_ADC, RS, RD) +#define ADCWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_ADC, MD, MB, MI, MS, RD) +#define ADCWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_ADC, RS, MD, MB, MI, MS) +#define ADCWir(IM, RD) _ALUWir(X86_ADC, IM, RD) +#define ADCWim(IM, MD, MB, MI, MS) _ALUWim(X86_ADC, IM, MD, MB, MI, MS) + +#define ADCLrr(RS, RD) _ALULrr(X86_ADC, RS, RD) +#define ADCLmr(MD, MB, MI, MS, RD) _ALULmr(X86_ADC, MD, MB, MI, MS, RD) +#define ADCLrm(RS, MD, MB, MI, MS) _ALULrm(X86_ADC, RS, MD, MB, MI, MS) +#define ADCLir(IM, RD) _ALULir(X86_ADC, IM, RD) +#define ADCLim(IM, MD, MB, MI, MS) _ALULim(X86_ADC, IM, MD, MB, MI, MS) + +#define ADCQrr(RS, RD) _ALUQrr(X86_ADC, RS, RD) +#define ADCQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_ADC, MD, MB, MI, MS, RD) +#define ADCQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_ADC, RS, MD, MB, MI, MS) +#define ADCQir(IM, RD) _ALUQir(X86_ADC, IM, RD) +#define ADCQim(IM, MD, MB, MI, MS) _ALUQim(X86_ADC, IM, MD, MB, MI, MS) + +#define ADDBrr(RS, RD) _ALUBrr(X86_ADD, RS, RD) +#define ADDBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_ADD, MD, MB, MI, MS, RD) +#define ADDBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_ADD, RS, MD, MB, MI, MS) +#define ADDBir(IM, RD) _ALUBir(X86_ADD, IM, RD) +#define ADDBim(IM, MD, MB, MI, MS) _ALUBim(X86_ADD, IM, MD, MB, MI, MS) + +#define ADDWrr(RS, RD) _ALUWrr(X86_ADD, RS, RD) +#define ADDWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_ADD, MD, MB, MI, MS, RD) +#define ADDWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_ADD, RS, MD, MB, MI, MS) +#define ADDWir(IM, RD) _ALUWir(X86_ADD, IM, RD) +#define ADDWim(IM, MD, MB, MI, MS) _ALUWim(X86_ADD, IM, MD, MB, MI, MS) + +#define ADDLrr(RS, RD) _ALULrr(X86_ADD, RS, RD) +#define ADDLmr(MD, MB, MI, MS, RD) _ALULmr(X86_ADD, MD, MB, MI, MS, RD) +#define ADDLrm(RS, MD, MB, MI, MS) _ALULrm(X86_ADD, RS, MD, MB, MI, MS) +#define ADDLir(IM, RD) _ALULir(X86_ADD, IM, RD) +#define ADDLim(IM, MD, MB, MI, MS) _ALULim(X86_ADD, IM, MD, MB, MI, MS) + +#define ADDQrr(RS, RD) _ALUQrr(X86_ADD, RS, RD) +#define ADDQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_ADD, MD, MB, MI, MS, RD) +#define ADDQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_ADD, RS, MD, MB, MI, MS) +#define ADDQir(IM, RD) _ALUQir(X86_ADD, IM, RD) +#define ADDQim(IM, MD, MB, MI, MS) _ALUQim(X86_ADD, IM, MD, MB, MI, MS) + +#define ANDBrr(RS, RD) _ALUBrr(X86_AND, RS, RD) +#define ANDBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_AND, MD, MB, MI, MS, RD) +#define ANDBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_AND, RS, MD, MB, MI, MS) +#define ANDBir(IM, RD) _ALUBir(X86_AND, IM, RD) +#define ANDBim(IM, MD, MB, MI, MS) _ALUBim(X86_AND, IM, MD, MB, MI, MS) + +#define ANDWrr(RS, RD) _ALUWrr(X86_AND, RS, RD) +#define ANDWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_AND, MD, MB, MI, MS, RD) +#define ANDWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_AND, RS, MD, MB, MI, MS) +#define ANDWir(IM, RD) _ALUWir(X86_AND, IM, RD) +#define ANDWim(IM, MD, MB, MI, MS) _ALUWim(X86_AND, IM, MD, MB, MI, MS) + +#define ANDLrr(RS, RD) _ALULrr(X86_AND, RS, RD) +#define ANDLmr(MD, MB, MI, MS, RD) _ALULmr(X86_AND, MD, MB, MI, MS, RD) +#define ANDLrm(RS, MD, MB, MI, MS) _ALULrm(X86_AND, RS, MD, MB, MI, MS) +#define ANDLir(IM, RD) _ALULir(X86_AND, IM, RD) +#define ANDLim(IM, MD, MB, MI, MS) _ALULim(X86_AND, IM, MD, MB, MI, MS) + +#define ANDQrr(RS, RD) _ALUQrr(X86_AND, RS, RD) +#define ANDQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_AND, MD, MB, MI, MS, RD) +#define ANDQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_AND, RS, MD, MB, MI, MS) +#define ANDQir(IM, RD) _ALUQir(X86_AND, IM, RD) +#define ANDQim(IM, MD, MB, MI, MS) _ALUQim(X86_AND, IM, MD, MB, MI, MS) + +#define CMPBrr(RS, RD) _ALUBrr(X86_CMP, RS, RD) +#define CMPBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_CMP, MD, MB, MI, MS, RD) +#define CMPBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_CMP, RS, MD, MB, MI, MS) +#define CMPBir(IM, RD) _ALUBir(X86_CMP, IM, RD) +#define CMPBim(IM, MD, MB, MI, MS) _ALUBim(X86_CMP, IM, MD, MB, MI, MS) + +#define CMPWrr(RS, RD) _ALUWrr(X86_CMP, RS, RD) +#define CMPWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_CMP, MD, MB, MI, MS, RD) +#define CMPWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_CMP, RS, MD, MB, MI, MS) +#define CMPWir(IM, RD) _ALUWir(X86_CMP, IM, RD) +#define CMPWim(IM, MD, MB, MI, MS) _ALUWim(X86_CMP, IM, MD, MB, MI, MS) + +#define CMPLrr(RS, RD) _ALULrr(X86_CMP, RS, RD) +#define CMPLmr(MD, MB, MI, MS, RD) _ALULmr(X86_CMP, MD, MB, MI, MS, RD) +#define CMPLrm(RS, MD, MB, MI, MS) _ALULrm(X86_CMP, RS, MD, MB, MI, MS) +#define CMPLir(IM, RD) _ALULir(X86_CMP, IM, RD) +#define CMPLim(IM, MD, MB, MI, MS) _ALULim(X86_CMP, IM, MD, MB, MI, MS) + +#define CMPQrr(RS, RD) _ALUQrr(X86_CMP, RS, RD) +#define CMPQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_CMP, MD, MB, MI, MS, RD) +#define CMPQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_CMP, RS, MD, MB, MI, MS) +#define CMPQir(IM, RD) _ALUQir(X86_CMP, IM, RD) +#define CMPQim(IM, MD, MB, MI, MS) _ALUQim(X86_CMP, IM, MD, MB, MI, MS) + +#define ORBrr(RS, RD) _ALUBrr(X86_OR, RS, RD) +#define ORBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_OR, MD, MB, MI, MS, RD) +#define ORBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_OR, RS, MD, MB, MI, MS) +#define ORBir(IM, RD) _ALUBir(X86_OR, IM, RD) +#define ORBim(IM, MD, MB, MI, MS) _ALUBim(X86_OR, IM, MD, MB, MI, MS) + +#define ORWrr(RS, RD) _ALUWrr(X86_OR, RS, RD) +#define ORWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_OR, MD, MB, MI, MS, RD) +#define ORWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_OR, RS, MD, MB, MI, MS) +#define ORWir(IM, RD) _ALUWir(X86_OR, IM, RD) +#define ORWim(IM, MD, MB, MI, MS) _ALUWim(X86_OR, IM, MD, MB, MI, MS) + +#define ORLrr(RS, RD) _ALULrr(X86_OR, RS, RD) +#define ORLmr(MD, MB, MI, MS, RD) _ALULmr(X86_OR, MD, MB, MI, MS, RD) +#define ORLrm(RS, MD, MB, MI, MS) _ALULrm(X86_OR, RS, MD, MB, MI, MS) +#define ORLir(IM, RD) _ALULir(X86_OR, IM, RD) +#define ORLim(IM, MD, MB, MI, MS) _ALULim(X86_OR, IM, MD, MB, MI, MS) + +#define ORQrr(RS, RD) _ALUQrr(X86_OR, RS, RD) +#define ORQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_OR, MD, MB, MI, MS, RD) +#define ORQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_OR, RS, MD, MB, MI, MS) +#define ORQir(IM, RD) _ALUQir(X86_OR, IM, RD) +#define ORQim(IM, MD, MB, MI, MS) _ALUQim(X86_OR, IM, MD, MB, MI, MS) + +#define SBBBrr(RS, RD) _ALUBrr(X86_SBB, RS, RD) +#define SBBBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_SBB, MD, MB, MI, MS, RD) +#define SBBBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_SBB, RS, MD, MB, MI, MS) +#define SBBBir(IM, RD) _ALUBir(X86_SBB, IM, RD) +#define SBBBim(IM, MD, MB, MI, MS) _ALUBim(X86_SBB, IM, MD, MB, MI, MS) + +#define SBBWrr(RS, RD) _ALUWrr(X86_SBB, RS, RD) +#define SBBWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_SBB, MD, MB, MI, MS, RD) +#define SBBWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_SBB, RS, MD, MB, MI, MS) +#define SBBWir(IM, RD) _ALUWir(X86_SBB, IM, RD) +#define SBBWim(IM, MD, MB, MI, MS) _ALUWim(X86_SBB, IM, MD, MB, MI, MS) + +#define SBBLrr(RS, RD) _ALULrr(X86_SBB, RS, RD) +#define SBBLmr(MD, MB, MI, MS, RD) _ALULmr(X86_SBB, MD, MB, MI, MS, RD) +#define SBBLrm(RS, MD, MB, MI, MS) _ALULrm(X86_SBB, RS, MD, MB, MI, MS) +#define SBBLir(IM, RD) _ALULir(X86_SBB, IM, RD) +#define SBBLim(IM, MD, MB, MI, MS) _ALULim(X86_SBB, IM, MD, MB, MI, MS) + +#define SBBQrr(RS, RD) _ALUQrr(X86_SBB, RS, RD) +#define SBBQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_SBB, MD, MB, MI, MS, RD) +#define SBBQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_SBB, RS, MD, MB, MI, MS) +#define SBBQir(IM, RD) _ALUQir(X86_SBB, IM, RD) +#define SBBQim(IM, MD, MB, MI, MS) _ALUQim(X86_SBB, IM, MD, MB, MI, MS) + +#define SUBBrr(RS, RD) _ALUBrr(X86_SUB, RS, RD) +#define SUBBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_SUB, MD, MB, MI, MS, RD) +#define SUBBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_SUB, RS, MD, MB, MI, MS) +#define SUBBir(IM, RD) _ALUBir(X86_SUB, IM, RD) +#define SUBBim(IM, MD, MB, MI, MS) _ALUBim(X86_SUB, IM, MD, MB, MI, MS) + +#define SUBWrr(RS, RD) _ALUWrr(X86_SUB, RS, RD) +#define SUBWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_SUB, MD, MB, MI, MS, RD) +#define SUBWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_SUB, RS, MD, MB, MI, MS) +#define SUBWir(IM, RD) _ALUWir(X86_SUB, IM, RD) +#define SUBWim(IM, MD, MB, MI, MS) _ALUWim(X86_SUB, IM, MD, MB, MI, MS) + +#define SUBLrr(RS, RD) _ALULrr(X86_SUB, RS, RD) +#define SUBLmr(MD, MB, MI, MS, RD) _ALULmr(X86_SUB, MD, MB, MI, MS, RD) +#define SUBLrm(RS, MD, MB, MI, MS) _ALULrm(X86_SUB, RS, MD, MB, MI, MS) +#define SUBLir(IM, RD) _ALULir(X86_SUB, IM, RD) +#define SUBLim(IM, MD, MB, MI, MS) _ALULim(X86_SUB, IM, MD, MB, MI, MS) + +#define SUBQrr(RS, RD) _ALUQrr(X86_SUB, RS, RD) +#define SUBQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_SUB, MD, MB, MI, MS, RD) +#define SUBQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_SUB, RS, MD, MB, MI, MS) +#define SUBQir(IM, RD) _ALUQir(X86_SUB, IM, RD) +#define SUBQim(IM, MD, MB, MI, MS) _ALUQim(X86_SUB, IM, MD, MB, MI, MS) + +#define XORBrr(RS, RD) _ALUBrr(X86_XOR, RS, RD) +#define XORBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_XOR, MD, MB, MI, MS, RD) +#define XORBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_XOR, RS, MD, MB, MI, MS) +#define XORBir(IM, RD) _ALUBir(X86_XOR, IM, RD) +#define XORBim(IM, MD, MB, MI, MS) _ALUBim(X86_XOR, IM, MD, MB, MI, MS) + +#define XORWrr(RS, RD) _ALUWrr(X86_XOR, RS, RD) +#define XORWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_XOR, MD, MB, MI, MS, RD) +#define XORWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_XOR, RS, MD, MB, MI, MS) +#define XORWir(IM, RD) _ALUWir(X86_XOR, IM, RD) +#define XORWim(IM, MD, MB, MI, MS) _ALUWim(X86_XOR, IM, MD, MB, MI, MS) + +#define XORLrr(RS, RD) _ALULrr(X86_XOR, RS, RD) +#define XORLmr(MD, MB, MI, MS, RD) _ALULmr(X86_XOR, MD, MB, MI, MS, RD) +#define XORLrm(RS, MD, MB, MI, MS) _ALULrm(X86_XOR, RS, MD, MB, MI, MS) +#define XORLir(IM, RD) _ALULir(X86_XOR, IM, RD) +#define XORLim(IM, MD, MB, MI, MS) _ALULim(X86_XOR, IM, MD, MB, MI, MS) + +#define XORQrr(RS, RD) _ALUQrr(X86_XOR, RS, RD) +#define XORQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_XOR, MD, MB, MI, MS, RD) +#define XORQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_XOR, RS, MD, MB, MI, MS) +#define XORQir(IM, RD) _ALUQir(X86_XOR, IM, RD) +#define XORQim(IM, MD, MB, MI, MS) _ALUQim(X86_XOR, IM, MD, MB, MI, MS) + + +/* --- Shift/Rotate instructions ------------------------------------------- */ + +enum { + X86_ROL = 0, + X86_ROR = 1, + X86_RCL = 2, + X86_RCR = 3, + X86_SHL = 4, + X86_SHR = 5, + X86_SAR = 7, +}; + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define _ROTSHIBir(OP,IM,RD) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \ + (_REXBrr(0, RD), _O_Mrm (0xd0 ,_b11,OP,_r1(RD) )) : \ + (_REXBrr(0, RD), _O_Mrm_B (0xc0 ,_b11,OP,_r1(RD) ,_u8(IM))) ) +#define _ROTSHIBim(OP,IM,MD,MB,MI,MS) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \ + (_REXBrm(0, MB, MI), _O_r_X (0xd0 ,OP ,MD,MB,MI,MS )) : \ + (_REXBrm(0, MB, MI), _O_r_X_B (0xc0 ,OP ,MD,MB,MI,MS ,_u8(IM))) ) +#define _ROTSHIBrr(OP,RS,RD) (((RS) == X86_CL) ? \ + (_REXBrr(RS, RD), _O_Mrm (0xd2 ,_b11,OP,_r1(RD) )) : \ + x86_emit_failure("source register must be CL" ) ) +#define _ROTSHIBrm(OP,RS,MD,MB,MI,MS) (((RS) == X86_CL) ? \ + (_REXBrm(RS, MB, MI), _O_r_X (0xd2 ,OP ,MD,MB,MI,MS )) : \ + x86_emit_failure("source register must be CL" ) ) + +#define _ROTSHIWir(OP,IM,RD) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \ + (_d16(), _REXLrr(0, RD), _O_Mrm (0xd1 ,_b11,OP,_r2(RD) )) : \ + (_d16(), _REXLrr(0, RD), _O_Mrm_B (0xc1 ,_b11,OP,_r2(RD) ,_u8(IM))) ) +#define _ROTSHIWim(OP,IM,MD,MB,MI,MS) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \ + (_d16(), _REXLrm(0, MB, MI), _O_r_X (0xd1 ,OP ,MD,MB,MI,MS )) : \ + (_d16(), _REXLrm(0, MB, MI), _O_r_X_B (0xc1 ,OP ,MD,MB,MI,MS ,_u8(IM))) ) +#define _ROTSHIWrr(OP,RS,RD) (((RS) == X86_CL) ? \ + (_d16(), _REXLrr(RS, RD), _O_Mrm (0xd3 ,_b11,OP,_r2(RD) )) : \ + x86_emit_failure("source register must be CL" ) ) +#define _ROTSHIWrm(OP,RS,MD,MB,MI,MS) (((RS) == X86_CL) ? \ + (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0xd3 ,OP ,MD,MB,MI,MS )) : \ + x86_emit_failure("source register must be CL" ) ) + +#define _ROTSHILir(OP,IM,RD) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \ + (_REXLrr(0, RD), _O_Mrm (0xd1 ,_b11,OP,_r4(RD) )) : \ + (_REXLrr(0, RD), _O_Mrm_B (0xc1 ,_b11,OP,_r4(RD) ,_u8(IM))) ) +#define _ROTSHILim(OP,IM,MD,MB,MI,MS) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \ + (_REXLrm(0, MB, MI), _O_r_X (0xd1 ,OP ,MD,MB,MI,MS )) : \ + (_REXLrm(0, MB, MI), _O_r_X_B (0xc1 ,OP ,MD,MB,MI,MS ,_u8(IM))) ) +#define _ROTSHILrr(OP,RS,RD) (((RS) == X86_CL) ? \ + (_REXLrr(RS, RD), _O_Mrm (0xd3 ,_b11,OP,_r4(RD) )) : \ + x86_emit_failure("source register must be CL" ) ) +#define _ROTSHILrm(OP,RS,MD,MB,MI,MS) (((RS) == X86_CL) ? \ + (_REXLrm(RS, MB, MI), _O_r_X (0xd3 ,OP ,MD,MB,MI,MS )) : \ + x86_emit_failure("source register must be CL" ) ) + +#define _ROTSHIQir(OP,IM,RD) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \ + (_REXQrr(0, RD), _O_Mrm (0xd1 ,_b11,OP,_r8(RD) )) : \ + (_REXQrr(0, RD), _O_Mrm_B (0xc1 ,_b11,OP,_r8(RD) ,_u8(IM))) ) +#define _ROTSHIQim(OP,IM,MD,MB,MI,MS) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \ + (_REXQrm(0, MB, MI), _O_r_X (0xd1 ,OP ,MD,MB,MI,MS )) : \ + (_REXQrm(0, MB, MI), _O_r_X_B (0xc1 ,OP ,MD,MB,MI,MS ,_u8(IM))) ) +#define _ROTSHIQrr(OP,RS,RD) (((RS) == X86_CL) ? \ + (_REXQrr(RS, RD), _O_Mrm (0xd3 ,_b11,OP,_r8(RD) )) : \ + x86_emit_failure("source register must be CL" ) ) +#define _ROTSHIQrm(OP,RS,MD,MB,MI,MS) (((RS) == X86_CL) ? \ + (_REXQrm(RS, MB, MI), _O_r_X (0xd3 ,OP ,MD,MB,MI,MS )) : \ + x86_emit_failure("source register must be CL" ) ) + +#define ROLBir(IM, RD) _ROTSHIBir(X86_ROL, IM, RD) +#define ROLBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_ROL, IM, MD, MB, MI, MS) +#define ROLBrr(RS, RD) _ROTSHIBrr(X86_ROL, RS, RD) +#define ROLBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_ROL, RS, MD, MB, MI, MS) + +#define ROLWir(IM, RD) _ROTSHIWir(X86_ROL, IM, RD) +#define ROLWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_ROL, IM, MD, MB, MI, MS) +#define ROLWrr(RS, RD) _ROTSHIWrr(X86_ROL, RS, RD) +#define ROLWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_ROL, RS, MD, MB, MI, MS) + +#define ROLLir(IM, RD) _ROTSHILir(X86_ROL, IM, RD) +#define ROLLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_ROL, IM, MD, MB, MI, MS) +#define ROLLrr(RS, RD) _ROTSHILrr(X86_ROL, RS, RD) +#define ROLLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_ROL, RS, MD, MB, MI, MS) + +#define ROLQir(IM, RD) _ROTSHIQir(X86_ROL, IM, RD) +#define ROLQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_ROL, IM, MD, MB, MI, MS) +#define ROLQrr(RS, RD) _ROTSHIQrr(X86_ROL, RS, RD) +#define ROLQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_ROL, RS, MD, MB, MI, MS) + +#define RORBir(IM, RD) _ROTSHIBir(X86_ROR, IM, RD) +#define RORBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_ROR, IM, MD, MB, MI, MS) +#define RORBrr(RS, RD) _ROTSHIBrr(X86_ROR, RS, RD) +#define RORBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_ROR, RS, MD, MB, MI, MS) + +#define RORWir(IM, RD) _ROTSHIWir(X86_ROR, IM, RD) +#define RORWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_ROR, IM, MD, MB, MI, MS) +#define RORWrr(RS, RD) _ROTSHIWrr(X86_ROR, RS, RD) +#define RORWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_ROR, RS, MD, MB, MI, MS) + +#define RORLir(IM, RD) _ROTSHILir(X86_ROR, IM, RD) +#define RORLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_ROR, IM, MD, MB, MI, MS) +#define RORLrr(RS, RD) _ROTSHILrr(X86_ROR, RS, RD) +#define RORLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_ROR, RS, MD, MB, MI, MS) + +#define RORQir(IM, RD) _ROTSHIQir(X86_ROR, IM, RD) +#define RORQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_ROR, IM, MD, MB, MI, MS) +#define RORQrr(RS, RD) _ROTSHIQrr(X86_ROR, RS, RD) +#define RORQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_ROR, RS, MD, MB, MI, MS) + +#define RCLBir(IM, RD) _ROTSHIBir(X86_RCL, IM, RD) +#define RCLBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_RCL, IM, MD, MB, MI, MS) +#define RCLBrr(RS, RD) _ROTSHIBrr(X86_RCL, RS, RD) +#define RCLBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_RCL, RS, MD, MB, MI, MS) + +#define RCLWir(IM, RD) _ROTSHIWir(X86_RCL, IM, RD) +#define RCLWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_RCL, IM, MD, MB, MI, MS) +#define RCLWrr(RS, RD) _ROTSHIWrr(X86_RCL, RS, RD) +#define RCLWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_RCL, RS, MD, MB, MI, MS) + +#define RCLLir(IM, RD) _ROTSHILir(X86_RCL, IM, RD) +#define RCLLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_RCL, IM, MD, MB, MI, MS) +#define RCLLrr(RS, RD) _ROTSHILrr(X86_RCL, RS, RD) +#define RCLLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_RCL, RS, MD, MB, MI, MS) + +#define RCLQir(IM, RD) _ROTSHIQir(X86_RCL, IM, RD) +#define RCLQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_RCL, IM, MD, MB, MI, MS) +#define RCLQrr(RS, RD) _ROTSHIQrr(X86_RCL, RS, RD) +#define RCLQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_RCL, RS, MD, MB, MI, MS) + +#define RCRBir(IM, RD) _ROTSHIBir(X86_RCR, IM, RD) +#define RCRBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_RCR, IM, MD, MB, MI, MS) +#define RCRBrr(RS, RD) _ROTSHIBrr(X86_RCR, RS, RD) +#define RCRBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_RCR, RS, MD, MB, MI, MS) + +#define RCRWir(IM, RD) _ROTSHIWir(X86_RCR, IM, RD) +#define RCRWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_RCR, IM, MD, MB, MI, MS) +#define RCRWrr(RS, RD) _ROTSHIWrr(X86_RCR, RS, RD) +#define RCRWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_RCR, RS, MD, MB, MI, MS) + +#define RCRLir(IM, RD) _ROTSHILir(X86_RCR, IM, RD) +#define RCRLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_RCR, IM, MD, MB, MI, MS) +#define RCRLrr(RS, RD) _ROTSHILrr(X86_RCR, RS, RD) +#define RCRLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_RCR, RS, MD, MB, MI, MS) + +#define RCRQir(IM, RD) _ROTSHIQir(X86_RCR, IM, RD) +#define RCRQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_RCR, IM, MD, MB, MI, MS) +#define RCRQrr(RS, RD) _ROTSHIQrr(X86_RCR, RS, RD) +#define RCRQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_RCR, RS, MD, MB, MI, MS) + +#define SHLBir(IM, RD) _ROTSHIBir(X86_SHL, IM, RD) +#define SHLBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_SHL, IM, MD, MB, MI, MS) +#define SHLBrr(RS, RD) _ROTSHIBrr(X86_SHL, RS, RD) +#define SHLBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_SHL, RS, MD, MB, MI, MS) + +#define SHLWir(IM, RD) _ROTSHIWir(X86_SHL, IM, RD) +#define SHLWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_SHL, IM, MD, MB, MI, MS) +#define SHLWrr(RS, RD) _ROTSHIWrr(X86_SHL, RS, RD) +#define SHLWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_SHL, RS, MD, MB, MI, MS) + +#define SHLLir(IM, RD) _ROTSHILir(X86_SHL, IM, RD) +#define SHLLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_SHL, IM, MD, MB, MI, MS) +#define SHLLrr(RS, RD) _ROTSHILrr(X86_SHL, RS, RD) +#define SHLLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_SHL, RS, MD, MB, MI, MS) + +#define SHLQir(IM, RD) _ROTSHIQir(X86_SHL, IM, RD) +#define SHLQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_SHL, IM, MD, MB, MI, MS) +#define SHLQrr(RS, RD) _ROTSHIQrr(X86_SHL, RS, RD) +#define SHLQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_SHL, RS, MD, MB, MI, MS) + +#define SHRBir(IM, RD) _ROTSHIBir(X86_SHR, IM, RD) +#define SHRBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_SHR, IM, MD, MB, MI, MS) +#define SHRBrr(RS, RD) _ROTSHIBrr(X86_SHR, RS, RD) +#define SHRBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_SHR, RS, MD, MB, MI, MS) + +#define SHRWir(IM, RD) _ROTSHIWir(X86_SHR, IM, RD) +#define SHRWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_SHR, IM, MD, MB, MI, MS) +#define SHRWrr(RS, RD) _ROTSHIWrr(X86_SHR, RS, RD) +#define SHRWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_SHR, RS, MD, MB, MI, MS) + +#define SHRLir(IM, RD) _ROTSHILir(X86_SHR, IM, RD) +#define SHRLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_SHR, IM, MD, MB, MI, MS) +#define SHRLrr(RS, RD) _ROTSHILrr(X86_SHR, RS, RD) +#define SHRLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_SHR, RS, MD, MB, MI, MS) + +#define SHRQir(IM, RD) _ROTSHIQir(X86_SHR, IM, RD) +#define SHRQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_SHR, IM, MD, MB, MI, MS) +#define SHRQrr(RS, RD) _ROTSHIQrr(X86_SHR, RS, RD) +#define SHRQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_SHR, RS, MD, MB, MI, MS) + +#define SALBir SHLBir +#define SALBim SHLBim +#define SALBrr SHLBrr +#define SALBrm SHLBrm + +#define SALWir SHLWir +#define SALWim SHLWim +#define SALWrr SHLWrr +#define SALWrm SHLWrm + +#define SALLir SHLLir +#define SALLim SHLLim +#define SALLrr SHLLrr +#define SALLrm SHLLrm + +#define SALQir SHLQir +#define SALQim SHLQim +#define SALQrr SHLQrr +#define SALQrm SHLQrm + +#define SARBir(IM, RD) _ROTSHIBir(X86_SAR, IM, RD) +#define SARBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_SAR, IM, MD, MB, MI, MS) +#define SARBrr(RS, RD) _ROTSHIBrr(X86_SAR, RS, RD) +#define SARBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_SAR, RS, MD, MB, MI, MS) + +#define SARWir(IM, RD) _ROTSHIWir(X86_SAR, IM, RD) +#define SARWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_SAR, IM, MD, MB, MI, MS) +#define SARWrr(RS, RD) _ROTSHIWrr(X86_SAR, RS, RD) +#define SARWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_SAR, RS, MD, MB, MI, MS) + +#define SARLir(IM, RD) _ROTSHILir(X86_SAR, IM, RD) +#define SARLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_SAR, IM, MD, MB, MI, MS) +#define SARLrr(RS, RD) _ROTSHILrr(X86_SAR, RS, RD) +#define SARLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_SAR, RS, MD, MB, MI, MS) + +#define SARQir(IM, RD) _ROTSHIQir(X86_SAR, IM, RD) +#define SARQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_SAR, IM, MD, MB, MI, MS) +#define SARQrr(RS, RD) _ROTSHIQrr(X86_SAR, RS, RD) +#define SARQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_SAR, RS, MD, MB, MI, MS) + + +/* --- Bit test instructions ----------------------------------------------- */ + +enum { + X86_BT = 4, + X86_BTS = 5, + X86_BTR = 6, + X86_BTC = 7, +}; + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define _BTWir(OP, IM, RD) (_d16(), _REXLrr(0, RD), _OO_Mrm_B (0x0fba ,_b11,OP ,_r2(RD) ,_u8(IM))) +#define _BTWim(OP, IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _OO_r_X_B (0x0fba ,OP ,MD,MB,MI,MS ,_u8(IM))) +#define _BTWrr(OP, RS, RD) (_d16(), _REXLrr(RS, RD), _OO_Mrm (0x0f83|((OP)<<3),_b11,_r2(RS),_r2(RD) )) +#define _BTWrm(OP, RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _OO_r_X (0x0f83|((OP)<<3) ,_r2(RS) ,MD,MB,MI,MS )) + +#define _BTLir(OP, IM, RD) (_REXLrr(0, RD), _OO_Mrm_B (0x0fba ,_b11,OP ,_r4(RD) ,_u8(IM))) +#define _BTLim(OP, IM, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _OO_r_X_B (0x0fba ,OP ,MD,MB,MI,MS ,_u8(IM))) +#define _BTLrr(OP, RS, RD) (_REXLrr(RS, RD), _OO_Mrm (0x0f83|((OP)<<3),_b11,_r4(RS),_r4(RD) )) +#define _BTLrm(OP, RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0f83|((OP)<<3) ,_r4(RS) ,MD,MB,MI,MS )) + +#define _BTQir(OP, IM, RD) (_REXQrr(0, RD), _OO_Mrm_B (0x0fba ,_b11,OP ,_r8(RD) ,_u8(IM))) +#define _BTQim(OP, IM, MD, MB, MI, MS) (_REXQrm(0, MB, MI), _OO_r_X_B (0x0fba ,OP ,MD,MB,MI,MS ,_u8(IM))) +#define _BTQrr(OP, RS, RD) (_REXQrr(RS, RD), _OO_Mrm (0x0f83|((OP)<<3),_b11,_r8(RS),_r8(RD) )) +#define _BTQrm(OP, RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0f83|((OP)<<3) ,_r8(RS) ,MD,MB,MI,MS )) + +#define BTWir(IM, RD) _BTWir(X86_BT, IM, RD) +#define BTWim(IM, MD, MB, MI, MS) _BTWim(X86_BT, IM, MD, MB, MI, MS) +#define BTWrr(RS, RD) _BTWrr(X86_BT, RS, RD) +#define BTWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BT, RS, MD, MB, MI, MS) + +#define BTLir(IM, RD) _BTLir(X86_BT, IM, RD) +#define BTLim(IM, MD, MB, MI, MS) _BTLim(X86_BT, IM, MD, MB, MI, MS) +#define BTLrr(RS, RD) _BTLrr(X86_BT, RS, RD) +#define BTLrm(RS, MD, MB, MI, MS) _BTLrm(X86_BT, RS, MD, MB, MI, MS) + +#define BTQir(IM, RD) _BTQir(X86_BT, IM, RD) +#define BTQim(IM, MD, MB, MI, MS) _BTQim(X86_BT, IM, MD, MB, MI, MS) +#define BTQrr(RS, RD) _BTQrr(X86_BT, RS, RD) +#define BTQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BT, RS, MD, MB, MI, MS) + +#define BTCWir(IM, RD) _BTWir(X86_BTC, IM, RD) +#define BTCWim(IM, MD, MB, MI, MS) _BTWim(X86_BTC, IM, MD, MB, MI, MS) +#define BTCWrr(RS, RD) _BTWrr(X86_BTC, RS, RD) +#define BTCWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BTC, RS, MD, MB, MI, MS) + +#define BTCLir(IM, RD) _BTLir(X86_BTC, IM, RD) +#define BTCLim(IM, MD, MB, MI, MS) _BTLim(X86_BTC, IM, MD, MB, MI, MS) +#define BTCLrr(RS, RD) _BTLrr(X86_BTC, RS, RD) +#define BTCLrm(RS, MD, MB, MI, MS) _BTLrm(X86_BTC, RS, MD, MB, MI, MS) + +#define BTCQir(IM, RD) _BTQir(X86_BTC, IM, RD) +#define BTCQim(IM, MD, MB, MI, MS) _BTQim(X86_BTC, IM, MD, MB, MI, MS) +#define BTCQrr(RS, RD) _BTQrr(X86_BTC, RS, RD) +#define BTCQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BTC, RS, MD, MB, MI, MS) + +#define BTRWir(IM, RD) _BTWir(X86_BTR, IM, RD) +#define BTRWim(IM, MD, MB, MI, MS) _BTWim(X86_BTR, IM, MD, MB, MI, MS) +#define BTRWrr(RS, RD) _BTWrr(X86_BTR, RS, RD) +#define BTRWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BTR, RS, MD, MB, MI, MS) + +#define BTRLir(IM, RD) _BTLir(X86_BTR, IM, RD) +#define BTRLim(IM, MD, MB, MI, MS) _BTLim(X86_BTR, IM, MD, MB, MI, MS) +#define BTRLrr(RS, RD) _BTLrr(X86_BTR, RS, RD) +#define BTRLrm(RS, MD, MB, MI, MS) _BTLrm(X86_BTR, RS, MD, MB, MI, MS) + +#define BTRQir(IM, RD) _BTQir(X86_BTR, IM, RD) +#define BTRQim(IM, MD, MB, MI, MS) _BTQim(X86_BTR, IM, MD, MB, MI, MS) +#define BTRQrr(RS, RD) _BTQrr(X86_BTR, RS, RD) +#define BTRQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BTR, RS, MD, MB, MI, MS) + +#define BTSWir(IM, RD) _BTWir(X86_BTS, IM, RD) +#define BTSWim(IM, MD, MB, MI, MS) _BTWim(X86_BTS, IM, MD, MB, MI, MS) +#define BTSWrr(RS, RD) _BTWrr(X86_BTS, RS, RD) +#define BTSWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BTS, RS, MD, MB, MI, MS) + +#define BTSLir(IM, RD) _BTLir(X86_BTS, IM, RD) +#define BTSLim(IM, MD, MB, MI, MS) _BTLim(X86_BTS, IM, MD, MB, MI, MS) +#define BTSLrr(RS, RD) _BTLrr(X86_BTS, RS, RD) +#define BTSLrm(RS, MD, MB, MI, MS) _BTLrm(X86_BTS, RS, MD, MB, MI, MS) + +#define BTSQir(IM, RD) _BTQir(X86_BTS, IM, RD) +#define BTSQim(IM, MD, MB, MI, MS) _BTQim(X86_BTS, IM, MD, MB, MI, MS) +#define BTSQrr(RS, RD) _BTQrr(X86_BTS, RS, RD) +#define BTSQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BTS, RS, MD, MB, MI, MS) + + +/* --- Move instructions --------------------------------------------------- */ + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define MOVBrr(RS, RD) (_REXBrr(RS, RD), _O_Mrm (0x88 ,_b11,_r1(RS),_r1(RD) )) +#define MOVBmr(MD, MB, MI, MS, RD) (_REXBmr(MB, MI, RD), _O_r_X (0x8a ,_r1(RD) ,MD,MB,MI,MS )) +#define MOVBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (0x88 ,_r1(RS) ,MD,MB,MI,MS )) +#define MOVBir(IM, R) (_REXBrr(0, R), _Or_B (0xb0,_r1(R) ,_su8(IM))) +#define MOVBim(IM, MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_X_B (0xc6 ,MD,MB,MI,MS ,_su8(IM))) + +#define MOVWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (0x89 ,_b11,_r2(RS),_r2(RD) )) +#define MOVWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _O_r_X (0x8b ,_r2(RD) ,MD,MB,MI,MS )) +#define MOVWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0x89 ,_r2(RS) ,MD,MB,MI,MS )) +#define MOVWir(IM, R) (_d16(), _REXLrr(0, R), _Or_W (0xb8,_r2(R) ,_su16(IM))) +#define MOVWim(IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_X_W (0xc7 ,MD,MB,MI,MS ,_su16(IM))) + +#define MOVLrr(RS, RD) (_REXLrr(RS, RD), _O_Mrm (0x89 ,_b11,_r4(RS),_r4(RD) )) +#define MOVLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _O_r_X (0x8b ,_r4(RD) ,MD,MB,MI,MS )) +#define MOVLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _O_r_X (0x89 ,_r4(RS) ,MD,MB,MI,MS )) +#define MOVLir(IM, R) (_REXLrr(0, R), _Or_L (0xb8,_r4(R) ,IM )) +#define MOVLim(IM, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_X_L (0xc7 ,MD,MB,MI,MS ,IM )) + +#define MOVQrr(RS, RD) (_REXQrr(RS, RD), _O_Mrm (0x89 ,_b11,_r8(RS),_r8(RD) )) +#define MOVQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _O_r_X (0x8b ,_r8(RD) ,MD,MB,MI,MS )) +#define MOVQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _O_r_X (0x89 ,_r8(RS) ,MD,MB,MI,MS )) +#define MOVQir(IM, R) (_REXQrr(0, R), _Or_Q (0xb8,_r8(R) ,IM )) +#define MOVQim(IM, MD, MB, MI, MS) (_REXQrm(0, MB, MI), _O_X_L (0xc7 ,MD,MB,MI,MS ,IM )) + + +/* --- Unary and Multiply/Divide instructions ------------------------------ */ + +enum { + X86_NOT = 2, + X86_NEG = 3, + X86_MUL = 4, + X86_IMUL = 5, + X86_DIV = 6, + X86_IDIV = 7, +}; + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define _UNARYBr(OP, RS) (_REXBrr(0, RS), _O_Mrm (0xf6 ,_b11,OP ,_r1(RS) )) +#define _UNARYBm(OP, MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X (0xf6 ,OP ,MD,MB,MI,MS )) +#define _UNARYWr(OP, RS) (_d16(), _REXLrr(0, RS), _O_Mrm (0xf7 ,_b11,OP ,_r2(RS) )) +#define _UNARYWm(OP, MD, MB, MI, MS) (_d16(), _REXLmr(MB, MI, 0), _O_r_X (0xf7 ,OP ,MD,MB,MI,MS )) +#define _UNARYLr(OP, RS) (_REXLrr(0, RS), _O_Mrm (0xf7 ,_b11,OP ,_r4(RS) )) +#define _UNARYLm(OP, MD, MB, MI, MS) (_REXLmr(MB, MI, 0), _O_r_X (0xf7 ,OP ,MD,MB,MI,MS )) +#define _UNARYQr(OP, RS) (_REXQrr(0, RS), _O_Mrm (0xf7 ,_b11,OP ,_r8(RS) )) +#define _UNARYQm(OP, MD, MB, MI, MS) (_REXQmr(MB, MI, 0), _O_r_X (0xf7 ,OP ,MD,MB,MI,MS )) + +#define NOTBr(RS) _UNARYBr(X86_NOT, RS) +#define NOTBm(MD, MB, MI, MS) _UNARYBm(X86_NOT, MD, MB, MI, MS) +#define NOTWr(RS) _UNARYWr(X86_NOT, RS) +#define NOTWm(MD, MB, MI, MS) _UNARYWm(X86_NOT, MD, MB, MI, MS) +#define NOTLr(RS) _UNARYLr(X86_NOT, RS) +#define NOTLm(MD, MB, MI, MS) _UNARYLm(X86_NOT, MD, MB, MI, MS) +#define NOTQr(RS) _UNARYQr(X86_NOT, RS) +#define NOTQm(MD, MB, MI, MS) _UNARYQm(X86_NOT, MD, MB, MI, MS) + +#define NEGBr(RS) _UNARYBr(X86_NEG, RS) +#define NEGBm(MD, MB, MI, MS) _UNARYBm(X86_NEG, MD, MB, MI, MS) +#define NEGWr(RS) _UNARYWr(X86_NEG, RS) +#define NEGWm(MD, MB, MI, MS) _UNARYWm(X86_NEG, MD, MB, MI, MS) +#define NEGLr(RS) _UNARYLr(X86_NEG, RS) +#define NEGLm(MD, MB, MI, MS) _UNARYLm(X86_NEG, MD, MB, MI, MS) +#define NEGQr(RS) _UNARYQr(X86_NEG, RS) +#define NEGQm(MD, MB, MI, MS) _UNARYQm(X86_NEG, MD, MB, MI, MS) + +#define MULBr(RS) _UNARYBr(X86_MUL, RS) +#define MULBm(MD, MB, MI, MS) _UNARYBm(X86_MUL, MD, MB, MI, MS) +#define MULWr(RS) _UNARYWr(X86_MUL, RS) +#define MULWm(MD, MB, MI, MS) _UNARYWm(X86_MUL, MD, MB, MI, MS) +#define MULLr(RS) _UNARYLr(X86_MUL, RS) +#define MULLm(MD, MB, MI, MS) _UNARYLm(X86_MUL, MD, MB, MI, MS) +#define MULQr(RS) _UNARYQr(X86_MUL, RS) +#define MULQm(MD, MB, MI, MS) _UNARYQm(X86_MUL, MD, MB, MI, MS) + +#define IMULBr(RS) _UNARYBr(X86_IMUL, RS) +#define IMULBm(MD, MB, MI, MS) _UNARYBm(X86_IMUL, MD, MB, MI, MS) +#define IMULWr(RS) _UNARYWr(X86_IMUL, RS) +#define IMULWm(MD, MB, MI, MS) _UNARYWm(X86_IMUL, MD, MB, MI, MS) +#define IMULLr(RS) _UNARYLr(X86_IMUL, RS) +#define IMULLm(MD, MB, MI, MS) _UNARYLm(X86_IMUL, MD, MB, MI, MS) +#define IMULQr(RS) _UNARYQr(X86_IMUL, RS) +#define IMULQm(MD, MB, MI, MS) _UNARYQm(X86_IMUL, MD, MB, MI, MS) + +#define DIVBr(RS) _UNARYBr(X86_DIV, RS) +#define DIVBm(MD, MB, MI, MS) _UNARYBm(X86_DIV, MD, MB, MI, MS) +#define DIVWr(RS) _UNARYWr(X86_DIV, RS) +#define DIVWm(MD, MB, MI, MS) _UNARYWm(X86_DIV, MD, MB, MI, MS) +#define DIVLr(RS) _UNARYLr(X86_DIV, RS) +#define DIVLm(MD, MB, MI, MS) _UNARYLm(X86_DIV, MD, MB, MI, MS) +#define DIVQr(RS) _UNARYQr(X86_DIV, RS) +#define DIVQm(MD, MB, MI, MS) _UNARYQm(X86_DIV, MD, MB, MI, MS) + +#define IDIVBr(RS) _UNARYBr(X86_IDIV, RS) +#define IDIVBm(MD, MB, MI, MS) _UNARYBm(X86_IDIV, MD, MB, MI, MS) +#define IDIVWr(RS) _UNARYWr(X86_IDIV, RS) +#define IDIVWm(MD, MB, MI, MS) _UNARYWm(X86_IDIV, MD, MB, MI, MS) +#define IDIVLr(RS) _UNARYLr(X86_IDIV, RS) +#define IDIVLm(MD, MB, MI, MS) _UNARYLm(X86_IDIV, MD, MB, MI, MS) +#define IDIVQr(RS) _UNARYQr(X86_IDIV, RS) +#define IDIVQm(MD, MB, MI, MS) _UNARYQm(X86_IDIV, MD, MB, MI, MS) + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define IMULWrr(RS, RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0faf ,_b11,_r2(RD),_r2(RS) )) +#define IMULWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0faf ,_r2(RD) ,MD,MB,MI,MS )) + +#define IMULWirr(IM,RS,RD) (_d16(), _REXLrr(RS, RD), _Os_Mrm_sW (0x69 ,_b11,_r2(RS),_r2(RD) ,_su16(IM) )) +#define IMULWimr(IM,MD,MB,MI,MS,RD) (_d16(), _REXLmr(MB, MI, RD), _Os_r_X_sW (0x69 ,_r2(RD) ,MD,MB,MI,MS ,_su16(IM) )) + +#define IMULLir(IM, RD) (_REXLrr(0, RD), _Os_Mrm_sL (0x69 ,_b11,_r4(RD),_r4(RD) ,IM )) +#define IMULLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0faf ,_b11,_r4(RD),_r4(RS) )) +#define IMULLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0faf ,_r4(RD) ,MD,MB,MI,MS )) + +#define IMULQir(IM, RD) (_REXQrr(0, RD), _Os_Mrm_sL (0x69 ,_b11,_r8(RD),_r8(RD) ,IM )) +#define IMULQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0faf ,_b11,_r8(RD),_r8(RS) )) +#define IMULQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0faf ,_r8(RD) ,MD,MB,MI,MS )) + +#define IMULLirr(IM,RS,RD) (_REXLrr(RS, RD), _Os_Mrm_sL (0x69 ,_b11,_r4(RS),_r4(RD) ,IM )) +#define IMULLimr(IM,MD,MB,MI,MS,RD) (_REXLmr(MB, MI, RD), _Os_r_X_sL (0x69 ,_r4(RD) ,MD,MB,MI,MS ,IM )) + +#define IMULQirr(IM,RS,RD) (_REXQrr(RS, RD), _Os_Mrm_sL (0x69 ,_b11,_r8(RS),_r8(RD) ,IM )) +#define IMULQimr(IM,MD,MB,MI,MS,RD) (_REXQmr(MB, MI, RD), _Os_r_X_sL (0x69 ,_r8(RD) ,MD,MB,MI,MS ,IM )) + + +/* --- Control Flow related instructions ----------------------------------- */ + +enum { + X86_CC_O = 0x0, + X86_CC_NO = 0x1, + X86_CC_NAE = 0x2, + X86_CC_B = 0x2, + X86_CC_C = 0x2, + X86_CC_AE = 0x3, + X86_CC_NB = 0x3, + X86_CC_NC = 0x3, + X86_CC_E = 0x4, + X86_CC_Z = 0x4, + X86_CC_NE = 0x5, + X86_CC_NZ = 0x5, + X86_CC_BE = 0x6, + X86_CC_NA = 0x6, + X86_CC_A = 0x7, + X86_CC_NBE = 0x7, + X86_CC_S = 0x8, + X86_CC_NS = 0x9, + X86_CC_P = 0xa, + X86_CC_PE = 0xa, + X86_CC_NP = 0xb, + X86_CC_PO = 0xb, + X86_CC_L = 0xc, + X86_CC_NGE = 0xc, + X86_CC_GE = 0xd, + X86_CC_NL = 0xd, + X86_CC_LE = 0xe, + X86_CC_NG = 0xe, + X86_CC_G = 0xf, + X86_CC_NLE = 0xf, +}; + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +// FIXME: no prefix is availble to encode a 32-bit operand size in 64-bit mode +#define CALLm(M) _O_D32 (0xe8 ,(int)(M) ) +#define _CALLLsr(R) (_REXLrr(0, R), _O_Mrm (0xff ,_b11,_b010,_r4(R) )) +#define _CALLQsr(R) (_REXLrr(0, R), _O_Mrm (0xff ,_b11,_b010,_r8(R) )) +#define CALLsr(R) ( X86_TARGET_64BIT ? _CALLQsr(R) : _CALLLsr(R)) +#define CALLsm(D,B,I,S) (_REXLrm(0, B, I), _O_r_X (0xff ,_b010 ,(int)(D),B,I,S )) + +// FIXME: no prefix is availble to encode a 32-bit operand size in 64-bit mode +#define JMPSm(M) _O_D8 (0xeb ,(int)(M) ) +#define JMPm(M) _O_D32 (0xe9 ,(int)(M) ) +#define _JMPLsr(R) (_REXLrr(0, R), _O_Mrm (0xff ,_b11,_b100,_r4(R) )) +#define _JMPQsr(R) (_REXLrr(0, R), _O_Mrm (0xff ,_b11,_b100,_r8(R) )) +#define JMPsr(R) ( X86_TARGET_64BIT ? _JMPQsr(R) : _JMPLsr(R)) +#define JMPsm(D,B,I,S) (_REXLrm(0, B, I), _O_r_X (0xff ,_b100 ,(int)(D),B,I,S )) + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ +#define JCCSii(CC, D) _O_B (0x70|(CC) ,(_sc)(int)(D) ) +#define JCCSim(CC, D) _O_D8 (0x70|(CC) ,(int)(D) ) +#define JOSm(D) JCCSim(X86_CC_O, D) +#define JNOSm(D) JCCSim(X86_CC_NO, D) +#define JBSm(D) JCCSim(X86_CC_B, D) +#define JNAESm(D) JCCSim(X86_CC_NAE, D) +#define JNBSm(D) JCCSim(X86_CC_NB, D) +#define JAESm(D) JCCSim(X86_CC_AE, D) +#define JESm(D) JCCSim(X86_CC_E, D) +#define JZSm(D) JCCSim(X86_CC_Z, D) +#define JNESm(D) JCCSim(X86_CC_NE, D) +#define JNZSm(D) JCCSim(X86_CC_NZ, D) +#define JBESm(D) JCCSim(X86_CC_BE, D) +#define JNASm(D) JCCSim(X86_CC_NA, D) +#define JNBESm(D) JCCSim(X86_CC_NBE, D) +#define JASm(D) JCCSim(X86_CC_A, D) +#define JSSm(D) JCCSim(X86_CC_S, D) +#define JNSSm(D) JCCSim(X86_CC_NS, D) +#define JPSm(D) JCCSim(X86_CC_P, D) +#define JPESm(D) JCCSim(X86_CC_PE, D) +#define JNPSm(D) JCCSim(X86_CC_NP, D) +#define JPOSm(D) JCCSim(X86_CC_PO, D) +#define JLSm(D) JCCSim(X86_CC_L, D) +#define JNGESm(D) JCCSim(X86_CC_NGE, D) +#define JNLSm(D) JCCSim(X86_CC_NL, D) +#define JGESm(D) JCCSim(X86_CC_GE, D) +#define JLESm(D) JCCSim(X86_CC_LE, D) +#define JNGSm(D) JCCSim(X86_CC_NG, D) +#define JNLESm(D) JCCSim(X86_CC_NLE, D) +#define JGSm(D) JCCSim(X86_CC_G, D) + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ +#define JCCii(CC, D) _OO_L (0x0f80|(CC) ,(int)(D) ) +#define JCCim(CC, D) _OO_D32 (0x0f80|(CC) ,(int)(D) ) +#define JOm(D) JCCim(X86_CC_O, D) +#define JNOm(D) JCCim(X86_CC_NO, D) +#define JBm(D) JCCim(X86_CC_B, D) +#define JNAEm(D) JCCim(X86_CC_NAE, D) +#define JNBm(D) JCCim(X86_CC_NB, D) +#define JAEm(D) JCCim(X86_CC_AE, D) +#define JEm(D) JCCim(X86_CC_E, D) +#define JZm(D) JCCim(X86_CC_Z, D) +#define JNEm(D) JCCim(X86_CC_NE, D) +#define JNZm(D) JCCim(X86_CC_NZ, D) +#define JBEm(D) JCCim(X86_CC_BE, D) +#define JNAm(D) JCCim(X86_CC_NA, D) +#define JNBEm(D) JCCim(X86_CC_NBE, D) +#define JAm(D) JCCim(X86_CC_A, D) +#define JSm(D) JCCim(X86_CC_S, D) +#define JNSm(D) JCCim(X86_CC_NS, D) +#define JPm(D) JCCim(X86_CC_P, D) +#define JPEm(D) JCCim(X86_CC_PE, D) +#define JNPm(D) JCCim(X86_CC_NP, D) +#define JPOm(D) JCCim(X86_CC_PO, D) +#define JLm(D) JCCim(X86_CC_L, D) +#define JNGEm(D) JCCim(X86_CC_NGE, D) +#define JNLm(D) JCCim(X86_CC_NL, D) +#define JGEm(D) JCCim(X86_CC_GE, D) +#define JLEm(D) JCCim(X86_CC_LE, D) +#define JNGm(D) JCCim(X86_CC_NG, D) +#define JNLEm(D) JCCim(X86_CC_NLE, D) +#define JGm(D) JCCim(X86_CC_G, D) + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ +#define SETCCir(CC, RD) (_REXBrr(0, RD), _OO_Mrm (0x0f90|(CC) ,_b11,_b000,_r1(RD) )) +#define SETOr(RD) SETCCir(X86_CC_O, RD) +#define SETNOr(RD) SETCCir(X86_CC_NO, RD) +#define SETBr(RD) SETCCir(X86_CC_B, RD) +#define SETNAEr(RD) SETCCir(X86_CC_NAE, RD) +#define SETNBr(RD) SETCCir(X86_CC_NB, RD) +#define SETAEr(RD) SETCCir(X86_CC_AE, RD) +#define SETEr(RD) SETCCir(X86_CC_E, RD) +#define SETZr(RD) SETCCir(X86_CC_Z, RD) +#define SETNEr(RD) SETCCir(X86_CC_NE, RD) +#define SETNZr(RD) SETCCir(X86_CC_NZ, RD) +#define SETBEr(RD) SETCCir(X86_CC_BE, RD) +#define SETNAr(RD) SETCCir(X86_CC_NA, RD) +#define SETNBEr(RD) SETCCir(X86_CC_NBE, RD) +#define SETAr(RD) SETCCir(X86_CC_A, RD) +#define SETSr(RD) SETCCir(X86_CC_S, RD) +#define SETNSr(RD) SETCCir(X86_CC_NS, RD) +#define SETPr(RD) SETCCir(X86_CC_P, RD) +#define SETPEr(RD) SETCCir(X86_CC_PE, RD) +#define SETNPr(RD) SETCCir(X86_CC_NP, RD) +#define SETPOr(RD) SETCCir(X86_CC_PO, RD) +#define SETLr(RD) SETCCir(X86_CC_L, RD) +#define SETNGEr(RD) SETCCir(X86_CC_NGE, RD) +#define SETNLr(RD) SETCCir(X86_CC_NL, RD) +#define SETGEr(RD) SETCCir(X86_CC_GE, RD) +#define SETLEr(RD) SETCCir(X86_CC_LE, RD) +#define SETNGr(RD) SETCCir(X86_CC_NG, RD) +#define SETNLEr(RD) SETCCir(X86_CC_NLE, RD) +#define SETGr(RD) SETCCir(X86_CC_G, RD) + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ +#define SETCCim(CC,MD,MB,MI,MS) (_REXBrm(0, MB, MI), _OO_r_X (0x0f90|(CC) ,_b000 ,MD,MB,MI,MS )) +#define SETOm(D, B, I, S) SETCCim(X86_CC_O, D, B, I, S) +#define SETNOm(D, B, I, S) SETCCim(X86_CC_NO, D, B, I, S) +#define SETBm(D, B, I, S) SETCCim(X86_CC_B, D, B, I, S) +#define SETNAEm(D, B, I, S) SETCCim(X86_CC_NAE, D, B, I, S) +#define SETNBm(D, B, I, S) SETCCim(X86_CC_NB, D, B, I, S) +#define SETAEm(D, B, I, S) SETCCim(X86_CC_AE, D, B, I, S) +#define SETEm(D, B, I, S) SETCCim(X86_CC_E, D, B, I, S) +#define SETZm(D, B, I, S) SETCCim(X86_CC_Z, D, B, I, S) +#define SETNEm(D, B, I, S) SETCCim(X86_CC_NE, D, B, I, S) +#define SETNZm(D, B, I, S) SETCCim(X86_CC_NZ, D, B, I, S) +#define SETBEm(D, B, I, S) SETCCim(X86_CC_BE, D, B, I, S) +#define SETNAm(D, B, I, S) SETCCim(X86_CC_NA, D, B, I, S) +#define SETNBEm(D, B, I, S) SETCCim(X86_CC_NBE, D, B, I, S) +#define SETAm(D, B, I, S) SETCCim(X86_CC_A, D, B, I, S) +#define SETSm(D, B, I, S) SETCCim(X86_CC_S, D, B, I, S) +#define SETNSm(D, B, I, S) SETCCim(X86_CC_NS, D, B, I, S) +#define SETPm(D, B, I, S) SETCCim(X86_CC_P, D, B, I, S) +#define SETPEm(D, B, I, S) SETCCim(X86_CC_PE, D, B, I, S) +#define SETNPm(D, B, I, S) SETCCim(X86_CC_NP, D, B, I, S) +#define SETPOm(D, B, I, S) SETCCim(X86_CC_PO, D, B, I, S) +#define SETLm(D, B, I, S) SETCCim(X86_CC_L, D, B, I, S) +#define SETNGEm(D, B, I, S) SETCCim(X86_CC_NGE, D, B, I, S) +#define SETNLm(D, B, I, S) SETCCim(X86_CC_NL, D, B, I, S) +#define SETGEm(D, B, I, S) SETCCim(X86_CC_GE, D, B, I, S) +#define SETLEm(D, B, I, S) SETCCim(X86_CC_LE, D, B, I, S) +#define SETNGm(D, B, I, S) SETCCim(X86_CC_NG, D, B, I, S) +#define SETNLEm(D, B, I, S) SETCCim(X86_CC_NLE, D, B, I, S) +#define SETGm(D, B, I, S) SETCCim(X86_CC_G, D, B, I, S) + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ +#define CMOVWrr(CC,RS,RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0f40|(CC) ,_b11,_r2(RD),_r2(RS) )) +#define CMOVWmr(CC,MD,MB,MI,MS,RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0f40|(CC) ,_r2(RD) ,MD,MB,MI,MS )) +#define CMOVLrr(CC,RS,RD) (_REXLrr(RD, RS), _OO_Mrm (0x0f40|(CC) ,_b11,_r4(RD),_r4(RS) )) +#define CMOVLmr(CC,MD,MB,MI,MS,RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0f40|(CC) ,_r4(RD) ,MD,MB,MI,MS )) +#define CMOVQrr(CC,RS,RD) (_REXQrr(RD, RS), _OO_Mrm (0x0f40|(CC) ,_b11,_r8(RD),_r8(RS) )) +#define CMOVQmr(CC,MD,MB,MI,MS,RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0f40|(CC) ,_r8(RD) ,MD,MB,MI,MS )) + + +/* --- Push/Pop instructions ----------------------------------------------- */ + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define POPWr(RD) _m32only((_d16(), _Or (0x58,_r2(RD) ))) +#define POPWm(MD, MB, MI, MS) _m32only((_d16(), _O_r_X (0x8f ,_b000 ,MD,MB,MI,MS ))) + +#define POPLr(RD) _m32only( _Or (0x58,_r4(RD) )) +#define POPLm(MD, MB, MI, MS) _m32only( _O_r_X (0x8f ,_b000 ,MD,MB,MI,MS )) + +#define POPQr(RD) _m64only((_REXQr(RD), _Or (0x58,_r8(RD) ))) +#define POPQm(MD, MB, MI, MS) _m64only((_REXQm(MB, MI), _O_r_X (0x8f ,_b000 ,MD,MB,MI,MS ))) + +#define PUSHWr(RS) _m32only((_d16(), _Or (0x50,_r2(RS) ))) +#define PUSHWm(MD, MB, MI, MS) _m32only((_d16(), _O_r_X (0xff, ,_b110 ,MD,MB,MI,MS ))) +#define PUSHWi(IM) _m32only((_d16(), _Os_sW (0x68 ,IM ))) + +#define PUSHLr(RS) _m32only( _Or (0x50,_r4(RS) )) +#define PUSHLm(MD, MB, MI, MS) _m32only( _O_r_X (0xff ,_b110 ,MD,MB,MI,MS )) +#define PUSHLi(IM) _m32only( _Os_sL (0x68 ,IM )) + +#define PUSHQr(RS) _m64only((_REXQr(RS), _Or (0x50,_r8(RS) ))) +#define PUSHQm(MD, MB, MI, MS) _m64only((_REXQm(MB, MI), _O_r_X (0xff ,_b110 ,MD,MB,MI,MS ))) +#define PUSHQi(IM) _m64only( _Os_sL (0x68 ,IM )) + +#define POPA() (_d16(), _O (0x61 )) +#define POPAD() _O (0x61 ) + +#define PUSHA() (_d16(), _O (0x60 )) +#define PUSHAD() _O (0x60 ) + +#define POPF() _O (0x9d ) +#define PUSHF() _O (0x9c ) + + +/* --- Test instructions --------------------------------------------------- */ + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define TESTBrr(RS, RD) (_REXBrr(RS, RD), _O_Mrm (0x84 ,_b11,_r1(RS),_r1(RD) )) +#define TESTBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (0x84 ,_r1(RS) ,MD,MB,MI,MS )) +#define TESTBir(IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_AL) ? \ + (_REXBrr(0, RD), _O_B (0xa8 ,_u8(IM))) : \ + (_REXBrr(0, RD), _O_Mrm_B (0xf6 ,_b11,_b000 ,_r1(RD) ,_u8(IM))) ) +#define TESTBim(IM, MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X_B (0xf6 ,_b000 ,MD,MB,MI,MS ,_u8(IM))) + +#define TESTWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (0x85 ,_b11,_r2(RS),_r2(RD) )) +#define TESTWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0x85 ,_r2(RS) ,MD,MB,MI,MS )) +#define TESTWir(IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_AX) ? \ + (_d16(), _REXLrr(0, RD), _O_W (0xa9 ,_u16(IM))) : \ + (_d16(), _REXLrr(0, RD), _O_Mrm_W (0xf7 ,_b11,_b000 ,_r2(RD) ,_u16(IM))) ) +#define TESTWim(IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_r_X_W (0xf7 ,_b000 ,MD,MB,MI,MS ,_u16(IM))) + +#define TESTLrr(RS, RD) (_REXLrr(RS, RD), _O_Mrm (0x85 ,_b11,_r4(RS),_r4(RD) )) +#define TESTLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _O_r_X (0x85 ,_r4(RS) ,MD,MB,MI,MS )) +#define TESTLir(IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_EAX) ? \ + (_REXLrr(0, RD), _O_L (0xa9 ,IM )) : \ + (_REXLrr(0, RD), _O_Mrm_L (0xf7 ,_b11,_b000 ,_r4(RD) ,IM )) ) +#define TESTLim(IM, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_r_X_L (0xf7 ,_b000 ,MD,MB,MI,MS ,IM )) + +#define TESTQrr(RS, RD) (_REXQrr(RS, RD), _O_Mrm (0x85 ,_b11,_r8(RS),_r8(RD) )) +#define TESTQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _O_r_X (0x85 ,_r8(RS) ,MD,MB,MI,MS )) +#define TESTQir(IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_RAX) ? \ + (_REXQrr(0, RD), _O_L (0xa9 ,IM )) : \ + (_REXQrr(0, RD), _O_Mrm_L (0xf7 ,_b11,_b000 ,_r8(RD) ,IM )) ) +#define TESTQim(IM, MD, MB, MI, MS) (_REXQrm(0, MB, MI), _O_r_X_L (0xf7 ,_b000 ,MD,MB,MI,MS ,IM )) + + +/* --- Exchange instructions ----------------------------------------------- */ + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define CMPXCHGBrr(RS, RD) (_REXBrr(RS, RD), _OO_Mrm (0x0fb0 ,_b11,_r1(RS),_r1(RD) )) +#define CMPXCHGBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _OO_r_X (0x0fb0 ,_r1(RS) ,MD,MB,MI,MS )) + +#define CMPXCHGWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _OO_Mrm (0x0fb1 ,_b11,_r2(RS),_r2(RD) )) +#define CMPXCHGWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _OO_r_X (0x0fb1 ,_r2(RS) ,MD,MB,MI,MS )) + +#define CMPXCHGLrr(RS, RD) (_REXLrr(RS, RD), _OO_Mrm (0x0fb1 ,_b11,_r4(RS),_r4(RD) )) +#define CMPXCHGLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0fb1 ,_r4(RS) ,MD,MB,MI,MS )) + +#define CMPXCHGQrr(RS, RD) (_REXQrr(RS, RD), _OO_Mrm (0x0fb1 ,_b11,_r8(RS),_r8(RD) )) +#define CMPXCHGQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0fb1 ,_r8(RS) ,MD,MB,MI,MS )) + +#define XADDBrr(RS, RD) (_REXBrr(RS, RD), _OO_Mrm (0x0fc0 ,_b11,_r1(RS),_r1(RD) )) +#define XADDBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _OO_r_X (0x0fc0 ,_r1(RS) ,MD,MB,MI,MS )) + +#define XADDWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _OO_Mrm (0x0fc1 ,_b11,_r2(RS),_r2(RD) )) +#define XADDWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _OO_r_X (0x0fc1 ,_r2(RS) ,MD,MB,MI,MS )) + +#define XADDLrr(RS, RD) (_REXLrr(RS, RD), _OO_Mrm (0x0fc1 ,_b11,_r4(RS),_r4(RD) )) +#define XADDLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0fc1 ,_r4(RS) ,MD,MB,MI,MS )) + +#define XADDQrr(RS, RD) (_REXQrr(RS, RD), _OO_Mrm (0x0fc1 ,_b11,_r8(RS),_r8(RD) )) +#define XADDQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0fc1 ,_r8(RS) ,MD,MB,MI,MS )) + +#define XCHGBrr(RS, RD) (_REXBrr(RS, RD), _O_Mrm (0x86 ,_b11,_r1(RS),_r1(RD) )) +#define XCHGBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (0x86 ,_r1(RS) ,MD,MB,MI,MS )) + +#define XCHGWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (0x87 ,_b11,_r2(RS),_r2(RD) )) +#define XCHGWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0x87 ,_r2(RS) ,MD,MB,MI,MS )) + +#define XCHGLrr(RS, RD) (_REXLrr(RS, RD), _O_Mrm (0x87 ,_b11,_r4(RS),_r4(RD) )) +#define XCHGLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _O_r_X (0x87 ,_r4(RS) ,MD,MB,MI,MS )) + +#define XCHGQrr(RS, RD) (_REXQrr(RS, RD), _O_Mrm (0x87 ,_b11,_r8(RS),_r8(RD) )) +#define XCHGQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _O_r_X (0x87 ,_r8(RS) ,MD,MB,MI,MS )) + + +/* --- Increment/Decrement instructions ------------------------------------ */ + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define DECBm(MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X (0xfe ,_b001 ,MD,MB,MI,MS )) +#define DECBr(RD) (_REXBrr(0, RD), _O_Mrm (0xfe ,_b11,_b001 ,_r1(RD) )) + +#define DECWm(MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_r_X (0xff ,_b001 ,MD,MB,MI,MS )) +#define DECWr(RD) (! X86_TARGET_64BIT ? (_d16(), _Or (0x48,_r2(RD) )) : \ + (_d16(), _REXLrr(0, RD), _O_Mrm (0xff ,_b11,_b001 ,_r2(RD) ))) + +#define DECLm(MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_r_X (0xff ,_b001 ,MD,MB,MI,MS )) +#define DECLr(RD) (! X86_TARGET_64BIT ? _Or (0x48,_r4(RD) ) : \ + (_REXLrr(0, RD), _O_Mrm (0xff ,_b11,_b001 ,_r4(RD) ))) + +#define DECQm(MD, MB, MI, MS) (_REXQrm(0, MB, MI), _O_r_X (0xff ,_b001 ,MD,MB,MI,MS )) +#define DECQr(RD) (_REXQrr(0, RD), _O_Mrm (0xff ,_b11,_b001 ,_r8(RD) )) + +#define INCBm(MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X (0xfe ,_b000 ,MD,MB,MI,MS )) +#define INCBr(RD) (_REXBrr(0, RD), _O_Mrm (0xfe ,_b11,_b000 ,_r1(RD) )) + +#define INCWm(MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_r_X (0xff ,_b000 ,MD,MB,MI,MS )) +#define INCWr(RD) (! X86_TARGET_64BIT ? (_d16(), _Or (0x40,_r2(RD) )) : \ + (_d16(), _REXLrr(0, RD), _O_Mrm (0xff ,_b11,_b000 ,_r2(RD) )) ) + +#define INCLm(MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_r_X (0xff ,_b000 ,MD,MB,MI,MS )) +#define INCLr(RD) (! X86_TARGET_64BIT ? _Or (0x40,_r4(RD) ) : \ + (_REXLrr(0, RD), _O_Mrm (0xff ,_b11,_b000 ,_r4(RD) ))) + +#define INCQm(MD, MB, MI, MS) (_REXQrm(0, MB, MI), _O_r_X (0xff ,_b000 ,MD,MB,MI,MS )) +#define INCQr(RD) (_REXQrr(0, RD), _O_Mrm (0xff ,_b11,_b000 ,_r8(RD) )) + + +/* --- Misc instructions --------------------------------------------------- */ + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define BSFWrr(RS, RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0fbc ,_b11,_r2(RD),_r2(RS) )) +#define BSFWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0fbc ,_r2(RD) ,MD,MB,MI,MS )) +#define BSRWrr(RS, RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0fbd ,_b11,_r2(RD),_r2(RS) )) +#define BSRWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0fbd ,_r2(RD) ,MD,MB,MI,MS )) + +#define BSFLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fbc ,_b11,_r4(RD),_r4(RS) )) +#define BSFLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fbc ,_r4(RD) ,MD,MB,MI,MS )) +#define BSRLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fbd ,_b11,_r4(RD),_r4(RS) )) +#define BSRLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fbd ,_r4(RD) ,MD,MB,MI,MS )) + +#define BSFQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fbc ,_b11,_r8(RD),_r8(RS) )) +#define BSFQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fbc ,_r8(RD) ,MD,MB,MI,MS )) +#define BSRQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fbd ,_b11,_r8(RD),_r8(RS) )) +#define BSRQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fbd ,_r8(RD) ,MD,MB,MI,MS )) + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define MOVSBWrr(RS, RD) (_d16(), _REXBLrr(RD, RS), _OO_Mrm (0x0fbe ,_b11,_r2(RD),_r1(RS) )) +#define MOVSBWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0fbe ,_r2(RD) ,MD,MB,MI,MS )) +#define MOVZBWrr(RS, RD) (_d16(), _REXBLrr(RD, RS), _OO_Mrm (0x0fb6 ,_b11,_r2(RD),_r1(RS) )) +#define MOVZBWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0fb6 ,_r2(RD) ,MD,MB,MI,MS )) + +#define MOVSBLrr(RS, RD) (_REXBLrr(RD, RS), _OO_Mrm (0x0fbe ,_b11,_r4(RD),_r1(RS) )) +#define MOVSBLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fbe ,_r4(RD) ,MD,MB,MI,MS )) +#define MOVZBLrr(RS, RD) (_REXBLrr(RD, RS), _OO_Mrm (0x0fb6 ,_b11,_r4(RD),_r1(RS) )) +#define MOVZBLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fb6 ,_r4(RD) ,MD,MB,MI,MS )) + +#define MOVSBQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fbe ,_b11,_r8(RD),_r1(RS) )) +#define MOVSBQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fbe ,_r8(RD) ,MD,MB,MI,MS )) +#define MOVZBQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fb6 ,_b11,_r8(RD),_r1(RS) )) +#define MOVZBQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fb6 ,_r8(RD) ,MD,MB,MI,MS )) + +#define MOVSWLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fbf ,_b11,_r4(RD),_r2(RS) )) +#define MOVSWLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fbf ,_r4(RD) ,MD,MB,MI,MS )) +#define MOVZWLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fb7 ,_b11,_r4(RD),_r2(RS) )) +#define MOVZWLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fb7 ,_r4(RD) ,MD,MB,MI,MS )) + +#define MOVSWQrr(RS, RD) _m64only((_REXQrr(RD, RS), _OO_Mrm (0x0fbf ,_b11,_r8(RD),_r2(RS) ))) +#define MOVSWQmr(MD, MB, MI, MS, RD) _m64only((_REXQmr(MB, MI, RD), _OO_r_X (0x0fbf ,_r8(RD) ,MD,MB,MI,MS ))) +#define MOVZWQrr(RS, RD) _m64only((_REXQrr(RD, RS), _OO_Mrm (0x0fb7 ,_b11,_r8(RD),_r2(RS) ))) +#define MOVZWQmr(MD, MB, MI, MS, RD) _m64only((_REXQmr(MB, MI, RD), _OO_r_X (0x0fb7 ,_r8(RD) ,MD,MB,MI,MS ))) + +#define MOVSLQrr(RS, RD) _m64only((_REXQrr(RD, RS), _O_Mrm (0x63 ,_b11,_r8(RD),_r4(RS) ))) +#define MOVSLQmr(MD, MB, MI, MS, RD) _m64only((_REXQmr(MB, MI, RD), _O_r_X (0x63 ,_r8(RD) ,MD,MB,MI,MS ))) + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define LEALmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _O_r_X (0x8d ,_r4(RD) ,MD,MB,MI,MS )) +#define LEAQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _O_r_X (0x8d ,_r4(RD) ,MD,MB,MI,MS )) + +#define BSWAPLr(R) (_REXLrr(0, R), _OOr (0x0fc8,_r4(R) )) +#define BSWAPQr(R) (_REXQrr(0, R), _OOr (0x0fc8,_r8(R) )) + +#define CLC() _O (0xf8 ) +#define STC() _O (0xf9 ) +#define CMC() _O (0xf5 ) + +#define CLD() _O (0xfc ) +#define STD() _O (0xfd ) + +#define CBTW() (_d16(), _O (0x98 )) +#define CWTL() _O (0x98 ) +#define CLTQ() _m64only(_REXQrr(0, 0), _O (0x98 )) + +#define CBW CBTW +#define CWDE CWTL +#define CDQE CLTQ + +#define CWTD() (_d16(), _O (0x99 )) +#define CLTD() _O (0x99 ) +#define CQTO() _m64only(_REXQrr(0, 0), _O (0x99 )) + +#define CWD CWTD +#define CDQ CLTD +#define CQO CQTO + +#define LAHF() _O (0x9f ) +#define SAHF() _O (0x9e ) + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define CPUID() _OO (0x0fa2 ) +#define RDTSC() _OO (0xff31 ) + +#define ENTERii(W, B) _O_W_B (0xc8 ,_su16(W),_su8(B)) + +#define LEAVE() _O (0xc9 ) +#define RET() _O (0xc3 ) +#define RETi(IM) _O_W (0xc2 ,_su16(IM)) + +#define NOP() _O (0x90 ) + + +/* --- Media 64-bit instructions ------------------------------------------- */ + +enum { + X86_MMX_PABSB = 0x1c, // 2P + X86_MMX_PABSW = 0x1d, // 2P + X86_MMX_PABSD = 0x1e, // 2P + X86_MMX_PACKSSWB = 0x63, + X86_MMX_PACKSSDW = 0x6b, + X86_MMX_PACKUSWB = 0x67, + X86_MMX_PADDB = 0xfc, + X86_MMX_PADDW = 0xfd, + X86_MMX_PADDD = 0xfe, + X86_MMX_PADDQ = 0xd4, + X86_MMX_PADDSB = 0xec, + X86_MMX_PADDSW = 0xed, + X86_MMX_PADDUSB = 0xdc, + X86_MMX_PADDUSW = 0xdd, + X86_MMX_PAND = 0xdb, + X86_MMX_PANDN = 0xdf, + X86_MMX_PAVGB = 0xe0, + X86_MMX_PAVGW = 0xe3, + X86_MMX_PCMPEQB = 0x74, + X86_MMX_PCMPEQW = 0x75, + X86_MMX_PCMPEQD = 0x76, + X86_MMX_PCMPGTB = 0x64, + X86_MMX_PCMPGTW = 0x65, + X86_MMX_PCMPGTD = 0x66, + X86_MMX_PEXTRW = 0xc5, // 64, /r ib + X86_MMX_PHADDW = 0x01, // 2P + X86_MMX_PHADDD = 0x02, // 2P + X86_MMX_PHADDSW = 0x03, // 2P + X86_MMX_PHSUBW = 0x05, // 2P + X86_MMX_PHSUBD = 0x06, // 2P + X86_MMX_PHSUBSW = 0x07, // 2P + X86_MMX_PINSRW = 0xc4, // 64, /r ib + X86_MMX_PMADDUBSW = 0x04, // 2P + X86_MMX_PMADDWD = 0xf5, + X86_MMX_PMAXSW = 0xee, + X86_MMX_PMAXUB = 0xde, + X86_MMX_PMINSW = 0xea, + X86_MMX_PMINUB = 0xda, + X86_MMX_PMOVMSKB = 0xd7, // 64 + X86_MMX_PMULHRSW = 0x0b, // 2P + X86_MMX_PMULHUW = 0xe4, + X86_MMX_PMULHW = 0xe5, + X86_MMX_PMULLW = 0xd5, + X86_MMX_PMULUDQ = 0xf4, + X86_MMX_POR = 0xeb, + X86_MMX_PSADBW = 0xf6, + X86_MMX_PSHUFB = 0x00, // 2P + X86_MMX_PSHUFW = 0x70, // /r ib + X86_MMX_PSIGNB = 0x08, // 2P + X86_MMX_PSIGNW = 0x09, // 2P + X86_MMX_PSIGND = 0x0a, // 2P + X86_MMX_PSLLW = 0xf1, + X86_MMX_PSLLWi = 0x71, // /6 ib + X86_MMX_PSLLD = 0xf2, + X86_MMX_PSLLDi = 0x72, // /6 ib + X86_MMX_PSLLQ = 0xf3, + X86_MMX_PSLLQi = 0x73, // /6 ib + X86_MMX_PSRAW = 0xe1, + X86_MMX_PSRAWi = 0x71, // /4 ib + X86_MMX_PSRAD = 0xe2, + X86_MMX_PSRADi = 0x72, // /4 ib + X86_MMX_PSRLW = 0xd1, + X86_MMX_PSRLWi = 0x71, // /2 ib + X86_MMX_PSRLD = 0xd2, + X86_MMX_PSRLDi = 0x72, // /2 ib + X86_MMX_PSRLQ = 0xd3, + X86_MMX_PSRLQi = 0x73, // /2 ib + X86_MMX_PSUBB = 0xf8, + X86_MMX_PSUBW = 0xf9, + X86_MMX_PSUBD = 0xfa, + X86_MMX_PSUBQ = 0xfb, + X86_MMX_PSUBSB = 0xe8, + X86_MMX_PSUBSW = 0xe9, + X86_MMX_PSUBUSB = 0xd8, + X86_MMX_PSUBUSW = 0xd9, + X86_MMX_PUNPCKHBW = 0x68, + X86_MMX_PUNPCKHWD = 0x69, + X86_MMX_PUNPCKHDQ = 0x6a, + X86_MMX_PUNPCKLBW = 0x60, + X86_MMX_PUNPCKLWD = 0x61, + X86_MMX_PUNPCKLDQ = 0x62, + X86_MMX_PXOR = 0xef, +}; + +#define __MMXLrr(OP,RS,RSA,RD,RDA) (_REXLrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) )) +#define __MMXLmr(OP,MD,MB,MI,MS,RD,RDA) (_REXLmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS )) +#define __MMXLrm(OP,RS,RSA,MD,MB,MI,MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS )) +#define __MMXLirr(OP,IM,RS,RSA,RD,RDA) (_REXLrr(RD, RS), _OO_Mrm_B (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) ,_u8(IM))) +#define __MMXLimr(OP,IM,MD,MB,MI,MS,RD,RDA) (_REXLmr(MB, MI, RS), _OO_r_X_B (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS ,_u8(IM))) +#define __MMXQrr(OP,RS,RSA,RD,RDA) (_REXQrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) )) +#define __MMXQmr(OP,MD,MB,MI,MS,RD,RDA) (_REXQmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS )) +#define __MMXQrm(OP,RS,RSA,MD,MB,MI,MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS )) +#define __MMXQirr(OP,IM,RS,RSA,RD,RDA) (_REXQrr(RD, RS), _OO_Mrm_B (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) ,_u8(IM))) +#define __MMXQimr(OP,IM,MD,MB,MI,MS,RD,RDA) (_REXQmr(MB, MI, RS), _OO_r_X_B (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS ,_u8(IM))) +#define __MMX1Lrr(PX,OP,RS,RSA,RD,RDA) (_REXLrr(RD, RS), _B(0x0f),_OO_Mrm(((PX)<<8)|(OP) ,_b11,RDA(RD),RSA(RS) )) +#define __MMX1Lmr(PX,OP,MD,MB,MI,MS,RD,RDA) (_REXLmr(MB, MI, RD), _B(0x0f),_OO_r_X(((PX)<<8)|(OP) ,RDA(RD) ,MD,MB,MI,MS )) +#define __MMX1Lrm(PX,OP,RS,RSA,MD,MB,MI,MS) (_REXLrm(RS, MB, MI), _B(0x0f),_OO_r_X(((PX)<<8)|(OP) ,RSA(RS) ,MD,MB,MI,MS )) + +#define _MMXLrr(OP,RS,RD) __MMXLrr(OP,RS,_rM,RD,_rM) +#define _MMXLmr(OP,MD,MB,MI,MS,RD) __MMXLmr(OP,MD,MB,MI,MS,RD,_rM) +#define _MMXLrm(OP,RS,MD,MB,MI,MS) __MMXLrm(OP,RS,_rM,MD,MB,MI,MS) +#define _MMXQrr(OP,RS,RD) __MMXQrr(OP,RS,_rM,RD,_rM) +#define _MMXQmr(OP,MD,MB,MI,MS,RD) __MMXQmr(OP,MD,MB,MI,MS,RD,_rM) +#define _MMXQrm(OP,RS,MD,MB,MI,MS) __MMXQrm(OP,RS,_rM,MD,MB,MI,MS) +#define _2P_MMXLrr(OP,RS,RD) __MMX1Lrr(0x38, OP,RS,_rM,RD,_rM) +#define _2P_MMXLmr(OP,MD,MB,MI,MS,RD) __MMX1Lmr(0x38, OP,MD,MB,MI,MS,RD,_rM) +#define _2P_MMXLrm(OP,RS,MD,MB,MI,MS) __MMX1Lrm(0x38, OP,RS,_rM,MD,MB,MI,MS) + +#define MMX_MOVDMDrr(RS, RD) __MMXLrr(0x6e, RS,_r4, RD,_rM) +#define MMX_MOVQMDrr(RS, RD) __MMXQrr(0x6e, RS,_r8, RD,_rM) +#define MMX_MOVDMSrr(RS, RD) __MMXLrr(0x7e, RD,_r4, RS,_rM) +#define MMX_MOVQMSrr(RS, RD) __MMXQrr(0x7e, RD,_r8, RS,_rM) + +#define MMX_MOVDmr(MD, MB, MI, MS, RD) _MMXLmr(0x6e, MD, MB, MI, MS, RD) +#define MMX_MOVDrm(RS, MD, MB, MI, MS) _MMXLrm(0x7e, RS, MD, MB, MI, MS) +#define MMX_MOVQrr(RS, RD) _MMXLrr(0x6f, RS, RD) +#define MMX_MOVQmr(MD, MB, MI, MS, RD) _MMXLmr(0x6f, MD, MB, MI, MS, RD) +#define MMX_MOVQrm(RS, MD, MB, MI, MS) _MMXLrm(0x7f, RS, MD, MB, MI, MS) + +// Original MMX instructions +#define MMX_PACKSSWBrr(RS, RD) _MMXLrr(X86_MMX_PACKSSWB,RS,RD) +#define MMX_PACKSSWBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PACKSSWB, MD, MB, MI, MS, RD) +#define MMX_PACKSSDWrr(RS, RD) _MMXLrr(X86_MMX_PACKSSDW,RS,RD) +#define MMX_PACKSSDWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PACKSSDW, MD, MB, MI, MS, RD) +#define MMX_PACKUSWBrr(RS, RD) _MMXLrr(X86_MMX_PACKUSWB,RS,RD) +#define MMX_PACKUSWBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PACKUSWB, MD, MB, MI, MS, RD) +#define MMX_PADDBrr(RS, RD) _MMXLrr(X86_MMX_PADDB,RS,RD) +#define MMX_PADDBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDB, MD, MB, MI, MS, RD) +#define MMX_PADDWrr(RS, RD) _MMXLrr(X86_MMX_PADDW,RS,RD) +#define MMX_PADDWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDW, MD, MB, MI, MS, RD) +#define MMX_PADDDrr(RS, RD) _MMXLrr(X86_MMX_PADDD,RS,RD) +#define MMX_PADDDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDD, MD, MB, MI, MS, RD) +#define MMX_PADDQrr(RS, RD) _MMXLrr(X86_MMX_PADDQ,RS,RD) +#define MMX_PADDQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDQ, MD, MB, MI, MS, RD) +#define MMX_PADDSBrr(RS, RD) _MMXLrr(X86_MMX_PADDSB,RS,RD) +#define MMX_PADDSBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDSB, MD, MB, MI, MS, RD) +#define MMX_PADDSWrr(RS, RD) _MMXLrr(X86_MMX_PADDSW,RS,RD) +#define MMX_PADDSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDSW, MD, MB, MI, MS, RD) +#define MMX_PADDUSBrr(RS, RD) _MMXLrr(X86_MMX_PADDUSB,RS,RD) +#define MMX_PADDUSBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDUSB, MD, MB, MI, MS, RD) +#define MMX_PADDUSWrr(RS, RD) _MMXLrr(X86_MMX_PADDUSW,RS,RD) +#define MMX_PADDUSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDUSW, MD, MB, MI, MS, RD) +#define MMX_PANDrr(RS, RD) _MMXLrr(X86_MMX_PAND,RS,RD) +#define MMX_PANDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PAND, MD, MB, MI, MS, RD) +#define MMX_PANDNrr(RS, RD) _MMXLrr(X86_MMX_PANDN,RS,RD) +#define MMX_PANDNmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PANDN, MD, MB, MI, MS, RD) +#define MMX_PAVGBrr(RS, RD) _MMXLrr(X86_MMX_PAVGB,RS,RD) +#define MMX_PAVGBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PAVGB, MD, MB, MI, MS, RD) +#define MMX_PAVGWrr(RS, RD) _MMXLrr(X86_MMX_PAVGW,RS,RD) +#define MMX_PAVGWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PAVGW, MD, MB, MI, MS, RD) +#define MMX_PCMPEQBrr(RS, RD) _MMXLrr(X86_MMX_PCMPEQB,RS,RD) +#define MMX_PCMPEQBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPEQB, MD, MB, MI, MS, RD) +#define MMX_PCMPEQWrr(RS, RD) _MMXLrr(X86_MMX_PCMPEQW,RS,RD) +#define MMX_PCMPEQWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPEQW, MD, MB, MI, MS, RD) +#define MMX_PCMPEQDrr(RS, RD) _MMXLrr(X86_MMX_PCMPEQD,RS,RD) +#define MMX_PCMPEQDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPEQD, MD, MB, MI, MS, RD) +#define MMX_PCMPGTBrr(RS, RD) _MMXLrr(X86_MMX_PCMPGTB,RS,RD) +#define MMX_PCMPGTBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPGTB, MD, MB, MI, MS, RD) +#define MMX_PCMPGTWrr(RS, RD) _MMXLrr(X86_MMX_PCMPGTW,RS,RD) +#define MMX_PCMPGTWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPGTW, MD, MB, MI, MS, RD) +#define MMX_PCMPGTDrr(RS, RD) _MMXLrr(X86_MMX_PCMPGTD,RS,RD) +#define MMX_PCMPGTDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPGTD, MD, MB, MI, MS, RD) +#define MMX_PMADDWDrr(RS, RD) _MMXLrr(X86_MMX_PMADDWD,RS,RD) +#define MMX_PMADDWDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMADDWD, MD, MB, MI, MS, RD) +#define MMX_PMAXSWrr(RS, RD) _MMXLrr(X86_MMX_PMAXSW,RS,RD) +#define MMX_PMAXSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMAXSW, MD, MB, MI, MS, RD) +#define MMX_PMAXUBrr(RS, RD) _MMXLrr(X86_MMX_PMAXUB,RS,RD) +#define MMX_PMAXUBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMAXUB, MD, MB, MI, MS, RD) +#define MMX_PMINSWrr(RS, RD) _MMXLrr(X86_MMX_PMINSW,RS,RD) +#define MMX_PMINSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMINSW, MD, MB, MI, MS, RD) +#define MMX_PMINUBrr(RS, RD) _MMXLrr(X86_MMX_PMINUB,RS,RD) +#define MMX_PMINUBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMINUB, MD, MB, MI, MS, RD) +#define MMX_PMULHUWrr(RS, RD) _MMXLrr(X86_MMX_PMULHUW,RS,RD) +#define MMX_PMULHUWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMULHUW, MD, MB, MI, MS, RD) +#define MMX_PMULHWrr(RS, RD) _MMXLrr(X86_MMX_PMULHW,RS,RD) +#define MMX_PMULHWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMULHW, MD, MB, MI, MS, RD) +#define MMX_PMULLWrr(RS, RD) _MMXLrr(X86_MMX_PMULLW,RS,RD) +#define MMX_PMULLWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMULLW, MD, MB, MI, MS, RD) +#define MMX_PMULUDQrr(RS, RD) _MMXLrr(X86_MMX_PMULUDQ,RS,RD) +#define MMX_PMULUDQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMULUDQ, MD, MB, MI, MS, RD) +#define MMX_PORrr(RS, RD) _MMXLrr(X86_MMX_POR,RS,RD) +#define MMX_PORmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_POR, MD, MB, MI, MS, RD) +#define MMX_PSADBWrr(RS, RD) _MMXLrr(X86_MMX_PSADBW,RS,RD) +#define MMX_PSADBWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSADBW, MD, MB, MI, MS, RD) +#define MMX_PSLLWir(IM, RD) __MMXLirr(X86_MMX_PSLLWi, IM, RD,_rM, _b110,_rN) +#define MMX_PSLLWrr(RS, RD) _MMXLrr(X86_MMX_PSLLW,RS,RD) +#define MMX_PSLLWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSLLW, MD, MB, MI, MS, RD) +#define MMX_PSLLDir(IM, RD) __MMXLirr(X86_MMX_PSLLDi, IM, RD,_rM, _b110,_rN) +#define MMX_PSLLDrr(RS, RD) _MMXLrr(X86_MMX_PSLLD,RS,RD) +#define MMX_PSLLDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSLLD, MD, MB, MI, MS, RD) +#define MMX_PSLLQir(IM, RD) __MMXLirr(X86_MMX_PSLLQi, IM, RD,_rM, _b110,_rN) +#define MMX_PSLLQrr(RS, RD) _MMXLrr(X86_MMX_PSLLQ,RS,RD) +#define MMX_PSLLQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSLLQ, MD, MB, MI, MS, RD) +#define MMX_PSRAWir(IM, RD) __MMXLirr(X86_MMX_PSRAWi, IM, RD,_rM, _b100,_rN) +#define MMX_PSRAWrr(RS, RD) _MMXLrr(X86_MMX_PSRAW,RS,RD) +#define MMX_PSRAWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSRAW, MD, MB, MI, MS, RD) +#define MMX_PSRADir(IM, RD) __MMXLirr(X86_MMX_PSRADi, IM, RD,_rM, _b100,_rN) +#define MMX_PSRADrr(RS, RD) _MMXLrr(X86_MMX_PSRAD,RS,RD) +#define MMX_PSRADmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSRAD, MD, MB, MI, MS, RD) +#define MMX_PSRLWir(IM, RD) __MMXLirr(X86_MMX_PSRLWi, IM, RD,_rM, _b010,_rN) +#define MMX_PSRLWrr(RS, RD) _MMXLrr(X86_MMX_PSRLW,RS,RD) +#define MMX_PSRLWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSRLW, MD, MB, MI, MS, RD) +#define MMX_PSRLDir(IM, RD) __MMXLirr(X86_MMX_PSRLDi, IM, RD,_rM, _b010,_rN) +#define MMX_PSRLDrr(RS, RD) _MMXLrr(X86_MMX_PSRLD,RS,RD) +#define MMX_PSRLDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSRLD, MD, MB, MI, MS, RD) +#define MMX_PSRLQir(IM, RD) __MMXLirr(X86_MMX_PSRLQi, IM, RD,_rM, _b010,_rN) +#define MMX_PSRLQrr(RS, RD) _MMXLrr(X86_MMX_PSRLQ,RS,RD) +#define MMX_PSRLQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSRLQ, MD, MB, MI, MS, RD) +#define MMX_PSUBBrr(RS, RD) _MMXLrr(X86_MMX_PSUBB,RS,RD) +#define MMX_PSUBBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBB, MD, MB, MI, MS, RD) +#define MMX_PSUBWrr(RS, RD) _MMXLrr(X86_MMX_PSUBW,RS,RD) +#define MMX_PSUBWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBW, MD, MB, MI, MS, RD) +#define MMX_PSUBDrr(RS, RD) _MMXLrr(X86_MMX_PSUBD,RS,RD) +#define MMX_PSUBDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBD, MD, MB, MI, MS, RD) +#define MMX_PSUBQrr(RS, RD) _MMXLrr(X86_MMX_PSUBQ,RS,RD) +#define MMX_PSUBQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBQ, MD, MB, MI, MS, RD) +#define MMX_PSUBSBrr(RS, RD) _MMXLrr(X86_MMX_PSUBSB,RS,RD) +#define MMX_PSUBSBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBSB, MD, MB, MI, MS, RD) +#define MMX_PSUBSWrr(RS, RD) _MMXLrr(X86_MMX_PSUBSW,RS,RD) +#define MMX_PSUBSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBSW, MD, MB, MI, MS, RD) +#define MMX_PSUBUSBrr(RS, RD) _MMXLrr(X86_MMX_PSUBUSB,RS,RD) +#define MMX_PSUBUSBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBUSB, MD, MB, MI, MS, RD) +#define MMX_PSUBUSWrr(RS, RD) _MMXLrr(X86_MMX_PSUBUSW,RS,RD) +#define MMX_PSUBUSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBUSW, MD, MB, MI, MS, RD) +#define MMX_PUNPCKHBWrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKHBW,RS,RD) +#define MMX_PUNPCKHBWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKHBW, MD, MB, MI, MS, RD) +#define MMX_PUNPCKHWDrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKHWD,RS,RD) +#define MMX_PUNPCKHWDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKHWD, MD, MB, MI, MS, RD) +#define MMX_PUNPCKHDQrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKHDQ,RS,RD) +#define MMX_PUNPCKHDQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKHDQ, MD, MB, MI, MS, RD) +#define MMX_PUNPCKLBWrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKLBW,RS,RD) +#define MMX_PUNPCKLBWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKLBW, MD, MB, MI, MS, RD) +#define MMX_PUNPCKLWDrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKLWD,RS,RD) +#define MMX_PUNPCKLWDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKLWD, MD, MB, MI, MS, RD) +#define MMX_PUNPCKLDQrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKLDQ,RS,RD) +#define MMX_PUNPCKLDQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKLDQ, MD, MB, MI, MS, RD) +#define MMX_PXORrr(RS, RD) _MMXLrr(X86_MMX_PXOR,RS,RD) +#define MMX_PXORmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PXOR, MD, MB, MI, MS, RD) + +#define MMX_PSHUFWirr(IM, RS, RD) __MMXLirr(X86_MMX_PSHUFW, IM, RS,_rM, RD,_rM) +#define MMX_PSHUFWimr(IM, MD, MB, MI, MS, RD) __MMXLimr(X86_MMX_PSHUFW, IM, MD, MB, MI, MS, RD,_rM) +#define MMX_PEXTRWLirr(IM, RS, RD) __MMXLirr(X86_MMX_PEXTRW, IM, RS,_rM, RD,_r4) +#define MMX_PEXTRWQirr(IM, RS, RD) __MMXQirr(X86_MMX_PEXTRW, IM, RS,_rM, RD,_r8) +#define MMX_PINSRWLirr(IM, RS, RD) __MMXLirr(X86_MMX_PINSRW, IM, RS,_r4, RD,_rM) +#define MMX_PINSRWLimr(IM, MD, MB, MI, MS, RD) __MMXLimr(X86_MMX_PINSRW, IM, MD, MB, MI, MS, RD,_r4) +#define MMX_PINSRWQirr(IM, RS, RD) __MMXQirr(X86_MMX_PINSRW, IM, RS,_r4, RD,_rM) +#define MMX_PINSRWQimr(IM, MD, MB, MI, MS, RD) __MMXQimr(X86_MMX_PINSRW, IM, MD, MB, MI, MS, RD,_r8) + +// Additionnal MMX instructions, brought by SSSE3 ISA +#define MMX_PABSBrr(RS, RD) _2P_MMXLrr(X86_MMX_PABSB,RS,RD) +#define MMX_PABSBmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PABSB, MD, MB, MI, MS, RD) +#define MMX_PABSWrr(RS, RD) _2P_MMXLrr(X86_MMX_PABSW,RS,RD) +#define MMX_PABSWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PABSW, MD, MB, MI, MS, RD) +#define MMX_PABSDrr(RS, RD) _2P_MMXLrr(X86_MMX_PABSD,RS,RD) +#define MMX_PABSDmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PABSD, MD, MB, MI, MS, RD) +#define MMX_PHADDWrr(RS, RD) _2P_MMXLrr(X86_MMX_PHADDW,RS,RD) +#define MMX_PHADDWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHADDW, MD, MB, MI, MS, RD) +#define MMX_PHADDDrr(RS, RD) _2P_MMXLrr(X86_MMX_PHADDD,RS,RD) +#define MMX_PHADDDmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHADDD, MD, MB, MI, MS, RD) +#define MMX_PHADDSWrr(RS, RD) _2P_MMXLrr(X86_MMX_PHADDSW,RS,RD) +#define MMX_PHADDSWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHADDSW, MD, MB, MI, MS, RD) +#define MMX_PHSUBWrr(RS, RD) _2P_MMXLrr(X86_MMX_PHSUBW,RS,RD) +#define MMX_PHSUBWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHSUBW, MD, MB, MI, MS, RD) +#define MMX_PHSUBDrr(RS, RD) _2P_MMXLrr(X86_MMX_PHSUBD,RS,RD) +#define MMX_PHSUBDmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHSUBD, MD, MB, MI, MS, RD) +#define MMX_PHSUBSWrr(RS, RD) _2P_MMXLrr(X86_MMX_PHSUBSW,RS,RD) +#define MMX_PHSUBSWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHSUBSW, MD, MB, MI, MS, RD) +#define MMX_PMADDUBSWrr(RS, RD) _2P_MMXLrr(X86_MMX_PMADDUBSW,RS,RD) +#define MMX_PMADDUBSWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PMADDUBSW, MD, MB, MI, MS, RD) +#define MMX_PMULHRSWrr(RS, RD) _2P_MMXLrr(X86_MMX_PMULHRSW,RS,RD) +#define MMX_PMULHRSWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PMULHRSW, MD, MB, MI, MS, RD) +#define MMX_PSHUFBrr(RS, RD) _2P_MMXLrr(X86_MMX_PSHUFB,RS,RD) +#define MMX_PSHUFBmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PSHUFB, MD, MB, MI, MS, RD) +#define MMX_PSIGNBrr(RS, RD) _2P_MMXLrr(X86_MMX_PSIGNB,RS,RD) +#define MMX_PSIGNBmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PSIGNB, MD, MB, MI, MS, RD) +#define MMX_PSIGNWrr(RS, RD) _2P_MMXLrr(X86_MMX_PSIGNW,RS,RD) +#define MMX_PSIGNWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PSIGNW, MD, MB, MI, MS, RD) +#define MMX_PSIGNDrr(RS, RD) _2P_MMXLrr(X86_MMX_PSIGND,RS,RD) +#define MMX_PSIGNDmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PSIGND, MD, MB, MI, MS, RD) + +#define EMMS() _OO (0x0f77 ) + + +/* --- Media 128-bit instructions ------------------------------------------ */ + +enum { + X86_SSE_CC_EQ = 0, + X86_SSE_CC_LT = 1, + X86_SSE_CC_GT = 1, + X86_SSE_CC_LE = 2, + X86_SSE_CC_GE = 2, + X86_SSE_CC_U = 3, + X86_SSE_CC_NEQ = 4, + X86_SSE_CC_NLT = 5, + X86_SSE_CC_NGT = 5, + X86_SSE_CC_NLE = 6, + X86_SSE_CC_NGE = 6, + X86_SSE_CC_O = 7 +}; + +enum { + X86_SSE_UCOMI = 0x2e, + X86_SSE_COMI = 0x2f, + X86_SSE_CMP = 0xc2, + X86_SSE_SQRT = 0x51, + X86_SSE_RSQRT = 0x52, + X86_SSE_RCP = 0x53, + X86_SSE_AND = 0x54, + X86_SSE_ANDN = 0x55, + X86_SSE_OR = 0x56, + X86_SSE_XOR = 0x57, + X86_SSE_ADD = 0x58, + X86_SSE_MUL = 0x59, + X86_SSE_SUB = 0x5c, + X86_SSE_MIN = 0x5d, + X86_SSE_DIV = 0x5e, + X86_SSE_MAX = 0x5f, + X86_SSE_CVTDQ2PD = 0xe6, + X86_SSE_CVTDQ2PS = 0x5b, + X86_SSE_CVTPD2DQ = 0xe6, + X86_SSE_CVTPD2PI = 0x2d, + X86_SSE_CVTPD2PS = 0x5a, + X86_SSE_CVTPI2PD = 0x2a, + X86_SSE_CVTPI2PS = 0x2a, + X86_SSE_CVTPS2DQ = 0x5b, + X86_SSE_CVTPS2PD = 0x5a, + X86_SSE_CVTPS2PI = 0x2d, + X86_SSE_CVTSD2SI = 0x2d, + X86_SSE_CVTSD2SS = 0x5a, + X86_SSE_CVTSI2SD = 0x2a, + X86_SSE_CVTSI2SS = 0x2a, + X86_SSE_CVTSS2SD = 0x5a, + X86_SSE_CVTSS2SI = 0x2d, + X86_SSE_CVTTPD2PI = 0x2c, + X86_SSE_CVTTPD2DQ = 0xe6, + X86_SSE_CVTTPS2DQ = 0x5b, + X86_SSE_CVTTPS2PI = 0x2c, + X86_SSE_CVTTSD2SI = 0x2c, + X86_SSE_CVTTSS2SI = 0x2c, + X86_SSE_MOVMSK = 0x50, + X86_SSE_PACKSSDW = 0x6b, + X86_SSE_PACKSSWB = 0x63, + X86_SSE_PACKUSWB = 0x67, + X86_SSE_PADDB = 0xfc, + X86_SSE_PADDD = 0xfe, + X86_SSE_PADDQ = 0xd4, + X86_SSE_PADDSB = 0xec, + X86_SSE_PADDSW = 0xed, + X86_SSE_PADDUSB = 0xdc, + X86_SSE_PADDUSW = 0xdd, + X86_SSE_PADDW = 0xfd, + X86_SSE_PAND = 0xdb, + X86_SSE_PANDN = 0xdf, + X86_SSE_PAVGB = 0xe0, + X86_SSE_PAVGW = 0xe3, + X86_SSE_PCMPEQB = 0x74, + X86_SSE_PCMPEQD = 0x76, + X86_SSE_PCMPEQW = 0x75, + X86_SSE_PCMPGTB = 0x64, + X86_SSE_PCMPGTD = 0x66, + X86_SSE_PCMPGTW = 0x65, + X86_SSE_PMADDWD = 0xf5, + X86_SSE_PMAXSW = 0xee, + X86_SSE_PMAXUB = 0xde, + X86_SSE_PMINSW = 0xea, + X86_SSE_PMINUB = 0xda, + X86_SSE_PMOVMSKB = 0xd7, + X86_SSE_PMULHUW = 0xe4, + X86_SSE_PMULHW = 0xe5, + X86_SSE_PMULLW = 0xd5, + X86_SSE_PMULUDQ = 0xf4, + X86_SSE_POR = 0xeb, + X86_SSE_PSADBW = 0xf6, + X86_SSE_PSLLD = 0xf2, + X86_SSE_PSLLQ = 0xf3, + X86_SSE_PSLLW = 0xf1, + X86_SSE_PSRAD = 0xe2, + X86_SSE_PSRAW = 0xe1, + X86_SSE_PSRLD = 0xd2, + X86_SSE_PSRLQ = 0xd3, + X86_SSE_PSRLW = 0xd1, + X86_SSE_PSUBB = 0xf8, + X86_SSE_PSUBD = 0xfa, + X86_SSE_PSUBQ = 0xfb, + X86_SSE_PSUBSB = 0xe8, + X86_SSE_PSUBSW = 0xe9, + X86_SSE_PSUBUSB = 0xd8, + X86_SSE_PSUBUSW = 0xd9, + X86_SSE_PSUBW = 0xf9, + X86_SSE_PUNPCKHBW = 0x68, + X86_SSE_PUNPCKHDQ = 0x6a, + X86_SSE_PUNPCKHQDQ = 0x6d, + X86_SSE_PUNPCKHWD = 0x69, + X86_SSE_PUNPCKLBW = 0x60, + X86_SSE_PUNPCKLDQ = 0x62, + X86_SSE_PUNPCKLQDQ = 0x6c, + X86_SSE_PUNPCKLWD = 0x61, + X86_SSE_PXOR = 0xef, + X86_SSSE3_PSHUFB = 0x00, +}; + +/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */ + +#define _SSSE3Lrr(OP1,OP2,RS,RSA,RD,RDA) (_B(0x66), _REXLrr(RD,RD), _B(0x0f), _OO_Mrm (((OP1)<<8)|(OP2) ,_b11,RDA(RD),RSA(RS) )) +#define _SSSE3Lmr(OP1,OP2,MD,MB,MI,MS,RD,RDA) (_B(0x66), _REXLmr(MB, MI, RD), _B(0x0f), _OO_r_X (((OP1)<<8)|(OP2) ,RDA(RD) ,MD,MB,MI,MS )) +#define _SSSE3Lirr(OP1,OP2,IM,RS,RD) (_B(0x66), _REXLrr(RD, RS), _B(0x0f), _OO_Mrm_B (((OP1)<<8)|(OP2) ,_b11,_rX(RD),_rX(RS) ,_u8(IM))) +#define _SSSE3Limr(OP1,OP2,IM,MD,MB,MI,MS,RD) (_B(0x66), _REXLmr(MB, MI, RD), _B(0x0f), _OO_r_X_B (((OP1)<<8)|(OP2) ,_rX(RD) ,MD,MB,MI,MS ,_u8(IM))) + +#define __SSELir(OP,MO,IM,RD) (_REXLrr(0, RD), _OO_Mrm_B (0x0f00|(OP) ,_b11,MO ,_rX(RD) ,_u8(IM))) +#define __SSELim(OP,MO,IM,MD,MB,MI,MS) (_REXLrm(0, MB, MI), _OO_r_X_B (0x0f00|(OP) ,MO ,MD,MB,MI,MS ,_u8(IM))) +#define __SSELrr(OP,RS,RSA,RD,RDA) (_REXLrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) )) +#define __SSELmr(OP,MD,MB,MI,MS,RD,RDA) (_REXLmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS )) +#define __SSELrm(OP,RS,RSA,MD,MB,MI,MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS )) +#define __SSELirr(OP,IM,RS,RD) (_REXLrr(RD, RS), _OO_Mrm_B (0x0f00|(OP) ,_b11,_rX(RD),_rX(RS) ,_u8(IM))) +#define __SSELimr(OP,IM,MD,MB,MI,MS,RD) (_REXLmr(MB, MI, RD), _OO_r_X_B (0x0f00|(OP) ,_rX(RD) ,MD,MB,MI,MS ,_u8(IM))) + +#define __SSEQrr(OP,RS,RSA,RD,RDA) (_REXQrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) )) +#define __SSEQmr(OP,MD,MB,MI,MS,RD,RDA) (_REXQmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS )) +#define __SSEQrm(OP,RS,RSA,MD,MB,MI,MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS )) + +#define _SSELrr(PX,OP,RS,RSA,RD,RDA) (_B(PX), __SSELrr(OP, RS, RSA, RD, RDA)) +#define _SSELmr(PX,OP,MD,MB,MI,MS,RD,RDA) (_B(PX), __SSELmr(OP, MD, MB, MI, MS, RD, RDA)) +#define _SSELrm(PX,OP,RS,RSA,MD,MB,MI,MS) (_B(PX), __SSELrm(OP, RS, RSA, MD, MB, MI, MS)) +#define _SSELir(PX,OP,MO,IM,RD) (_B(PX), __SSELir(OP, MO, IM, RD)) +#define _SSELim(PX,OP,MO,IM,MD,MB,MI,MS) (_B(PX), __SSELim(OP, MO, IM, MD, MB, MI, MS)) +#define _SSELirr(PX,OP,IM,RS,RD) (_B(PX), __SSELirr(OP, IM, RS, RD)) +#define _SSELimr(PX,OP,IM,MD,MB,MI,MS,RD) (_B(PX), __SSELimr(OP, IM, MD, MB, MI, MS, RD)) + +#define _SSEQrr(PX,OP,RS,RSA,RD,RDA) (_B(PX), __SSEQrr(OP, RS, RSA, RD, RDA)) +#define _SSEQmr(PX,OP,MD,MB,MI,MS,RD,RDA) (_B(PX), __SSEQmr(OP, MD, MB, MI, MS, RD, RDA)) +#define _SSEQrm(PX,OP,RS,RSA,MD,MB,MI,MS) (_B(PX), __SSEQrm(OP, RS, RSA, MD, MB, MI, MS)) + +#define _SSEPSrr(OP,RS,RD) __SSELrr( OP, RS,_rX, RD,_rX) +#define _SSEPSmr(OP,MD,MB,MI,MS,RD) __SSELmr( OP, MD, MB, MI, MS, RD,_rX) +#define _SSEPSrm(OP,RS,MD,MB,MI,MS) __SSELrm( OP, RS,_rX, MD, MB, MI, MS) +#define _SSEPSirr(OP,IM,RS,RD) __SSELirr( OP, IM, RS, RD) +#define _SSEPSimr(OP,IM,MD,MB,MI,MS,RD) __SSELimr( OP, IM, MD, MB, MI, MS, RD) + +#define _SSEPDrr(OP,RS,RD) _SSELrr(0x66, OP, RS,_rX, RD,_rX) +#define _SSEPDmr(OP,MD,MB,MI,MS,RD) _SSELmr(0x66, OP, MD, MB, MI, MS, RD,_rX) +#define _SSEPDrm(OP,RS,MD,MB,MI,MS) _SSELrm(0x66, OP, RS,_rX, MD, MB, MI, MS) +#define _SSEPDirr(OP,IM,RS,RD) _SSELirr(0x66, OP, IM, RS, RD) +#define _SSEPDimr(OP,IM,MD,MB,MI,MS,RD) _SSELimr(0x66, OP, IM, MD, MB, MI, MS, RD) + +#define _SSESSrr(OP,RS,RD) _SSELrr(0xf3, OP, RS,_rX, RD,_rX) +#define _SSESSmr(OP,MD,MB,MI,MS,RD) _SSELmr(0xf3, OP, MD, MB, MI, MS, RD,_rX) +#define _SSESSrm(OP,RS,MD,MB,MI,MS) _SSELrm(0xf3, OP, RS,_rX, MD, MB, MI, MS) +#define _SSESSirr(OP,IM,RS,RD) _SSELirr(0xf3, OP, IM, RS, RD) +#define _SSESSimr(OP,IM,MD,MB,MI,MS,RD) _SSELimr(0xf3, OP, IM, MD, MB, MI, MS, RD) + +#define _SSESDrr(OP,RS,RD) _SSELrr(0xf2, OP, RS,_rX, RD,_rX) +#define _SSESDmr(OP,MD,MB,MI,MS,RD) _SSELmr(0xf2, OP, MD, MB, MI, MS, RD,_rX) +#define _SSESDrm(OP,RS,MD,MB,MI,MS) _SSELrm(0xf2, OP, RS,_rX, MD, MB, MI, MS) +#define _SSESDirr(OP,IM,RS,RD) _SSELirr(0xf2, OP, IM, RS, RD) +#define _SSESDimr(OP,IM,MD,MB,MI,MS,RD) _SSELimr(0xf2, OP, IM, MD, MB, MI, MS, RD) + +#define ADDPSrr(RS, RD) _SSEPSrr(X86_SSE_ADD, RS, RD) +#define ADDPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_ADD, MD, MB, MI, MS, RD) +#define ADDPDrr(RS, RD) _SSEPDrr(X86_SSE_ADD, RS, RD) +#define ADDPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_ADD, MD, MB, MI, MS, RD) + +#define ADDSSrr(RS, RD) _SSESSrr(X86_SSE_ADD, RS, RD) +#define ADDSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_ADD, MD, MB, MI, MS, RD) +#define ADDSDrr(RS, RD) _SSESDrr(X86_SSE_ADD, RS, RD) +#define ADDSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_ADD, MD, MB, MI, MS, RD) + +#define ANDNPSrr(RS, RD) _SSEPSrr(X86_SSE_ANDN, RS, RD) +#define ANDNPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_ANDN, MD, MB, MI, MS, RD) +#define ANDNPDrr(RS, RD) _SSEPDrr(X86_SSE_ANDN, RS, RD) +#define ANDNPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_ANDN, MD, MB, MI, MS, RD) + +#define ANDPSrr(RS, RD) _SSEPSrr(X86_SSE_AND, RS, RD) +#define ANDPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_AND, MD, MB, MI, MS, RD) +#define ANDPDrr(RS, RD) _SSEPDrr(X86_SSE_AND, RS, RD) +#define ANDPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_AND, MD, MB, MI, MS, RD) + +#define CMPPSrr(IM, RS, RD) _SSEPSirr(X86_SSE_CMP, IM, RS, RD) +#define CMPPSmr(IM, MD, MB, MI, MS, RD) _SSEPSimr(X86_SSE_CMP, IM, MD, MB, MI, MS, RD) +#define CMPPDrr(IM, RS, RD) _SSEPDirr(X86_SSE_CMP, IM, RS, RD) +#define CMPPDmr(IM, MD, MB, MI, MS, RD) _SSEPDimr(X86_SSE_CMP, IM, MD, MB, MI, MS, RD) + +#define CMPSSrr(IM, RS, RD) _SSESSirr(X86_SSE_CMP, IM, RS, RD) +#define CMPSSmr(IM, MD, MB, MI, MS, RD) _SSESSimr(X86_SSE_CMP, IM, MD, MB, MI, MS, RD) +#define CMPSDrr(IM, RS, RD) _SSESDirr(X86_SSE_CMP, IM, RS, RD) +#define CMPSDmr(IM, MD, MB, MI, MS, RD) _SSESDimr(X86_SSE_CMP, IM, MD, MB, MI, MS, RD) + +#define DIVPSrr(RS, RD) _SSEPSrr(X86_SSE_DIV, RS, RD) +#define DIVPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_DIV, MD, MB, MI, MS, RD) +#define DIVPDrr(RS, RD) _SSEPDrr(X86_SSE_DIV, RS, RD) +#define DIVPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_DIV, MD, MB, MI, MS, RD) + +#define DIVSSrr(RS, RD) _SSESSrr(X86_SSE_DIV, RS, RD) +#define DIVSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_DIV, MD, MB, MI, MS, RD) +#define DIVSDrr(RS, RD) _SSESDrr(X86_SSE_DIV, RS, RD) +#define DIVSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_DIV, MD, MB, MI, MS, RD) + +#define MAXPSrr(RS, RD) _SSEPSrr(X86_SSE_MAX, RS, RD) +#define MAXPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MAX, MD, MB, MI, MS, RD) +#define MAXPDrr(RS, RD) _SSEPDrr(X86_SSE_MAX, RS, RD) +#define MAXPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MAX, MD, MB, MI, MS, RD) + +#define MAXSSrr(RS, RD) _SSESSrr(X86_SSE_MAX, RS, RD) +#define MAXSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MAX, MD, MB, MI, MS, RD) +#define MAXSDrr(RS, RD) _SSESDrr(X86_SSE_MAX, RS, RD) +#define MAXSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MAX, MD, MB, MI, MS, RD) + +#define MINPSrr(RS, RD) _SSEPSrr(X86_SSE_MIN, RS, RD) +#define MINPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MIN, MD, MB, MI, MS, RD) +#define MINPDrr(RS, RD) _SSEPDrr(X86_SSE_MIN, RS, RD) +#define MINPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MIN, MD, MB, MI, MS, RD) + +#define MINSSrr(RS, RD) _SSESSrr(X86_SSE_MIN, RS, RD) +#define MINSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MIN, MD, MB, MI, MS, RD) +#define MINSDrr(RS, RD) _SSESDrr(X86_SSE_MIN, RS, RD) +#define MINSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MIN, MD, MB, MI, MS, RD) + +#define MULPSrr(RS, RD) _SSEPSrr(X86_SSE_MUL, RS, RD) +#define MULPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MUL, MD, MB, MI, MS, RD) +#define MULPDrr(RS, RD) _SSEPDrr(X86_SSE_MUL, RS, RD) +#define MULPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MUL, MD, MB, MI, MS, RD) + +#define MULSSrr(RS, RD) _SSESSrr(X86_SSE_MUL, RS, RD) +#define MULSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MUL, MD, MB, MI, MS, RD) +#define MULSDrr(RS, RD) _SSESDrr(X86_SSE_MUL, RS, RD) +#define MULSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MUL, MD, MB, MI, MS, RD) + +#define ORPSrr(RS, RD) _SSEPSrr(X86_SSE_OR, RS, RD) +#define ORPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_OR, MD, MB, MI, MS, RD) +#define ORPDrr(RS, RD) _SSEPDrr(X86_SSE_OR, RS, RD) +#define ORPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_OR, MD, MB, MI, MS, RD) + +#define RCPPSrr(RS, RD) _SSEPSrr(X86_SSE_RCP, RS, RD) +#define RCPPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_RCP, MD, MB, MI, MS, RD) +#define RCPSSrr(RS, RD) _SSESSrr(X86_SSE_RCP, RS, RD) +#define RCPSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_RCP, MD, MB, MI, MS, RD) + +#define RSQRTPSrr(RS, RD) _SSEPSrr(X86_SSE_RSQRT, RS, RD) +#define RSQRTPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_RSQRT, MD, MB, MI, MS, RD) +#define RSQRTSSrr(RS, RD) _SSESSrr(X86_SSE_RSQRT, RS, RD) +#define RSQRTSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_RSQRT, MD, MB, MI, MS, RD) + +#define SQRTPSrr(RS, RD) _SSEPSrr(X86_SSE_SQRT, RS, RD) +#define SQRTPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_SQRT, MD, MB, MI, MS, RD) +#define SQRTPDrr(RS, RD) _SSEPDrr(X86_SSE_SQRT, RS, RD) +#define SQRTPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_SQRT, MD, MB, MI, MS, RD) + +#define SQRTSSrr(RS, RD) _SSESSrr(X86_SSE_SQRT, RS, RD) +#define SQRTSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_SQRT, MD, MB, MI, MS, RD) +#define SQRTSDrr(RS, RD) _SSESDrr(X86_SSE_SQRT, RS, RD) +#define SQRTSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_SQRT, MD, MB, MI, MS, RD) + +#define SUBPSrr(RS, RD) _SSEPSrr(X86_SSE_SUB, RS, RD) +#define SUBPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_SUB, MD, MB, MI, MS, RD) +#define SUBPDrr(RS, RD) _SSEPDrr(X86_SSE_SUB, RS, RD) +#define SUBPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_SUB, MD, MB, MI, MS, RD) + +#define SUBSSrr(RS, RD) _SSESSrr(X86_SSE_SUB, RS, RD) +#define SUBSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_SUB, MD, MB, MI, MS, RD) +#define SUBSDrr(RS, RD) _SSESDrr(X86_SSE_SUB, RS, RD) +#define SUBSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_SUB, MD, MB, MI, MS, RD) + +#define XORPSrr(RS, RD) _SSEPSrr(X86_SSE_XOR, RS, RD) +#define XORPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_XOR, MD, MB, MI, MS, RD) +#define XORPDrr(RS, RD) _SSEPDrr(X86_SSE_XOR, RS, RD) +#define XORPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_XOR, MD, MB, MI, MS, RD) + +#define COMISSrr(RS, RD) _SSEPSrr(X86_SSE_COMI, RS, RD) +#define COMISSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_COMI, MD, MB, MI, MS, RD) +#define COMISDrr(RS, RD) _SSEPDrr(X86_SSE_COMI, RS, RD) +#define COMISDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_COMI, MD, MB, MI, MS, RD) + +#define UCOMISSrr(RS, RD) _SSEPSrr(X86_SSE_UCOMI, RS, RD) +#define UCOMISSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_UCOMI, MD, MB, MI, MS, RD) +#define UCOMISDrr(RS, RD) _SSEPDrr(X86_SSE_UCOMI, RS, RD) +#define UCOMISDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_UCOMI, MD, MB, MI, MS, RD) + +#define MOVAPSrr(RS, RD) _SSEPSrr(0x28, RS, RD) +#define MOVAPSmr(MD, MB, MI, MS, RD) _SSEPSmr(0x28, MD, MB, MI, MS, RD) +#define MOVAPSrm(RS, MD, MB, MI, MS) _SSEPSrm(0x29, RS, MD, MB, MI, MS) + +#define MOVAPDrr(RS, RD) _SSEPDrr(0x28, RS, RD) +#define MOVAPDmr(MD, MB, MI, MS, RD) _SSEPDmr(0x28, MD, MB, MI, MS, RD) +#define MOVAPDrm(RS, MD, MB, MI, MS) _SSEPDrm(0x29, RS, MD, MB, MI, MS) + +#define CVTDQ2PDrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTDQ2PD, RS,_rX, RD,_rX) +#define CVTDQ2PDmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTDQ2PD, MD, MB, MI, MS, RD,_rX) +#define CVTDQ2PSrr(RS, RD) __SSELrr( X86_SSE_CVTDQ2PS, RS,_rX, RD,_rX) +#define CVTDQ2PSmr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTDQ2PS, MD, MB, MI, MS, RD,_rX) +#define CVTPD2DQrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTPD2DQ, RS,_rX, RD,_rX) +#define CVTPD2DQmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTPD2DQ, MD, MB, MI, MS, RD,_rX) +#define CVTPD2PIrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTPD2PI, RS,_rX, RD,_rM) +#define CVTPD2PImr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTPD2PI, MD, MB, MI, MS, RD,_rM) +#define CVTPD2PSrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTPD2PS, RS,_rX, RD,_rX) +#define CVTPD2PSmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTPD2PS, MD, MB, MI, MS, RD,_rX) +#define CVTPI2PDrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTPI2PD, RS,_rM, RD,_rX) +#define CVTPI2PDmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTPI2PD, MD, MB, MI, MS, RD,_rX) +#define CVTPI2PSrr(RS, RD) __SSELrr( X86_SSE_CVTPI2PS, RS,_rM, RD,_rX) +#define CVTPI2PSmr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTPI2PS, MD, MB, MI, MS, RD,_rX) +#define CVTPS2DQrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTPS2DQ, RS,_rX, RD,_rX) +#define CVTPS2DQmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTPS2DQ, MD, MB, MI, MS, RD,_rX) +#define CVTPS2PDrr(RS, RD) __SSELrr( X86_SSE_CVTPS2PD, RS,_rX, RD,_rX) +#define CVTPS2PDmr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTPS2PD, MD, MB, MI, MS, RD,_rX) +#define CVTPS2PIrr(RS, RD) __SSELrr( X86_SSE_CVTPS2PI, RS,_rX, RD,_rM) +#define CVTPS2PImr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTPS2PI, MD, MB, MI, MS, RD,_rM) +#define CVTSD2SILrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTSD2SI, RS,_rX, RD,_r4) +#define CVTSD2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTSD2SI, MD, MB, MI, MS, RD,_r4) +#define CVTSD2SIQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTSD2SI, RS,_rX, RD,_r8) +#define CVTSD2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTSD2SI, MD, MB, MI, MS, RD,_r8) +#define CVTSD2SSrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTSD2SS, RS,_rX, RD,_rX) +#define CVTSD2SSmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTSD2SS, MD, MB, MI, MS, RD,_rX) +#define CVTSI2SDLrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTSI2SD, RS,_r4, RD,_rX) +#define CVTSI2SDLmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTSI2SD, MD, MB, MI, MS, RD,_rX) +#define CVTSI2SDQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTSI2SD, RS,_r8, RD,_rX) +#define CVTSI2SDQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTSI2SD, MD, MB, MI, MS, RD,_rX) +#define CVTSI2SSLrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTSI2SS, RS,_r4, RD,_rX) +#define CVTSI2SSLmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTSI2SS, MD, MB, MI, MS, RD,_rX) +#define CVTSI2SSQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTSI2SS, RS,_r8, RD,_rX) +#define CVTSI2SSQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTSI2SS, MD, MB, MI, MS, RD,_rX) +#define CVTSS2SDrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTSS2SD, RS,_rX, RD,_rX) +#define CVTSS2SDmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTSS2SD, MD, MB, MI, MS, RD,_rX) +#define CVTSS2SILrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTSS2SI, RS,_rX, RD,_r4) +#define CVTSS2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTSS2SI, MD, MB, MI, MS, RD,_r4) +#define CVTSS2SIQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTSS2SI, RS,_rX, RD,_r8) +#define CVTSS2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTSS2SI, MD, MB, MI, MS, RD,_r8) +#define CVTTPD2PIrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTTPD2PI, RS,_rX, RD,_rM) +#define CVTTPD2PImr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTTPD2PI, MD, MB, MI, MS, RD,_rM) +#define CVTTPD2DQrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTTPD2DQ, RS,_rX, RD,_rX) +#define CVTTPD2DQmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTTPD2DQ, MD, MB, MI, MS, RD,_rX) +#define CVTTPS2DQrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTTPS2DQ, RS,_rX, RD,_rX) +#define CVTTPS2DQmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTTPS2DQ, MD, MB, MI, MS, RD,_rX) +#define CVTTPS2PIrr(RS, RD) __SSELrr( X86_SSE_CVTTPS2PI, RS,_rX, RD,_rM) +#define CVTTPS2PImr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTTPS2PI, MD, MB, MI, MS, RD,_rM) +#define CVTTSD2SILrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTTSD2SI, RS,_rX, RD,_r4) +#define CVTTSD2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTTSD2SI, MD, MB, MI, MS, RD,_r4) +#define CVTTSD2SIQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTTSD2SI, RS,_rX, RD,_r8) +#define CVTTSD2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTTSD2SI, MD, MB, MI, MS, RD,_r8) +#define CVTTSS2SILrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTTSS2SI, RS,_rX, RD,_r4) +#define CVTTSS2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTTSS2SI, MD, MB, MI, MS, RD,_r4) +#define CVTTSS2SIQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTTSS2SI, RS,_rX, RD,_r8) +#define CVTTSS2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTTSS2SI, MD, MB, MI, MS, RD,_r8) + +#define MOVDXDrr(RS, RD) _SSELrr(0x66, 0x6e, RS,_r4, RD,_rX) +#define MOVDXDmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x6e, MD, MB, MI, MS, RD,_rX) +#define MOVQXDrr(RS, RD) _SSEQrr(0x66, 0x6e, RS,_r8, RD,_rX) +#define MOVQXDmr(MD, MB, MI, MS, RD) _SSEQmr(0x66, 0x6e, MD, MB, MI, MS, RD,_rX) + +#define MOVDXSrr(RS, RD) _SSELrr(0x66, 0x7e, RD,_r4, RS,_rX) +#define MOVDXSrm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x7e, RS,_rX, MD, MB, MI, MS) +#define MOVQXSrr(RS, RD) _SSEQrr(0x66, 0x7e, RD,_r8, RS,_rX) +#define MOVQXSrm(RS, MD, MB, MI, MS) _SSEQrm(0x66, 0x7e, RS,_rX, MD, MB, MI, MS) + +#define MOVDLMrr(RS, RD) __SSELrr( 0x6e, RS,_r4, RD,_rM) +#define MOVDLMmr(MD, MB, MI, MS, RD) __SSELmr( 0x6e, MD, MB, MI, MS, RD,_rM) +#define MOVDQMrr(RS, RD) __SSEQrr( 0x6e, RS,_r8, RD,_rM) +#define MOVDQMmr(MD, MB, MI, MS, RD) __SSEQmr( 0x6e, MD, MB, MI, MS, RD,_rM) + +#define MOVDMLrr(RS, RD) __SSELrr( 0x7e, RS,_rM, RD,_r4) +#define MOVDMLrm(RS, MD, MB, MI, MS) __SSELrm( 0x7e, RS,_rM, MD, MB, MI, MS) +#define MOVDMQrr(RS, RD) __SSEQrr( 0x7e, RS,_rM, RD,_r8) +#define MOVDMQrm(RS, MD, MB, MI, MS) __SSEQrm( 0x7e, RS,_rM, MD, MB, MI, MS) + +#define MOVDQ2Qrr(RS, RD) _SSELrr(0xf2, 0xd6, RS,_rX, RD,_rM) +#define MOVMSKPSrr(RS, RD) __SSELrr( 0x50, RS,_rX, RD,_r4) +#define MOVMSKPDrr(RS, RD) _SSELrr(0x66, 0x50, RS,_rX, RD,_r4) + +#define MOVHLPSrr(RS, RD) __SSELrr( 0x12, RS,_rX, RD,_rX) +#define MOVLHPSrr(RS, RD) __SSELrr( 0x16, RS,_rX, RD,_rX) + +#define MOVDQArr(RS, RD) _SSELrr(0x66, 0x6f, RS,_rX, RD,_rX) +#define MOVDQAmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x6f, MD, MB, MI, MS, RD,_rX) +#define MOVDQArm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x7f, RS,_rX, MD, MB, MI, MS) + +#define MOVDQUrr(RS, RD) _SSELrr(0xf3, 0x6f, RS,_rX, RD,_rX) +#define MOVDQUmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, 0x6f, MD, MB, MI, MS, RD,_rX) +#define MOVDQUrm(RS, MD, MB, MI, MS) _SSELrm(0xf3, 0x7f, RS,_rX, MD, MB, MI, MS) + +#define MOVHPDmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x16, MD, MB, MI, MS, RD,_rX) +#define MOVHPDrm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x17, RS,_rX, MD, MB, MI, MS) +#define MOVHPSmr(MD, MB, MI, MS, RD) __SSELmr( 0x16, MD, MB, MI, MS, RD,_rX) +#define MOVHPSrm(RS, MD, MB, MI, MS) __SSELrm( 0x17, RS,_rX, MD, MB, MI, MS) + +#define MOVLPDmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x12, MD, MB, MI, MS, RD,_rX) +#define MOVLPDrm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x13, RS,_rX, MD, MB, MI, MS) +#define MOVLPSmr(MD, MB, MI, MS, RD) __SSELmr( 0x12, MD, MB, MI, MS, RD,_rX) +#define MOVLPSrm(RS, MD, MB, MI, MS) __SSELrm( 0x13, RS,_rX, MD, MB, MI, MS) + + +/* --- Floating-Point instructions ----------------------------------------- */ + +enum { + X86_F2XM1 = 0xd9f0, + X86_FABS = 0xd9e1, + X86_FADD = 0xd8c0, // m32fp, m64fp, sti0, st0i, pst0i + X86_FIADD = 0xda00, // m32int, m16int + X86_FBLD = 0xdf04, // mem + X86_FBSTP = 0xdf06, // mem + X86_FCHS = 0xd9e0, + X86_FCMOVB = 0xdac0, // sti0 + X86_FCMOVE = 0xdac8, // sti0 + X86_FCMOVBE = 0xdad0, // sti0 + X86_FCMOVU = 0xdad8, // sti0 + X86_FCMOVNB = 0xdbc0, // sti0 + X86_FCMOVNE = 0xdbc8, // sti0 + X86_FCMOVNBE = 0xdbd0, // sti0 + X86_FCMOVNU = 0xdbd8, // sti0 + X86_FCOM = 0xd8d2, // m32fp, m64fp, sti + X86_FCOMP = 0xd8db, // m32fp, m64fp, sti + X86_FCOMPP = 0xded9, + X86_FCOMI = 0xdbf0, // sti0 + X86_FCOMIP = 0xdff0, // sti0 + X86_FUCOMI = 0xdbe8, // sti0 + X86_FUCOMIP = 0xdfe8, // sti0 + X86_FCOS = 0xd9ff, + X86_FDECSTP = 0xd9f6, + X86_FDIV = 0xd8f6, // m32fp, m64fp, sti0, st0i, pst0i + X86_FIDIV = 0xda06, // m32int, m16int + X86_FDIVR = 0xd8ff, // m32fp, m64fp, sti0, st0i, pst0i + X86_FIDIVR = 0xda07, // m32int, m16int + X86_FFREE = 0xddc0, // sti + X86_FICOM = 0xda02, // m32int, m16int + X86_FICOMP = 0xda03, // m32int, m16int + X86_FILD = 0xdb00, // m32int, m16int + X86_FILDQ = 0xdf05, // mem + X86_FINCSTP = 0xd9f7, + X86_FIST = 0xdb02, // m32int, m16int + X86_FISTP = 0xdb03, // m32int, m16int + X86_FISTPQ = 0xdf07, // mem + X86_FISTTP = 0xdb01, // m32int, m16int + X86_FISTTPQ = 0xdd01, // mem + X86_FLD = 0xd900, // m32fp, m64fp + X86_FLDT = 0xdb05, // mem + X86_FLD1 = 0xd9e8, + X86_FLDL2T = 0xd9e9, + X86_FLDL2E = 0xd9ea, + X86_FLDPI = 0xd9eb, + X86_FLDLG2 = 0xd9ec, + X86_FLDLN2 = 0xd9ed, + X86_FLDZ = 0xd9ee, + X86_FMUL = 0xd8c9, // m32fp, m64fp, sti0, st0i, pst0i + X86_FIMUL = 0xda01, // m32int, m16int + X86_FNOP = 0xd9d0, + X86_FPATAN = 0xd9f3, + X86_FPREM = 0xd9f8, + X86_FPREM1 = 0xd9f5, + X86_FPTAN = 0xd9f2, + X86_FRNDINT = 0xd9fc, + X86_FSCALE = 0xd9fd, + X86_FSIN = 0xd9fe, + X86_FSINCOS = 0xd9fb, + X86_FSQRT = 0xd9fa, + X86_FSTS = 0xd902, // mem + X86_FSTD = 0xdd02, // mem + X86_FST = 0xddd0, // sti + X86_FSTPS = 0xd903, // mem + X86_FSTPD = 0xdd03, // mem + X86_FSTPT = 0xdb07, // mem + X86_FSTP = 0xddd8, // sti + X86_FSUB = 0xd8e4, // m32fp, m64fp, sti0, st0i, pst0i + X86_FISUB = 0xda04, // m32int, m16int + X86_FSUBR = 0xd8ed, // m32fp, m64fp, sti0, st0i, pst0i + X86_FISUBR = 0xda05, // m32int, m16int + X86_FTST = 0xd9e4, + X86_FUCOM = 0xdde0, // sti + X86_FUCOMP = 0xdde8, // sti + X86_FUCOMPP = 0xdae9, + X86_FXAM = 0xd9e5, + X86_FXCH = 0xd9c8, // sti + X86_FXTRACT = 0xd9f4, + X86_FYL2X = 0xd9f1, + X86_FYL2XP1 = 0xd9f9, +}; + +#define _FPU(OP) _OO(OP) +#define _FPUm(OP, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_r_X((OP)>>8, (OP)&7, MD, MB, MI, MS)) +#define _FPUSm(OP, MD, MB, MI, MS) _FPUm(OP, MD, MB, MI, MS) +#define _FPUDm(OP, MD, MB, MI, MS) _FPUm((OP)|0x400, MD, MB, MI, MS) +#define _FPULm(OP, MD, MB, MI, MS) _FPUm(OP, MD, MB, MI, MS) +#define _FPUWm(OP, MD, MB, MI, MS) _FPUm((OP)|0x400, MD, MB, MI, MS) +#define _FPUr(OP, RR) _OOr((OP)&0xfff8, _rF(RR)) +#define _FPU0r(OP, RD) _FPUr((OP)|0x400, RD) +#define _FPUr0(OP, RS) _FPUr((OP) , RS) +#define _FPUrr(OP, RS, RD) (_rST0P(RS) ? _FPU0r(OP, RD) : (_rST0P(RD) ? _FPUr0(OP, RS) : x86_emit_failure("FPU instruction without st0"))) +#define _FPUP0r(OP, RD) _FPU0r((OP)|0x200, RD) + +#define F2XM1() _FPU(X86_F2XM1) +#define FABS() _FPU(X86_FABS) +#define FADDSm(MD, MB, MI, MS) _FPUSm(X86_FADD, MD, MB, MI, MS) +#define FADDDm(MD, MB, MI, MS) _FPUDm(X86_FADD, MD, MB, MI, MS) +#define FADDP0r(RD) _FPUP0r(X86_FADD, RD) +#define FADDrr(RS, RD) _FPUrr(X86_FADD, RS, RD) +#define FADD0r(RD) _FPU0r(X86_FADD, RD) +#define FADDr0(RS) _FPUr0(X86_FADD, RS) +#define FIADDWm(MD, MB, MI, MS) _FPUWm(X86_FIADD, MD, MB, MI, MS) +#define FIADDLm(MD, MB, MI, MS) _FPULm(X86_FIADD, MD, MB, MI, MS) +#define FBLDm(MD, MB, MI, MS) _FPUm(X86_FBLD, MD, MB, MI, MS) +#define FBSTPm(MD, MB, MI, MS) _FPUm(X86_FBSTP, MD, MB, MI, MS) +#define FCHS() _FPU(X86_FCHS) +#define FCMOVBr0(RS) _FPUr0(X86_FCMOVB, RS) +#define FCMOVEr0(RS) _FPUr0(X86_FCMOVE, RS) +#define FCMOVBEr0(RS) _FPUr0(X86_FCMOVBE, RS) +#define FCMOVUr0(RS) _FPUr0(X86_FCMOVU, RS) +#define FCMOVNBr0(RS) _FPUr0(X86_FCMOVNB, RS) +#define FCMOVNEr0(RS) _FPUr0(X86_FCMOVNE, RS) +#define FCMOVNBEr0(RS) _FPUr0(X86_FCMOVNBE, RS) +#define FCMOVNUr0(RS) _FPUr0(X86_FCMOVNU, RS) +#define FCOMSm(MD, MB, MI, MS) _FPUSm(X86_FCOM, MD, MB, MI, MS) +#define FCOMDm(MD, MB, MI, MS) _FPUDm(X86_FCOM, MD, MB, MI, MS) +#define FCOMr(RD) _FPUr(X86_FCOM, RD) +#define FCOMPSm(MD, MB, MI, MS) _FPUSm(X86_FCOMP, MD, MB, MI, MS) +#define FCOMPDm(MD, MB, MI, MS) _FPUDm(X86_FCOMP, MD, MB, MI, MS) +#define FCOMPr(RD) _FPUr(X86_FCOMP, RD) +#define FCOMPP() _FPU(X86_FCOMPP) +#define FCOMIr0(RS) _FPUr0(X86_FCOMI, RS) +#define FCOMIPr0(RS) _FPUr0(X86_FCOMIP, RS) +#define FUCOMIr0(RS) _FPUr0(X86_FUCOMI, RS) +#define FUCOMIPr0(RS) _FPUr0(X86_FUCOMIP, RS) +#define FCOS() _FPU(X86_FCOS) +#define FDECSTP() _FPU(X86_FDECSTP) +#define FDIVSm(MD, MB, MI, MS) _FPUSm(X86_FDIV, MD, MB, MI, MS) +#define FDIVDm(MD, MB, MI, MS) _FPUDm(X86_FDIV, MD, MB, MI, MS) +#define FDIVP0r(RD) _FPUP0r(X86_FDIV, RD) +#define FDIVrr(RS, RD) _FPUrr(X86_FDIV, RS, RD) +#define FDIV0r(RD) _FPU0r(X86_FDIV, RD) +#define FDIVr0(RS) _FPUr0(X86_FDIV, RS) +#define FIDIVWm(MD, MB, MI, MS) _FPUWm(X86_FIDIV, MD, MB, MI, MS) +#define FIDIVLm(MD, MB, MI, MS) _FPULm(X86_FIDIV, MD, MB, MI, MS) +#define FDIVRSm(MD, MB, MI, MS) _FPUSm(X86_FDIVR, MD, MB, MI, MS) +#define FDIVRDm(MD, MB, MI, MS) _FPUDm(X86_FDIVR, MD, MB, MI, MS) +#define FDIVRP0r(RD) _FPUP0r(X86_FDIVR, RD) +#define FDIVRrr(RS, RD) _FPUrr(X86_FDIVR, RS, RD) +#define FDIVR0r(RD) _FPU0r(X86_FDIVR, RD) +#define FDIVRr0(RS) _FPUr0(X86_FDIVR, RS) +#define FIDIVRWm(MD, MB, MI, MS) _FPUWm(X86_FIDIVR, MD, MB, MI, MS) +#define FIDIVRLm(MD, MB, MI, MS) _FPULm(X86_FIDIVR, MD, MB, MI, MS) +#define FFREEr(RD) _FPUr(X86_FFREE, RD) +#define FICOMWm(MD, MB, MI, MS) _FPUWm(X86_FICOM, MD, MB, MI, MS) +#define FICOMLm(MD, MB, MI, MS) _FPULm(X86_FICOM, MD, MB, MI, MS) +#define FICOMPWm(MD, MB, MI, MS) _FPUWm(X86_FICOMP, MD, MB, MI, MS) +#define FICOMPLm(MD, MB, MI, MS) _FPULm(X86_FICOMP, MD, MB, MI, MS) +#define FILDWm(MD, MB, MI, MS) _FPUWm(X86_FILD, MD, MB, MI, MS) +#define FILDLm(MD, MB, MI, MS) _FPULm(X86_FILD, MD, MB, MI, MS) +#define FILDQm(MD, MB, MI, MS) _FPUm(X86_FILDQ, MD, MB, MI, MS) +#define FINCSTP() _FPU(X86_FINCSTP) +#define FISTWm(MD, MB, MI, MS) _FPUWm(X86_FIST, MD, MB, MI, MS) +#define FISTLm(MD, MB, MI, MS) _FPULm(X86_FIST, MD, MB, MI, MS) +#define FISTPWm(MD, MB, MI, MS) _FPUWm(X86_FISTP, MD, MB, MI, MS) +#define FISTPLm(MD, MB, MI, MS) _FPULm(X86_FISTP, MD, MB, MI, MS) +#define FISTPQm(MD, MB, MI, MS) _FPUm(X86_FISTPQ, MD, MB, MI, MS) +#define FISTTPWm(MD, MB, MI, MS) _FPUWm(X86_FISTTP, MD, MB, MI, MS) +#define FISTTPLm(MD, MB, MI, MS) _FPULm(X86_FISTTP, MD, MB, MI, MS) +#define FISTTPQm(MD, MB, MI, MS) _FPUm(X86_FISTTPQ, MD, MB, MI, MS) +#define FLDSm(MD, MB, MI, MS) _FPUSm(X86_FLD, MD, MB, MI, MS) +#define FLDDm(MD, MB, MI, MS) _FPUDm(X86_FLD, MD, MB, MI, MS) +#define FLDTm(MD, MB, MI, MS) _FPUm(X86_FLDT, MD, MB, MI, MS) +#define FLD1() _FPU(X86_FLD1) +#define FLDL2T() _FPU(X86_FLDL2T) +#define FLDL2E() _FPU(X86_FLDL2E) +#define FLDPI() _FPU(X86_FLDPI) +#define FLDLG2() _FPU(X86_FLDLG2) +#define FLDLN2() _FPU(X86_FLDLN2) +#define FLDZ() _FPU(X86_FLDZ) +#define FMULSm(MD, MB, MI, MS) _FPUSm(X86_FMUL, MD, MB, MI, MS) +#define FMULDm(MD, MB, MI, MS) _FPUDm(X86_FMUL, MD, MB, MI, MS) +#define FMULP0r(RD) _FPUP0r(X86_FMUL, RD) +#define FMULrr(RS, RD) _FPUrr(X86_FMUL, RS, RD) +#define FMUL0r(RD) _FPU0r(X86_FMUL, RD) +#define FMULr0(RS) _FPUr0(X86_FMUL, RS) +#define FIMULWm(MD, MB, MI, MS) _FPUWm(X86_FIMUL, MD, MB, MI, MS) +#define FIMULLm(MD, MB, MI, MS) _FPULm(X86_FIMUL, MD, MB, MI, MS) +#define FNOP() _FPU(X86_FNOP) +#define FPATAN() _FPU(X86_FPATAN) +#define FPREM() _FPU(X86_FPREM) +#define FPREM1() _FPU(X86_FPREM1) +#define FPTAN() _FPU(X86_FPTAN) +#define FRNDINT() _FPU(X86_FRNDINT) +#define FSCALE() _FPU(X86_FSCALE) +#define FSIN() _FPU(X86_FSIN) +#define FSINCOS() _FPU(X86_FSINCOS) +#define FSQRT() _FPU(X86_FSQRT) +#define FSTSm(MD, MB, MI, MS) _FPUm(X86_FSTS, MD, MB, MI, MS) +#define FSTDm(MD, MB, MI, MS) _FPUm(X86_FSTD, MD, MB, MI, MS) +#define FSTr(RD) _FPUr(X86_FST, RD) +#define FSTPSm(MD, MB, MI, MS) _FPUm(X86_FSTPS, MD, MB, MI, MS) +#define FSTPDm(MD, MB, MI, MS) _FPUm(X86_FSTPD, MD, MB, MI, MS) +#define FSTPTm(MD, MB, MI, MS) _FPUm(X86_FSTPT, MD, MB, MI, MS) +#define FSTPr(RD) _FPUr(X86_FSTP, RD) +#define FSUBSm(MD, MB, MI, MS) _FPUSm(X86_FSUB, MD, MB, MI, MS) +#define FSUBDm(MD, MB, MI, MS) _FPUDm(X86_FSUB, MD, MB, MI, MS) +#define FSUBP0r(RD) _FPUP0r(X86_FSUB, RD) +#define FSUBrr(RS, RD) _FPUrr(X86_FSUB, RS, RD) +#define FSUB0r(RD) _FPU0r(X86_FSUB, RD) +#define FSUBr0(RS) _FPUr0(X86_FSUB, RS) +#define FISUBWm(MD, MB, MI, MS) _FPUWm(X86_FISUB, MD, MB, MI, MS) +#define FISUBLm(MD, MB, MI, MS) _FPULm(X86_FISUB, MD, MB, MI, MS) +#define FSUBRSm(MD, MB, MI, MS) _FPUSm(X86_FSUBR, MD, MB, MI, MS) +#define FSUBRDm(MD, MB, MI, MS) _FPUDm(X86_FSUBR, MD, MB, MI, MS) +#define FSUBRP0r(RD) _FPUP0r(X86_FSUBR, RD) +#define FSUBRrr(RS, RD) _FPUrr(X86_FSUBR, RS, RD) +#define FSUBR0r(RD) _FPU0r(X86_FSUBR, RD) +#define FSUBRr0(RS) _FPUr0(X86_FSUBR, RS) +#define FISUBRWm(MD, MB, MI, MS) _FPUWm(X86_FISUBR, MD, MB, MI, MS) +#define FISUBRLm(MD, MB, MI, MS) _FPULm(X86_FISUBR, MD, MB, MI, MS) +#define FTST() _FPU(X86_FTST) +#define FUCOMr(RD) _FPUr(X86_FUCOM, RD) +#define FUCOMPr(RD) _FPUr(X86_FUCOMP, RD) +#define FUCOMPP() _FPU(X86_FUCOMPP) +#define FXAM() _FPU(X86_FXAM) +#define FXCHr(RD) _FPUr(X86_FXCH, RD) +#define FXTRACT() _FPU(X86_FXTRACT) +#define FYL2X() _FPU(X86_FYL2X) +#define FYL2XP1() _FPU(X86_FYL2XP1) + +#endif /* X86_RTASM_H */ diff --git a/jit2/compemu.h b/jit2/compemu.h new file mode 100644 index 00000000..9a612fb2 --- /dev/null +++ b/jit2/compemu.h @@ -0,0 +1,609 @@ +/* + * compiler/compemu.h - Public interface and definitions + * + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * + * Adaptation for Basilisk II and improvements, copyright 2000-2005 + * Gwenole Beauchesne + * + * Basilisk II (C) 1997-2008 Christian Bauer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef COMPEMU_H +#define COMPEMU_H + +#include "newcpu.h" + +#if USE_JIT + +#if defined __i386__ || defined __x86_64__ +#include "flags_x86.h" +#else +#error "Unsupported JIT compiler for this architecture" +#endif + +#if JIT_DEBUG +/* dump some information (m68k block, x86 block addresses) about the compiler state */ +extern void compiler_dumpstate(void); +#endif + +/* Now that we do block chaining, and also have linked lists on each tag, + TAGMASK can be much smaller and still do its job. Saves several megs + of memory! */ +#define TAGMASK 0x0000ffff +#define TAGSIZE (TAGMASK+1) +#define MAXRUN 1024 +#define cacheline(x) (((uintptr)x)&TAGMASK) + +extern uae_u8* start_pc_p; +extern uae_u32 start_pc; + +struct blockinfo_t; + +struct cpu_history { + uae_u16 * location; +}; + +union cacheline { + cpuop_func * handler; + blockinfo_t * bi; +}; + +/* Use new spill/reload strategy when calling external functions */ +#define USE_OPTIMIZED_CALLS 0 +#if USE_OPTIMIZED_CALLS +#error implementation in progress +#endif + +/* (gb) When on, this option can save save up to 30% compilation time + * when many lazy flushes occur (e.g. apps in MacOS 8.x). + */ +#define USE_SEPARATE_BIA 1 + +/* Use chain of checksum_info_t to compute the block checksum */ +#define USE_CHECKSUM_INFO 1 + +/* Use code inlining, aka follow-up of constant jumps */ +#define USE_INLINING 1 + +/* Inlining requires the chained checksuming information */ +#if USE_INLINING +#undef USE_CHECKSUM_INFO +#define USE_CHECKSUM_INFO 1 +#endif + +/* Does flush_icache_range() only check for blocks falling in the requested range? */ +#define LAZY_FLUSH_ICACHE_RANGE 0 + +#define USE_F_ALIAS 1 +#define USE_OFFSET 1 +#define COMP_DEBUG 1 + +#if COMP_DEBUG +#define Dif(x) if (x) +#else +#define Dif(x) if (0) +#endif + +#define SCALE 2 + +#define BYTES_PER_INST 10240 /* paranoid ;-) */ +#define LONGEST_68K_INST 16 /* The number of bytes the longest possible + 68k instruction takes */ +#define MAX_CHECKSUM_LEN 2048 /* The maximum size we calculate checksums + for. Anything larger will be flushed + unconditionally even with SOFT_FLUSH */ +#define MAX_HOLD_BI 3 /* One for the current block, and up to two + for jump targets */ + +#define INDIVIDUAL_INST 0 +#if 1 +// gb-- my format from readcpu.cpp is not the same +#define FLAG_X 0x0010 +#define FLAG_N 0x0008 +#define FLAG_Z 0x0004 +#define FLAG_V 0x0002 +#define FLAG_C 0x0001 +#else +#define FLAG_C 0x0010 +#define FLAG_V 0x0008 +#define FLAG_Z 0x0004 +#define FLAG_N 0x0002 +#define FLAG_X 0x0001 +#endif +#define FLAG_CZNV (FLAG_C | FLAG_Z | FLAG_N | FLAG_V) +#define FLAG_ZNV (FLAG_Z | FLAG_N | FLAG_V) + +#define KILLTHERAT 1 /* Set to 1 to avoid some partial_rat_stalls */ + +#if defined(__x86_64__) +#define N_REGS 16 /* really only 15, but they are numbered 0-3,5-15 */ +#else +#define N_REGS 8 /* really only 7, but they are numbered 0,1,2,3,5,6,7 */ +#endif +#define N_FREGS 6 /* That leaves us two positions on the stack to play with */ + +/* Functions exposed to newcpu, or to what was moved from newcpu.c to + * compemu_support.c */ +extern void compiler_init(void); +extern void compiler_exit(void); +extern bool compiler_use_jit(void); +extern void init_comp(void); +extern void flush(int save_regs); +extern void small_flush(int save_regs); +extern void set_target(uae_u8* t); +extern uae_u8* get_target(void); +extern void freescratch(void); +extern void build_comp(void); +extern void set_cache_state(int enabled); +extern int get_cache_state(void); +extern uae_u32 get_jitted_size(void); +extern void (*flush_icache)(int n); +extern void alloc_cache(void); +extern int check_for_cache_miss(void); + +/* JIT FPU compilation */ +extern void comp_fpp_opp (uae_u32 opcode, uae_u16 extra); +extern void comp_fbcc_opp (uae_u32 opcode); +extern void comp_fscc_opp (uae_u32 opcode, uae_u16 extra); + +extern uae_u32 needed_flags; +extern cacheline cache_tags[]; +extern uae_u8* comp_pc_p; +extern void* pushall_call_handler; + +#define VREGS 32 +#define VFREGS 16 + +#define INMEM 1 +#define CLEAN 2 +#define DIRTY 3 +#define UNDEF 4 +#define ISCONST 5 + +typedef struct { + uae_u32* mem; + uae_u32 val; + uae_u8 is_swapped; + uae_u8 status; + uae_s8 realreg; /* gb-- realreg can hold -1 */ + uae_u8 realind; /* The index in the holds[] array */ + uae_u8 needflush; + uae_u8 validsize; + uae_u8 dirtysize; + uae_u8 dummy; +} reg_status; + +typedef struct { + uae_u32* mem; + double val; + uae_u8 status; + uae_s8 realreg; /* gb-- realreg can hold -1 */ + uae_u8 realind; + uae_u8 needflush; +} freg_status; + +#define PC_P 16 +#define FLAGX 17 +#define FLAGTMP 18 +#define NEXT_HANDLER 19 +#define S1 20 +#define S2 21 +#define S3 22 +#define S4 23 +#define S5 24 +#define S6 25 +#define S7 26 +#define S8 27 +#define S9 28 +#define S10 29 +#define S11 30 +#define S12 31 + +#define FP_RESULT 8 +#define FS1 9 +#define FS2 10 +#define FS3 11 + +typedef struct { + uae_u32 touched; + uae_s8 holds[VREGS]; + uae_u8 nholds; + uae_u8 canbyte; + uae_u8 canword; + uae_u8 locked; +} n_status; + +typedef struct { + uae_u32 touched; + uae_s8 holds[VFREGS]; + uae_u8 nholds; + uae_u8 locked; +} fn_status; + +/* For flag handling */ +#define NADA 1 +#define TRASH 2 +#define VALID 3 + +/* needflush values */ +#define NF_SCRATCH 0 +#define NF_TOMEM 1 +#define NF_HANDLER 2 + +typedef struct { + /* Integer part */ + reg_status state[VREGS]; + n_status nat[N_REGS]; + uae_u32 flags_on_stack; + uae_u32 flags_in_flags; + uae_u32 flags_are_important; + /* FPU part */ + freg_status fate[VFREGS]; + fn_status fat[N_FREGS]; + + /* x86 FPU part */ + uae_s8 spos[N_FREGS]; + uae_s8 onstack[6]; + uae_s8 tos; +} bigstate; + +typedef struct { + /* Integer part */ + char virt[VREGS]; + char nat[N_REGS]; +} smallstate; + +extern bigstate live; +extern int touchcnt; + + +#define IMM uae_s32 +#define R1 uae_u32 +#define R2 uae_u32 +#define R4 uae_u32 +#define W1 uae_u32 +#define W2 uae_u32 +#define W4 uae_u32 +#define RW1 uae_u32 +#define RW2 uae_u32 +#define RW4 uae_u32 +#define MEMR uae_u32 +#define MEMW uae_u32 +#define MEMRW uae_u32 + +#define FW uae_u32 +#define FR uae_u32 +#define FRW uae_u32 + +#define MIDFUNC(nargs,func,args) void func args +#define MENDFUNC(nargs,func,args) +#define COMPCALL(func) func + +#define LOWFUNC(flags,mem,nargs,func,args) static __inline__ void func args +#define LENDFUNC(flags,mem,nargs,func,args) + +/* What we expose to the outside */ +#define DECLARE_MIDFUNC(func) extern void func +DECLARE_MIDFUNC(bt_l_ri(R4 r, IMM i)); +DECLARE_MIDFUNC(bt_l_rr(R4 r, R4 b)); +DECLARE_MIDFUNC(btc_l_ri(RW4 r, IMM i)); +DECLARE_MIDFUNC(btc_l_rr(RW4 r, R4 b)); +DECLARE_MIDFUNC(bts_l_ri(RW4 r, IMM i)); +DECLARE_MIDFUNC(bts_l_rr(RW4 r, R4 b)); +DECLARE_MIDFUNC(btr_l_ri(RW4 r, IMM i)); +DECLARE_MIDFUNC(btr_l_rr(RW4 r, R4 b)); +DECLARE_MIDFUNC(mov_l_rm(W4 d, IMM s)); +DECLARE_MIDFUNC(call_r(R4 r)); +DECLARE_MIDFUNC(sub_l_mi(IMM d, IMM s)); +DECLARE_MIDFUNC(mov_l_mi(IMM d, IMM s)); +DECLARE_MIDFUNC(mov_w_mi(IMM d, IMM s)); +DECLARE_MIDFUNC(mov_b_mi(IMM d, IMM s)); +DECLARE_MIDFUNC(rol_b_ri(RW1 r, IMM i)); +DECLARE_MIDFUNC(rol_w_ri(RW2 r, IMM i)); +DECLARE_MIDFUNC(rol_l_ri(RW4 r, IMM i)); +DECLARE_MIDFUNC(rol_l_rr(RW4 d, R1 r)); +DECLARE_MIDFUNC(rol_w_rr(RW2 d, R1 r)); +DECLARE_MIDFUNC(rol_b_rr(RW1 d, R1 r)); +DECLARE_MIDFUNC(shll_l_rr(RW4 d, R1 r)); +DECLARE_MIDFUNC(shll_w_rr(RW2 d, R1 r)); +DECLARE_MIDFUNC(shll_b_rr(RW1 d, R1 r)); +DECLARE_MIDFUNC(ror_b_ri(R1 r, IMM i)); +DECLARE_MIDFUNC(ror_w_ri(R2 r, IMM i)); +DECLARE_MIDFUNC(ror_l_ri(R4 r, IMM i)); +DECLARE_MIDFUNC(ror_l_rr(R4 d, R1 r)); +DECLARE_MIDFUNC(ror_w_rr(R2 d, R1 r)); +DECLARE_MIDFUNC(ror_b_rr(R1 d, R1 r)); +DECLARE_MIDFUNC(shrl_l_rr(RW4 d, R1 r)); +DECLARE_MIDFUNC(shrl_w_rr(RW2 d, R1 r)); +DECLARE_MIDFUNC(shrl_b_rr(RW1 d, R1 r)); +DECLARE_MIDFUNC(shra_l_rr(RW4 d, R1 r)); +DECLARE_MIDFUNC(shra_w_rr(RW2 d, R1 r)); +DECLARE_MIDFUNC(shra_b_rr(RW1 d, R1 r)); +DECLARE_MIDFUNC(shll_l_ri(RW4 r, IMM i)); +DECLARE_MIDFUNC(shll_w_ri(RW2 r, IMM i)); +DECLARE_MIDFUNC(shll_b_ri(RW1 r, IMM i)); +DECLARE_MIDFUNC(shrl_l_ri(RW4 r, IMM i)); +DECLARE_MIDFUNC(shrl_w_ri(RW2 r, IMM i)); +DECLARE_MIDFUNC(shrl_b_ri(RW1 r, IMM i)); +DECLARE_MIDFUNC(shra_l_ri(RW4 r, IMM i)); +DECLARE_MIDFUNC(shra_w_ri(RW2 r, IMM i)); +DECLARE_MIDFUNC(shra_b_ri(RW1 r, IMM i)); +DECLARE_MIDFUNC(setcc(W1 d, IMM cc)); +DECLARE_MIDFUNC(setcc_m(IMM d, IMM cc)); +DECLARE_MIDFUNC(cmov_b_rr(RW1 d, R1 s, IMM cc)); +DECLARE_MIDFUNC(cmov_w_rr(RW2 d, R2 s, IMM cc)); +DECLARE_MIDFUNC(cmov_l_rr(RW4 d, R4 s, IMM cc)); +DECLARE_MIDFUNC(cmov_l_rm(RW4 d, IMM s, IMM cc)); +DECLARE_MIDFUNC(bsf_l_rr(W4 d, R4 s)); +DECLARE_MIDFUNC(pop_m(IMM d)); +DECLARE_MIDFUNC(push_m(IMM d)); +DECLARE_MIDFUNC(pop_l(W4 d)); +DECLARE_MIDFUNC(push_l_i(IMM i)); +DECLARE_MIDFUNC(push_l(R4 s)); +DECLARE_MIDFUNC(clear_16(RW4 r)); +DECLARE_MIDFUNC(clear_8(RW4 r)); +DECLARE_MIDFUNC(sign_extend_16_rr(W4 d, R2 s)); +DECLARE_MIDFUNC(sign_extend_8_rr(W4 d, R1 s)); +DECLARE_MIDFUNC(zero_extend_16_rr(W4 d, R2 s)); +DECLARE_MIDFUNC(zero_extend_8_rr(W4 d, R1 s)); +DECLARE_MIDFUNC(imul_64_32(RW4 d, RW4 s)); +DECLARE_MIDFUNC(mul_64_32(RW4 d, RW4 s)); +DECLARE_MIDFUNC(imul_32_32(RW4 d, R4 s)); +DECLARE_MIDFUNC(mul_32_32(RW4 d, R4 s)); +DECLARE_MIDFUNC(mov_b_rr(W1 d, R1 s)); +DECLARE_MIDFUNC(mov_w_rr(W2 d, R2 s)); +DECLARE_MIDFUNC(mov_l_rrm_indexed(W4 d,R4 baser, R4 index, IMM factor)); +DECLARE_MIDFUNC(mov_w_rrm_indexed(W2 d, R4 baser, R4 index, IMM factor)); +DECLARE_MIDFUNC(mov_b_rrm_indexed(W1 d, R4 baser, R4 index, IMM factor)); +DECLARE_MIDFUNC(mov_l_mrr_indexed(R4 baser, R4 index, IMM factor, R4 s)); +DECLARE_MIDFUNC(mov_w_mrr_indexed(R4 baser, R4 index, IMM factor, R2 s)); +DECLARE_MIDFUNC(mov_b_mrr_indexed(R4 baser, R4 index, IMM factor, R1 s)); +DECLARE_MIDFUNC(mov_l_bmrr_indexed(IMM base, R4 baser, R4 index, IMM factor, R4 s)); +DECLARE_MIDFUNC(mov_w_bmrr_indexed(IMM base, R4 baser, R4 index, IMM factor, R2 s)); +DECLARE_MIDFUNC(mov_b_bmrr_indexed(IMM base, R4 baser, R4 index, IMM factor, R1 s)); +DECLARE_MIDFUNC(mov_l_brrm_indexed(W4 d, IMM base, R4 baser, R4 index, IMM factor)); +DECLARE_MIDFUNC(mov_w_brrm_indexed(W2 d, IMM base, R4 baser, R4 index, IMM factor)); +DECLARE_MIDFUNC(mov_b_brrm_indexed(W1 d, IMM base, R4 baser, R4 index, IMM factor)); +DECLARE_MIDFUNC(mov_l_rm_indexed(W4 d, IMM base, R4 index, IMM factor)); +DECLARE_MIDFUNC(mov_l_rR(W4 d, R4 s, IMM offset)); +DECLARE_MIDFUNC(mov_w_rR(W2 d, R4 s, IMM offset)); +DECLARE_MIDFUNC(mov_b_rR(W1 d, R4 s, IMM offset)); +DECLARE_MIDFUNC(mov_l_brR(W4 d, R4 s, IMM offset)); +DECLARE_MIDFUNC(mov_w_brR(W2 d, R4 s, IMM offset)); +DECLARE_MIDFUNC(mov_b_brR(W1 d, R4 s, IMM offset)); +DECLARE_MIDFUNC(mov_l_Ri(R4 d, IMM i, IMM offset)); +DECLARE_MIDFUNC(mov_w_Ri(R4 d, IMM i, IMM offset)); +DECLARE_MIDFUNC(mov_b_Ri(R4 d, IMM i, IMM offset)); +DECLARE_MIDFUNC(mov_l_Rr(R4 d, R4 s, IMM offset)); +DECLARE_MIDFUNC(mov_w_Rr(R4 d, R2 s, IMM offset)); +DECLARE_MIDFUNC(mov_b_Rr(R4 d, R1 s, IMM offset)); +DECLARE_MIDFUNC(lea_l_brr(W4 d, R4 s, IMM offset)); +DECLARE_MIDFUNC(lea_l_brr_indexed(W4 d, R4 s, R4 index, IMM factor, IMM offset)); +DECLARE_MIDFUNC(lea_l_rr_indexed(W4 d, R4 s, R4 index, IMM factor)); +DECLARE_MIDFUNC(mov_l_bRr(R4 d, R4 s, IMM offset)); +DECLARE_MIDFUNC(mov_w_bRr(R4 d, R2 s, IMM offset)); +DECLARE_MIDFUNC(mov_b_bRr(R4 d, R1 s, IMM offset)); +DECLARE_MIDFUNC(bswap_32(RW4 r)); +DECLARE_MIDFUNC(bswap_16(RW2 r)); +DECLARE_MIDFUNC(mov_l_rr(W4 d, R4 s)); +DECLARE_MIDFUNC(mov_l_mr(IMM d, R4 s)); +DECLARE_MIDFUNC(mov_w_mr(IMM d, R2 s)); +DECLARE_MIDFUNC(mov_w_rm(W2 d, IMM s)); +DECLARE_MIDFUNC(mov_b_mr(IMM d, R1 s)); +DECLARE_MIDFUNC(mov_b_rm(W1 d, IMM s)); +DECLARE_MIDFUNC(mov_l_ri(W4 d, IMM s)); +DECLARE_MIDFUNC(mov_w_ri(W2 d, IMM s)); +DECLARE_MIDFUNC(mov_b_ri(W1 d, IMM s)); +DECLARE_MIDFUNC(add_l_mi(IMM d, IMM s) ); +DECLARE_MIDFUNC(add_w_mi(IMM d, IMM s) ); +DECLARE_MIDFUNC(add_b_mi(IMM d, IMM s) ); +DECLARE_MIDFUNC(test_l_ri(R4 d, IMM i)); +DECLARE_MIDFUNC(test_l_rr(R4 d, R4 s)); +DECLARE_MIDFUNC(test_w_rr(R2 d, R2 s)); +DECLARE_MIDFUNC(test_b_rr(R1 d, R1 s)); +DECLARE_MIDFUNC(and_l_ri(RW4 d, IMM i)); +DECLARE_MIDFUNC(and_l(RW4 d, R4 s)); +DECLARE_MIDFUNC(and_w(RW2 d, R2 s)); +DECLARE_MIDFUNC(and_b(RW1 d, R1 s)); +DECLARE_MIDFUNC(or_l_rm(RW4 d, IMM s)); +DECLARE_MIDFUNC(or_l_ri(RW4 d, IMM i)); +DECLARE_MIDFUNC(or_l(RW4 d, R4 s)); +DECLARE_MIDFUNC(or_w(RW2 d, R2 s)); +DECLARE_MIDFUNC(or_b(RW1 d, R1 s)); +DECLARE_MIDFUNC(adc_l(RW4 d, R4 s)); +DECLARE_MIDFUNC(adc_w(RW2 d, R2 s)); +DECLARE_MIDFUNC(adc_b(RW1 d, R1 s)); +DECLARE_MIDFUNC(add_l(RW4 d, R4 s)); +DECLARE_MIDFUNC(add_w(RW2 d, R2 s)); +DECLARE_MIDFUNC(add_b(RW1 d, R1 s)); +DECLARE_MIDFUNC(sub_l_ri(RW4 d, IMM i)); +DECLARE_MIDFUNC(sub_w_ri(RW2 d, IMM i)); +DECLARE_MIDFUNC(sub_b_ri(RW1 d, IMM i)); +DECLARE_MIDFUNC(add_l_ri(RW4 d, IMM i)); +DECLARE_MIDFUNC(add_w_ri(RW2 d, IMM i)); +DECLARE_MIDFUNC(add_b_ri(RW1 d, IMM i)); +DECLARE_MIDFUNC(sbb_l(RW4 d, R4 s)); +DECLARE_MIDFUNC(sbb_w(RW2 d, R2 s)); +DECLARE_MIDFUNC(sbb_b(RW1 d, R1 s)); +DECLARE_MIDFUNC(sub_l(RW4 d, R4 s)); +DECLARE_MIDFUNC(sub_w(RW2 d, R2 s)); +DECLARE_MIDFUNC(sub_b(RW1 d, R1 s)); +DECLARE_MIDFUNC(cmp_l(R4 d, R4 s)); +DECLARE_MIDFUNC(cmp_l_ri(R4 r, IMM i)); +DECLARE_MIDFUNC(cmp_w(R2 d, R2 s)); +DECLARE_MIDFUNC(cmp_b(R1 d, R1 s)); +DECLARE_MIDFUNC(xor_l(RW4 d, R4 s)); +DECLARE_MIDFUNC(xor_w(RW2 d, R2 s)); +DECLARE_MIDFUNC(xor_b(RW1 d, R1 s)); +DECLARE_MIDFUNC(live_flags(void)); +DECLARE_MIDFUNC(dont_care_flags(void)); +DECLARE_MIDFUNC(duplicate_carry(void)); +DECLARE_MIDFUNC(restore_carry(void)); +DECLARE_MIDFUNC(start_needflags(void)); +DECLARE_MIDFUNC(end_needflags(void)); +DECLARE_MIDFUNC(make_flags_live(void)); +DECLARE_MIDFUNC(call_r_11(R4 r, W4 out1, R4 in1, IMM osize, IMM isize)); +DECLARE_MIDFUNC(call_r_02(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2)); +DECLARE_MIDFUNC(forget_about(W4 r)); +DECLARE_MIDFUNC(nop(void)); + +DECLARE_MIDFUNC(f_forget_about(FW r)); +DECLARE_MIDFUNC(fmov_pi(FW r)); +DECLARE_MIDFUNC(fmov_log10_2(FW r)); +DECLARE_MIDFUNC(fmov_log2_e(FW r)); +DECLARE_MIDFUNC(fmov_loge_2(FW r)); +DECLARE_MIDFUNC(fmov_1(FW r)); +DECLARE_MIDFUNC(fmov_0(FW r)); +DECLARE_MIDFUNC(fmov_rm(FW r, MEMR m)); +DECLARE_MIDFUNC(fmovi_rm(FW r, MEMR m)); +DECLARE_MIDFUNC(fmovi_mr(MEMW m, FR r)); +DECLARE_MIDFUNC(fmovs_rm(FW r, MEMR m)); +DECLARE_MIDFUNC(fmovs_mr(MEMW m, FR r)); +DECLARE_MIDFUNC(fmov_mr(MEMW m, FR r)); +DECLARE_MIDFUNC(fmov_ext_mr(MEMW m, FR r)); +DECLARE_MIDFUNC(fmov_ext_rm(FW r, MEMR m)); +DECLARE_MIDFUNC(fmov_rr(FW d, FR s)); +DECLARE_MIDFUNC(fldcw_m_indexed(R4 index, IMM base)); +DECLARE_MIDFUNC(ftst_r(FR r)); +DECLARE_MIDFUNC(dont_care_fflags(void)); +DECLARE_MIDFUNC(fsqrt_rr(FW d, FR s)); +DECLARE_MIDFUNC(fabs_rr(FW d, FR s)); +DECLARE_MIDFUNC(frndint_rr(FW d, FR s)); +DECLARE_MIDFUNC(fsin_rr(FW d, FR s)); +DECLARE_MIDFUNC(fcos_rr(FW d, FR s)); +DECLARE_MIDFUNC(ftwotox_rr(FW d, FR s)); +DECLARE_MIDFUNC(fetox_rr(FW d, FR s)); +DECLARE_MIDFUNC(flog2_rr(FW d, FR s)); +DECLARE_MIDFUNC(fneg_rr(FW d, FR s)); +DECLARE_MIDFUNC(fadd_rr(FRW d, FR s)); +DECLARE_MIDFUNC(fsub_rr(FRW d, FR s)); +DECLARE_MIDFUNC(fmul_rr(FRW d, FR s)); +DECLARE_MIDFUNC(frem_rr(FRW d, FR s)); +DECLARE_MIDFUNC(frem1_rr(FRW d, FR s)); +DECLARE_MIDFUNC(fdiv_rr(FRW d, FR s)); +DECLARE_MIDFUNC(fcmp_rr(FR d, FR s)); +DECLARE_MIDFUNC(fflags_into_flags(W2 tmp)); +#undef DECLARE_MIDFUNC + +extern int failure; +#define FAIL(x) do { failure|=x; } while (0) + +/* Convenience functions exposed to gencomp */ +extern uae_u32 m68k_pc_offset; +extern void readbyte(int address, int dest, int tmp); +extern void readword(int address, int dest, int tmp); +extern void readlong(int address, int dest, int tmp); +extern void writebyte(int address, int source, int tmp); +extern void writeword(int address, int source, int tmp); +extern void writelong(int address, int source, int tmp); +extern void writeword_clobber(int address, int source, int tmp); +extern void writelong_clobber(int address, int source, int tmp); +extern void get_n_addr(int address, int dest, int tmp); +extern void get_n_addr_jmp(int address, int dest, int tmp); +extern void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp); +/* Set native Z flag only if register is zero */ +extern void set_zero(int r, int tmp); +extern int kill_rodent(int r); +extern void sync_m68k_pc(void); +extern uae_u32 get_const(int r); +extern int is_const(int r); +extern void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond); + +#define comp_get_ibyte(o) do_get_mem_byte((uae_u8 *)(comp_pc_p + (o) + 1)) +#define comp_get_iword(o) do_get_mem_word((uae_u16 *)(comp_pc_p + (o))) +#define comp_get_ilong(o) do_get_mem_long((uae_u32 *)(comp_pc_p + (o))) + +struct blockinfo_t; + +typedef struct dep_t { + uae_u32* jmp_off; + struct blockinfo_t* target; + struct blockinfo_t* source; + struct dep_t** prev_p; + struct dep_t* next; +} dependency; + +typedef struct checksum_info_t { + uae_u8 *start_p; + uae_u32 length; + struct checksum_info_t *next; +} checksum_info; + +typedef struct blockinfo_t { + uae_s32 count; + cpuop_func* direct_handler_to_use; + cpuop_func* handler_to_use; + /* The direct handler does not check for the correct address */ + + cpuop_func* handler; + cpuop_func* direct_handler; + + cpuop_func* direct_pen; + cpuop_func* direct_pcc; + + uae_u8* pc_p; + + uae_u32 c1; + uae_u32 c2; +#if USE_CHECKSUM_INFO + checksum_info *csi; +#else + uae_u32 len; + uae_u32 min_pcp; +#endif + + struct blockinfo_t* next_same_cl; + struct blockinfo_t** prev_same_cl_p; + struct blockinfo_t* next; + struct blockinfo_t** prev_p; + + uae_u8 optlevel; + uae_u8 needed_flags; + uae_u8 status; + uae_u8 havestate; + + dependency dep[2]; /* Holds things we depend on */ + dependency* deplist; /* List of things that depend on this */ + smallstate env; + +#if JIT_DEBUG + /* (gb) size of the compiled block (direct handler) */ + uae_u32 direct_handler_size; +#endif +} blockinfo; + +#define BI_INVALID 0 +#define BI_ACTIVE 1 +#define BI_NEED_RECOMP 2 +#define BI_NEED_CHECK 3 +#define BI_CHECKING 4 +#define BI_COMPILING 5 +#define BI_FINALIZING 6 + +void execute_normal(void); +void exec_nostats(void); +void do_nothing(void); + +#else + +static __inline__ void flush_icache(int) { } +static __inline__ void build_comp() { } + +#endif /* !USE_JIT */ + +#endif /* COMPEMU_H */ diff --git a/jit2/compemu_fpp.cpp b/jit2/compemu_fpp.cpp new file mode 100644 index 00000000..bb536634 --- /dev/null +++ b/jit2/compemu_fpp.cpp @@ -0,0 +1,1637 @@ +/* + * compiler/compemu_fpp.cpp - Dynamic translation of FPU instructions + * + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * + * Adaptation for Basilisk II and improvements, copyright 2000-2005 + * Gwenole Beauchesne + * + * Basilisk II (C) 1997-2008 Christian Bauer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* + * UAE - The Un*x Amiga Emulator + * + * MC68881 emulation + * + * Copyright 1996 Herman ten Brugge + * Adapted for JIT compilation (c) Bernd Meyer, 2000 + */ + +#include "sysdeps.h" + +#include +#include + +#include "memory.h" +#include "readcpu.h" +#include "newcpu.h" +#include "main.h" +#include "compiler/compemu.h" +#include "fpu/fpu.h" +#include "fpu/flags.h" +#include "fpu/exceptions.h" +#include "fpu/rounding.h" + +#define DEBUG 0 +#include "debug.h" + +// gb-- WARNING: get_fpcr() and set_fpcr() support is experimental +#define HANDLE_FPCR 0 + +// - IEEE-based fpu core must be used +#if defined(FPU_IEEE) +# define CAN_HANDLE_FPCR +#endif + +// - Generic rounding mode and precision modes are supported if set together +#if defined(FPU_USE_GENERIC_ROUNDING_MODE) && defined(FPU_USE_GENERIC_ROUNDING_PRECISION) +# define CAN_HANDLE_FPCR +#endif + +// - X86 rounding mode and precision modes are *not* supported but might work (?!) +#if defined(FPU_USE_X86_ROUNDING_MODE) && defined(FPU_USE_X86_ROUNDING_PRECISION) +# define CAN_HANDLE_FPCR +#endif + +#if HANDLE_FPCR && !defined(CAN_HANDLE_FPCR) +# warning "Can't handle FPCR, will FAIL(1) at runtime" +# undef HANDLE_FPCR +# define HANDLE_FPCR 0 +#endif + +#define STATIC_INLINE static inline +#define MAKE_FPSR(r) do { fmov_rr(FP_RESULT,r); } while (0) + +#define delay nop() ;nop() +#define delay2 nop() ;nop() + +#define UNKNOWN_EXTRA 0xFFFFFFFF +static void fpuop_illg(uae_u32 opcode, uae_u32 extra) +{ +/* + if (extra == UNKNOWN_EXTRA) + printf("FPU opcode %x, extra UNKNOWN_EXTRA\n",opcode & 0xFFFF); + else + printf("FPU opcode %x, extra %x\n",opcode & 0xFFFF,extra & 0xFFFF); +*/ + op_illg(opcode); +} + +static uae_s32 temp_fp[4]; /* To convert between FP/integer */ + +/* return register number, or -1 for failure */ +STATIC_INLINE int get_fp_value (uae_u32 opcode, uae_u16 extra) +{ + uaecptr tmppc; + uae_u16 tmp; + int size; + int mode; + int reg; + double* src; + uae_u32 ad = 0; + static int sz1[8] = { 4, 4, 12, 12, 2, 8, 1, 0 }; + static int sz2[8] = { 4, 4, 12, 12, 2, 8, 2, 0 }; + + if ((extra & 0x4000) == 0) { + return ((extra >> 10) & 7); + } + + mode = (opcode >> 3) & 7; + reg = opcode & 7; + size = (extra >> 10) & 7; + switch (mode) { + case 0: + switch (size) { + case 6: + sign_extend_8_rr(S1,reg); + mov_l_mr((uintptr)temp_fp,S1); + delay2; + fmovi_rm(FS1,(uintptr)temp_fp); + return FS1; + case 4: + sign_extend_16_rr(S1,reg); + mov_l_mr((uintptr)temp_fp,S1); + delay2; + fmovi_rm(FS1,(uintptr)temp_fp); + return FS1; + case 0: + mov_l_mr((uintptr)temp_fp,reg); + delay2; + fmovi_rm(FS1,(uintptr)temp_fp); + return FS1; + case 1: + mov_l_mr((uintptr)temp_fp,reg); + delay2; + fmovs_rm(FS1,(uintptr)temp_fp); + return FS1; + default: + return -1; + } + return -1; /* Should be unreachable */ + case 1: + return -1; /* Genuine invalid instruction */ + default: + break; + } + /* OK, we *will* have to load something from an address. Let's make + sure we know how to handle that, or quit early --- i.e. *before* + we do any postincrement/predecrement that we may regret */ + + switch (size) { + case 3: + return -1; + case 0: + case 1: + case 2: + case 4: + case 5: + case 6: + break; + default: + return -1; + } + + switch (mode) { + case 2: + ad=S1; /* We will change it, anyway ;-) */ + mov_l_rr(ad,reg+8); + break; + case 3: + ad=S1; + mov_l_rr(ad,reg+8); + lea_l_brr(reg+8,reg+8,(reg == 7?sz2[size]:sz1[size])); + break; + case 4: + ad=S1; + + lea_l_brr(reg+8,reg+8,-(reg == 7?sz2[size]:sz1[size])); + mov_l_rr(ad,reg+8); + break; + case 5: + { + uae_u32 off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + ad=S1; + mov_l_rr(ad,reg+8); + lea_l_brr(ad,ad,off); + break; + } + case 6: + { + uae_u32 dp=comp_get_iword((m68k_pc_offset+=2)-2); + ad=S1; + calc_disp_ea_020(reg+8,dp,ad,S2); + break; + } + case 7: + switch (reg) { + case 0: + { + uae_u32 off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + ad=S1; + mov_l_ri(ad,off); + break; + } + case 1: + { + uae_u32 off=comp_get_ilong((m68k_pc_offset+=4)-4); + ad=S1; + mov_l_ri(ad,off); + break; + } + case 2: + { + uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+ + m68k_pc_offset; + uae_s32 PC16off =(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2) +-2); + ad=S1; + mov_l_ri(ad,address+PC16off); + break; + } + case 3: + return -1; + tmppc = m68k_getpc (); + tmp = next_iword (); + ad = get_disp_ea_020 (tmppc, tmp); + break; + case 4: + { + uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+ m68k_pc_offset; + ad=S1; + // Immediate addressing mode && Operation Length == Byte -> + // Use the low-order byte of the extension word. + if (size == 6) address++; + mov_l_ri(ad,address); + m68k_pc_offset+=sz2[size]; + break; + } + default: + return -1; + } + } + + switch (size) { + case 0: + readlong(ad,S2,S3); + mov_l_mr((uintptr)temp_fp,S2); + delay2; + fmovi_rm(FS1,(uintptr)temp_fp); + break; + case 1: + readlong(ad,S2,S3); + mov_l_mr((uintptr)temp_fp,S2); + delay2; + fmovs_rm(FS1,(uintptr)temp_fp); + break; + case 2: + readword(ad,S2,S3); + mov_w_mr(((uintptr)temp_fp)+8,S2); + add_l_ri(ad,4); + readlong(ad,S2,S3); + mov_l_mr((uintptr)(temp_fp)+4,S2); + add_l_ri(ad,4); + readlong(ad,S2,S3); + mov_l_mr((uintptr)(temp_fp),S2); + delay2; + fmov_ext_rm(FS1,(uintptr)(temp_fp)); + break; + case 3: + return -1; /* Some silly "packed" stuff */ + case 4: + readword(ad,S2,S3); + sign_extend_16_rr(S2,S2); + mov_l_mr((uintptr)temp_fp,S2); + delay2; + fmovi_rm(FS1,(uintptr)temp_fp); + break; + case 5: + readlong(ad,S2,S3); + mov_l_mr(((uintptr)temp_fp)+4,S2); + add_l_ri(ad,4); + readlong(ad,S2,S3); + mov_l_mr((uintptr)(temp_fp),S2); + delay2; + fmov_rm(FS1,(uintptr)(temp_fp)); + break; + case 6: + readbyte(ad,S2,S3); + sign_extend_8_rr(S2,S2); + mov_l_mr((uintptr)temp_fp,S2); + delay2; + fmovi_rm(FS1,(uintptr)temp_fp); + break; + default: + return -1; + } + return FS1; +} + +/* return of -1 means failure, >=0 means OK */ +STATIC_INLINE int put_fp_value (int val, uae_u32 opcode, uae_u16 extra) +{ + uae_u16 tmp; + uaecptr tmppc; + int size; + int mode; + int reg; + uae_u32 ad; + static int sz1[8] = { 4, 4, 12, 12, 2, 8, 1, 0 }; + static int sz2[8] = { 4, 4, 12, 12, 2, 8, 2, 0 }; + + if ((extra & 0x4000) == 0) { + const int dest_reg = (extra >> 10) & 7; + fmov_rr(dest_reg, val); + // gb-- status register is affected + MAKE_FPSR(dest_reg); + return 0; + } + + mode = (opcode >> 3) & 7; + reg = opcode & 7; + size = (extra >> 10) & 7; + ad = (uae_u32)-1; + switch (mode) { + case 0: + switch (size) { + case 6: + fmovi_mr((uintptr)temp_fp,val); + delay; + mov_b_rm(reg,(uintptr)temp_fp); + return 0; + case 4: + fmovi_mr((uintptr)temp_fp,val); + delay; + mov_w_rm(reg,(uintptr)temp_fp); + return 0; + case 0: + fmovi_mr((uintptr)temp_fp,val); + delay; + mov_l_rm(reg,(uintptr)temp_fp); + return 0; + case 1: + fmovs_mr((uintptr)temp_fp,val); + delay; + mov_l_rm(reg,(uintptr)temp_fp); + return 0; + default: + return -1; + } + case 1: + return -1; /* genuine invalid instruction */ + default: break; + } + + /* Let's make sure we get out *before* doing something silly if + we can't handle the size */ + switch (size) { + case 0: + case 4: + case 5: + case 6: + case 2: + case 1: + break; + case 3: + default: + return -1; + } + + switch (mode) { + case 2: + ad=S1; + mov_l_rr(ad,reg+8); + break; + case 3: + ad=S1; + mov_l_rr(ad,reg+8); + lea_l_brr(reg+8,reg+8,(reg == 7?sz2[size]:sz1[size])); + break; + case 4: + ad=S1; + lea_l_brr(reg+8,reg+8,-(reg == 7?sz2[size]:sz1[size])); + mov_l_rr(ad,reg+8); + break; + case 5: + { + uae_u32 off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + ad=S1; + mov_l_rr(ad,reg+8); + add_l_ri(ad,off); + break; + } + case 6: + { + uae_u32 dp=comp_get_iword((m68k_pc_offset+=2)-2); + ad=S1; + calc_disp_ea_020(reg+8,dp,ad,S2); + break; + } + case 7: + switch (reg) { + case 0: + { + uae_u32 off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + ad=S1; + mov_l_ri(ad,off); + break; + } + case 1: + { + uae_u32 off=comp_get_ilong((m68k_pc_offset+=4)-4); + ad=S1; + mov_l_ri(ad,off); + break; + } + case 2: + { + uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+ + m68k_pc_offset; + uae_s32 PC16off =(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + ad=S1; + mov_l_ri(ad,address+PC16off); + break; + } + case 3: + return -1; + tmppc = m68k_getpc (); + tmp = next_iword (); + ad = get_disp_ea_020 (tmppc, tmp); + break; + case 4: + { + uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+ + m68k_pc_offset; + ad=S1; + mov_l_ri(ad,address); + m68k_pc_offset+=sz2[size]; + break; + } + default: + return -1; + } + } + switch (size) { + case 0: + fmovi_mr((uintptr)temp_fp,val); + delay; + mov_l_rm(S2,(uintptr)temp_fp); + writelong_clobber(ad,S2,S3); + break; + case 1: + fmovs_mr((uintptr)temp_fp,val); + delay; + mov_l_rm(S2,(uintptr)temp_fp); + writelong_clobber(ad,S2,S3); + break; + case 2: + fmov_ext_mr((uintptr)temp_fp,val); + delay; + mov_w_rm(S2,(uintptr)temp_fp+8); + writeword_clobber(ad,S2,S3); + add_l_ri(ad,4); + mov_l_rm(S2,(uintptr)temp_fp+4); + writelong_clobber(ad,S2,S3); + add_l_ri(ad,4); + mov_l_rm(S2,(uintptr)temp_fp); + writelong_clobber(ad,S2,S3); + break; + case 3: return -1; /* Packed */ + + case 4: + fmovi_mr((uintptr)temp_fp,val); + delay; + mov_l_rm(S2,(uintptr)temp_fp); + writeword_clobber(ad,S2,S3); + break; + case 5: + fmov_mr((uintptr)temp_fp,val); + delay; + mov_l_rm(S2,(uintptr)temp_fp+4); + writelong_clobber(ad,S2,S3); + add_l_ri(ad,4); + mov_l_rm(S2,(uintptr)temp_fp); + writelong_clobber(ad,S2,S3); + break; + case 6: + fmovi_mr((uintptr)temp_fp,val); + delay; + mov_l_rm(S2,(uintptr)temp_fp); + writebyte(ad,S2,S3); + break; + default: + return -1; + } + return 0; +} + +/* return -1 for failure, or register number for success */ +STATIC_INLINE int get_fp_ad (uae_u32 opcode, uae_u32 * ad) +{ + uae_u16 tmp; + uaecptr tmppc; + int mode; + int reg; + uae_s32 off; + + mode = (opcode >> 3) & 7; + reg = opcode & 7; + switch (mode) { + case 0: + case 1: + return -1; + case 2: + case 3: + case 4: + mov_l_rr(S1,8+reg); + return S1; + *ad = m68k_areg (regs, reg); + break; + case 5: + off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + + mov_l_rr(S1,8+reg); + add_l_ri(S1,off); + return S1; + case 6: + return -1; + break; + case 7: + switch (reg) { + case 0: + off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + mov_l_ri(S1,off); + return S1; + case 1: + off=comp_get_ilong((m68k_pc_offset+=4)-4); + mov_l_ri(S1,off); + return S1; + case 2: + return -1; +// *ad = m68k_getpc (); +// *ad += (uae_s32) (uae_s16) next_iword (); + off=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset; + off+=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + mov_l_ri(S1,off); + return S1; + case 3: + return -1; + tmppc = m68k_getpc (); + tmp = next_iword (); + *ad = get_disp_ea_020 (tmppc, tmp); + break; + default: + return -1; + } + } + abort(); +} + +void comp_fdbcc_opp (uae_u32 opcode, uae_u16 extra) +{ + FAIL(1); + return; +} + +void comp_fscc_opp (uae_u32 opcode, uae_u16 extra) +{ + uae_u32 ad; + int cc; + int reg; + +#if DEBUG_FPP + printf ("fscc_opp at %08lx\n", m68k_getpc ()); + fflush (stdout); +#endif + + + if (extra&0x20) { /* only cc from 00 to 1f are defined */ + FAIL(1); + return; + } + if ((opcode & 0x38) != 0) { /* We can only do to integer register */ + FAIL(1); + return; + } + + fflags_into_flags(S2); + reg=(opcode&7); + + mov_l_ri(S1,255); + mov_l_ri(S4,0); + switch(extra&0x0f) { /* according to fpp.c, the 0x10 bit is ignored + */ + case 0: break; /* set never */ + case 1: mov_l_rr(S2,S4); + cmov_l_rr(S4,S1,4); + cmov_l_rr(S4,S2,10); break; + case 2: cmov_l_rr(S4,S1,7); break; + case 3: cmov_l_rr(S4,S1,3); break; + case 4: mov_l_rr(S2,S4); + cmov_l_rr(S4,S1,2); + cmov_l_rr(S4,S2,10); break; + case 5: mov_l_rr(S2,S4); + cmov_l_rr(S4,S1,6); + cmov_l_rr(S4,S2,10); break; + case 6: cmov_l_rr(S4,S1,5); break; + case 7: cmov_l_rr(S4,S1,11); break; + case 8: cmov_l_rr(S4,S1,10); break; + case 9: cmov_l_rr(S4,S1,4); break; + case 10: cmov_l_rr(S4,S1,10); cmov_l_rr(S4,S1,7); break; + case 11: cmov_l_rr(S4,S1,4); cmov_l_rr(S4,S1,3); break; + case 12: cmov_l_rr(S4,S1,2); break; + case 13: cmov_l_rr(S4,S1,6); break; + case 14: cmov_l_rr(S4,S1,5); cmov_l_rr(S4,S1,10); break; + case 15: mov_l_rr(S4,S1); break; + } + + if ((opcode & 0x38) == 0) { + mov_b_rr(reg,S4); + } else { + abort(); + if (get_fp_ad (opcode, &ad) == 0) { + m68k_setpc (m68k_getpc () - 4); + fpuop_illg (opcode,extra); + } else + put_byte (ad, cc ? 0xff : 0x00); + } +} + +void comp_ftrapcc_opp (uae_u32 opcode, uaecptr oldpc) +{ + int cc; + + FAIL(1); + return; +} + +void comp_fbcc_opp (uae_u32 opcode) +{ + uae_u32 start_68k_offset=m68k_pc_offset; + uae_u32 off; + uae_u32 v1; + uae_u32 v2; + uae_u32 nh; + int cc; + + // comp_pc_p is expected to be bound to 32-bit addresses + assert((uintptr)comp_pc_p <= 0xffffffffUL); + + if (opcode&0x20) { /* only cc from 00 to 1f are defined */ + FAIL(1); + return; + } + if ((opcode&0x40)==0) { + off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + } + else { + off=comp_get_ilong((m68k_pc_offset+=4)-4); + } + mov_l_ri(S1,(uintptr) + (comp_pc_p+off-(m68k_pc_offset-start_68k_offset))); + mov_l_ri(PC_P,(uintptr)comp_pc_p); + + /* Now they are both constant. Might as well fold in m68k_pc_offset */ + add_l_ri(S1,m68k_pc_offset); + add_l_ri(PC_P,m68k_pc_offset); + m68k_pc_offset=0; + + /* according to fpp.c, the 0x10 bit is ignored + (it handles exception handling, which we don't + do, anyway ;-) */ + cc=opcode&0x0f; + v1=get_const(PC_P); + v2=get_const(S1); + fflags_into_flags(S2); + + switch(cc) { + case 0: break; /* jump never */ + case 1: + mov_l_rr(S2,PC_P); + cmov_l_rr(PC_P,S1,4); + cmov_l_rr(PC_P,S2,10); break; + case 2: register_branch(v1,v2,7); break; + case 3: register_branch(v1,v2,3); break; + case 4: + mov_l_rr(S2,PC_P); + cmov_l_rr(PC_P,S1,2); + cmov_l_rr(PC_P,S2,10); break; + case 5: + mov_l_rr(S2,PC_P); + cmov_l_rr(PC_P,S1,6); + cmov_l_rr(PC_P,S2,10); break; + case 6: register_branch(v1,v2,5); break; + case 7: register_branch(v1,v2,11); break; + case 8: register_branch(v1,v2,10); break; + case 9: register_branch(v1,v2,4); break; + case 10: + cmov_l_rr(PC_P,S1,10); + cmov_l_rr(PC_P,S1,7); break; + case 11: + cmov_l_rr(PC_P,S1,4); + cmov_l_rr(PC_P,S1,3); break; + case 12: register_branch(v1,v2,2); break; + case 13: register_branch(v1,v2,6); break; + case 14: + cmov_l_rr(PC_P,S1,5); + cmov_l_rr(PC_P,S1,10); break; + case 15: mov_l_rr(PC_P,S1); break; + } +} + + /* Floating point conditions + The "NotANumber" part could be problematic; Howver, when NaN is + encountered, the ftst instruction sets bot N and Z to 1 on the x87, + so quite often things just fall into place. This is probably not + accurate wrt the 68k FPU, but it is *as* accurate as this was before. + However, some more thought should go into fixing this stuff up so + it accurately emulates the 68k FPU. +>==> 13) & 0x7) { + case 3: /* 2nd most common */ + if (put_fp_value ((extra >> 7)&7 , opcode, extra) < 0) { + FAIL(1); + return; + + } + return; + case 6: + case 7: + { + uae_u32 ad, list = 0; + int incr = 0; + if (extra & 0x2000) { + uae_u32 ad; + + /* FMOVEM FPP->memory */ + switch ((extra >> 11) & 3) { /* Get out early if failure */ + case 0: + case 2: + break; + case 1: + case 3: + default: + FAIL(1); return; + } + ad=get_fp_ad (opcode, &ad); + if (ad<0) { + abort(); + m68k_setpc (m68k_getpc () - 4); + fpuop_illg (opcode,extra); + return; + } + switch ((extra >> 11) & 3) { + case 0: /* static pred */ + list = extra & 0xff; + incr = -1; + break; + case 2: /* static postinc */ + list = extra & 0xff; + incr = 1; + break; + case 1: /* dynamic pred */ + case 3: /* dynamic postinc */ + abort(); + } + if (incr < 0) { /* Predecrement */ + for (reg = 7; reg >= 0; reg--) { + if (list & 0x80) { + fmov_ext_mr((uintptr)temp_fp,reg); + delay; + sub_l_ri(ad,4); + mov_l_rm(S2,(uintptr)temp_fp); + writelong_clobber(ad,S2,S3); + sub_l_ri(ad,4); + mov_l_rm(S2,(uintptr)temp_fp+4); + writelong_clobber(ad,S2,S3); + sub_l_ri(ad,4); + mov_w_rm(S2,(uintptr)temp_fp+8); + writeword_clobber(ad,S2,S3); + } + list <<= 1; + } + } + else { /* Postincrement */ + for (reg = 0; reg < 8; reg++) { + if (list & 0x80) { + fmov_ext_mr((uintptr)temp_fp,reg); + delay; + mov_w_rm(S2,(uintptr)temp_fp+8); + writeword_clobber(ad,S2,S3); + add_l_ri(ad,4); + mov_l_rm(S2,(uintptr)temp_fp+4); + writelong_clobber(ad,S2,S3); + add_l_ri(ad,4); + mov_l_rm(S2,(uintptr)temp_fp); + writelong_clobber(ad,S2,S3); + add_l_ri(ad,4); + } + list <<= 1; + } + } + if ((opcode & 0x38) == 0x18) + mov_l_rr((opcode & 7)+8,ad); + if ((opcode & 0x38) == 0x20) + mov_l_rr((opcode & 7)+8,ad); + } else { + /* FMOVEM memory->FPP */ + + uae_u32 ad; + switch ((extra >> 11) & 3) { /* Get out early if failure */ + case 0: + case 2: + break; + case 1: + case 3: + default: + FAIL(1); return; + } + ad=get_fp_ad (opcode, &ad); + if (ad<0) { + abort(); + m68k_setpc (m68k_getpc () - 4); + write_log("no ad\n"); + fpuop_illg (opcode,extra); + return; + } + switch ((extra >> 11) & 3) { + case 0: /* static pred */ + list = extra & 0xff; + incr = -1; + break; + case 2: /* static postinc */ + list = extra & 0xff; + incr = 1; + break; + case 1: /* dynamic pred */ + case 3: /* dynamic postinc */ + abort(); + } + + if (incr < 0) { + // not reached + for (reg = 7; reg >= 0; reg--) { + uae_u32 wrd1, wrd2, wrd3; + if (list & 0x80) { + sub_l_ri(ad,4); + readlong(ad,S2,S3); + mov_l_mr((uintptr)(temp_fp),S2); + sub_l_ri(ad,4); + readlong(ad,S2,S3); + mov_l_mr((uintptr)(temp_fp)+4,S2); + sub_l_ri(ad,4); + readword(ad,S2,S3); + mov_w_mr(((uintptr)temp_fp)+8,S2); + delay2; + fmov_ext_rm(reg,(uintptr)(temp_fp)); + } + list <<= 1; + } + } + else { + for (reg = 0; reg < 8; reg++) { + uae_u32 wrd1, wrd2, wrd3; + if (list & 0x80) { + readword(ad,S2,S3); + mov_w_mr(((uintptr)temp_fp)+8,S2); + add_l_ri(ad,4); + readlong(ad,S2,S3); + mov_l_mr((uintptr)(temp_fp)+4,S2); + add_l_ri(ad,4); + readlong(ad,S2,S3); + mov_l_mr((uintptr)(temp_fp),S2); + add_l_ri(ad,4); + delay2; + fmov_ext_rm(reg,(uintptr)(temp_fp)); + } + list <<= 1; + } + } + if ((opcode & 0x38) == 0x18) + mov_l_rr((opcode & 7)+8,ad); + if ((opcode & 0x38) == 0x20) + mov_l_rr((opcode & 7)+8,ad); + } + } + return; + + case 4: + case 5: /* rare */ + if ((opcode & 0x30) == 0) { + if (extra & 0x2000) { + if (extra & 0x1000) { +#if HANDLE_FPCR + mov_l_rm(opcode & 15, (uintptr)&fpu.fpcr.rounding_mode); + or_l_rm(opcode & 15, (uintptr)&fpu.fpcr.rounding_precision); +#else + FAIL(1); + return; +#endif + } + if (extra & 0x0800) { + FAIL(1); + return; + } + if (extra & 0x0400) { + mov_l_rm(opcode & 15,(uintptr)&fpu.instruction_address); + return; + } + } else { + // gb-- moved here so that we may FAIL() without generating any code + if (extra & 0x0800) { + // set_fpsr(m68k_dreg (regs, opcode & 15)); + FAIL(1); + return; + } + if (extra & 0x1000) { +#if HANDLE_FPCR +#if defined(FPU_USE_X86_ROUNDING_MODE) && defined(FPU_USE_X86_ROUNDING_PRECISION) + FAIL(1); + return; +#endif + mov_l_rr(S1,opcode & 15); + mov_l_rr(S2,opcode & 15); + and_l_ri(S1,FPCR_ROUNDING_PRECISION); + and_l_ri(S2,FPCR_ROUNDING_MODE); + mov_l_mr((uintptr)&fpu.fpcr.rounding_precision,S1); + mov_l_mr((uintptr)&fpu.fpcr.rounding_mode,S2); +#else + FAIL(1); + return; +#endif +// return; gb-- FMOVEM could also operate on fpiar + } + if (extra & 0x0400) { + mov_l_mr((uintptr)&fpu.instruction_address,opcode & 15); +// return; gb-- we have to process all FMOVEM bits before returning + } + return; + } + } else if ((opcode & 0x3f) == 0x3c) { + if ((extra & 0x2000) == 0) { + // gb-- moved here so that we may FAIL() without generating any code + if (extra & 0x0800) { + FAIL(1); + return; + } + if (extra & 0x1000) { + uae_u32 val=comp_get_ilong((m68k_pc_offset+=4)-4); +#if HANDLE_FPCR +#if defined(FPU_USE_X86_ROUNDING_MODE) && defined(FPU_USE_X86_ROUNDING_PRECISION) + FAIL(1); + return; +#endif +// mov_l_mi((uintptr)®s.fpcr,val); + mov_l_ri(S1,val); + mov_l_ri(S2,val); + and_l_ri(S1,FPCR_ROUNDING_PRECISION); + and_l_ri(S2,FPCR_ROUNDING_MODE); + mov_l_mr((uintptr)&fpu.fpcr.rounding_precision,S1); + mov_l_mr((uintptr)&fpu.fpcr.rounding_mode,S2); +#else + FAIL(1); + return; +#endif +// return; gb-- FMOVEM could also operate on fpiar + } + if (extra & 0x0400) { + uae_u32 val=comp_get_ilong((m68k_pc_offset+=4)-4); + mov_l_mi((uintptr)&fpu.instruction_address,val); +// return; gb-- we have to process all FMOVEM bits before returning + } + return; + } + FAIL(1); + return; + } else if (extra & 0x2000) { + FAIL(1); + return; + } else { + FAIL(1); + return; + } + FAIL(1); + return; + + case 0: + case 2: /* Extremely common */ + reg = (extra >> 7) & 7; + if ((extra & 0xfc00) == 0x5c00) { + switch (extra & 0x7f) { + case 0x00: + fmov_pi(reg); + break; + case 0x0b: + fmov_log10_2(reg); + break; + case 0x0c: +#if USE_LONG_DOUBLE + fmov_ext_rm(reg,(uintptr)&const_e); +#else + fmov_rm(reg,(uintptr)&const_e); +#endif + break; + case 0x0d: + fmov_log2_e(reg); + break; + case 0x0e: +#if USE_LONG_DOUBLE + fmov_ext_rm(reg,(uintptr)&const_log10_e); +#else + fmov_rm(reg,(uintptr)&const_log10_e); +#endif + break; + case 0x0f: + fmov_0(reg); + break; + case 0x30: + fmov_loge_2(reg); + break; + case 0x31: +#if USE_LONG_DOUBLE + fmov_ext_rm(reg,(uintptr)&const_loge_10); +#else + fmov_rm(reg,(uintptr)&const_loge_10); +#endif + break; + case 0x32: + fmov_1(reg); + break; + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: + case 0x38: + case 0x39: + case 0x3a: + case 0x3b: +#if USE_LONG_DOUBLE + case 0x3c: + case 0x3d: + case 0x3e: + case 0x3f: + fmov_ext_rm(reg,(uintptr)(power10+(extra & 0x7f)-0x32)); +#else + fmov_rm(reg,(uintptr)(power10+(extra & 0x7f)-0x32)); +#endif + break; + default: + /* This is not valid, so we fail */ + FAIL(1); + return; + } + return; + } + + switch (extra & 0x7f) { + case 0x00: /* FMOVE */ + case 0x40: /* Explicit rounding. This is just a quick fix. Same + * for all other cases that have three choices */ + case 0x44: + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fmov_rr(reg,src); + MAKE_FPSR (src); + break; + case 0x01: /* FINT */ + FAIL(1); + return; + dont_care_fflags(); + case 0x02: /* FSINH */ + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x03: /* FINTRZ */ +#if USE_X86_FPUCW + /* If we have control over the CW, we can do this */ + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + mov_l_ri(S1,16); /* Switch to "round to zero" mode */ + fldcw_m_indexed(S1,(uae_u32)x86_fpucw); + + frndint_rr(reg,src); + + /* restore control word */ + mov_l_rm(S1,(uintptr)®s.fpcr); + and_l_ri(S1,0x000000f0); + fldcw_m_indexed(S1,(uintptr)x86_fpucw); + + MAKE_FPSR (reg); + break; +#endif + FAIL(1); + return; + break; + case 0x04: /* FSQRT */ + case 0x41: + case 0x45: + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fsqrt_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x06: /* FLOGNP1 */ + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x08: /* FETOXM1 */ + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x09: /* FTANH */ + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x0a: /* FATAN */ + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x0c: /* FASIN */ + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x0d: /* FATANH */ + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x0e: /* FSIN */ + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fsin_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x0f: /* FTAN */ + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x10: /* FETOX */ + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fetox_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x11: /* FTWOTOX */ + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + ftwotox_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x12: /* FTENTOX */ + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x14: /* FLOGN */ + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x15: /* FLOG10 */ + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x16: /* FLOG2 */ + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + flog2_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x18: /* FABS */ + case 0x58: + case 0x5c: + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fabs_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x19: /* FCOSH */ + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x1a: /* FNEG */ + case 0x5a: + case 0x5e: + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fneg_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x1c: /* FACOS */ + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x1d: /* FCOS */ + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fcos_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x1e: /* FGETEXP */ + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x1f: /* FGETMAN */ + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x20: /* FDIV */ + case 0x60: + case 0x64: + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fdiv_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x21: /* FMOD */ + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + frem_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x22: /* FADD */ + case 0x62: + case 0x66: + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fadd_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x23: /* FMUL */ + case 0x63: + case 0x67: + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fmul_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x24: /* FSGLDIV */ + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fdiv_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x25: /* FREM */ + // gb-- disabled because the quotient byte must be computed + // otherwise, free rotation in ClarisWorks doesn't work. + FAIL(1); + return; + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + frem1_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x26: /* FSCALE */ + dont_care_fflags(); + FAIL(1); + return; + break; + case 0x27: /* FSGLMUL */ + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fmul_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x28: /* FSUB */ + case 0x68: + case 0x6c: + dont_care_fflags(); + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fsub_rr(reg,src); + MAKE_FPSR (reg); + break; + case 0x30: /* FSINCOS */ + case 0x31: + case 0x32: + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: + FAIL(1); + return; + dont_care_fflags(); + break; + case 0x38: /* FCMP */ + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fmov_rr(FP_RESULT,reg); + fsub_rr(FP_RESULT,src); /* Right way? */ + break; + case 0x3a: /* FTST */ + src=get_fp_value (opcode, extra); + if (src < 0) { + FAIL(1); /* Illegal instruction */ + return; + } + fmov_rr(FP_RESULT,src); + break; + default: + FAIL(1); + return; + break; + } + return; + } + m68k_setpc (m68k_getpc () - 4); + fpuop_illg (opcode,extra); +} diff --git a/jit2/compemu_support.cpp b/jit2/compemu_support.cpp new file mode 100644 index 00000000..2448d3da --- /dev/null +++ b/jit2/compemu_support.cpp @@ -0,0 +1,7132 @@ +/* + * compiler/compemu_support.cpp - Core dynamic translation engine + * + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * + * Adaptation for Basilisk II and improvements, copyright 2000-2005 + * Gwenole Beauchesne + * + * Basilisk II (C) 1997-2008 Christian Bauer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#if !REAL_ADDRESSING && !DIRECT_ADDRESSING +#error "Only Real or Direct Addressing is supported with the JIT Compiler" +#endif + +#if X86_ASSEMBLY && !SAHF_SETO_PROFITABLE +#error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler" +#endif + +/* NOTE: support for AMD64 assumes translation cache and other code + * buffers are allocated into a 32-bit address space because (i) B2/JIT + * code is not 64-bit clean and (ii) it's faster to resolve branches + * that way. + */ +#if !defined(__i386__) && !defined(__x86_64__) +#error "Only IA-32 and X86-64 targets are supported with the JIT Compiler" +#endif + +#define USE_MATCH 0 + +/* kludge for Brian, so he can compile under MSVC++ */ +#define USE_NORMAL_CALLING_CONVENTION 0 + +#ifndef WIN32 +#include +#include +#include +#endif + +#include +#include +#include + +#include "sysdeps.h" +#include "cpu_emulation.h" +#include "main.h" +#include "prefs.h" +#include "user_strings.h" +#include "vm_alloc.h" + +#include "m68k.h" +#include "memory.h" +#include "readcpu.h" +#include "newcpu.h" +#include "comptbl.h" +#include "compiler/compemu.h" +#include "fpu/fpu.h" +#include "fpu/flags.h" + +#define DEBUG 1 +#include "debug.h" + +#ifdef ENABLE_MON +#include "mon.h" +#endif + +#ifndef WIN32 +#define PROFILE_COMPILE_TIME 1 +#define PROFILE_UNTRANSLATED_INSNS 1 +#endif + +#if defined(__x86_64__) && 0 +#define RECORD_REGISTER_USAGE 1 +#endif + +#ifdef WIN32 +#undef write_log +#define write_log dummy_write_log +static void dummy_write_log(const char *, ...) { } +#endif + +#if JIT_DEBUG +#undef abort +#define abort() do { \ + fprintf(stderr, "Abort in file %s at line %d\n", __FILE__, __LINE__); \ + exit(EXIT_FAILURE); \ +} while (0) +#endif + +#if RECORD_REGISTER_USAGE +static uint64 reg_count[16]; +static int reg_count_local[16]; + +static int reg_count_compare(const void *ap, const void *bp) +{ + const int a = *((int *)ap); + const int b = *((int *)bp); + return reg_count[b] - reg_count[a]; +} +#endif + +#if PROFILE_COMPILE_TIME +#include +static uae_u32 compile_count = 0; +static clock_t compile_time = 0; +static clock_t emul_start_time = 0; +static clock_t emul_end_time = 0; +#endif + +#if PROFILE_UNTRANSLATED_INSNS +const int untranslated_top_ten = 20; +static uae_u32 raw_cputbl_count[65536] = { 0, }; +static uae_u16 opcode_nums[65536]; + +static int untranslated_compfn(const void *e1, const void *e2) +{ + return raw_cputbl_count[*(const uae_u16 *)e1] < raw_cputbl_count[*(const uae_u16 *)e2]; +} +#endif + +static compop_func *compfunctbl[65536]; +static compop_func *nfcompfunctbl[65536]; +static cpuop_func *nfcpufunctbl[65536]; +uae_u8* comp_pc_p; + +// From newcpu.cpp +extern bool quit_program; + +// gb-- Extra data for Basilisk II/JIT +#if JIT_DEBUG +static bool JITDebug = false; // Enable runtime disassemblers through mon? +#else +const bool JITDebug = false; // Don't use JIT debug mode at all +#endif +#if USE_INLINING +static bool follow_const_jumps = true; // Flag: translation through constant jumps +#else +const bool follow_const_jumps = false; +#endif + +const uae_u32 MIN_CACHE_SIZE = 1024; // Minimal translation cache size (1 MB) +static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks +static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already +static bool lazy_flush = true; // Flag: lazy translation cache invalidation +static bool avoid_fpu = true; // Flag: compile FPU instructions ? +static bool have_cmov = false; // target has CMOV instructions ? +static bool have_lahf_lm = true; // target has LAHF supported in long mode ? +static bool have_rat_stall = true; // target has partial register stalls ? +const bool tune_alignment = true; // Tune code alignments for running CPU ? +const bool tune_nop_fillers = true; // Tune no-op fillers for architecture +static bool setzflg_uses_bsf = false; // setzflg virtual instruction can use native BSF instruction correctly? +static int align_loops = 32; // Align the start of loops +static int align_jumps = 32; // Align the start of jumps +static int optcount[10] = { + 10, // How often a block has to be executed before it is translated + 0, // How often to use naive translation + 0, 0, 0, 0, + -1, -1, -1, -1 +}; + +struct op_properties { + uae_u8 use_flags; + uae_u8 set_flags; + uae_u8 is_addx; + uae_u8 cflow; +}; +static op_properties prop[65536]; + +static inline int end_block(uae_u32 opcode) +{ + return (prop[opcode].cflow & fl_end_block); +} + +static inline bool is_const_jump(uae_u32 opcode) +{ + return (prop[opcode].cflow == fl_const_jump); +} + +static inline bool may_trap(uae_u32 opcode) +{ + return (prop[opcode].cflow & fl_trap); +} + +static inline unsigned int cft_map (unsigned int f) +{ +#ifndef HAVE_GET_WORD_UNSWAPPED + return f; +#else + return ((f >> 8) & 255) | ((f & 255) << 8); +#endif +} + +uae_u8* start_pc_p; +uae_u32 start_pc; +uae_u32 current_block_pc_p; +static uintptr current_block_start_target; +uae_u32 needed_flags; +static uintptr next_pc_p; +static uintptr taken_pc_p; +static int branch_cc; +static int redo_current_block; + +int segvcount=0; +int soft_flush_count=0; +int hard_flush_count=0; +int checksum_count=0; +static uae_u8* current_compile_p=NULL; +static uae_u8* max_compile_start; +static uae_u8* compiled_code=NULL; +static uae_s32 reg_alloc_run; +const int POPALLSPACE_SIZE = 1024; /* That should be enough space */ +static uae_u8* popallspace=NULL; + +void* pushall_call_handler=NULL; +static void* popall_do_nothing=NULL; +static void* popall_exec_nostats=NULL; +static void* popall_execute_normal=NULL; +static void* popall_cache_miss=NULL; +static void* popall_recompile_block=NULL; +static void* popall_check_checksum=NULL; + +/* The 68k only ever executes from even addresses. So right now, we + * waste half the entries in this array + * UPDATE: We now use those entries to store the start of the linked + * lists that we maintain for each hash result. + */ +cacheline cache_tags[TAGSIZE]; +int letit=0; +blockinfo* hold_bi[MAX_HOLD_BI]; +blockinfo* active; +blockinfo* dormant; + +/* 68040 */ +extern struct cputbl op_smalltbl_0_nf[]; +extern struct comptbl op_smalltbl_0_comp_nf[]; +extern struct comptbl op_smalltbl_0_comp_ff[]; + +/* 68020 + 68881 */ +extern struct cputbl op_smalltbl_1_nf[]; + +/* 68020 */ +extern struct cputbl op_smalltbl_2_nf[]; + +/* 68010 */ +extern struct cputbl op_smalltbl_3_nf[]; + +/* 68000 */ +extern struct cputbl op_smalltbl_4_nf[]; + +/* 68000 slow but compatible. */ +extern struct cputbl op_smalltbl_5_nf[]; + +static void flush_icache_hard(int n); +static void flush_icache_lazy(int n); +static void flush_icache_none(int n); +void (*flush_icache)(int n) = flush_icache_none; + + + +bigstate live; +smallstate empty_ss; +smallstate default_ss; +static int optlev; + +static int writereg(int r, int size); +static void unlock2(int r); +static void setlock(int r); +static int readreg_specific(int r, int size, int spec); +static int writereg_specific(int r, int size, int spec); +static void prepare_for_call_1(void); +static void prepare_for_call_2(void); +static void align_target(uae_u32 a); + +static uae_s32 nextused[VREGS]; + +uae_u32 m68k_pc_offset; + +/* Some arithmetic ooperations can be optimized away if the operands + * are known to be constant. But that's only a good idea when the + * side effects they would have on the flags are not important. This + * variable indicates whether we need the side effects or not + */ +uae_u32 needflags=0; + +/* Flag handling is complicated. + * + * x86 instructions create flags, which quite often are exactly what we + * want. So at times, the "68k" flags are actually in the x86 flags. + * + * Then again, sometimes we do x86 instructions that clobber the x86 + * flags, but don't represent a corresponding m68k instruction. In that + * case, we have to save them. + * + * We used to save them to the stack, but now store them back directly + * into the regflags.cznv of the traditional emulation. Thus some odd + * names. + * + * So flags can be in either of two places (used to be three; boy were + * things complicated back then!); And either place can contain either + * valid flags or invalid trash (and on the stack, there was also the + * option of "nothing at all", now gone). A couple of variables keep + * track of the respective states. + * + * To make things worse, we might or might not be interested in the flags. + * by default, we are, but a call to dont_care_flags can change that + * until the next call to live_flags. If we are not, pretty much whatever + * is in the register and/or the native flags is seen as valid. + */ + +static __inline__ blockinfo* get_blockinfo(uae_u32 cl) +{ + return cache_tags[cl+1].bi; +} + +static __inline__ blockinfo* get_blockinfo_addr(void* addr) +{ + blockinfo* bi=get_blockinfo(cacheline(addr)); + + while (bi) { + if (bi->pc_p==addr) + return bi; + bi=bi->next_same_cl; + } + return NULL; +} + + +/******************************************************************* + * All sorts of list related functions for all of the lists * + *******************************************************************/ + +static __inline__ void remove_from_cl_list(blockinfo* bi) +{ + uae_u32 cl=cacheline(bi->pc_p); + + if (bi->prev_same_cl_p) + *(bi->prev_same_cl_p)=bi->next_same_cl; + if (bi->next_same_cl) + bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p; + if (cache_tags[cl+1].bi) + cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use; + else + cache_tags[cl].handler=(cpuop_func *)popall_execute_normal; +} + +static __inline__ void remove_from_list(blockinfo* bi) +{ + if (bi->prev_p) + *(bi->prev_p)=bi->next; + if (bi->next) + bi->next->prev_p=bi->prev_p; +} + +static __inline__ void remove_from_lists(blockinfo* bi) +{ + remove_from_list(bi); + remove_from_cl_list(bi); +} + +static __inline__ void add_to_cl_list(blockinfo* bi) +{ + uae_u32 cl=cacheline(bi->pc_p); + + if (cache_tags[cl+1].bi) + cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl); + bi->next_same_cl=cache_tags[cl+1].bi; + + cache_tags[cl+1].bi=bi; + bi->prev_same_cl_p=&(cache_tags[cl+1].bi); + + cache_tags[cl].handler=bi->handler_to_use; +} + +static __inline__ void raise_in_cl_list(blockinfo* bi) +{ + remove_from_cl_list(bi); + add_to_cl_list(bi); +} + +static __inline__ void add_to_active(blockinfo* bi) +{ + if (active) + active->prev_p=&(bi->next); + bi->next=active; + + active=bi; + bi->prev_p=&active; +} + +static __inline__ void add_to_dormant(blockinfo* bi) +{ + if (dormant) + dormant->prev_p=&(bi->next); + bi->next=dormant; + + dormant=bi; + bi->prev_p=&dormant; +} + +static __inline__ void remove_dep(dependency* d) +{ + if (d->prev_p) + *(d->prev_p)=d->next; + if (d->next) + d->next->prev_p=d->prev_p; + d->prev_p=NULL; + d->next=NULL; +} + +/* This block's code is about to be thrown away, so it no longer + depends on anything else */ +static __inline__ void remove_deps(blockinfo* bi) +{ + remove_dep(&(bi->dep[0])); + remove_dep(&(bi->dep[1])); +} + +static __inline__ void adjust_jmpdep(dependency* d, cpuop_func* a) +{ + *(d->jmp_off)=(uintptr)a-((uintptr)d->jmp_off+4); +} + +/******************************************************************** + * Soft flush handling support functions * + ********************************************************************/ + +static __inline__ void set_dhtu(blockinfo* bi, cpuop_func* dh) +{ + //write_log("bi is %p\n",bi); + if (dh!=bi->direct_handler_to_use) { + dependency* x=bi->deplist; + //write_log("bi->deplist=%p\n",bi->deplist); + while (x) { + //write_log("x is %p\n",x); + //write_log("x->next is %p\n",x->next); + //write_log("x->prev_p is %p\n",x->prev_p); + + if (x->jmp_off) { + adjust_jmpdep(x,dh); + } + x=x->next; + } + bi->direct_handler_to_use=dh; + } +} + +static __inline__ void invalidate_block(blockinfo* bi) +{ + int i; + + bi->optlevel=0; + bi->count=optcount[0]-1; + bi->handler=NULL; + bi->handler_to_use=(cpuop_func *)popall_execute_normal; + bi->direct_handler=NULL; + set_dhtu(bi,bi->direct_pen); + bi->needed_flags=0xff; + bi->status=BI_INVALID; + for (i=0;i<2;i++) { + bi->dep[i].jmp_off=NULL; + bi->dep[i].target=NULL; + } + remove_deps(bi); +} + +static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target) +{ + blockinfo* tbi=get_blockinfo_addr((void*)(uintptr)target); + + Dif(!tbi) { + write_log("Could not create jmpdep!\n"); + abort(); + } + bi->dep[i].jmp_off=jmpaddr; + bi->dep[i].source=bi; + bi->dep[i].target=tbi; + bi->dep[i].next=tbi->deplist; + if (bi->dep[i].next) + bi->dep[i].next->prev_p=&(bi->dep[i].next); + bi->dep[i].prev_p=&(tbi->deplist); + tbi->deplist=&(bi->dep[i]); +} + +static __inline__ void block_need_recompile(blockinfo * bi) +{ + uae_u32 cl = cacheline(bi->pc_p); + + set_dhtu(bi, bi->direct_pen); + bi->direct_handler = bi->direct_pen; + + bi->handler_to_use = (cpuop_func *)popall_execute_normal; + bi->handler = (cpuop_func *)popall_execute_normal; + if (bi == cache_tags[cl + 1].bi) + cache_tags[cl].handler = (cpuop_func *)popall_execute_normal; + bi->status = BI_NEED_RECOMP; +} + +static __inline__ void mark_callers_recompile(blockinfo * bi) +{ + dependency *x = bi->deplist; + + while (x) { + dependency *next = x->next; /* This disappears when we mark for + * recompilation and thus remove the + * blocks from the lists */ + if (x->jmp_off) { + blockinfo *cbi = x->source; + + Dif(cbi->status == BI_INVALID) { + // write_log("invalid block in dependency list\n"); // FIXME? + // abort(); + } + if (cbi->status == BI_ACTIVE || cbi->status == BI_NEED_CHECK) { + block_need_recompile(cbi); + mark_callers_recompile(cbi); + } + else if (cbi->status == BI_COMPILING) { + redo_current_block = 1; + } + else if (cbi->status == BI_NEED_RECOMP) { + /* nothing */ + } + else { + //write_log("Status %d in mark_callers\n",cbi->status); // FIXME? + } + } + x = next; + } +} + +static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate) +{ + blockinfo* bi=get_blockinfo_addr(addr); + int i; + + if (!bi) { + for (i=0;ipc_p=(uae_u8 *)addr; + invalidate_block(bi); + add_to_active(bi); + add_to_cl_list(bi); + + } + } + } + if (!bi) { + write_log("Looking for blockinfo, can't find free one\n"); + abort(); + } + return bi; +} + +static void prepare_block(blockinfo* bi); + +/* Managment of blockinfos. + + A blockinfo struct is allocated whenever a new block has to be + compiled. If the list of free blockinfos is empty, we allocate a new + pool of blockinfos and link the newly created blockinfos altogether + into the list of free blockinfos. Otherwise, we simply pop a structure + off the free list. + + Blockinfo are lazily deallocated, i.e. chained altogether in the + list of free blockinfos whenvever a translation cache flush (hard or + soft) request occurs. +*/ + +template< class T > +class LazyBlockAllocator +{ + enum { + kPoolSize = 1 + 4096 / sizeof(T) + }; + struct Pool { + T chunk[kPoolSize]; + Pool * next; + }; + Pool * mPools; + T * mChunks; +public: + LazyBlockAllocator() : mPools(0), mChunks(0) { } + ~LazyBlockAllocator(); + T * acquire(); + void release(T * const); +}; + +template< class T > +LazyBlockAllocator::~LazyBlockAllocator() +{ + Pool * currentPool = mPools; + while (currentPool) { + Pool * deadPool = currentPool; + currentPool = currentPool->next; + free(deadPool); + } +} + +template< class T > +T * LazyBlockAllocator::acquire() +{ + if (!mChunks) { + // There is no chunk left, allocate a new pool and link the + // chunks into the free list + Pool * newPool = (Pool *)malloc(sizeof(Pool)); + for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) { + chunk->next = mChunks; + mChunks = chunk; + } + newPool->next = mPools; + mPools = newPool; + } + T * chunk = mChunks; + mChunks = chunk->next; + return chunk; +} + +template< class T > +void LazyBlockAllocator::release(T * const chunk) +{ + chunk->next = mChunks; + mChunks = chunk; +} + +template< class T > +class HardBlockAllocator +{ +public: + T * acquire() { + T * data = (T *)current_compile_p; + current_compile_p += sizeof(T); + return data; + } + + void release(T * const chunk) { + // Deallocated on invalidation + } +}; + +#if USE_SEPARATE_BIA +static LazyBlockAllocator BlockInfoAllocator; +static LazyBlockAllocator ChecksumInfoAllocator; +#else +static HardBlockAllocator BlockInfoAllocator; +static HardBlockAllocator ChecksumInfoAllocator; +#endif + +static __inline__ checksum_info *alloc_checksum_info(void) +{ + checksum_info *csi = ChecksumInfoAllocator.acquire(); + csi->next = NULL; + return csi; +} + +static __inline__ void free_checksum_info(checksum_info *csi) +{ + csi->next = NULL; + ChecksumInfoAllocator.release(csi); +} + +static __inline__ void free_checksum_info_chain(checksum_info *csi) +{ + while (csi != NULL) { + checksum_info *csi2 = csi->next; + free_checksum_info(csi); + csi = csi2; + } +} + +static __inline__ blockinfo *alloc_blockinfo(void) +{ + blockinfo *bi = BlockInfoAllocator.acquire(); +#if USE_CHECKSUM_INFO + bi->csi = NULL; +#endif + return bi; +} + +static __inline__ void free_blockinfo(blockinfo *bi) +{ +#if USE_CHECKSUM_INFO + free_checksum_info_chain(bi->csi); + bi->csi = NULL; +#endif + BlockInfoAllocator.release(bi); +} + +static __inline__ void alloc_blockinfos(void) +{ + int i; + blockinfo* bi; + + for (i=0;i>24)&0xff) | ((v>>8)&0xff00) | ((v<<8)&0xff0000) | ((v<<24)&0xff000000); +#endif +} + +/******************************************************************** + * Getting the information about the target CPU * + ********************************************************************/ + +#include "codegen_x86.cpp" + +void set_target(uae_u8* t) +{ + target=t; +} + +static __inline__ uae_u8* get_target_noopt(void) +{ + return target; +} + +__inline__ uae_u8* get_target(void) +{ + return get_target_noopt(); +} + + +/******************************************************************** + * Flags status handling. EMIT TIME! * + ********************************************************************/ + +static void bt_l_ri_noclobber(R4 r, IMM i); + +static void make_flags_live_internal(void) +{ + if (live.flags_in_flags==VALID) + return; + Dif (live.flags_on_stack==TRASH) { + write_log("Want flags, got something on stack, but it is TRASH\n"); + abort(); + } + if (live.flags_on_stack==VALID) { + int tmp; + tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2); + raw_reg_to_flags(tmp); + unlock2(tmp); + + live.flags_in_flags=VALID; + return; + } + write_log("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n", + live.flags_in_flags,live.flags_on_stack); + abort(); +} + +static void flags_to_stack(void) +{ + if (live.flags_on_stack==VALID) + return; + if (!live.flags_are_important) { + live.flags_on_stack=VALID; + return; + } + Dif (live.flags_in_flags!=VALID) + abort(); + else { + int tmp; + tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1); + raw_flags_to_reg(tmp); + unlock2(tmp); + } + live.flags_on_stack=VALID; +} + +static __inline__ void clobber_flags(void) +{ + if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID) + flags_to_stack(); + live.flags_in_flags=TRASH; +} + +/* Prepare for leaving the compiled stuff */ +static __inline__ void flush_flags(void) +{ + flags_to_stack(); + return; +} + +int touchcnt; + +/******************************************************************** + * Partial register flushing for optimized calls * + ********************************************************************/ + +struct regusage { + uae_u16 rmask; + uae_u16 wmask; +}; + +static inline void ru_set(uae_u16 *mask, int reg) +{ +#if USE_OPTIMIZED_CALLS + *mask |= 1 << reg; +#endif +} + +static inline bool ru_get(const uae_u16 *mask, int reg) +{ +#if USE_OPTIMIZED_CALLS + return (*mask & (1 << reg)); +#else + /* Default: instruction reads & write to register */ + return true; +#endif +} + +static inline void ru_set_read(regusage *ru, int reg) +{ + ru_set(&ru->rmask, reg); +} + +static inline void ru_set_write(regusage *ru, int reg) +{ + ru_set(&ru->wmask, reg); +} + +static inline bool ru_read_p(const regusage *ru, int reg) +{ + return ru_get(&ru->rmask, reg); +} + +static inline bool ru_write_p(const regusage *ru, int reg) +{ + return ru_get(&ru->wmask, reg); +} + +static void ru_fill_ea(regusage *ru, int reg, amodes mode, + wordsizes size, int write_mode) +{ + switch (mode) { + case Areg: + reg += 8; + /* fall through */ + case Dreg: + ru_set(write_mode ? &ru->wmask : &ru->rmask, reg); + break; + case Ad16: + /* skip displacment */ + m68k_pc_offset += 2; + case Aind: + case Aipi: + case Apdi: + ru_set_read(ru, reg+8); + break; + case Ad8r: + ru_set_read(ru, reg+8); + /* fall through */ + case PC8r: { + uae_u16 dp = comp_get_iword((m68k_pc_offset+=2)-2); + reg = (dp >> 12) & 15; + ru_set_read(ru, reg); + if (dp & 0x100) + m68k_pc_offset += (((dp & 0x30) >> 3) & 7) + ((dp & 3) * 2); + break; + } + case PC16: + case absw: + case imm0: + case imm1: + m68k_pc_offset += 2; + break; + case absl: + case imm2: + m68k_pc_offset += 4; + break; + case immi: + m68k_pc_offset += (size == sz_long) ? 4 : 2; + break; + } +} + +/* TODO: split into a static initialization part and a dynamic one + (instructions depending on extension words) */ +static void ru_fill(regusage *ru, uae_u32 opcode) +{ + m68k_pc_offset += 2; + + /* Default: no register is used or written to */ + ru->rmask = 0; + ru->wmask = 0; + + uae_u32 real_opcode = cft_map(opcode); + struct instr *dp = &table68k[real_opcode]; + + bool rw_dest = true; + bool handled = false; + + /* Handle some instructions specifically */ + uae_u16 reg, ext; + switch (dp->mnemo) { + case i_BFCHG: + case i_BFCLR: + case i_BFEXTS: + case i_BFEXTU: + case i_BFFFO: + case i_BFINS: + case i_BFSET: + case i_BFTST: + ext = comp_get_iword((m68k_pc_offset+=2)-2); + if (ext & 0x800) ru_set_read(ru, (ext >> 6) & 7); + if (ext & 0x020) ru_set_read(ru, ext & 7); + ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1); + if (dp->dmode == Dreg) + ru_set_read(ru, dp->dreg); + switch (dp->mnemo) { + case i_BFEXTS: + case i_BFEXTU: + case i_BFFFO: + ru_set_write(ru, (ext >> 12) & 7); + break; + case i_BFINS: + ru_set_read(ru, (ext >> 12) & 7); + /* fall through */ + case i_BFCHG: + case i_BFCLR: + case i_BSET: + if (dp->dmode == Dreg) + ru_set_write(ru, dp->dreg); + break; + } + handled = true; + rw_dest = false; + break; + + case i_BTST: + rw_dest = false; + break; + + case i_CAS: + { + ext = comp_get_iword((m68k_pc_offset+=2)-2); + int Du = ext & 7; + ru_set_read(ru, Du); + int Dc = (ext >> 6) & 7; + ru_set_read(ru, Dc); + ru_set_write(ru, Dc); + break; + } + case i_CAS2: + { + int Dc1, Dc2, Du1, Du2, Rn1, Rn2; + ext = comp_get_iword((m68k_pc_offset+=2)-2); + Rn1 = (ext >> 12) & 15; + Du1 = (ext >> 6) & 7; + Dc1 = ext & 7; + ru_set_read(ru, Rn1); + ru_set_read(ru, Du1); + ru_set_read(ru, Dc1); + ru_set_write(ru, Dc1); + ext = comp_get_iword((m68k_pc_offset+=2)-2); + Rn2 = (ext >> 12) & 15; + Du2 = (ext >> 6) & 7; + Dc2 = ext & 7; + ru_set_read(ru, Rn2); + ru_set_read(ru, Du2); + ru_set_write(ru, Dc2); + break; + } + case i_DIVL: case i_MULL: + m68k_pc_offset += 2; + break; + case i_LEA: + case i_MOVE: case i_MOVEA: case i_MOVE16: + rw_dest = false; + break; + case i_PACK: case i_UNPK: + rw_dest = false; + m68k_pc_offset += 2; + break; + case i_TRAPcc: + m68k_pc_offset += (dp->size == sz_long) ? 4 : 2; + break; + case i_RTR: + /* do nothing, just for coverage debugging */ + break; + /* TODO: handle EXG instruction */ + } + + /* Handle A-Traps better */ + if ((real_opcode & 0xf000) == 0xa000) { + handled = true; + } + + /* Handle EmulOps better */ + if ((real_opcode & 0xff00) == 0x7100) { + handled = true; + ru->rmask = 0xffff; + ru->wmask = 0; + } + + if (dp->suse && !handled) + ru_fill_ea(ru, dp->sreg, (amodes)dp->smode, (wordsizes)dp->size, 0); + + if (dp->duse && !handled) + ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1); + + if (rw_dest) + ru->rmask |= ru->wmask; + + handled = handled || dp->suse || dp->duse; + + /* Mark all registers as used/written if the instruction may trap */ + if (may_trap(opcode)) { + handled = true; + ru->rmask = 0xffff; + ru->wmask = 0xffff; + } + + if (!handled) { + write_log("ru_fill: %04x = { %04x, %04x }\n", + real_opcode, ru->rmask, ru->wmask); + abort(); + } +} + +/******************************************************************** + * register allocation per block logging * + ********************************************************************/ + +static uae_s8 vstate[VREGS]; +static uae_s8 vwritten[VREGS]; +static uae_s8 nstate[N_REGS]; + +#define L_UNKNOWN -127 +#define L_UNAVAIL -1 +#define L_NEEDED -2 +#define L_UNNEEDED -3 + +static __inline__ void big_to_small_state(bigstate * b, smallstate * s) +{ + int i; + + for (i = 0; i < VREGS; i++) + s->virt[i] = vstate[i]; + for (i = 0; i < N_REGS; i++) + s->nat[i] = nstate[i]; +} + +static __inline__ int callers_need_recompile(bigstate * b, smallstate * s) +{ + int i; + int reverse = 0; + + for (i = 0; i < VREGS; i++) { + if (vstate[i] != L_UNNEEDED && s->virt[i] == L_UNNEEDED) + return 1; + if (vstate[i] == L_UNNEEDED && s->virt[i] != L_UNNEEDED) + reverse++; + } + for (i = 0; i < N_REGS; i++) { + if (nstate[i] >= 0 && nstate[i] != s->nat[i]) + return 1; + if (nstate[i] < 0 && s->nat[i] >= 0) + reverse++; + } + if (reverse >= 2 && USE_MATCH) + return 1; /* In this case, it might be worth recompiling the + * callers */ + return 0; +} + +static __inline__ void log_startblock(void) +{ + int i; + + for (i = 0; i < VREGS; i++) { + vstate[i] = L_UNKNOWN; + vwritten[i] = 0; + } + for (i = 0; i < N_REGS; i++) + nstate[i] = L_UNKNOWN; +} + +/* Using an n-reg for a temp variable */ +static __inline__ void log_isused(int n) +{ + if (nstate[n] == L_UNKNOWN) + nstate[n] = L_UNAVAIL; +} + +static __inline__ void log_visused(int r) +{ + if (vstate[r] == L_UNKNOWN) + vstate[r] = L_NEEDED; +} + +static __inline__ void do_load_reg(int n, int r) +{ + if (r == FLAGTMP) + raw_load_flagreg(n, r); + else if (r == FLAGX) + raw_load_flagx(n, r); + else + raw_mov_l_rm(n, (uintptr) live.state[r].mem); +} + +static __inline__ void check_load_reg(int n, int r) +{ + raw_mov_l_rm(n, (uintptr) live.state[r].mem); +} + +static __inline__ void log_vwrite(int r) +{ + vwritten[r] = 1; +} + +/* Using an n-reg to hold a v-reg */ +static __inline__ void log_isreg(int n, int r) +{ + static int count = 0; + + if (nstate[n] == L_UNKNOWN && r < 16 && !vwritten[r] && USE_MATCH) + nstate[n] = r; + else { + do_load_reg(n, r); + if (nstate[n] == L_UNKNOWN) + nstate[n] = L_UNAVAIL; + } + if (vstate[r] == L_UNKNOWN) + vstate[r] = L_NEEDED; +} + +static __inline__ void log_clobberreg(int r) +{ + if (vstate[r] == L_UNKNOWN) + vstate[r] = L_UNNEEDED; +} + +/* This ends all possibility of clever register allocation */ + +static __inline__ void log_flush(void) +{ + int i; + + for (i = 0; i < VREGS; i++) + if (vstate[i] == L_UNKNOWN) + vstate[i] = L_NEEDED; + for (i = 0; i < N_REGS; i++) + if (nstate[i] == L_UNKNOWN) + nstate[i] = L_UNAVAIL; +} + +static __inline__ void log_dump(void) +{ + int i; + + return; + + write_log("----------------------\n"); + for (i = 0; i < N_REGS; i++) { + switch (nstate[i]) { + case L_UNKNOWN: + write_log("Nat %d : UNKNOWN\n", i); + break; + case L_UNAVAIL: + write_log("Nat %d : UNAVAIL\n", i); + break; + default: + write_log("Nat %d : %d\n", i, nstate[i]); + break; + } + } + for (i = 0; i < VREGS; i++) { + if (vstate[i] == L_UNNEEDED) + write_log("Virt %d: UNNEEDED\n", i); + } +} + +/******************************************************************** + * register status handling. EMIT TIME! * + ********************************************************************/ + +static __inline__ void set_status(int r, int status) +{ + if (status == ISCONST) + log_clobberreg(r); + live.state[r].status=status; +} + +static __inline__ int isinreg(int r) +{ + return live.state[r].status==CLEAN || live.state[r].status==DIRTY; +} + +static __inline__ void adjust_nreg(int r, uae_u32 val) +{ + if (!val) + return; + raw_lea_l_brr(r,r,val); +} + +static void tomem(int r) +{ + int rr=live.state[r].realreg; + + if (isinreg(r)) { + if (live.state[r].val && live.nat[rr].nholds==1 + && !live.nat[rr].locked) { + // write_log("RemovingA offset %x from reg %d (%d) at %p\n", + // live.state[r].val,r,rr,target); + adjust_nreg(rr,live.state[r].val); + live.state[r].val=0; + live.state[r].dirtysize=4; + set_status(r,DIRTY); + } + } + + if (live.state[r].status==DIRTY) { + switch (live.state[r].dirtysize) { + case 1: raw_mov_b_mr((uintptr)live.state[r].mem,rr); break; + case 2: raw_mov_w_mr((uintptr)live.state[r].mem,rr); break; + case 4: raw_mov_l_mr((uintptr)live.state[r].mem,rr); break; + default: abort(); + } + log_vwrite(r); + set_status(r,CLEAN); + live.state[r].dirtysize=0; + } +} + +static __inline__ int isconst(int r) +{ + return live.state[r].status==ISCONST; +} + +int is_const(int r) +{ + return isconst(r); +} + +static __inline__ void writeback_const(int r) +{ + if (!isconst(r)) + return; + Dif (live.state[r].needflush==NF_HANDLER) { + write_log("Trying to write back constant NF_HANDLER!\n"); + abort(); + } + + raw_mov_l_mi((uintptr)live.state[r].mem,live.state[r].val); + log_vwrite(r); + live.state[r].val=0; + set_status(r,INMEM); +} + +static __inline__ void tomem_c(int r) +{ + if (isconst(r)) { + writeback_const(r); + } + else + tomem(r); +} + +static void evict(int r) +{ + int rr; + + if (!isinreg(r)) + return; + tomem(r); + rr=live.state[r].realreg; + + Dif (live.nat[rr].locked && + live.nat[rr].nholds==1) { + write_log("register %d in nreg %d is locked!\n",r,live.state[r].realreg); + abort(); + } + + live.nat[rr].nholds--; + if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */ + int topreg=live.nat[rr].holds[live.nat[rr].nholds]; + int thisind=live.state[r].realind; + + live.nat[rr].holds[thisind]=topreg; + live.state[topreg].realind=thisind; + } + live.state[r].realreg=-1; + set_status(r,INMEM); +} + +static __inline__ void free_nreg(int r) +{ + int i=live.nat[r].nholds; + + while (i) { + int vr; + + --i; + vr=live.nat[r].holds[i]; + evict(vr); + } + Dif (live.nat[r].nholds!=0) { + write_log("Failed to free nreg %d, nholds is %d\n",r,live.nat[r].nholds); + abort(); + } +} + +/* Use with care! */ +static __inline__ void isclean(int r) +{ + if (!isinreg(r)) + return; + live.state[r].validsize=4; + live.state[r].dirtysize=0; + live.state[r].val=0; + set_status(r,CLEAN); +} + +static __inline__ void disassociate(int r) +{ + isclean(r); + evict(r); +} + +static __inline__ void set_const(int r, uae_u32 val) +{ + disassociate(r); + live.state[r].val=val; + set_status(r,ISCONST); +} + +static __inline__ uae_u32 get_offset(int r) +{ + return live.state[r].val; +} + +static int alloc_reg_hinted(int r, int size, int willclobber, int hint) +{ + int bestreg; + uae_s32 when; + int i; + uae_s32 badness=0; /* to shut up gcc */ + bestreg=-1; + when=2000000000; + + /* XXX use a regalloc_order table? */ + for (i=0;i0) { + free_nreg(bestreg); + } + if (isinreg(r)) { + int rr=live.state[r].realreg; + /* This will happen if we read a partially dirty register at a + bigger size */ + Dif (willclobber || live.state[r].validsize>=size) + abort(); + Dif (live.nat[rr].nholds!=1) + abort(); + if (size==4 && live.state[r].validsize==2) { + log_isused(bestreg); + log_visused(r); + raw_mov_l_rm(bestreg,(uintptr)live.state[r].mem); + raw_bswap_32(bestreg); + raw_zero_extend_16_rr(rr,rr); + raw_zero_extend_16_rr(bestreg,bestreg); + raw_bswap_32(bestreg); + raw_lea_l_brr_indexed(rr,rr,bestreg,1,0); + live.state[r].validsize=4; + live.nat[rr].touched=touchcnt++; + return rr; + } + if (live.state[r].validsize==1) { + /* Nothing yet */ + } + evict(r); + } + + if (!willclobber) { + if (live.state[r].status!=UNDEF) { + if (isconst(r)) { + raw_mov_l_ri(bestreg,live.state[r].val); + live.state[r].val=0; + live.state[r].dirtysize=4; + set_status(r,DIRTY); + log_isused(bestreg); + } + else { + log_isreg(bestreg, r); /* This will also load it! */ + live.state[r].dirtysize=0; + set_status(r,CLEAN); + } + } + else { + live.state[r].val=0; + live.state[r].dirtysize=0; + set_status(r,CLEAN); + log_isused(bestreg); + } + live.state[r].validsize=4; + } + else { /* this is the easiest way, but not optimal. FIXME! */ + /* Now it's trickier, but hopefully still OK */ + if (!isconst(r) || size==4) { + live.state[r].validsize=size; + live.state[r].dirtysize=size; + live.state[r].val=0; + set_status(r,DIRTY); + if (size == 4) { + log_clobberreg(r); + log_isused(bestreg); + } + else { + log_visused(r); + log_isused(bestreg); + } + } + else { + if (live.state[r].status!=UNDEF) + raw_mov_l_ri(bestreg,live.state[r].val); + live.state[r].val=0; + live.state[r].validsize=4; + live.state[r].dirtysize=4; + set_status(r,DIRTY); + log_isused(bestreg); + } + } + live.state[r].realreg=bestreg; + live.state[r].realind=live.nat[bestreg].nholds; + live.nat[bestreg].touched=touchcnt++; + live.nat[bestreg].holds[live.nat[bestreg].nholds]=r; + live.nat[bestreg].nholds++; + + return bestreg; +} + +static int alloc_reg(int r, int size, int willclobber) +{ + return alloc_reg_hinted(r,size,willclobber,-1); +} + +static void unlock2(int r) +{ + Dif (!live.nat[r].locked) + abort(); + live.nat[r].locked--; +} + +static void setlock(int r) +{ + live.nat[r].locked++; +} + + +static void mov_nregs(int d, int s) +{ + int ns=live.nat[s].nholds; + int nd=live.nat[d].nholds; + int i; + + if (s==d) + return; + + if (nd>0) + free_nreg(d); + + log_isused(d); + raw_mov_l_rr(d,s); + + for (i=0;i=size) { + n=live.state[r].realreg; + switch(size) { + case 1: + if (live.nat[n].canbyte || spec>=0) { + answer=n; + } + break; + case 2: + if (live.nat[n].canword || spec>=0) { + answer=n; + } + break; + case 4: + answer=n; + break; + default: abort(); + } + if (answer<0) + evict(r); + } + /* either the value was in memory to start with, or it was evicted and + is in memory now */ + if (answer<0) { + answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec); + } + + if (spec>=0 && spec!=answer) { + /* Too bad */ + mov_nregs(spec,answer); + answer=spec; + } + live.nat[answer].locked++; + live.nat[answer].touched=touchcnt++; + return answer; +} + + + +static int readreg(int r, int size) +{ + return readreg_general(r,size,-1,0); +} + +static int readreg_specific(int r, int size, int spec) +{ + return readreg_general(r,size,spec,0); +} + +static int readreg_offset(int r, int size) +{ + return readreg_general(r,size,-1,1); +} + +/* writereg_general(r, size, spec) + * + * INPUT + * - r : mid-layer register + * - size : requested size (1/2/4) + * - spec : -1 if find or make a register free, otherwise specifies + * the physical register to use in any case + * + * OUTPUT + * - hard (physical, x86 here) register allocated to virtual register r + */ +static __inline__ int writereg_general(int r, int size, int spec) +{ + int n; + int answer=-1; + + record_register(r); + if (size<4) { + remove_offset(r,spec); + } + + make_exclusive(r,size,spec); + if (isinreg(r)) { + int nvsize=size>live.state[r].validsize?size:live.state[r].validsize; + int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize; + n=live.state[r].realreg; + + Dif (live.nat[n].nholds!=1) + abort(); + switch(size) { + case 1: + if (live.nat[n].canbyte || spec>=0) { + live.state[r].dirtysize=ndsize; + live.state[r].validsize=nvsize; + answer=n; + } + break; + case 2: + if (live.nat[n].canword || spec>=0) { + live.state[r].dirtysize=ndsize; + live.state[r].validsize=nvsize; + answer=n; + } + break; + case 4: + live.state[r].dirtysize=ndsize; + live.state[r].validsize=nvsize; + answer=n; + break; + default: abort(); + } + if (answer<0) + evict(r); + } + /* either the value was in memory to start with, or it was evicted and + is in memory now */ + if (answer<0) { + answer=alloc_reg_hinted(r,size,1,spec); + } + if (spec>=0 && spec!=answer) { + mov_nregs(spec,answer); + answer=spec; + } + if (live.state[r].status==UNDEF) + live.state[r].validsize=4; + live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize; + live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize; + + live.nat[answer].locked++; + live.nat[answer].touched=touchcnt++; + if (size==4) { + live.state[r].val=0; + } + else { + Dif (live.state[r].val) { + write_log("Problem with val\n"); + abort(); + } + } + set_status(r,DIRTY); + return answer; +} + +static int writereg(int r, int size) +{ + return writereg_general(r,size,-1); +} + +static int writereg_specific(int r, int size, int spec) +{ + return writereg_general(r,size,spec); +} + +static __inline__ int rmw_general(int r, int wsize, int rsize, int spec) +{ + int n; + int answer=-1; + + record_register(r); + if (live.state[r].status==UNDEF) { + write_log("WARNING: Unexpected read of undefined register %d\n",r); + } + remove_offset(r,spec); + make_exclusive(r,0,spec); + + Dif (wsize=rsize) { + n=live.state[r].realreg; + Dif (live.nat[n].nholds!=1) + abort(); + + switch(rsize) { + case 1: + if (live.nat[n].canbyte || spec>=0) { + answer=n; + } + break; + case 2: + if (live.nat[n].canword || spec>=0) { + answer=n; + } + break; + case 4: + answer=n; + break; + default: abort(); + } + if (answer<0) + evict(r); + } + /* either the value was in memory to start with, or it was evicted and + is in memory now */ + if (answer<0) { + answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec); + } + + if (spec>=0 && spec!=answer) { + /* Too bad */ + mov_nregs(spec,answer); + answer=spec; + } + if (wsize>live.state[r].dirtysize) + live.state[r].dirtysize=wsize; + if (wsize>live.state[r].validsize) + live.state[r].validsize=wsize; + set_status(r,DIRTY); + + live.nat[answer].locked++; + live.nat[answer].touched=touchcnt++; + + Dif (live.state[r].val) { + write_log("Problem with val(rmw)\n"); + abort(); + } + return answer; +} + +static int rmw(int r, int wsize, int rsize) +{ + return rmw_general(r,wsize,rsize,-1); +} + +static int rmw_specific(int r, int wsize, int rsize, int spec) +{ + return rmw_general(r,wsize,rsize,spec); +} + + +/* needed for restoring the carry flag on non-P6 cores */ +static void bt_l_ri_noclobber(R4 r, IMM i) +{ + int size=4; + if (i<16) + size=2; + r=readreg(r,size); + raw_bt_l_ri(r,i); + unlock2(r); +} + +/******************************************************************** + * FPU register status handling. EMIT TIME! * + ********************************************************************/ + +static void f_tomem(int r) +{ + if (live.fate[r].status==DIRTY) { +#if USE_LONG_DOUBLE + raw_fmov_ext_mr((uintptr)live.fate[r].mem,live.fate[r].realreg); +#else + raw_fmov_mr((uintptr)live.fate[r].mem,live.fate[r].realreg); +#endif + live.fate[r].status=CLEAN; + } +} + +static void f_tomem_drop(int r) +{ + if (live.fate[r].status==DIRTY) { +#if USE_LONG_DOUBLE + raw_fmov_ext_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg); +#else + raw_fmov_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg); +#endif + live.fate[r].status=INMEM; + } +} + + +static __inline__ int f_isinreg(int r) +{ + return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY; +} + +static void f_evict(int r) +{ + int rr; + + if (!f_isinreg(r)) + return; + rr=live.fate[r].realreg; + if (live.fat[rr].nholds==1) + f_tomem_drop(r); + else + f_tomem(r); + + Dif (live.fat[rr].locked && + live.fat[rr].nholds==1) { + write_log("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg); + abort(); + } + + live.fat[rr].nholds--; + if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */ + int topreg=live.fat[rr].holds[live.fat[rr].nholds]; + int thisind=live.fate[r].realind; + live.fat[rr].holds[thisind]=topreg; + live.fate[topreg].realind=thisind; + } + live.fate[r].status=INMEM; + live.fate[r].realreg=-1; +} + +static __inline__ void f_free_nreg(int r) +{ + int i=live.fat[r].nholds; + + while (i) { + int vr; + + --i; + vr=live.fat[r].holds[i]; + f_evict(vr); + } + Dif (live.fat[r].nholds!=0) { + write_log("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds); + abort(); + } +} + + +/* Use with care! */ +static __inline__ void f_isclean(int r) +{ + if (!f_isinreg(r)) + return; + live.fate[r].status=CLEAN; +} + +static __inline__ void f_disassociate(int r) +{ + f_isclean(r); + f_evict(r); +} + + + +static int f_alloc_reg(int r, int willclobber) +{ + int bestreg; + uae_s32 when; + int i; + uae_s32 badness; + bestreg=-1; + when=2000000000; + for (i=N_FREGS;i--;) { + badness=live.fat[i].touched; + if (live.fat[i].nholds==0) + badness=0; + + if (!live.fat[i].locked && badness0) { + f_free_nreg(bestreg); + } + if (f_isinreg(r)) { + f_evict(r); + } + + if (!willclobber) { + if (live.fate[r].status!=UNDEF) { +#if USE_LONG_DOUBLE + raw_fmov_ext_rm(bestreg,(uintptr)live.fate[r].mem); +#else + raw_fmov_rm(bestreg,(uintptr)live.fate[r].mem); +#endif + } + live.fate[r].status=CLEAN; + } + else { + live.fate[r].status=DIRTY; + } + live.fate[r].realreg=bestreg; + live.fate[r].realind=live.fat[bestreg].nholds; + live.fat[bestreg].touched=touchcnt++; + live.fat[bestreg].holds[live.fat[bestreg].nholds]=r; + live.fat[bestreg].nholds++; + + return bestreg; +} + +static void f_unlock(int r) +{ + Dif (!live.fat[r].locked) + abort(); + live.fat[r].locked--; +} + +static void f_setlock(int r) +{ + live.fat[r].locked++; +} + +static __inline__ int f_readreg(int r) +{ + int n; + int answer=-1; + + if (f_isinreg(r)) { + n=live.fate[r].realreg; + answer=n; + } + /* either the value was in memory to start with, or it was evicted and + is in memory now */ + if (answer<0) + answer=f_alloc_reg(r,0); + + live.fat[answer].locked++; + live.fat[answer].touched=touchcnt++; + return answer; +} + +static __inline__ void f_make_exclusive(int r, int clobber) +{ + freg_status oldstate; + int rr=live.fate[r].realreg; + int nr; + int nind; + int ndirt=0; + int i; + + if (!f_isinreg(r)) + return; + if (live.fat[rr].nholds==1) + return; + for (i=0;i>=i; + return; + } + CLOBBER_SHRL; + r=rmw(r,4,4); + raw_shrl_l_ri(r,i); + unlock2(r); +} +MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i)) + +MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHRL; + r=rmw(r,2,2); + raw_shrl_w_ri(r,i); + unlock2(r); +} +MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i)) + +MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHRL; + r=rmw(r,1,1); + raw_shrl_b_ri(r,i); + unlock2(r); +} +MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i)) + +MIDFUNC(2,shra_l_ri,(RW4 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHRA; + r=rmw(r,4,4); + raw_shra_l_ri(r,i); + unlock2(r); +} +MENDFUNC(2,shra_l_ri,(RW4 r, IMM i)) + +MIDFUNC(2,shra_w_ri,(RW2 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHRA; + r=rmw(r,2,2); + raw_shra_w_ri(r,i); + unlock2(r); +} +MENDFUNC(2,shra_w_ri,(RW2 r, IMM i)) + +MIDFUNC(2,shra_b_ri,(RW1 r, IMM i)) +{ + if (!i && !needflags) + return; + CLOBBER_SHRA; + r=rmw(r,1,1); + raw_shra_b_ri(r,i); + unlock2(r); +} +MENDFUNC(2,shra_b_ri,(RW1 r, IMM i)) + +MIDFUNC(2,shra_l_rr,(RW4 d, R1 r)) +{ + if (isconst(r)) { + COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_SHRA; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,4,4); + Dif (r!=1) { + write_log("Illegal register %d in raw_rol_b\n",r); + abort(); + } + raw_shra_l_rr(d,r) ; + unlock2(r); + unlock2(d); +} +MENDFUNC(2,shra_l_rr,(RW4 d, R1 r)) + +MIDFUNC(2,shra_w_rr,(RW2 d, R1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r)) { + COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val); + return; + } + CLOBBER_SHRA; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,2,2); + Dif (r!=1) { + write_log("Illegal register %d in raw_shra_b\n",r); + abort(); + } + raw_shra_w_rr(d,r) ; + unlock2(r); + unlock2(d); +} +MENDFUNC(2,shra_w_rr,(RW2 d, R1 r)) + +MIDFUNC(2,shra_b_rr,(RW1 d, R1 r)) +{ /* Can only do this with r==1, i.e. cl */ + + if (isconst(r)) { + COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val); + return; + } + + CLOBBER_SHRA; + r=readreg_specific(r,1,SHIFTCOUNT_NREG); + d=rmw(d,1,1); + Dif (r!=1) { + write_log("Illegal register %d in raw_shra_b\n",r); + abort(); + } + raw_shra_b_rr(d,r) ; + unlock2(r); + unlock2(d); +} +MENDFUNC(2,shra_b_rr,(RW1 d, R1 r)) + + +MIDFUNC(2,setcc,(W1 d, IMM cc)) +{ + CLOBBER_SETCC; + d=writereg(d,1); + raw_setcc(d,cc); + unlock2(d); +} +MENDFUNC(2,setcc,(W1 d, IMM cc)) + +MIDFUNC(2,setcc_m,(IMM d, IMM cc)) +{ + CLOBBER_SETCC; + raw_setcc_m(d,cc); +} +MENDFUNC(2,setcc_m,(IMM d, IMM cc)) + +MIDFUNC(3,cmov_b_rr,(RW1 d, R1 s, IMM cc)) +{ + if (d==s) + return; + CLOBBER_CMOV; + s=readreg(s,1); + d=rmw(d,1,1); + raw_cmov_b_rr(d,s,cc); + unlock2(s); + unlock2(d); +} +MENDFUNC(3,cmov_b_rr,(RW1 d, R1 s, IMM cc)) + +MIDFUNC(3,cmov_w_rr,(RW2 d, R2 s, IMM cc)) +{ + if (d==s) + return; + CLOBBER_CMOV; + s=readreg(s,2); + d=rmw(d,2,2); + raw_cmov_w_rr(d,s,cc); + unlock2(s); + unlock2(d); +} +MENDFUNC(3,cmov_w_rr,(RW2 d, R2 s, IMM cc)) + +MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc)) +{ + if (d==s) + return; + CLOBBER_CMOV; + s=readreg(s,4); + d=rmw(d,4,4); + raw_cmov_l_rr(d,s,cc); + unlock2(s); + unlock2(d); +} +MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc)) + +MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc)) +{ + CLOBBER_CMOV; + d=rmw(d,4,4); + raw_cmov_l_rm(d,s,cc); + unlock2(d); +} +MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc)) + +MIDFUNC(2,bsf_l_rr,(W4 d, W4 s)) +{ + CLOBBER_BSF; + s = readreg(s, 4); + d = writereg(d, 4); + raw_bsf_l_rr(d, s); + unlock2(s); + unlock2(d); +} +MENDFUNC(2,bsf_l_rr,(W4 d, W4 s)) + +/* Set the Z flag depending on the value in s. Note that the + value has to be 0 or -1 (or, more precisely, for non-zero + values, bit 14 must be set)! */ +MIDFUNC(2,simulate_bsf,(W4 tmp, RW4 s)) +{ + CLOBBER_BSF; + s=rmw_specific(s,4,4,FLAG_NREG3); + tmp=writereg(tmp,4); + raw_flags_set_zero(s, tmp); + unlock2(tmp); + unlock2(s); +} +MENDFUNC(2,simulate_bsf,(W4 tmp, RW4 s)) + +MIDFUNC(2,imul_32_32,(RW4 d, R4 s)) +{ + CLOBBER_MUL; + s=readreg(s,4); + d=rmw(d,4,4); + raw_imul_32_32(d,s); + unlock2(s); + unlock2(d); +} +MENDFUNC(2,imul_32_32,(RW4 d, R4 s)) + +MIDFUNC(2,imul_64_32,(RW4 d, RW4 s)) +{ + CLOBBER_MUL; + s=rmw_specific(s,4,4,MUL_NREG2); + d=rmw_specific(d,4,4,MUL_NREG1); + raw_imul_64_32(d,s); + unlock2(s); + unlock2(d); +} +MENDFUNC(2,imul_64_32,(RW4 d, RW4 s)) + +MIDFUNC(2,mul_64_32,(RW4 d, RW4 s)) +{ + CLOBBER_MUL; + s=rmw_specific(s,4,4,MUL_NREG2); + d=rmw_specific(d,4,4,MUL_NREG1); + raw_mul_64_32(d,s); + unlock2(s); + unlock2(d); +} +MENDFUNC(2,mul_64_32,(RW4 d, RW4 s)) + +MIDFUNC(2,mul_32_32,(RW4 d, R4 s)) +{ + CLOBBER_MUL; + s=readreg(s,4); + d=rmw(d,4,4); + raw_mul_32_32(d,s); + unlock2(s); + unlock2(d); +} +MENDFUNC(2,mul_32_32,(RW4 d, R4 s)) + +#if SIZEOF_VOID_P == 8 +MIDFUNC(2,sign_extend_32_rr,(W4 d, R2 s)) +{ + int isrmw; + + if (isconst(s)) { + set_const(d,(uae_s32)live.state[s].val); + return; + } + + CLOBBER_SE32; + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,4); + d=writereg(d,4); + } + else { /* If we try to lock this twice, with different sizes, we + are int trouble! */ + s=d=rmw(s,4,4); + } + raw_sign_extend_32_rr(d,s); + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} +MENDFUNC(2,sign_extend_32_rr,(W4 d, R2 s)) +#endif + +MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s)) +{ + int isrmw; + + if (isconst(s)) { + set_const(d,(uae_s32)(uae_s16)live.state[s].val); + return; + } + + CLOBBER_SE16; + isrmw=(s==d); + if (!isrmw) { + s=readreg(s,2); + d=writereg(d,4); + } + else { /* If we try to lock this twice, with different sizes, we + are int trouble! */ + s=d=rmw(s,4,2); + } + raw_sign_extend_16_rr(d,s); + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} +MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s)) + +MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s)) +{ + int isrmw; + + if (isconst(s)) { + set_const(d,(uae_s32)(uae_s8)live.state[s].val); + return; + } + + isrmw=(s==d); + CLOBBER_SE8; + if (!isrmw) { + s=readreg(s,1); + d=writereg(d,4); + } + else { /* If we try to lock this twice, with different sizes, we + are int trouble! */ + s=d=rmw(s,4,1); + } + + raw_sign_extend_8_rr(d,s); + + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} +MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s)) + + +MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s)) +{ + int isrmw; + + if (isconst(s)) { + set_const(d,(uae_u32)(uae_u16)live.state[s].val); + return; + } + + isrmw=(s==d); + CLOBBER_ZE16; + if (!isrmw) { + s=readreg(s,2); + d=writereg(d,4); + } + else { /* If we try to lock this twice, with different sizes, we + are int trouble! */ + s=d=rmw(s,4,2); + } + raw_zero_extend_16_rr(d,s); + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} +MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s)) + +MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s)) +{ + int isrmw; + if (isconst(s)) { + set_const(d,(uae_u32)(uae_u8)live.state[s].val); + return; + } + + isrmw=(s==d); + CLOBBER_ZE8; + if (!isrmw) { + s=readreg(s,1); + d=writereg(d,4); + } + else { /* If we try to lock this twice, with different sizes, we + are int trouble! */ + s=d=rmw(s,4,1); + } + + raw_zero_extend_8_rr(d,s); + + if (!isrmw) { + unlock2(d); + unlock2(s); + } + else { + unlock2(s); + } +} +MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s)) + +MIDFUNC(2,mov_b_rr,(W1 d, R1 s)) +{ + if (d==s) + return; + if (isconst(s)) { + COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val); + return; + } + + CLOBBER_MOV; + s=readreg(s,1); + d=writereg(d,1); + raw_mov_b_rr(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,mov_b_rr,(W1 d, R1 s)) + +MIDFUNC(2,mov_w_rr,(W2 d, R2 s)) +{ + if (d==s) + return; + if (isconst(s)) { + COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val); + return; + } + + CLOBBER_MOV; + s=readreg(s,2); + d=writereg(d,2); + raw_mov_w_rr(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,mov_w_rr,(W2 d, R2 s)) + + +MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor)) +{ + CLOBBER_MOV; + baser=readreg(baser,4); + index=readreg(index,4); + d=writereg(d,4); + + raw_mov_l_rrm_indexed(d,baser,index,factor); + unlock2(d); + unlock2(baser); + unlock2(index); +} +MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor)) + +MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor)) +{ + CLOBBER_MOV; + baser=readreg(baser,4); + index=readreg(index,4); + d=writereg(d,2); + + raw_mov_w_rrm_indexed(d,baser,index,factor); + unlock2(d); + unlock2(baser); + unlock2(index); +} +MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor)) + +MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor)) +{ + CLOBBER_MOV; + baser=readreg(baser,4); + index=readreg(index,4); + d=writereg(d,1); + + raw_mov_b_rrm_indexed(d,baser,index,factor); + + unlock2(d); + unlock2(baser); + unlock2(index); +} +MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor)) + + +MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s)) +{ + CLOBBER_MOV; + baser=readreg(baser,4); + index=readreg(index,4); + s=readreg(s,4); + + Dif (baser==s || index==s) + abort(); + + + raw_mov_l_mrr_indexed(baser,index,factor,s); + unlock2(s); + unlock2(baser); + unlock2(index); +} +MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s)) + +MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s)) +{ + CLOBBER_MOV; + baser=readreg(baser,4); + index=readreg(index,4); + s=readreg(s,2); + + raw_mov_w_mrr_indexed(baser,index,factor,s); + unlock2(s); + unlock2(baser); + unlock2(index); +} +MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s)) + +MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s)) +{ + CLOBBER_MOV; + s=readreg(s,1); + baser=readreg(baser,4); + index=readreg(index,4); + + raw_mov_b_mrr_indexed(baser,index,factor,s); + unlock2(s); + unlock2(baser); + unlock2(index); +} +MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s)) + + +MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s)) +{ + int basereg=baser; + int indexreg=index; + + CLOBBER_MOV; + s=readreg(s,4); + baser=readreg_offset(baser,4); + index=readreg_offset(index,4); + + base+=get_offset(basereg); + base+=factor*get_offset(indexreg); + + raw_mov_l_bmrr_indexed(base,baser,index,factor,s); + unlock2(s); + unlock2(baser); + unlock2(index); +} +MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s)) + +MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s)) +{ + int basereg=baser; + int indexreg=index; + + CLOBBER_MOV; + s=readreg(s,2); + baser=readreg_offset(baser,4); + index=readreg_offset(index,4); + + base+=get_offset(basereg); + base+=factor*get_offset(indexreg); + + raw_mov_w_bmrr_indexed(base,baser,index,factor,s); + unlock2(s); + unlock2(baser); + unlock2(index); +} +MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s)) + +MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s)) +{ + int basereg=baser; + int indexreg=index; + + CLOBBER_MOV; + s=readreg(s,1); + baser=readreg_offset(baser,4); + index=readreg_offset(index,4); + + base+=get_offset(basereg); + base+=factor*get_offset(indexreg); + + raw_mov_b_bmrr_indexed(base,baser,index,factor,s); + unlock2(s); + unlock2(baser); + unlock2(index); +} +MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s)) + + + +/* Read a long from base+baser+factor*index */ +MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor)) +{ + int basereg=baser; + int indexreg=index; + + CLOBBER_MOV; + baser=readreg_offset(baser,4); + index=readreg_offset(index,4); + base+=get_offset(basereg); + base+=factor*get_offset(indexreg); + d=writereg(d,4); + raw_mov_l_brrm_indexed(d,base,baser,index,factor); + unlock2(d); + unlock2(baser); + unlock2(index); +} +MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor)) + + +MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor)) +{ + int basereg=baser; + int indexreg=index; + + CLOBBER_MOV; + remove_offset(d,-1); + baser=readreg_offset(baser,4); + index=readreg_offset(index,4); + base+=get_offset(basereg); + base+=factor*get_offset(indexreg); + d=writereg(d,2); + raw_mov_w_brrm_indexed(d,base,baser,index,factor); + unlock2(d); + unlock2(baser); + unlock2(index); +} +MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor)) + + +MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor)) +{ + int basereg=baser; + int indexreg=index; + + CLOBBER_MOV; + remove_offset(d,-1); + baser=readreg_offset(baser,4); + index=readreg_offset(index,4); + base+=get_offset(basereg); + base+=factor*get_offset(indexreg); + d=writereg(d,1); + raw_mov_b_brrm_indexed(d,base,baser,index,factor); + unlock2(d); + unlock2(baser); + unlock2(index); +} +MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor)) + +/* Read a long from base+factor*index */ +MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor)) +{ + int indexreg=index; + + if (isconst(index)) { + COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val); + return; + } + + CLOBBER_MOV; + index=readreg_offset(index,4); + base+=get_offset(indexreg)*factor; + d=writereg(d,4); + + raw_mov_l_rm_indexed(d,base,index,factor); + unlock2(index); + unlock2(d); +} +MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor)) + + +/* read the long at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset)) +{ + if (isconst(s)) { + COMPCALL(mov_l_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + s=readreg(s,4); + d=writereg(d,4); + + raw_mov_l_rR(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset)) + +/* read the word at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset)) +{ + if (isconst(s)) { + COMPCALL(mov_w_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + s=readreg(s,4); + d=writereg(d,2); + + raw_mov_w_rR(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset)) + +/* read the word at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset)) +{ + if (isconst(s)) { + COMPCALL(mov_b_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + s=readreg(s,4); + d=writereg(d,1); + + raw_mov_b_rR(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset)) + +/* read the long at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset)) +{ + int sreg=s; + if (isconst(s)) { + COMPCALL(mov_l_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + s=readreg_offset(s,4); + offset+=get_offset(sreg); + d=writereg(d,4); + + raw_mov_l_brR(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset)) + +/* read the word at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset)) +{ + int sreg=s; + if (isconst(s)) { + COMPCALL(mov_w_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + remove_offset(d,-1); + s=readreg_offset(s,4); + offset+=get_offset(sreg); + d=writereg(d,2); + + raw_mov_w_brR(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset)) + +/* read the word at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset)) +{ + int sreg=s; + if (isconst(s)) { + COMPCALL(mov_b_rm)(d,live.state[s].val+offset); + return; + } + CLOBBER_MOV; + remove_offset(d,-1); + s=readreg_offset(s,4); + offset+=get_offset(sreg); + d=writereg(d,1); + + raw_mov_b_brR(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset)) + +MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset)) +{ + int dreg=d; + if (isconst(d)) { + COMPCALL(mov_l_mi)(live.state[d].val+offset,i); + return; + } + + CLOBBER_MOV; + d=readreg_offset(d,4); + offset+=get_offset(dreg); + raw_mov_l_Ri(d,i,offset); + unlock2(d); +} +MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset)) + +MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset)) +{ + int dreg=d; + if (isconst(d)) { + COMPCALL(mov_w_mi)(live.state[d].val+offset,i); + return; + } + + CLOBBER_MOV; + d=readreg_offset(d,4); + offset+=get_offset(dreg); + raw_mov_w_Ri(d,i,offset); + unlock2(d); +} +MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset)) + +MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset)) +{ + int dreg=d; + if (isconst(d)) { + COMPCALL(mov_b_mi)(live.state[d].val+offset,i); + return; + } + + CLOBBER_MOV; + d=readreg_offset(d,4); + offset+=get_offset(dreg); + raw_mov_b_Ri(d,i,offset); + unlock2(d); +} +MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset)) + + /* Warning! OFFSET is byte sized only! */ +MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset)) +{ + if (isconst(d)) { + COMPCALL(mov_l_mr)(live.state[d].val+offset,s); + return; + } + if (isconst(s)) { + COMPCALL(mov_l_Ri)(d,live.state[s].val,offset); + return; + } + + CLOBBER_MOV; + s=readreg(s,4); + d=readreg(d,4); + + raw_mov_l_Rr(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset)) + +MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset)) +{ + if (isconst(d)) { + COMPCALL(mov_w_mr)(live.state[d].val+offset,s); + return; + } + if (isconst(s)) { + COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset); + return; + } + + CLOBBER_MOV; + s=readreg(s,2); + d=readreg(d,4); + raw_mov_w_Rr(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset)) + +MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset)) +{ + if (isconst(d)) { + COMPCALL(mov_b_mr)(live.state[d].val+offset,s); + return; + } + if (isconst(s)) { + COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset); + return; + } + + CLOBBER_MOV; + s=readreg(s,1); + d=readreg(d,4); + raw_mov_b_Rr(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset)) + +MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset)) +{ + if (isconst(s)) { + COMPCALL(mov_l_ri)(d,live.state[s].val+offset); + return; + } +#if USE_OFFSET + if (d==s) { + add_offset(d,offset); + return; + } +#endif + CLOBBER_LEA; + s=readreg(s,4); + d=writereg(d,4); + raw_lea_l_brr(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset)) + +MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset)) +{ + if (!offset) { + COMPCALL(lea_l_rr_indexed)(d,s,index,factor); + return; + } + CLOBBER_LEA; + s=readreg(s,4); + index=readreg(index,4); + d=writereg(d,4); + + raw_lea_l_brr_indexed(d,s,index,factor,offset); + unlock2(d); + unlock2(index); + unlock2(s); +} +MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset)) + +MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor)) +{ + CLOBBER_LEA; + s=readreg(s,4); + index=readreg(index,4); + d=writereg(d,4); + + raw_lea_l_rr_indexed(d,s,index,factor); + unlock2(d); + unlock2(index); + unlock2(s); +} +MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor)) + +/* write d to the long at the address contained in s+offset */ +MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset)) +{ + int dreg=d; + if (isconst(d)) { + COMPCALL(mov_l_mr)(live.state[d].val+offset,s); + return; + } + + CLOBBER_MOV; + s=readreg(s,4); + d=readreg_offset(d,4); + offset+=get_offset(dreg); + + raw_mov_l_bRr(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset)) + +/* write the word at the address contained in s+offset and store in d */ +MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset)) +{ + int dreg=d; + + if (isconst(d)) { + COMPCALL(mov_w_mr)(live.state[d].val+offset,s); + return; + } + + CLOBBER_MOV; + s=readreg(s,2); + d=readreg_offset(d,4); + offset+=get_offset(dreg); + raw_mov_w_bRr(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset)) + +MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset)) +{ + int dreg=d; + if (isconst(d)) { + COMPCALL(mov_b_mr)(live.state[d].val+offset,s); + return; + } + + CLOBBER_MOV; + s=readreg(s,1); + d=readreg_offset(d,4); + offset+=get_offset(dreg); + raw_mov_b_bRr(d,s,offset); + unlock2(d); + unlock2(s); +} +MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset)) + +MIDFUNC(1,bswap_32,(RW4 r)) +{ + int reg=r; + + if (isconst(r)) { + uae_u32 oldv=live.state[r].val; + live.state[r].val=reverse32(oldv); + return; + } + + CLOBBER_SW32; + r=rmw(r,4,4); + raw_bswap_32(r); + unlock2(r); +} +MENDFUNC(1,bswap_32,(RW4 r)) + +MIDFUNC(1,bswap_16,(RW2 r)) +{ + if (isconst(r)) { + uae_u32 oldv=live.state[r].val; + live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) | + (oldv&0xffff0000); + return; + } + + CLOBBER_SW16; + r=rmw(r,2,2); + + raw_bswap_16(r); + unlock2(r); +} +MENDFUNC(1,bswap_16,(RW2 r)) + + + +MIDFUNC(2,mov_l_rr,(W4 d, R4 s)) +{ + int olds; + + if (d==s) { /* How pointless! */ + return; + } + if (isconst(s)) { + COMPCALL(mov_l_ri)(d,live.state[s].val); + return; + } + olds=s; + disassociate(d); + s=readreg_offset(s,4); + live.state[d].realreg=s; + live.state[d].realind=live.nat[s].nholds; + live.state[d].val=live.state[olds].val; + live.state[d].validsize=4; + live.state[d].dirtysize=4; + set_status(d,DIRTY); + + live.nat[s].holds[live.nat[s].nholds]=d; + live.nat[s].nholds++; + log_clobberreg(d); + /* write_log("Added %d to nreg %d(%d), now holds %d regs\n", + d,s,live.state[d].realind,live.nat[s].nholds); */ + unlock2(s); +} +MENDFUNC(2,mov_l_rr,(W4 d, R4 s)) + +MIDFUNC(2,mov_l_mr,(IMM d, R4 s)) +{ + if (isconst(s)) { + COMPCALL(mov_l_mi)(d,live.state[s].val); + return; + } + CLOBBER_MOV; + s=readreg(s,4); + + raw_mov_l_mr(d,s); + unlock2(s); +} +MENDFUNC(2,mov_l_mr,(IMM d, R4 s)) + + +MIDFUNC(2,mov_w_mr,(IMM d, R2 s)) +{ + if (isconst(s)) { + COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val); + return; + } + CLOBBER_MOV; + s=readreg(s,2); + + raw_mov_w_mr(d,s); + unlock2(s); +} +MENDFUNC(2,mov_w_mr,(IMM d, R2 s)) + +MIDFUNC(2,mov_w_rm,(W2 d, IMM s)) +{ + CLOBBER_MOV; + d=writereg(d,2); + + raw_mov_w_rm(d,s); + unlock2(d); +} +MENDFUNC(2,mov_w_rm,(W2 d, IMM s)) + +MIDFUNC(2,mov_b_mr,(IMM d, R1 s)) +{ + if (isconst(s)) { + COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val); + return; + } + + CLOBBER_MOV; + s=readreg(s,1); + + raw_mov_b_mr(d,s); + unlock2(s); +} +MENDFUNC(2,mov_b_mr,(IMM d, R1 s)) + +MIDFUNC(2,mov_b_rm,(W1 d, IMM s)) +{ + CLOBBER_MOV; + d=writereg(d,1); + + raw_mov_b_rm(d,s); + unlock2(d); +} +MENDFUNC(2,mov_b_rm,(W1 d, IMM s)) + +MIDFUNC(2,mov_l_ri,(W4 d, IMM s)) +{ + set_const(d,s); + return; +} +MENDFUNC(2,mov_l_ri,(W4 d, IMM s)) + +MIDFUNC(2,mov_w_ri,(W2 d, IMM s)) +{ + CLOBBER_MOV; + d=writereg(d,2); + + raw_mov_w_ri(d,s); + unlock2(d); +} +MENDFUNC(2,mov_w_ri,(W2 d, IMM s)) + +MIDFUNC(2,mov_b_ri,(W1 d, IMM s)) +{ + CLOBBER_MOV; + d=writereg(d,1); + + raw_mov_b_ri(d,s); + unlock2(d); +} +MENDFUNC(2,mov_b_ri,(W1 d, IMM s)) + + +MIDFUNC(2,add_l_mi,(IMM d, IMM s)) +{ + CLOBBER_ADD; + raw_add_l_mi(d,s) ; +} +MENDFUNC(2,add_l_mi,(IMM d, IMM s)) + +MIDFUNC(2,add_w_mi,(IMM d, IMM s)) +{ + CLOBBER_ADD; + raw_add_w_mi(d,s) ; +} +MENDFUNC(2,add_w_mi,(IMM d, IMM s)) + +MIDFUNC(2,add_b_mi,(IMM d, IMM s)) +{ + CLOBBER_ADD; + raw_add_b_mi(d,s) ; +} +MENDFUNC(2,add_b_mi,(IMM d, IMM s)) + + +MIDFUNC(2,test_l_ri,(R4 d, IMM i)) +{ + CLOBBER_TEST; + d=readreg(d,4); + + raw_test_l_ri(d,i); + unlock2(d); +} +MENDFUNC(2,test_l_ri,(R4 d, IMM i)) + +MIDFUNC(2,test_l_rr,(R4 d, R4 s)) +{ + CLOBBER_TEST; + d=readreg(d,4); + s=readreg(s,4); + + raw_test_l_rr(d,s);; + unlock2(d); + unlock2(s); +} +MENDFUNC(2,test_l_rr,(R4 d, R4 s)) + +MIDFUNC(2,test_w_rr,(R2 d, R2 s)) +{ + CLOBBER_TEST; + d=readreg(d,2); + s=readreg(s,2); + + raw_test_w_rr(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,test_w_rr,(R2 d, R2 s)) + +MIDFUNC(2,test_b_rr,(R1 d, R1 s)) +{ + CLOBBER_TEST; + d=readreg(d,1); + s=readreg(s,1); + + raw_test_b_rr(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,test_b_rr,(R1 d, R1 s)) + + +MIDFUNC(2,and_l_ri,(RW4 d, IMM i)) +{ + if (isconst(d) && !needflags) { + live.state[d].val &= i; + return; + } + + CLOBBER_AND; + d=rmw(d,4,4); + + raw_and_l_ri(d,i); + unlock2(d); +} +MENDFUNC(2,and_l_ri,(RW4 d, IMM i)) + +MIDFUNC(2,and_l,(RW4 d, R4 s)) +{ + CLOBBER_AND; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_and_l(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,and_l,(RW4 d, R4 s)) + +MIDFUNC(2,and_w,(RW2 d, R2 s)) +{ + CLOBBER_AND; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_and_w(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,and_w,(RW2 d, R2 s)) + +MIDFUNC(2,and_b,(RW1 d, R1 s)) +{ + CLOBBER_AND; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_and_b(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,and_b,(RW1 d, R1 s)) + +// gb-- used for making an fpcr value in compemu_fpp.cpp +MIDFUNC(2,or_l_rm,(RW4 d, IMM s)) +{ + CLOBBER_OR; + d=rmw(d,4,4); + + raw_or_l_rm(d,s); + unlock2(d); +} +MENDFUNC(2,or_l_rm,(RW4 d, IMM s)) + +MIDFUNC(2,or_l_ri,(RW4 d, IMM i)) +{ + if (isconst(d) && !needflags) { + live.state[d].val|=i; + return; + } + CLOBBER_OR; + d=rmw(d,4,4); + + raw_or_l_ri(d,i); + unlock2(d); +} +MENDFUNC(2,or_l_ri,(RW4 d, IMM i)) + +MIDFUNC(2,or_l,(RW4 d, R4 s)) +{ + if (isconst(d) && isconst(s) && !needflags) { + live.state[d].val|=live.state[s].val; + return; + } + CLOBBER_OR; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_or_l(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,or_l,(RW4 d, R4 s)) + +MIDFUNC(2,or_w,(RW2 d, R2 s)) +{ + CLOBBER_OR; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_or_w(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,or_w,(RW2 d, R2 s)) + +MIDFUNC(2,or_b,(RW1 d, R1 s)) +{ + CLOBBER_OR; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_or_b(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,or_b,(RW1 d, R1 s)) + +MIDFUNC(2,adc_l,(RW4 d, R4 s)) +{ + CLOBBER_ADC; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_adc_l(d,s); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,adc_l,(RW4 d, R4 s)) + +MIDFUNC(2,adc_w,(RW2 d, R2 s)) +{ + CLOBBER_ADC; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_adc_w(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,adc_w,(RW2 d, R2 s)) + +MIDFUNC(2,adc_b,(RW1 d, R1 s)) +{ + CLOBBER_ADC; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_adc_b(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,adc_b,(RW1 d, R1 s)) + +MIDFUNC(2,add_l,(RW4 d, R4 s)) +{ + if (isconst(s)) { + COMPCALL(add_l_ri)(d,live.state[s].val); + return; + } + + CLOBBER_ADD; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_add_l(d,s); + + unlock2(d); + unlock2(s); +} +MENDFUNC(2,add_l,(RW4 d, R4 s)) + +MIDFUNC(2,add_w,(RW2 d, R2 s)) +{ + if (isconst(s)) { + COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val); + return; + } + + CLOBBER_ADD; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_add_w(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,add_w,(RW2 d, R2 s)) + +MIDFUNC(2,add_b,(RW1 d, R1 s)) +{ + if (isconst(s)) { + COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val); + return; + } + + CLOBBER_ADD; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_add_b(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,add_b,(RW1 d, R1 s)) + +MIDFUNC(2,sub_l_ri,(RW4 d, IMM i)) +{ + if (!i && !needflags) + return; + if (isconst(d) && !needflags) { + live.state[d].val-=i; + return; + } +#if USE_OFFSET + if (!needflags) { + add_offset(d,-i); + return; + } +#endif + + CLOBBER_SUB; + d=rmw(d,4,4); + + raw_sub_l_ri(d,i); + unlock2(d); +} +MENDFUNC(2,sub_l_ri,(RW4 d, IMM i)) + +MIDFUNC(2,sub_w_ri,(RW2 d, IMM i)) +{ + if (!i && !needflags) + return; + + CLOBBER_SUB; + d=rmw(d,2,2); + + raw_sub_w_ri(d,i); + unlock2(d); +} +MENDFUNC(2,sub_w_ri,(RW2 d, IMM i)) + +MIDFUNC(2,sub_b_ri,(RW1 d, IMM i)) +{ + if (!i && !needflags) + return; + + CLOBBER_SUB; + d=rmw(d,1,1); + + raw_sub_b_ri(d,i); + + unlock2(d); +} +MENDFUNC(2,sub_b_ri,(RW1 d, IMM i)) + +MIDFUNC(2,add_l_ri,(RW4 d, IMM i)) +{ + if (!i && !needflags) + return; + if (isconst(d) && !needflags) { + live.state[d].val+=i; + return; + } +#if USE_OFFSET + if (!needflags) { + add_offset(d,i); + return; + } +#endif + CLOBBER_ADD; + d=rmw(d,4,4); + raw_add_l_ri(d,i); + unlock2(d); +} +MENDFUNC(2,add_l_ri,(RW4 d, IMM i)) + +MIDFUNC(2,add_w_ri,(RW2 d, IMM i)) +{ + if (!i && !needflags) + return; + + CLOBBER_ADD; + d=rmw(d,2,2); + + raw_add_w_ri(d,i); + unlock2(d); +} +MENDFUNC(2,add_w_ri,(RW2 d, IMM i)) + +MIDFUNC(2,add_b_ri,(RW1 d, IMM i)) +{ + if (!i && !needflags) + return; + + CLOBBER_ADD; + d=rmw(d,1,1); + + raw_add_b_ri(d,i); + + unlock2(d); +} +MENDFUNC(2,add_b_ri,(RW1 d, IMM i)) + +MIDFUNC(2,sbb_l,(RW4 d, R4 s)) +{ + CLOBBER_SBB; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_sbb_l(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,sbb_l,(RW4 d, R4 s)) + +MIDFUNC(2,sbb_w,(RW2 d, R2 s)) +{ + CLOBBER_SBB; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_sbb_w(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,sbb_w,(RW2 d, R2 s)) + +MIDFUNC(2,sbb_b,(RW1 d, R1 s)) +{ + CLOBBER_SBB; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_sbb_b(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,sbb_b,(RW1 d, R1 s)) + +MIDFUNC(2,sub_l,(RW4 d, R4 s)) +{ + if (isconst(s)) { + COMPCALL(sub_l_ri)(d,live.state[s].val); + return; + } + + CLOBBER_SUB; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_sub_l(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,sub_l,(RW4 d, R4 s)) + +MIDFUNC(2,sub_w,(RW2 d, R2 s)) +{ + if (isconst(s)) { + COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val); + return; + } + + CLOBBER_SUB; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_sub_w(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,sub_w,(RW2 d, R2 s)) + +MIDFUNC(2,sub_b,(RW1 d, R1 s)) +{ + if (isconst(s)) { + COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val); + return; + } + + CLOBBER_SUB; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_sub_b(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,sub_b,(RW1 d, R1 s)) + +MIDFUNC(2,cmp_l,(R4 d, R4 s)) +{ + CLOBBER_CMP; + s=readreg(s,4); + d=readreg(d,4); + + raw_cmp_l(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,cmp_l,(R4 d, R4 s)) + +MIDFUNC(2,cmp_l_ri,(R4 r, IMM i)) +{ + CLOBBER_CMP; + r=readreg(r,4); + + raw_cmp_l_ri(r,i); + unlock2(r); +} +MENDFUNC(2,cmp_l_ri,(R4 r, IMM i)) + +MIDFUNC(2,cmp_w,(R2 d, R2 s)) +{ + CLOBBER_CMP; + s=readreg(s,2); + d=readreg(d,2); + + raw_cmp_w(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,cmp_w,(R2 d, R2 s)) + +MIDFUNC(2,cmp_b,(R1 d, R1 s)) +{ + CLOBBER_CMP; + s=readreg(s,1); + d=readreg(d,1); + + raw_cmp_b(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,cmp_b,(R1 d, R1 s)) + + +MIDFUNC(2,xor_l,(RW4 d, R4 s)) +{ + CLOBBER_XOR; + s=readreg(s,4); + d=rmw(d,4,4); + + raw_xor_l(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,xor_l,(RW4 d, R4 s)) + +MIDFUNC(2,xor_w,(RW2 d, R2 s)) +{ + CLOBBER_XOR; + s=readreg(s,2); + d=rmw(d,2,2); + + raw_xor_w(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,xor_w,(RW2 d, R2 s)) + +MIDFUNC(2,xor_b,(RW1 d, R1 s)) +{ + CLOBBER_XOR; + s=readreg(s,1); + d=rmw(d,1,1); + + raw_xor_b(d,s); + unlock2(d); + unlock2(s); +} +MENDFUNC(2,xor_b,(RW1 d, R1 s)) + +MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize)) +{ + clobber_flags(); + remove_all_offsets(); + if (osize==4) { + if (out1!=in1 && out1!=r) { + COMPCALL(forget_about)(out1); + } + } + else { + tomem_c(out1); + } + + in1=readreg_specific(in1,isize,REG_PAR1); + r=readreg(r,4); + prepare_for_call_1(); /* This should ensure that there won't be + any need for swapping nregs in prepare_for_call_2 + */ +#if USE_NORMAL_CALLING_CONVENTION + raw_push_l_r(in1); +#endif + unlock2(in1); + unlock2(r); + + prepare_for_call_2(); + raw_call_r(r); + +#if USE_NORMAL_CALLING_CONVENTION + raw_inc_sp(4); +#endif + + + live.nat[REG_RESULT].holds[0]=out1; + live.nat[REG_RESULT].nholds=1; + live.nat[REG_RESULT].touched=touchcnt++; + + live.state[out1].realreg=REG_RESULT; + live.state[out1].realind=0; + live.state[out1].val=0; + live.state[out1].validsize=osize; + live.state[out1].dirtysize=osize; + set_status(out1,DIRTY); +} +MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize)) + +MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2)) +{ + clobber_flags(); + remove_all_offsets(); + in1=readreg_specific(in1,isize1,REG_PAR1); + in2=readreg_specific(in2,isize2,REG_PAR2); + r=readreg(r,4); + prepare_for_call_1(); /* This should ensure that there won't be + any need for swapping nregs in prepare_for_call_2 + */ +#if USE_NORMAL_CALLING_CONVENTION + raw_push_l_r(in2); + raw_push_l_r(in1); +#endif + unlock2(r); + unlock2(in1); + unlock2(in2); + prepare_for_call_2(); + raw_call_r(r); +#if USE_NORMAL_CALLING_CONVENTION + raw_inc_sp(8); +#endif +} +MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2)) + +/* forget_about() takes a mid-layer register */ +MIDFUNC(1,forget_about,(W4 r)) +{ + if (isinreg(r)) + disassociate(r); + live.state[r].val=0; + set_status(r,UNDEF); +} +MENDFUNC(1,forget_about,(W4 r)) + +MIDFUNC(0,nop,(void)) +{ + raw_nop(); +} +MENDFUNC(0,nop,(void)) + + +MIDFUNC(1,f_forget_about,(FW r)) +{ + if (f_isinreg(r)) + f_disassociate(r); + live.fate[r].status=UNDEF; +} +MENDFUNC(1,f_forget_about,(FW r)) + +MIDFUNC(1,fmov_pi,(FW r)) +{ + r=f_writereg(r); + raw_fmov_pi(r); + f_unlock(r); +} +MENDFUNC(1,fmov_pi,(FW r)) + +MIDFUNC(1,fmov_log10_2,(FW r)) +{ + r=f_writereg(r); + raw_fmov_log10_2(r); + f_unlock(r); +} +MENDFUNC(1,fmov_log10_2,(FW r)) + +MIDFUNC(1,fmov_log2_e,(FW r)) +{ + r=f_writereg(r); + raw_fmov_log2_e(r); + f_unlock(r); +} +MENDFUNC(1,fmov_log2_e,(FW r)) + +MIDFUNC(1,fmov_loge_2,(FW r)) +{ + r=f_writereg(r); + raw_fmov_loge_2(r); + f_unlock(r); +} +MENDFUNC(1,fmov_loge_2,(FW r)) + +MIDFUNC(1,fmov_1,(FW r)) +{ + r=f_writereg(r); + raw_fmov_1(r); + f_unlock(r); +} +MENDFUNC(1,fmov_1,(FW r)) + +MIDFUNC(1,fmov_0,(FW r)) +{ + r=f_writereg(r); + raw_fmov_0(r); + f_unlock(r); +} +MENDFUNC(1,fmov_0,(FW r)) + +MIDFUNC(2,fmov_rm,(FW r, MEMR m)) +{ + r=f_writereg(r); + raw_fmov_rm(r,m); + f_unlock(r); +} +MENDFUNC(2,fmov_rm,(FW r, MEMR m)) + +MIDFUNC(2,fmovi_rm,(FW r, MEMR m)) +{ + r=f_writereg(r); + raw_fmovi_rm(r,m); + f_unlock(r); +} +MENDFUNC(2,fmovi_rm,(FW r, MEMR m)) + +MIDFUNC(2,fmovi_mr,(MEMW m, FR r)) +{ + r=f_readreg(r); + raw_fmovi_mr(m,r); + f_unlock(r); +} +MENDFUNC(2,fmovi_mr,(MEMW m, FR r)) + +MIDFUNC(2,fmovs_rm,(FW r, MEMR m)) +{ + r=f_writereg(r); + raw_fmovs_rm(r,m); + f_unlock(r); +} +MENDFUNC(2,fmovs_rm,(FW r, MEMR m)) + +MIDFUNC(2,fmovs_mr,(MEMW m, FR r)) +{ + r=f_readreg(r); + raw_fmovs_mr(m,r); + f_unlock(r); +} +MENDFUNC(2,fmovs_mr,(MEMW m, FR r)) + +MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r)) +{ + r=f_readreg(r); + raw_fmov_ext_mr(m,r); + f_unlock(r); +} +MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r)) + +MIDFUNC(2,fmov_mr,(MEMW m, FR r)) +{ + r=f_readreg(r); + raw_fmov_mr(m,r); + f_unlock(r); +} +MENDFUNC(2,fmov_mr,(MEMW m, FR r)) + +MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m)) +{ + r=f_writereg(r); + raw_fmov_ext_rm(r,m); + f_unlock(r); +} +MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m)) + +MIDFUNC(2,fmov_rr,(FW d, FR s)) +{ + if (d==s) { /* How pointless! */ + return; + } +#if USE_F_ALIAS + f_disassociate(d); + s=f_readreg(s); + live.fate[d].realreg=s; + live.fate[d].realind=live.fat[s].nholds; + live.fate[d].status=DIRTY; + live.fat[s].holds[live.fat[s].nholds]=d; + live.fat[s].nholds++; + f_unlock(s); +#else + s=f_readreg(s); + d=f_writereg(d); + raw_fmov_rr(d,s); + f_unlock(s); + f_unlock(d); +#endif +} +MENDFUNC(2,fmov_rr,(FW d, FR s)) + +MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base)) +{ + index=readreg(index,4); + + raw_fldcw_m_indexed(index,base); + unlock2(index); +} +MENDFUNC(2,fldcw_m_indexed,(R4 index, IMM base)) + +MIDFUNC(1,ftst_r,(FR r)) +{ + r=f_readreg(r); + raw_ftst_r(r); + f_unlock(r); +} +MENDFUNC(1,ftst_r,(FR r)) + +MIDFUNC(0,dont_care_fflags,(void)) +{ + f_disassociate(FP_RESULT); +} +MENDFUNC(0,dont_care_fflags,(void)) + +MIDFUNC(2,fsqrt_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fsqrt_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fsqrt_rr,(FW d, FR s)) + +MIDFUNC(2,fabs_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fabs_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fabs_rr,(FW d, FR s)) + +MIDFUNC(2,fsin_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fsin_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fsin_rr,(FW d, FR s)) + +MIDFUNC(2,fcos_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fcos_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fcos_rr,(FW d, FR s)) + +MIDFUNC(2,ftwotox_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_ftwotox_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,ftwotox_rr,(FW d, FR s)) + +MIDFUNC(2,fetox_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fetox_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fetox_rr,(FW d, FR s)) + +MIDFUNC(2,frndint_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_frndint_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,frndint_rr,(FW d, FR s)) + +MIDFUNC(2,flog2_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_flog2_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,flog2_rr,(FW d, FR s)) + +MIDFUNC(2,fneg_rr,(FW d, FR s)) +{ + s=f_readreg(s); + d=f_writereg(d); + raw_fneg_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fneg_rr,(FW d, FR s)) + +MIDFUNC(2,fadd_rr,(FRW d, FR s)) +{ + s=f_readreg(s); + d=f_rmw(d); + raw_fadd_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fadd_rr,(FRW d, FR s)) + +MIDFUNC(2,fsub_rr,(FRW d, FR s)) +{ + s=f_readreg(s); + d=f_rmw(d); + raw_fsub_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fsub_rr,(FRW d, FR s)) + +MIDFUNC(2,fcmp_rr,(FR d, FR s)) +{ + d=f_readreg(d); + s=f_readreg(s); + raw_fcmp_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fcmp_rr,(FR d, FR s)) + +MIDFUNC(2,fdiv_rr,(FRW d, FR s)) +{ + s=f_readreg(s); + d=f_rmw(d); + raw_fdiv_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fdiv_rr,(FRW d, FR s)) + +MIDFUNC(2,frem_rr,(FRW d, FR s)) +{ + s=f_readreg(s); + d=f_rmw(d); + raw_frem_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,frem_rr,(FRW d, FR s)) + +MIDFUNC(2,frem1_rr,(FRW d, FR s)) +{ + s=f_readreg(s); + d=f_rmw(d); + raw_frem1_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,frem1_rr,(FRW d, FR s)) + +MIDFUNC(2,fmul_rr,(FRW d, FR s)) +{ + s=f_readreg(s); + d=f_rmw(d); + raw_fmul_rr(d,s); + f_unlock(s); + f_unlock(d); +} +MENDFUNC(2,fmul_rr,(FRW d, FR s)) + +/******************************************************************** + * Support functions exposed to gencomp. CREATE time * + ********************************************************************/ + +void set_zero(int r, int tmp) +{ + if (setzflg_uses_bsf) + bsf_l_rr(r,r); + else + simulate_bsf(tmp,r); +} + +int kill_rodent(int r) +{ + return KILLTHERAT && + have_rat_stall && + (live.state[r].status==INMEM || + live.state[r].status==CLEAN || + live.state[r].status==ISCONST || + live.state[r].dirtysize==4); +} + +uae_u32 get_const(int r) +{ + Dif (!isconst(r)) { + write_log("Register %d should be constant, but isn't\n",r); + abort(); + } + return live.state[r].val; +} + +void sync_m68k_pc(void) +{ + if (m68k_pc_offset) { + add_l_ri(PC_P,m68k_pc_offset); + comp_pc_p+=m68k_pc_offset; + m68k_pc_offset=0; + } +} + +/******************************************************************** + * Scratch registers management * + ********************************************************************/ + +struct scratch_t { + uae_u32 regs[VREGS]; + fpu_register fregs[VFREGS]; +}; + +static scratch_t scratch; + +/******************************************************************** + * Support functions exposed to newcpu * + ********************************************************************/ + +static inline const char *str_on_off(bool b) +{ + return b ? "on" : "off"; +} + +void compiler_init(void) +{ + static bool initialized = false; + if (initialized) + return; + +#if JIT_DEBUG + // JIT debug mode ? + JITDebug = PrefsFindBool("jitdebug"); +#endif + write_log(" : enable runtime disassemblers : %s\n", JITDebug ? "yes" : "no"); + +#ifdef USE_JIT_FPU + // Use JIT compiler for FPU instructions ? + avoid_fpu = !PrefsFindBool("jitfpu"); +#else + // JIT FPU is always disabled + avoid_fpu = true; +#endif + write_log(" : compile FPU instructions : %s\n", !avoid_fpu ? "yes" : "no"); + + // Get size of the translation cache (in KB) + cache_size = PrefsFindInt32("jitcachesize"); + write_log(" : requested translation cache size : %d KB\n", cache_size); + + // Initialize target CPU (check for features, e.g. CMOV, rat stalls) + raw_init_cpu(); + setzflg_uses_bsf = target_check_bsf(); + write_log(" : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no"); + write_log(" : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no"); + write_log(" : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps); + + // Translation cache flush mechanism + lazy_flush = PrefsFindBool("jitlazyflush"); + write_log(" : lazy translation cache invalidation : %s\n", str_on_off(lazy_flush)); + flush_icache = lazy_flush ? flush_icache_lazy : flush_icache_hard; + + // Compiler features + write_log(" : register aliasing : %s\n", str_on_off(1)); + write_log(" : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS)); + write_log(" : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET)); +#if USE_INLINING + follow_const_jumps = PrefsFindBool("jitinline"); +#endif + write_log(" : translate through constant jumps : %s\n", str_on_off(follow_const_jumps)); + write_log(" : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA)); + + // Build compiler tables + build_comp(); + + initialized = true; + +#if PROFILE_UNTRANSLATED_INSNS + write_log(" : gather statistics on untranslated insns count\n"); +#endif + +#if PROFILE_COMPILE_TIME + write_log(" : gather statistics on translation time\n"); + emul_start_time = clock(); +#endif +} + +void compiler_exit(void) +{ +#if PROFILE_COMPILE_TIME + emul_end_time = clock(); +#endif + + // Deallocate translation cache + if (compiled_code) { + vm_release(compiled_code, cache_size * 1024); + compiled_code = 0; + } + + // Deallocate popallspace + if (popallspace) { + vm_release(popallspace, POPALLSPACE_SIZE); + popallspace = 0; + } + +#if PROFILE_COMPILE_TIME + write_log("### Compile Block statistics\n"); + write_log("Number of calls to compile_block : %d\n", compile_count); + uae_u32 emul_time = emul_end_time - emul_start_time; + write_log("Total emulation time : %.1f sec\n", double(emul_time)/double(CLOCKS_PER_SEC)); + write_log("Total compilation time : %.1f sec (%.1f%%)\n", double(compile_time)/double(CLOCKS_PER_SEC), + 100.0*double(compile_time)/double(emul_time)); + write_log("\n"); +#endif + +#if PROFILE_UNTRANSLATED_INSNS + uae_u64 untranslated_count = 0; + for (int i = 0; i < 65536; i++) { + opcode_nums[i] = i; + untranslated_count += raw_cputbl_count[i]; + } + write_log("Sorting out untranslated instructions count...\n"); + qsort(opcode_nums, 65536, sizeof(uae_u16), untranslated_compfn); + write_log("\nRank Opc Count Name\n"); + for (int i = 0; i < untranslated_top_ten; i++) { + uae_u32 count = raw_cputbl_count[opcode_nums[i]]; + struct instr *dp; + struct mnemolookup *lookup; + if (!count) + break; + dp = table68k + opcode_nums[i]; + for (lookup = lookuptab; lookup->mnemo != dp->mnemo; lookup++) + ; + write_log("%03d: %04x %10lu %s\n", i, opcode_nums[i], count, lookup->name); + } +#endif + +#if RECORD_REGISTER_USAGE + int reg_count_ids[16]; + uint64 tot_reg_count = 0; + for (int i = 0; i < 16; i++) { + reg_count_ids[i] = i; + tot_reg_count += reg_count[i]; + } + qsort(reg_count_ids, 16, sizeof(int), reg_count_compare); + uint64 cum_reg_count = 0; + for (int i = 0; i < 16; i++) { + int r = reg_count_ids[i]; + cum_reg_count += reg_count[r]; + printf("%c%d : %16ld %2.1f%% [%2.1f]\n", r < 8 ? 'D' : 'A', r % 8, + reg_count[r], + 100.0*double(reg_count[r])/double(tot_reg_count), + 100.0*double(cum_reg_count)/double(tot_reg_count)); + } +#endif +} + +bool compiler_use_jit(void) +{ + // Check for the "jit" prefs item + if (!PrefsFindBool("jit")) + return false; + + // Don't use JIT if translation cache size is less then MIN_CACHE_SIZE KB + if (PrefsFindInt32("jitcachesize") < MIN_CACHE_SIZE) { + write_log(" : translation cache size is less than %d KB. Disabling JIT.\n", MIN_CACHE_SIZE); + return false; + } + + // Enable JIT for 68020+ emulation only + if (CPUType < 2) { + write_log(" : JIT is not supported in 680%d0 emulation mode, disabling.\n", CPUType); + return false; + } + + return true; +} + +void init_comp(void) +{ + int i; + uae_s8* cb=can_byte; + uae_s8* cw=can_word; + uae_s8* au=always_used; + +#if RECORD_REGISTER_USAGE + for (i=0;i<16;i++) + reg_count_local[i] = 0; +#endif + + for (i=0;i= (uintptr)ROMBaseHost) && (addr < (uintptr)ROMBaseHost + ROMSize)); +} + +static void flush_all(void) +{ + int i; + + log_flush(); + for (i=0;i0) + free_nreg(i); + + for (i=0;i0) + f_free_nreg(i); + + live.flags_in_flags=TRASH; /* Note: We assume we already rescued the + flags at the very start of the call_r + functions! */ +} + +/******************************************************************** + * Memory access and related functions, CREATE time * + ********************************************************************/ + +void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond) +{ + next_pc_p=not_taken; + taken_pc_p=taken; + branch_cc=cond; +} + + +static uae_u32 get_handler_address(uae_u32 addr) +{ + uae_u32 cl=cacheline(addr); + blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0); + return (uintptr)&(bi->direct_handler_to_use); +} + +static uae_u32 get_handler(uae_u32 addr) +{ + uae_u32 cl=cacheline(addr); + blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0); + return (uintptr)bi->direct_handler_to_use; +} + +static void load_handler(int reg, uae_u32 addr) +{ + mov_l_rm(reg,get_handler_address(addr)); +} + +/* This version assumes that it is writing *real* memory, and *will* fail + * if that assumption is wrong! No branches, no second chances, just + * straight go-for-it attitude */ + +static void writemem_real(int address, int source, int size, int tmp, int clobber) +{ + int f=tmp; + + if (clobber) + f=source; + + switch(size) { + case 1: mov_b_bRr(address,source,MEMBaseDiff); break; + case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break; + case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,MEMBaseDiff); break; + } + forget_about(tmp); + forget_about(f); +} + +void writebyte(int address, int source, int tmp) +{ + writemem_real(address,source,1,tmp,0); +} + +static __inline__ void writeword_general(int address, int source, int tmp, + int clobber) +{ + writemem_real(address,source,2,tmp,clobber); +} + +void writeword_clobber(int address, int source, int tmp) +{ + writeword_general(address,source,tmp,1); +} + +void writeword(int address, int source, int tmp) +{ + writeword_general(address,source,tmp,0); +} + +static __inline__ void writelong_general(int address, int source, int tmp, + int clobber) +{ + writemem_real(address,source,4,tmp,clobber); +} + +void writelong_clobber(int address, int source, int tmp) +{ + writelong_general(address,source,tmp,1); +} + +void writelong(int address, int source, int tmp) +{ + writelong_general(address,source,tmp,0); +} + + + +/* This version assumes that it is reading *real* memory, and *will* fail + * if that assumption is wrong! No branches, no second chances, just + * straight go-for-it attitude */ + +static void readmem_real(int address, int dest, int size, int tmp) +{ + int f=tmp; + + if (size==4 && address!=dest) + f=dest; + + switch(size) { + case 1: mov_b_brR(dest,address,MEMBaseDiff); break; + case 2: mov_w_brR(dest,address,MEMBaseDiff); bswap_16(dest); break; + case 4: mov_l_brR(dest,address,MEMBaseDiff); bswap_32(dest); break; + } + forget_about(tmp); +} + +void readbyte(int address, int dest, int tmp) +{ + readmem_real(address,dest,1,tmp); +} + +void readword(int address, int dest, int tmp) +{ + readmem_real(address,dest,2,tmp); +} + +void readlong(int address, int dest, int tmp) +{ + readmem_real(address,dest,4,tmp); +} + +void get_n_addr(int address, int dest, int tmp) +{ + // a is the register containing the virtual address + // after the offset had been fetched + int a=tmp; + + // f is the register that will contain the offset + int f=tmp; + + // a == f == tmp if (address == dest) + if (address!=dest) { + a=address; + f=dest; + } + +#if REAL_ADDRESSING + mov_l_rr(dest, address); +#elif DIRECT_ADDRESSING + lea_l_brr(dest,address,MEMBaseDiff); +#endif + forget_about(tmp); +} + +void get_n_addr_jmp(int address, int dest, int tmp) +{ + /* For this, we need to get the same address as the rest of UAE + would --- otherwise we end up translating everything twice */ + get_n_addr(address,dest,tmp); +} + + +/* base is a register, but dp is an actual value. + target is a register, as is tmp */ +void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp) +{ + int reg = (dp >> 12) & 15; + int regd_shift=(dp >> 9) & 3; + + if (dp & 0x100) { + int ignorebase=(dp&0x80); + int ignorereg=(dp&0x40); + int addbase=0; + int outer=0; + + if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4); + + if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); + if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4); + + if ((dp & 0x4) == 0) { /* add regd *before* the get_long */ + if (!ignorereg) { + if ((dp & 0x800) == 0) + sign_extend_16_rr(target,reg); + else + mov_l_rr(target,reg); + shll_l_ri(target,regd_shift); + } + else + mov_l_ri(target,0); + + /* target is now regd */ + if (!ignorebase) + add_l(target,base); + add_l_ri(target,addbase); + if (dp&0x03) readlong(target,target,tmp); + } else { /* do the getlong first, then add regd */ + if (!ignorebase) { + mov_l_rr(target,base); + add_l_ri(target,addbase); + } + else + mov_l_ri(target,addbase); + if (dp&0x03) readlong(target,target,tmp); + + if (!ignorereg) { + if ((dp & 0x800) == 0) + sign_extend_16_rr(tmp,reg); + else + mov_l_rr(tmp,reg); + shll_l_ri(tmp,regd_shift); + /* tmp is now regd */ + add_l(target,tmp); + } + } + add_l_ri(target,outer); + } + else { /* 68000 version */ + if ((dp & 0x800) == 0) { /* Sign extend */ + sign_extend_16_rr(target,reg); + lea_l_brr_indexed(target,base,target,1<= CODE_ALLOC_MAX_ATTEMPTS) + return NULL; + + return do_alloc_code(size, depth + 1); +#else + uint8 *code = (uint8 *)vm_acquire(size); + return code == VM_MAP_FAILED ? NULL : code; +#endif +} + +static inline uint8 *alloc_code(uint32 size) +{ + uint8 *ptr = do_alloc_code(size, 0); + /* allocated code must fit in 32-bit boundaries */ + assert((uintptr)ptr <= 0xffffffff); + return ptr; +} + +void alloc_cache(void) +{ + if (compiled_code) { + flush_icache_hard(6); + vm_release(compiled_code, cache_size * 1024); + compiled_code = 0; + } + + if (cache_size == 0) + return; + + while (!compiled_code && cache_size) { + if ((compiled_code = alloc_code(cache_size * 1024)) == NULL) { + compiled_code = 0; + cache_size /= 2; + } + } + vm_protect(compiled_code, cache_size * 1024, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE); + + if (compiled_code) { + write_log(" : actual translation cache size : %d KB at 0x%08X\n", cache_size, compiled_code); + max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST; + current_compile_p = compiled_code; + current_cache_size = 0; + } +} + + + +extern void op_illg_1 (uae_u32 opcode) REGPARAM; + +static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2) +{ + uae_u32 k1 = 0; + uae_u32 k2 = 0; + +#if USE_CHECKSUM_INFO + checksum_info *csi = bi->csi; + Dif(!csi) abort(); + while (csi) { + uae_s32 len = csi->length; + uintptr tmp = (uintptr)csi->start_p; +#else + uae_s32 len = bi->len; + uintptr tmp = (uintptr)bi->min_pcp; +#endif + uae_u32*pos; + + len += (tmp & 3); + tmp &= ~((uintptr)3); + pos = (uae_u32 *)tmp; + + if (len >= 0 && len <= MAX_CHECKSUM_LEN) { + while (len > 0) { + k1 += *pos; + k2 ^= *pos; + pos++; + len -= 4; + } + } + +#if USE_CHECKSUM_INFO + csi = csi->next; + } +#endif + + *c1 = k1; + *c2 = k2; +} + +#if 0 +static void show_checksum(CSI_TYPE* csi) +{ + uae_u32 k1=0; + uae_u32 k2=0; + uae_s32 len=CSI_LENGTH(csi); + uae_u32 tmp=(uintptr)CSI_START_P(csi); + uae_u32* pos; + + len+=(tmp&3); + tmp&=(~3); + pos=(uae_u32*)tmp; + + if (len<0 || len>MAX_CHECKSUM_LEN) { + return; + } + else { + while (len>0) { + write_log("%08x ",*pos); + pos++; + len-=4; + } + write_log(" bla\n"); + } +} +#endif + + +int check_for_cache_miss(void) +{ + blockinfo* bi=get_blockinfo_addr(regs.pc_p); + + if (bi) { + int cl=cacheline(regs.pc_p); + if (bi!=cache_tags[cl+1].bi) { + raise_in_cl_list(bi); + return 1; + } + } + return 0; +} + + +static void recompile_block(void) +{ + /* An existing block's countdown code has expired. We need to make + sure that execute_normal doesn't refuse to recompile due to a + perceived cache miss... */ + blockinfo* bi=get_blockinfo_addr(regs.pc_p); + + Dif (!bi) + abort(); + raise_in_cl_list(bi); + execute_normal(); + return; +} +static void cache_miss(void) +{ + blockinfo* bi=get_blockinfo_addr(regs.pc_p); + uae_u32 cl=cacheline(regs.pc_p); + blockinfo* bi2=get_blockinfo(cl); + + if (!bi) { + execute_normal(); /* Compile this block now */ + return; + } + Dif (!bi2 || bi==bi2) { + write_log("Unexplained cache miss %p %p\n",bi,bi2); + abort(); + } + raise_in_cl_list(bi); + return; +} + +static int called_check_checksum(blockinfo* bi); + +static inline int block_check_checksum(blockinfo* bi) +{ + uae_u32 c1,c2; + bool isgood; + + if (bi->status!=BI_NEED_CHECK) + return 1; /* This block is in a checked state */ + + checksum_count++; + + if (bi->c1 || bi->c2) + calc_checksum(bi,&c1,&c2); + else { + c1=c2=1; /* Make sure it doesn't match */ + } + + isgood=(c1==bi->c1 && c2==bi->c2); + + if (isgood) { + /* This block is still OK. So we reactivate. Of course, that + means we have to move it into the needs-to-be-flushed list */ + bi->handler_to_use=bi->handler; + set_dhtu(bi,bi->direct_handler); + bi->status=BI_CHECKING; + isgood=called_check_checksum(bi); + } + if (isgood) { + /* write_log("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p, + c1,c2,bi->c1,bi->c2);*/ + remove_from_list(bi); + add_to_active(bi); + raise_in_cl_list(bi); + bi->status=BI_ACTIVE; + } + else { + /* This block actually changed. We need to invalidate it, + and set it up to be recompiled */ + /* write_log("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p, + c1,c2,bi->c1,bi->c2); */ + invalidate_block(bi); + raise_in_cl_list(bi); + } + return isgood; +} + +static int called_check_checksum(blockinfo* bi) +{ + dependency* x=bi->deplist; + int isgood=1; + int i; + + for (i=0;i<2 && isgood;i++) { + if (bi->dep[i].jmp_off) { + isgood=block_check_checksum(bi->dep[i].target); + } + } + return isgood; +} + +static void check_checksum(void) +{ + blockinfo* bi=get_blockinfo_addr(regs.pc_p); + uae_u32 cl=cacheline(regs.pc_p); + blockinfo* bi2=get_blockinfo(cl); + + /* These are not the droids you are looking for... */ + if (!bi) { + /* Whoever is the primary target is in a dormant state, but + calling it was accidental, and we should just compile this + new block */ + execute_normal(); + return; + } + if (bi!=bi2) { + /* The block was hit accidentally, but it does exist. Cache miss */ + cache_miss(); + return; + } + + if (!block_check_checksum(bi)) + execute_normal(); +} + +static __inline__ void match_states(blockinfo* bi) +{ + int i; + smallstate* s=&(bi->env); + + if (bi->status==BI_NEED_CHECK) { + block_check_checksum(bi); + } + if (bi->status==BI_ACTIVE || + bi->status==BI_FINALIZING) { /* Deal with the *promises* the + block makes (about not using + certain vregs) */ + for (i=0;i<16;i++) { + if (s->virt[i]==L_UNNEEDED) { + // write_log("unneeded reg %d at %p\n",i,target); + COMPCALL(forget_about)(i); // FIXME + } + } + } + flush(1); + + /* And now deal with the *demands* the block makes */ + for (i=0;inat[i]; + if (v>=0) { + // printf("Loading reg %d into %d at %p\n",v,i,target); + readreg_specific(v,4,i); + // do_load_reg(i,v); + // setlock(i); + } + } + for (i=0;inat[i]; + if (v>=0) { + unlock2(i); + } + } +} + +static __inline__ void create_popalls(void) +{ + int i,r; + + if ((popallspace = alloc_code(POPALLSPACE_SIZE)) == NULL) { + write_log("FATAL: Could not allocate popallspace!\n"); + abort(); + } + vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_WRITE); + + int stack_space = STACK_OFFSET; + for (i=0;idirect_pen=(cpuop_func *)get_target(); + raw_mov_l_rm(0,(uintptr)&(bi->pc_p)); + raw_mov_l_mr((uintptr)®s.pc_p,0); + raw_jmp((uintptr)popall_execute_normal); + + align_target(align_jumps); + bi->direct_pcc=(cpuop_func *)get_target(); + raw_mov_l_rm(0,(uintptr)&(bi->pc_p)); + raw_mov_l_mr((uintptr)®s.pc_p,0); + raw_jmp((uintptr)popall_check_checksum); + current_compile_p=get_target(); + + bi->deplist=NULL; + for (i=0;i<2;i++) { + bi->dep[i].prev_p=NULL; + bi->dep[i].next=NULL; + } + bi->env=default_ss; + bi->status=BI_INVALID; + bi->havestate=0; + //bi->env=empty_ss; +} + +// OPCODE is in big endian format, use cft_map() beforehand, if needed. +static inline void reset_compop(int opcode) +{ + compfunctbl[opcode] = NULL; + nfcompfunctbl[opcode] = NULL; +} + +static int read_opcode(const char *p) +{ + int opcode = 0; + for (int i = 0; i < 4; i++) { + int op = p[i]; + switch (op) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + opcode = (opcode << 4) | (op - '0'); + break; + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + opcode = (opcode << 4) | ((op - 'a') + 10); + break; + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + opcode = (opcode << 4) | ((op - 'A') + 10); + break; + default: + return -1; + } + } + return opcode; +} + +static bool merge_blacklist() +{ + const char *blacklist = PrefsFindString("jitblacklist"); + if (blacklist) { + const char *p = blacklist; + for (;;) { + if (*p == 0) + return true; + + int opcode1 = read_opcode(p); + if (opcode1 < 0) + return false; + p += 4; + + int opcode2 = opcode1; + if (*p == '-') { + p++; + opcode2 = read_opcode(p); + if (opcode2 < 0) + return false; + p += 4; + } + + if (*p == 0 || *p == ',' || *p == ';') { + write_log(" : blacklist opcodes : %04x-%04x\n", opcode1, opcode2); + for (int opcode = opcode1; opcode <= opcode2; opcode++) + reset_compop(cft_map(opcode)); + + if (*p == ',' || *p++ == ';') + continue; + + return true; + } + + return false; + } + } + return true; +} + +void build_comp(void) +{ + int i; + int jumpcount=0; + unsigned long opcode; + struct comptbl* tbl=op_smalltbl_0_comp_ff; + struct comptbl* nftbl=op_smalltbl_0_comp_nf; + int count; + int cpu_level = 0; // 68000 (default) + if (CPUType == 4) + cpu_level = 4; // 68040 with FPU + else { + if (FPUType) + cpu_level = 3; // 68020 with FPU + else if (CPUType >= 2) + cpu_level = 2; // 68020 + else if (CPUType == 1) + cpu_level = 1; + } + struct cputbl *nfctbl = ( + cpu_level == 4 ? op_smalltbl_0_nf + : cpu_level == 3 ? op_smalltbl_1_nf + : cpu_level == 2 ? op_smalltbl_2_nf + : cpu_level == 1 ? op_smalltbl_3_nf + : op_smalltbl_4_nf); + + write_log (" : building compiler function tables\n"); + + for (opcode = 0; opcode < 65536; opcode++) { + reset_compop(opcode); + nfcpufunctbl[opcode] = op_illg_1; + prop[opcode].use_flags = 0x1f; + prop[opcode].set_flags = 0x1f; + prop[opcode].cflow = fl_trap; // ILLEGAL instructions do trap + } + + for (i = 0; tbl[i].opcode < 65536; i++) { + int cflow = table68k[tbl[i].opcode].cflow; + if (follow_const_jumps && (tbl[i].specific & 16)) + cflow = fl_const_jump; + else + cflow &= ~fl_const_jump; + prop[cft_map(tbl[i].opcode)].cflow = cflow; + + int uses_fpu = tbl[i].specific & 32; + if (uses_fpu && avoid_fpu) + compfunctbl[cft_map(tbl[i].opcode)] = NULL; + else + compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler; + } + + for (i = 0; nftbl[i].opcode < 65536; i++) { + int uses_fpu = tbl[i].specific & 32; + if (uses_fpu && avoid_fpu) + nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL; + else + nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler; + + nfcpufunctbl[cft_map(nftbl[i].opcode)] = nfctbl[i].handler; + } + + for (i = 0; nfctbl[i].handler; i++) { + nfcpufunctbl[cft_map(nfctbl[i].opcode)] = nfctbl[i].handler; + } + + for (opcode = 0; opcode < 65536; opcode++) { + compop_func *f; + compop_func *nff; + cpuop_func *nfcf; + int isaddx,cflow; + + if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level) + continue; + + if (table68k[opcode].handler != -1) { + f = compfunctbl[cft_map(table68k[opcode].handler)]; + nff = nfcompfunctbl[cft_map(table68k[opcode].handler)]; + nfcf = nfcpufunctbl[cft_map(table68k[opcode].handler)]; + cflow = prop[cft_map(table68k[opcode].handler)].cflow; + isaddx = prop[cft_map(table68k[opcode].handler)].is_addx; + prop[cft_map(opcode)].cflow = cflow; + prop[cft_map(opcode)].is_addx = isaddx; + compfunctbl[cft_map(opcode)] = f; + nfcompfunctbl[cft_map(opcode)] = nff; + Dif (nfcf == op_illg_1) + abort(); + nfcpufunctbl[cft_map(opcode)] = nfcf; + } + prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead; + prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive; + /* Unconditional jumps don't evaluate condition codes, so they + * don't actually use any flags themselves */ + if (prop[cft_map(opcode)].cflow & fl_const_jump) + prop[cft_map(opcode)].use_flags = 0; + } + for (i = 0; nfctbl[i].handler != NULL; i++) { + if (nfctbl[i].specific) + nfcpufunctbl[cft_map(tbl[i].opcode)] = nfctbl[i].handler; + } + + /* Merge in blacklist */ + if (!merge_blacklist()) + write_log(" : blacklist merge failure!\n"); + + count=0; + for (opcode = 0; opcode < 65536; opcode++) { + if (compfunctbl[cft_map(opcode)]) + count++; + } + write_log(" : supposedly %d compileable opcodes!\n",count); + + /* Initialise state */ + create_popalls(); + alloc_cache(); + reset_lists(); + + for (i=0;ipc_p)].handler=(cpuop_func *)popall_execute_normal; + cache_tags[cacheline(bi->pc_p)+1].bi=NULL; + dbi=bi; bi=bi->next; + free_blockinfo(dbi); + } + bi=dormant; + while(bi) { + cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal; + cache_tags[cacheline(bi->pc_p)+1].bi=NULL; + dbi=bi; bi=bi->next; + free_blockinfo(dbi); + } + + reset_lists(); + if (!compiled_code) + return; + current_compile_p=compiled_code; + SPCFLAGS_SET( SPCFLAG_JIT_EXEC_RETURN ); /* To get out of compiled code */ +} + + +/* "Soft flushing" --- instead of actually throwing everything away, + we simply mark everything as "needs to be checked". +*/ + +static inline void flush_icache_lazy(int n) +{ + uae_u32 i; + blockinfo* bi; + blockinfo* bi2; + + soft_flush_count++; + if (!active) + return; + + bi=active; + while (bi) { + uae_u32 cl=cacheline(bi->pc_p); + if (bi->status==BI_INVALID || + bi->status==BI_NEED_RECOMP) { + if (bi==cache_tags[cl+1].bi) + cache_tags[cl].handler=(cpuop_func *)popall_execute_normal; + bi->handler_to_use=(cpuop_func *)popall_execute_normal; + set_dhtu(bi,bi->direct_pen); + bi->status=BI_INVALID; + } + else { + if (bi==cache_tags[cl+1].bi) + cache_tags[cl].handler=(cpuop_func *)popall_check_checksum; + bi->handler_to_use=(cpuop_func *)popall_check_checksum; + set_dhtu(bi,bi->direct_pcc); + bi->status=BI_NEED_CHECK; + } + bi2=bi; + bi=bi->next; + } + /* bi2 is now the last entry in the active list */ + bi2->next=dormant; + if (dormant) + dormant->prev_p=&(bi2->next); + + dormant=active; + active->prev_p=&dormant; + active=NULL; +} + +void flush_icache_range(uae_u8 *start_p, uae_u32 length) +{ + if (!active) + return; + +#if LAZY_FLUSH_ICACHE_RANGE + blockinfo *bi = active; + while (bi) { +#if USE_CHECKSUM_INFO + bool candidate = false; + for (checksum_info *csi = bi->csi; csi; csi = csi->next) { + if (((start_p - csi->start_p) < csi->length) || + ((csi->start_p - start_p) < length)) { + candidate = true; + break; + } + } +#else + // Assume system is consistent and would invalidate the right range + const bool candidate = (bi->pc_p - start_p) < length; +#endif + blockinfo *dbi = bi; + bi = bi->next; + if (candidate) { + uae_u32 cl = cacheline(dbi->pc_p); + if (dbi->status == BI_INVALID || dbi->status == BI_NEED_RECOMP) { + if (dbi == cache_tags[cl+1].bi) + cache_tags[cl].handler = (cpuop_func *)popall_execute_normal; + dbi->handler_to_use = (cpuop_func *)popall_execute_normal; + set_dhtu(dbi, dbi->direct_pen); + dbi->status = BI_INVALID; + } + else { + if (dbi == cache_tags[cl+1].bi) + cache_tags[cl].handler = (cpuop_func *)popall_check_checksum; + dbi->handler_to_use = (cpuop_func *)popall_check_checksum; + set_dhtu(dbi, dbi->direct_pcc); + dbi->status = BI_NEED_CHECK; + } + remove_from_list(dbi); + add_to_dormant(dbi); + } + } + return; +#endif + flush_icache(-1); +} + +static void catastrophe(void) +{ + abort(); +} + +int failure; + +#define TARGET_M68K 0 +#define TARGET_POWERPC 1 +#define TARGET_X86 2 +#define TARGET_X86_64 3 +#if defined(i386) || defined(__i386__) +#define TARGET_NATIVE TARGET_X86 +#endif +#if defined(powerpc) || defined(__powerpc__) +#define TARGET_NATIVE TARGET_POWERPC +#endif +#if defined(x86_64) || defined(__x86_64__) +#define TARGET_NATIVE TARGET_X86_64 +#endif + +#ifdef ENABLE_MON +static uae_u32 mon_read_byte_jit(uintptr addr) +{ + uae_u8 *m = (uae_u8 *)addr; + return (uintptr)(*m); +} + +static void mon_write_byte_jit(uintptr addr, uae_u32 b) +{ + uae_u8 *m = (uae_u8 *)addr; + *m = b; +} +#endif + +void disasm_block(int target, uint8 * start, size_t length) +{ + if (!JITDebug) + return; + +#if defined(JIT_DEBUG) && defined(ENABLE_MON) + char disasm_str[200]; + sprintf(disasm_str, "%s $%x $%x", + target == TARGET_M68K ? "d68" : + target == TARGET_X86 ? "d86" : + target == TARGET_X86_64 ? "d8664" : + target == TARGET_POWERPC ? "d" : "x", + start, start + length - 1); + + uae_u32 (*old_mon_read_byte)(uintptr) = mon_read_byte; + void (*old_mon_write_byte)(uintptr, uae_u32) = mon_write_byte; + + mon_read_byte = mon_read_byte_jit; + mon_write_byte = mon_write_byte_jit; + + char *arg[5] = {"mon", "-m", "-r", disasm_str, NULL}; + mon(4, arg); + + mon_read_byte = old_mon_read_byte; + mon_write_byte = old_mon_write_byte; +#endif +} + +static void disasm_native_block(uint8 *start, size_t length) +{ + disasm_block(TARGET_NATIVE, start, length); +} + +static void disasm_m68k_block(uint8 *start, size_t length) +{ + disasm_block(TARGET_M68K, start, length); +} + +#ifdef HAVE_GET_WORD_UNSWAPPED +# define DO_GET_OPCODE(a) (do_get_mem_word_unswapped((uae_u16 *)(a))) +#else +# define DO_GET_OPCODE(a) (do_get_mem_word((uae_u16 *)(a))) +#endif + +#if JIT_DEBUG +static uae_u8 *last_regs_pc_p = 0; +static uae_u8 *last_compiled_block_addr = 0; + +void compiler_dumpstate(void) +{ + if (!JITDebug) + return; + + write_log("### Host addresses\n"); + write_log("MEM_BASE : %x\n", MEMBaseDiff); + write_log("PC_P : %p\n", ®s.pc_p); + write_log("SPCFLAGS : %p\n", ®s.spcflags); + write_log("D0-D7 : %p-%p\n", ®s.regs[0], ®s.regs[7]); + write_log("A0-A7 : %p-%p\n", ®s.regs[8], ®s.regs[15]); + write_log("\n"); + + write_log("### M68k processor state\n"); + m68k_dumpstate(0); + write_log("\n"); + + write_log("### Block in Mac address space\n"); + write_log("M68K block : %p\n", + (void *)(uintptr)get_virtual_address(last_regs_pc_p)); + write_log("Native block : %p (%d bytes)\n", + (void *)(uintptr)get_virtual_address(last_compiled_block_addr), + get_blockinfo_addr(last_regs_pc_p)->direct_handler_size); + write_log("\n"); +} +#endif + +static void compile_block(cpu_history* pc_hist, int blocklen) +{ + if (letit && compiled_code) { +#if PROFILE_COMPILE_TIME + compile_count++; + clock_t start_time = clock(); +#endif +#if JIT_DEBUG + bool disasm_block = false; +#endif + + /* OK, here we need to 'compile' a block */ + int i; + int r; + int was_comp=0; + uae_u8 liveflags[MAXRUN+1]; +#if USE_CHECKSUM_INFO + bool trace_in_rom = isinrom((uintptr)pc_hist[0].location); + uintptr max_pcp=(uintptr)pc_hist[blocklen - 1].location; + uintptr min_pcp=max_pcp; +#else + uintptr max_pcp=(uintptr)pc_hist[0].location; + uintptr min_pcp=max_pcp; +#endif + uae_u32 cl=cacheline(pc_hist[0].location); + void* specflags=(void*)®s.spcflags; + blockinfo* bi=NULL; + blockinfo* bi2; + int extra_len=0; + + redo_current_block=0; + if (current_compile_p>=max_compile_start) + flush_icache_hard(7); + + alloc_blockinfos(); + + bi=get_blockinfo_addr_new(pc_hist[0].location,0); + bi2=get_blockinfo(cl); + + optlev=bi->optlevel; + if (bi->status!=BI_INVALID) { + Dif (bi!=bi2) { + /* I don't think it can happen anymore. Shouldn't, in + any case. So let's make sure... */ + write_log("WOOOWOO count=%d, ol=%d %p %p\n", + bi->count,bi->optlevel,bi->handler_to_use, + cache_tags[cl].handler); + abort(); + } + + Dif (bi->count!=-1 && bi->status!=BI_NEED_RECOMP) { + write_log("bi->count=%d, bi->status=%d\n",bi->count,bi->status); + /* What the heck? We are not supposed to be here! */ + abort(); + } + } + if (bi->count==-1) { + optlev++; + while (!optcount[optlev]) + optlev++; + bi->count=optcount[optlev]-1; + } + current_block_pc_p=(uintptr)pc_hist[0].location; + + remove_deps(bi); /* We are about to create new code */ + bi->optlevel=optlev; + bi->pc_p=(uae_u8*)pc_hist[0].location; +#if USE_CHECKSUM_INFO + free_checksum_info_chain(bi->csi); + bi->csi = NULL; +#endif + + liveflags[blocklen]=0x1f; /* All flags needed afterwards */ + i=blocklen; + while (i--) { + uae_u16* currpcp=pc_hist[i].location; + uae_u32 op=DO_GET_OPCODE(currpcp); + +#if USE_CHECKSUM_INFO + trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp); + if (follow_const_jumps && is_const_jump(op)) { + checksum_info *csi = alloc_checksum_info(); + csi->start_p = (uae_u8 *)min_pcp; + csi->length = max_pcp - min_pcp + LONGEST_68K_INST; + csi->next = bi->csi; + bi->csi = csi; + max_pcp = (uintptr)currpcp; + } + min_pcp = (uintptr)currpcp; +#else + if ((uintptr)currpcpmax_pcp) + max_pcp=(uintptr)currpcp; +#endif + + liveflags[i]=((liveflags[i+1]& + (~prop[op].set_flags))| + prop[op].use_flags); + if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0) + liveflags[i]&= ~FLAG_Z; + } + +#if USE_CHECKSUM_INFO + checksum_info *csi = alloc_checksum_info(); + csi->start_p = (uae_u8 *)min_pcp; + csi->length = max_pcp - min_pcp + LONGEST_68K_INST; + csi->next = bi->csi; + bi->csi = csi; +#endif + + bi->needed_flags=liveflags[0]; + + align_target(align_loops); + was_comp=0; + + bi->direct_handler=(cpuop_func *)get_target(); + set_dhtu(bi,bi->direct_handler); + bi->status=BI_COMPILING; + current_block_start_target=(uintptr)get_target(); + + log_startblock(); + + if (bi->count>=0) { /* Need to generate countdown code */ + raw_mov_l_mi((uintptr)®s.pc_p,(uintptr)pc_hist[0].location); + raw_sub_l_mi((uintptr)&(bi->count),1); + raw_jl((uintptr)popall_recompile_block); + } + if (optlev==0) { /* No need to actually translate */ + /* Execute normally without keeping stats */ + raw_mov_l_mi((uintptr)®s.pc_p,(uintptr)pc_hist[0].location); + raw_jmp((uintptr)popall_exec_nostats); + } + else { + reg_alloc_run=0; + next_pc_p=0; + taken_pc_p=0; + branch_cc=0; + + comp_pc_p=(uae_u8*)pc_hist[0].location; + init_comp(); + was_comp=1; + +#ifdef USE_CPU_EMUL_SERVICES + raw_sub_l_mi((uintptr)&emulated_ticks,blocklen); + raw_jcc_b_oponly(NATIVE_CC_GT); + uae_s8 *branchadd=(uae_s8*)get_target(); + emit_byte(0); + raw_call((uintptr)cpu_do_check_ticks); + *branchadd=(uintptr)get_target()-((uintptr)branchadd+1); +#endif + +#if JIT_DEBUG + if (JITDebug) { + raw_mov_l_mi((uintptr)&last_regs_pc_p,(uintptr)pc_hist[0].location); + raw_mov_l_mi((uintptr)&last_compiled_block_addr,current_block_start_target); + } +#endif + + for (i=0;i1) { + failure=0; + if (!was_comp) { + comp_pc_p=(uae_u8*)pc_hist[i].location; + init_comp(); + } + was_comp=1; + + comptbl[opcode](opcode); + freescratch(); + if (!(liveflags[i+1] & FLAG_CZNV)) { + /* We can forget about flags */ + dont_care_flags(); + } +#if INDIVIDUAL_INST + flush(1); + nop(); + flush(1); + was_comp=0; +#endif + } + + if (failure) { + if (was_comp) { + flush(1); + was_comp=0; + } + raw_mov_l_ri(REG_PAR1,(uae_u32)opcode); +#if USE_NORMAL_CALLING_CONVENTION + raw_push_l_r(REG_PAR1); +#endif + raw_mov_l_mi((uintptr)®s.pc_p, + (uintptr)pc_hist[i].location); + raw_call((uintptr)cputbl[opcode]); +#if PROFILE_UNTRANSLATED_INSNS + // raw_cputbl_count[] is indexed with plain opcode (in m68k order) + raw_add_l_mi((uintptr)&raw_cputbl_count[cft_map(opcode)],1); +#endif +#if USE_NORMAL_CALLING_CONVENTION + raw_inc_sp(4); +#endif + + if (i < blocklen - 1) { + uae_s8* branchadd; + + raw_mov_l_rm(0,(uintptr)specflags); + raw_test_l_rr(0,0); + raw_jz_b_oponly(); + branchadd=(uae_s8 *)get_target(); + emit_byte(0); + raw_jmp((uintptr)popall_do_nothing); + *branchadd=(uintptr)get_target()-(uintptr)branchadd-1; + } + } + } +#if 1 /* This isn't completely kosher yet; It really needs to be + be integrated into a general inter-block-dependency scheme */ + if (next_pc_p && taken_pc_p && + was_comp && taken_pc_p==current_block_pc_p) { + blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0); + blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0); + uae_u8 x=bi1->needed_flags; + + if (x==0xff || 1) { /* To be on the safe side */ + uae_u16* next=(uae_u16*)next_pc_p; + uae_u32 op=DO_GET_OPCODE(next); + + x=0x1f; + x&=(~prop[op].set_flags); + x|=prop[op].use_flags; + } + + x|=bi2->needed_flags; + if (!(x & FLAG_CZNV)) { + /* We can forget about flags */ + dont_care_flags(); + extra_len+=2; /* The next instruction now is part of this + block */ + } + + } +#endif + log_flush(); + + if (next_pc_p) { /* A branch was registered */ + uintptr t1=next_pc_p; + uintptr t2=taken_pc_p; + int cc=branch_cc; + + uae_u32* branchadd; + uae_u32* tba; + bigstate tmp; + blockinfo* tbi; + + if (taken_pc_penv))) { + mark_callers_recompile(bi); + } + + big_to_small_state(&live,&(bi->env)); +#endif + +#if USE_CHECKSUM_INFO + remove_from_list(bi); + if (trace_in_rom) { + // No need to checksum that block trace on cache invalidation + free_checksum_info_chain(bi->csi); + bi->csi = NULL; + add_to_dormant(bi); + } + else { + calc_checksum(bi,&(bi->c1),&(bi->c2)); + add_to_active(bi); + } +#else + if (next_pc_p+extra_len>=max_pcp && + next_pc_p+extra_lenlen=max_pcp-min_pcp; + bi->min_pcp=min_pcp; + + remove_from_list(bi); + if (isinrom(min_pcp) && isinrom(max_pcp)) { + add_to_dormant(bi); /* No need to checksum it on cache flush. + Please don't start changing ROMs in + flight! */ + } + else { + calc_checksum(bi,&(bi->c1),&(bi->c2)); + add_to_active(bi); + } +#endif + + current_cache_size += get_target() - (uae_u8 *)current_compile_p; + +#if JIT_DEBUG + if (JITDebug) + bi->direct_handler_size = get_target() - (uae_u8 *)current_block_start_target; + + if (JITDebug && disasm_block) { + uaecptr block_addr = start_pc + ((char *)pc_hist[0].location - (char *)start_pc_p); + D(bug("M68K block @ 0x%08x (%d insns)\n", block_addr, blocklen)); + uae_u32 block_size = ((uae_u8 *)pc_hist[blocklen - 1].location - (uae_u8 *)pc_hist[0].location) + 1; + disasm_m68k_block((uae_u8 *)pc_hist[0].location, block_size); + D(bug("Compiled block @ 0x%08x\n", pc_hist[0].location)); + disasm_native_block((uae_u8 *)current_block_start_target, bi->direct_handler_size); + getchar(); + } +#endif + + log_dump(); + align_target(align_jumps); + + /* This is the non-direct handler */ + bi->handler= + bi->handler_to_use=(cpuop_func *)get_target(); + raw_cmp_l_mi((uintptr)®s.pc_p,(uintptr)pc_hist[0].location); + raw_jnz((uintptr)popall_cache_miss); + comp_pc_p=(uae_u8*)pc_hist[0].location; + + bi->status=BI_FINALIZING; + init_comp(); + match_states(bi); + flush(1); + + raw_jmp((uintptr)bi->direct_handler); + + current_compile_p=get_target(); + raise_in_cl_list(bi); + + /* We will flush soon, anyway, so let's do it now */ + if (current_compile_p>=max_compile_start) + flush_icache_hard(7); + + bi->status=BI_ACTIVE; + if (redo_current_block) + block_need_recompile(bi); + +#if PROFILE_COMPILE_TIME + compile_time += (clock() - start_time); +#endif + } + + /* Account for compilation time */ + cpu_do_check_ticks(); +} + +void do_nothing(void) +{ + /* What did you expect this to do? */ +} + +void exec_nostats(void) +{ + for (;;) { + uae_u32 opcode = GET_OPCODE; +#if FLIGHT_RECORDER + m68k_record_step(m68k_getpc()); +#endif + (*cpufunctbl[opcode])(opcode); + cpu_check_ticks(); + if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) { + return; /* We will deal with the spcflags in the caller */ + } + } +} + +void execute_normal(void) +{ + if (!check_for_cache_miss()) { + cpu_history pc_hist[MAXRUN]; + int blocklen = 0; +#if REAL_ADDRESSING || DIRECT_ADDRESSING + start_pc_p = regs.pc_p; + start_pc = get_virtual_address(regs.pc_p); +#else + start_pc_p = regs.pc_oldp; + start_pc = regs.pc; +#endif + for (;;) { /* Take note: This is the do-it-normal loop */ + pc_hist[blocklen++].location = (uae_u16 *)regs.pc_p; + uae_u32 opcode = GET_OPCODE; +#if FLIGHT_RECORDER + m68k_record_step(m68k_getpc()); +#endif + (*cpufunctbl[opcode])(opcode); + cpu_check_ticks(); + if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) { + compile_block(pc_hist, blocklen); + return; /* We will deal with the spcflags in the caller */ + } + /* No need to check regs.spcflags, because if they were set, + we'd have ended up inside that "if" */ + } + } +} + +typedef void (*compiled_handler)(void); + +static void m68k_do_compile_execute(void) +{ + for (;;) { + ((compiled_handler)(pushall_call_handler))(); + /* Whenever we return from that, we should check spcflags */ + if (SPCFLAGS_TEST(SPCFLAG_ALL)) { + if (m68k_do_specialties ()) + return; + } + } +} + +void m68k_compile_execute (void) +{ + for (;;) { + if (quit_program) + break; + m68k_do_compile_execute(); + } +} diff --git a/jit2/flags_x86.h b/jit2/flags_x86.h new file mode 100644 index 00000000..4247f10a --- /dev/null +++ b/jit2/flags_x86.h @@ -0,0 +1,47 @@ +/* + * compiler/flags_x86.h - Native flags definitions for IA-32 + * + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * + * Adaptation for Basilisk II and improvements, copyright 2000-2005 + * Gwenole Beauchesne + * + * Basilisk II (C) 1997-2008 Christian Bauer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef NATIVE_FLAGS_X86_H +#define NATIVE_FLAGS_X86_H + +/* Native integer code conditions */ +enum { + NATIVE_CC_HI = 7, + NATIVE_CC_LS = 6, + NATIVE_CC_CC = 3, + NATIVE_CC_CS = 2, + NATIVE_CC_NE = 5, + NATIVE_CC_EQ = 4, + NATIVE_CC_VC = 11, + NATIVE_CC_VS = 10, + NATIVE_CC_PL = 9, + NATIVE_CC_MI = 8, + NATIVE_CC_GE = 13, + NATIVE_CC_LT = 12, + NATIVE_CC_GT = 15, + NATIVE_CC_LE = 14 +}; + +#endif /* NATIVE_FLAGS_X86_H */ diff --git a/jit2/gencomp.c b/jit2/gencomp.c new file mode 100644 index 00000000..6b47c062 --- /dev/null +++ b/jit2/gencomp.c @@ -0,0 +1,3076 @@ +/* + * compiler/gencomp.c - MC680x0 compilation generator + * + * Based on work Copyright 1995, 1996 Bernd Schmidt + * Changes for UAE-JIT Copyright 2000 Bernd Meyer + * + * Adaptation for Basilisk II and improvements, copyright 2000-2005 + * Gwenole Beauchesne + * + * Basilisk II (C) 1997-2005 Christian Bauer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include "sysdeps.h" +#include "readcpu.h" + +#define BOOL_TYPE "int" +#define failure global_failure=1 +#define FAILURE global_failure=1 +#define isjump global_isjump=1 +#define is_const_jump global_iscjump=1; +#define isaddx global_isaddx=1 +#define uses_cmov global_cmov=1 +#define mayfail global_mayfail=1 +#define uses_fpu global_fpu=1 + +int hack_opcode; + +static int global_failure; +static int global_isjump; +static int global_iscjump; +static int global_isaddx; +static int global_cmov; +static int long_opcode; +static int global_mayfail; +static int global_fpu; + +static char endstr[1000]; +static char lines[100000]; +static int comp_index=0; + +static int cond_codes_x86[]={-1,-1,7,6,3,2,5,4,-1,-1,9,8,13,12,15,14}; + +static void comprintf(const char* format, ...) +{ + va_list args; + + va_start(args,format); + comp_index+=vsprintf(lines+comp_index,format,args); +} + +static void com_discard(void) +{ + comp_index=0; +} + +static void com_flush(void) +{ + int i; + for (i=0;i 0); + n_braces--; + comprintf ("}"); +} + +static void +finish_braces (void) +{ + while (n_braces > 0) + close_brace (); +} + +static void +pop_braces (int to) +{ + while (n_braces > to) + close_brace (); +} + +static int +bit_size (int size) +{ + switch (size) + { + case sz_byte: + return 8; + case sz_word: + return 16; + case sz_long: + return 32; + default: + abort (); + } + return 0; +} + +static const char * +bit_mask (int size) +{ + switch (size) + { + case sz_byte: + return "0xff"; + case sz_word: + return "0xffff"; + case sz_long: + return "0xffffffff"; + default: + abort (); + } + return 0; +} + +static __inline__ void gen_update_next_handler(void) +{ + return; /* Can anything clever be done here? */ +} + +static void gen_writebyte(char* address, char* source) +{ + comprintf("\twritebyte(%s,%s,scratchie);\n",address,source); +} + +static void gen_writeword(char* address, char* source) +{ + comprintf("\twriteword(%s,%s,scratchie);\n",address,source); +} + +static void gen_writelong(char* address, char* source) +{ + comprintf("\twritelong(%s,%s,scratchie);\n",address,source); +} + +static void gen_readbyte(char* address, char* dest) +{ + comprintf("\treadbyte(%s,%s,scratchie);\n",address,dest); +} + +static void gen_readword(char* address, char* dest) +{ + comprintf("\treadword(%s,%s,scratchie);\n",address,dest); +} + +static void gen_readlong(char* address, char* dest) +{ + comprintf("\treadlong(%s,%s,scratchie);\n",address,dest); +} + + + +static const char * +gen_nextilong (void) +{ + static char buffer[80]; + + sprintf (buffer, "comp_get_ilong((m68k_pc_offset+=4)-4)"); + insn_n_cycles += 4; + + long_opcode=1; + return buffer; +} + +static const char * +gen_nextiword (void) +{ + static char buffer[80]; + + sprintf (buffer, "comp_get_iword((m68k_pc_offset+=2)-2)"); + insn_n_cycles+=2; + + long_opcode=1; + return buffer; +} + +static const char * +gen_nextibyte (void) +{ + static char buffer[80]; + + sprintf (buffer, "comp_get_ibyte((m68k_pc_offset+=2)-2)"); + insn_n_cycles += 2; + + long_opcode=1; + return buffer; +} + +static void +swap_opcode (void) +{ + comprintf("#ifdef HAVE_GET_WORD_UNSWAPPED\n"); + comprintf("\topcode = do_byteswap_16(opcode);\n"); + comprintf("#endif\n"); +} + +static void +sync_m68k_pc (void) +{ + comprintf("\t if (m68k_pc_offset>100) sync_m68k_pc();\n"); +} + + +/* getv == 1: fetch data; getv != 0: check for odd address. If movem != 0, + * the calling routine handles Apdi and Aipi modes. + * gb-- movem == 2 means the same thing but for a MOVE16 instruction */ +static void +genamode (amodes mode, char *reg, wordsizes size, char *name, int getv, int movem) +{ + start_brace (); + switch (mode) + { + case Dreg: /* Do we need to check dodgy here? */ + if (movem) + abort (); + if (getv == 1 || getv==2) { + /* We generate the variable even for getv==2, so we can use + it as a destination for MOVE */ + comprintf ("\tint %s=%s;\n",name,reg); + } + return; + + case Areg: + if (movem) + abort (); + if (getv == 1 || getv==2) { + /* see above */ + comprintf ("\tint %s=dodgy?scratchie++:%s+8;\n",name,reg); + if (getv==1) { + comprintf ("\tif (dodgy) \n"); + comprintf ("\t\tmov_l_rr(%s,%s+8);\n",name, reg); + } + } + return; + + case Aind: + comprintf ("\tint %sa=dodgy?scratchie++:%s+8;\n",name,reg); + comprintf ("\tif (dodgy) \n"); + comprintf ("\t\tmov_l_rr(%sa,%s+8);\n",name, reg); + break; + case Aipi: + comprintf ("\tint %sa=scratchie++;\n",name,reg); + comprintf ("\tmov_l_rr(%sa,%s+8);\n",name, reg); + break; + case Apdi: + switch (size) + { + case sz_byte: + if (movem) { + comprintf ("\tint %sa=dodgy?scratchie++:%s+8;\n",name,reg); + comprintf ("\tif (dodgy) \n"); + comprintf("\tmov_l_rr(%sa,8+%s);\n",name,reg); + } + else { + start_brace(); + comprintf ("\tint %sa=dodgy?scratchie++:%s+8;\n",name,reg); + comprintf("\tlea_l_brr(%s+8,%s+8,(uae_s32)-areg_byteinc[%s]);\n",reg,reg,reg); + comprintf ("\tif (dodgy) \n"); + comprintf("\tmov_l_rr(%sa,8+%s);\n",name,reg); + } + break; + case sz_word: + if (movem) { + comprintf ("\tint %sa=dodgy?scratchie++:%s+8;\n",name,reg); + comprintf ("\tif (dodgy) \n"); + comprintf("\tmov_l_rr(%sa,8+%s);\n",name,reg); + } + else { + start_brace(); + comprintf ("\tint %sa=dodgy?scratchie++:%s+8;\n",name,reg); + comprintf("\tlea_l_brr(%s+8,%s+8,-2);\n",reg,reg); + comprintf ("\tif (dodgy) \n"); + comprintf("\tmov_l_rr(%sa,8+%s);\n",name,reg); + } + break; + case sz_long: + if (movem) { + comprintf ("\tint %sa=dodgy?scratchie++:%s+8;\n",name,reg); + comprintf ("\tif (dodgy) \n"); + comprintf("\tmov_l_rr(%sa,8+%s);\n",name,reg); + } + else { + start_brace(); + comprintf ("\tint %sa=dodgy?scratchie++:%s+8;\n",name,reg); + comprintf("\tlea_l_brr(%s+8,%s+8,-4);\n",reg,reg); + comprintf ("\tif (dodgy) \n"); + comprintf("\tmov_l_rr(%sa,8+%s);\n",name,reg); + } + break; + default: + abort (); + } + break; + case Ad16: + comprintf("\tint %sa=scratchie++;\n",name); + comprintf("\tmov_l_rr(%sa,8+%s);\n",name,reg); + comprintf("\tlea_l_brr(%sa,%sa,(uae_s32)(uae_s16)%s);\n",name,name,gen_nextiword()); + break; + case Ad8r: + comprintf("\tint %sa=scratchie++;\n",name); + comprintf("\tcalc_disp_ea_020(%s+8,%s,%sa,scratchie);\n", + reg,gen_nextiword(),name); + break; + + case PC16: + comprintf("\tint %sa=scratchie++;\n",name); + comprintf("\tuae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n"); + comprintf ("\tuae_s32 PC16off = (uae_s32)(uae_s16)%s;\n", gen_nextiword ()); + comprintf("\tmov_l_ri(%sa,address+PC16off);\n",name); + break; + + case PC8r: + comprintf("\tint pctmp=scratchie++;\n"); + comprintf("\tint %sa=scratchie++;\n",name); + comprintf("\tuae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n"); + start_brace(); + comprintf("\tmov_l_ri(pctmp,address);\n"); + + comprintf("\tcalc_disp_ea_020(pctmp,%s,%sa,scratchie);\n", + gen_nextiword(),name); + break; + case absw: + comprintf ("\tint %sa = scratchie++;\n",name); + comprintf ("\tmov_l_ri(%sa,(uae_s32)(uae_s16)%s);\n", name, gen_nextiword ()); + break; + case absl: + comprintf ("\tint %sa = scratchie++;\n",name); + comprintf ("\tmov_l_ri(%sa,%s); /* absl */\n", name, gen_nextilong ()); + break; + case imm: + if (getv != 1) + abort (); + switch (size) + { + case sz_byte: + comprintf ("\tint %s = scratchie++;\n",name); + comprintf ("\tmov_l_ri(%s,(uae_s32)(uae_s8)%s);\n", name, gen_nextibyte ()); + break; + case sz_word: + comprintf ("\tint %s = scratchie++;\n",name); + comprintf ("\tmov_l_ri(%s,(uae_s32)(uae_s16)%s);\n", name, gen_nextiword ()); + break; + case sz_long: + comprintf ("\tint %s = scratchie++;\n",name); + comprintf ("\tmov_l_ri(%s,%s);\n", name, gen_nextilong ()); + break; + default: + abort (); + } + return; + case imm0: + if (getv != 1) + abort (); + comprintf ("\tint %s = scratchie++;\n",name); + comprintf ("\tmov_l_ri(%s,(uae_s32)(uae_s8)%s);\n", name, gen_nextibyte ()); + return; + case imm1: + if (getv != 1) + abort (); + comprintf ("\tint %s = scratchie++;\n",name); + comprintf ("\tmov_l_ri(%s,(uae_s32)(uae_s16)%s);\n", name, gen_nextiword ()); + return; + case imm2: + if (getv != 1) + abort (); + comprintf ("\tint %s = scratchie++;\n",name); + comprintf ("\tmov_l_ri(%s,%s);\n", name, gen_nextilong ()); + return; + case immi: + if (getv != 1) + abort (); + comprintf ("\tint %s = scratchie++;\n",name); + comprintf ("\tmov_l_ri(%s,%s);\n", name, reg); + return; + default: + abort (); + } + + /* We get here for all non-reg non-immediate addressing modes to + * actually fetch the value. */ + if (getv == 1) + { + char astring[80]; + sprintf(astring,"%sa",name); + switch (size) + { + case sz_byte: + insn_n_cycles += 2; + break; + case sz_word: + insn_n_cycles += 2; + break; + case sz_long: + insn_n_cycles += 4; + break; + default: + abort (); + } + start_brace (); + comprintf("\tint %s=scratchie++;\n",name); + switch (size) + { + case sz_byte: + gen_readbyte(astring,name); + break; + case sz_word: + gen_readword(astring,name); + break; + case sz_long: + gen_readlong(astring,name); + break; + default: + abort (); + } + } + + /* We now might have to fix up the register for pre-dec or post-inc + * addressing modes. */ + if (!movem) { + char x[160]; + switch (mode) + { + case Aipi: + switch (size) + { + case sz_byte: + comprintf("\tlea_l_brr(%s+8,%s+8,areg_byteinc[%s]);\n",reg,reg,reg); + break; + case sz_word: + comprintf("\tlea_l_brr(%s+8,%s+8,2);\n",reg,reg,reg); + break; + case sz_long: + comprintf("\tlea_l_brr(%s+8,%s+8,4);\n",reg,reg); + break; + default: + abort (); + } + break; + case Apdi: + break; + default: + break; + } + } +} + +static void +genastore (char *from, amodes mode, char *reg, wordsizes size, char *to) +{ + switch (mode) + { + case Dreg: + switch (size) + { + case sz_byte: + comprintf("\tif(%s!=%s)\n",reg,from); + comprintf ("\t\tmov_b_rr(%s,%s);\n", reg, from); + break; + case sz_word: + comprintf("\tif(%s!=%s)\n",reg,from); + comprintf ("\t\tmov_w_rr(%s,%s);\n", reg, from); + break; + case sz_long: + comprintf("\tif(%s!=%s)\n",reg,from); + comprintf ("\t\tmov_l_rr(%s,%s);\n", reg, from); + break; + default: + abort (); + } + break; + case Areg: + switch (size) + { + case sz_word: + comprintf("\tif(%s+8!=%s)\n",reg,from); + comprintf ("\t\tmov_w_rr(%s+8,%s);\n", reg, from); + break; + case sz_long: + comprintf("\tif(%s+8!=%s)\n",reg,from); + comprintf ("\t\tmov_l_rr(%s+8,%s);\n", reg, from); + break; + default: + abort (); + } + break; + + case Apdi: + case absw: + case PC16: + case PC8r: + case Ad16: + case Ad8r: + case Aipi: + case Aind: + case absl: + { + char astring[80]; + sprintf(astring,"%sa",to); + + switch (size) + { + case sz_byte: + insn_n_cycles += 2; + gen_writebyte(astring,from); + break; + case sz_word: + insn_n_cycles += 2; + gen_writeword(astring,from); + break; + case sz_long: + insn_n_cycles += 4; + gen_writelong(astring,from); + break; + default: + abort (); + } + } + break; + case imm: + case imm0: + case imm1: + case imm2: + case immi: + abort (); + break; + default: + abort (); + } +} + +static void genmov16(uae_u32 opcode, struct instr *curi) +{ + comprintf("\tint src=scratchie++;\n"); + comprintf("\tint dst=scratchie++;\n"); + + if ((opcode & 0xfff8) == 0xf620) { + /* MOVE16 (Ax)+,(Ay)+ */ + comprintf("\tuae_u16 dstreg=((%s)>>12)&0x07;\n", gen_nextiword()); + comprintf("\tmov_l_rr(src,8+srcreg);\n"); + comprintf("\tmov_l_rr(dst,8+dstreg);\n"); + } + else { + /* Other variants */ + genamode (curi->smode, "srcreg", curi->size, "src", 0, 2); + genamode (curi->dmode, "dstreg", curi->size, "dst", 0, 2); + comprintf("\tmov_l_rr(src,srca);\n"); + comprintf("\tmov_l_rr(dst,dsta);\n"); + } + + /* Align on 16-byte boundaries */ + comprintf("\tand_l_ri(src,~15);\n"); + comprintf("\tand_l_ri(dst,~15);\n"); + + if ((opcode & 0xfff8) == 0xf620) { + comprintf("\tif (srcreg != dstreg)\n"); + comprintf("\tadd_l_ri(srcreg+8,16);\n"); + comprintf("\tadd_l_ri(dstreg+8,16);\n"); + } + else if ((opcode & 0xfff8) == 0xf600) + comprintf("\tadd_l_ri(srcreg+8,16);\n"); + else if ((opcode & 0xfff8) == 0xf608) + comprintf("\tadd_l_ri(dstreg+8,16);\n"); + + comprintf("\tint tmp=scratchie;\n"); + comprintf("\tscratchie+=4;\n"); + + comprintf("\tget_n_addr(src,src,scratchie);\n" + "\tget_n_addr(dst,dst,scratchie);\n" + "\tmov_l_rR(tmp+0,src,0);\n" + "\tmov_l_rR(tmp+1,src,4);\n" + "\tmov_l_rR(tmp+2,src,8);\n" + "\tmov_l_rR(tmp+3,src,12);\n" + "\tmov_l_Rr(dst,tmp+0,0);\n" + "\tforget_about(tmp+0);\n" + "\tmov_l_Rr(dst,tmp+1,4);\n" + "\tforget_about(tmp+1);\n" + "\tmov_l_Rr(dst,tmp+2,8);\n" + "\tforget_about(tmp+2);\n" + "\tmov_l_Rr(dst,tmp+3,12);\n"); +} + +static void +genmovemel (uae_u16 opcode) +{ + comprintf ("\tuae_u16 mask = %s;\n", gen_nextiword ()); + comprintf ("\tint native=scratchie++;\n"); + comprintf ("\tint i;\n"); + comprintf ("\tsigned char offset=0;\n"); + genamode (table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src", 2, 1); + comprintf("\tget_n_addr(srca,native,scratchie);\n"); + + comprintf("\tfor (i=0;i<16;i++) {\n" + "\t\tif ((mask>>i)&1) {\n"); + switch(table68k[opcode].size) { + case sz_long: + comprintf("\t\t\tmov_l_rR(i,native,offset);\n" + "\t\t\tbswap_32(i);\n" + "\t\t\toffset+=4;\n"); + break; + case sz_word: + comprintf("\t\t\tmov_w_rR(i,native,offset);\n" + "\t\t\tbswap_16(i);\n" + "\t\t\tsign_extend_16_rr(i,i);\n" + "\t\t\toffset+=2;\n"); + break; + default: abort(); + } + comprintf("\t\t}\n" + "\t}"); + if (table68k[opcode].dmode == Aipi) { + comprintf("\t\t\tlea_l_brr(8+dstreg,srca,offset);\n"); + } +} + + +static void +genmovemle (uae_u16 opcode) +{ + comprintf ("\tuae_u16 mask = %s;\n", gen_nextiword ()); + comprintf ("\tint native=scratchie++;\n"); + comprintf ("\tint i;\n"); + comprintf ("\tint tmp=scratchie++;\n"); + comprintf ("\tsigned char offset=0;\n"); + genamode (table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src", 2, 1); + + comprintf("\tget_n_addr(srca,native,scratchie);\n"); + + if (table68k[opcode].dmode!=Apdi) { + comprintf("\tfor (i=0;i<16;i++) {\n" + "\t\tif ((mask>>i)&1) {\n"); + switch(table68k[opcode].size) { + case sz_long: + comprintf("\t\t\tmov_l_rr(tmp,i);\n" + "\t\t\tbswap_32(tmp);\n" + "\t\t\tmov_l_Rr(native,tmp,offset);\n" + "\t\t\toffset+=4;\n"); + break; + case sz_word: + comprintf("\t\t\tmov_l_rr(tmp,i);\n" + "\t\t\tbswap_16(tmp);\n" + "\t\t\tmov_w_Rr(native,tmp,offset);\n" + "\t\t\toffset+=2;\n"); + break; + default: abort(); + } + } + else { /* Pre-decrement */ + comprintf("\tfor (i=0;i<16;i++) {\n" + "\t\tif ((mask>>i)&1) {\n"); + switch(table68k[opcode].size) { + case sz_long: + comprintf("\t\t\toffset-=4;\n" + "\t\t\tmov_l_rr(tmp,15-i);\n" + "\t\t\tbswap_32(tmp);\n" + "\t\t\tmov_l_Rr(native,tmp,offset);\n" + ); + break; + case sz_word: + comprintf("\t\t\toffset-=2;\n" + "\t\t\tmov_l_rr(tmp,15-i);\n" + "\t\t\tbswap_16(tmp);\n" + "\t\t\tmov_w_Rr(native,tmp,offset);\n" + ); + break; + default: abort(); + } + } + + + comprintf("\t\t}\n" + "\t}"); + if (table68k[opcode].dmode == Apdi) { + comprintf("\t\t\tlea_l_brr(8+dstreg,srca,(uae_s32)offset);\n"); + } +} + + +static void +duplicate_carry (void) +{ + comprintf ("\tif (needed_flags&FLAG_X) duplicate_carry();\n"); +} + +typedef enum +{ + flag_logical_noclobber, flag_logical, flag_add, flag_sub, flag_cmp, + flag_addx, flag_subx, flag_zn, flag_av, flag_sv, flag_and, flag_or, + flag_eor, flag_mov +} +flagtypes; + + +static void +genflags (flagtypes type, wordsizes size, char *value, char *src, char *dst) +{ + if (noflags) { + switch(type) { + case flag_cmp: + comprintf("\tdont_care_flags();\n"); + comprintf("/* Weird --- CMP with noflags ;-) */\n"); + return; + case flag_add: + case flag_sub: + comprintf("\tdont_care_flags();\n"); + { + char* op; + switch(type) { + case flag_add: op="add"; break; + case flag_sub: op="sub"; break; + default: abort(); + } + switch (size) + { + case sz_byte: + comprintf("\t%s_b(%s,%s);\n",op,dst,src); + break; + case sz_word: + comprintf("\t%s_w(%s,%s);\n",op,dst,src); + break; + case sz_long: + comprintf("\t%s_l(%s,%s);\n",op,dst,src); + break; + } + return; + } + break; + + case flag_and: + comprintf("\tdont_care_flags();\n"); + switch (size) + { + case sz_byte: + comprintf("if (kill_rodent(dst)) {\n"); + comprintf("\tzero_extend_8_rr(scratchie,%s);\n",src); + comprintf("\tor_l_ri(scratchie,0xffffff00);\n"); + comprintf("\tand_l(%s,scratchie);\n",dst); + comprintf("\tforget_about(scratchie);\n"); + comprintf("\t} else \n" + "\tand_b(%s,%s);\n",dst,src); + break; + case sz_word: + comprintf("if (kill_rodent(dst)) {\n"); + comprintf("\tzero_extend_16_rr(scratchie,%s);\n",src); + comprintf("\tor_l_ri(scratchie,0xffff0000);\n"); + comprintf("\tand_l(%s,scratchie);\n",dst); + comprintf("\tforget_about(scratchie);\n"); + comprintf("\t} else \n" + "\tand_w(%s,%s);\n",dst,src); + break; + case sz_long: + comprintf("\tand_l(%s,%s);\n",dst,src); + break; + } + return; + + case flag_mov: + comprintf("\tdont_care_flags();\n"); + switch (size) + { + case sz_byte: + comprintf("if (kill_rodent(dst)) {\n"); + comprintf("\tzero_extend_8_rr(scratchie,%s);\n",src); + comprintf("\tand_l_ri(%s,0xffffff00);\n",dst); + comprintf("\tor_l(%s,scratchie);\n",dst); + comprintf("\tforget_about(scratchie);\n"); + comprintf("\t} else \n" + "\tmov_b_rr(%s,%s);\n",dst,src); + break; + case sz_word: + comprintf("if (kill_rodent(dst)) {\n"); + comprintf("\tzero_extend_16_rr(scratchie,%s);\n",src); + comprintf("\tand_l_ri(%s,0xffff0000);\n",dst); + comprintf("\tor_l(%s,scratchie);\n",dst); + comprintf("\tforget_about(scratchie);\n"); + comprintf("\t} else \n" + "\tmov_w_rr(%s,%s);\n",dst,src); + break; + case sz_long: + comprintf("\tmov_l_rr(%s,%s);\n",dst,src); + break; + } + return; + + case flag_or: + case flag_eor: + comprintf("\tdont_care_flags();\n"); + start_brace(); + { + char* op; + switch(type) { + case flag_or: op="or"; break; + case flag_eor: op="xor"; break; + default: abort(); + } + switch (size) + { + case sz_byte: + comprintf("if (kill_rodent(dst)) {\n"); + comprintf("\tzero_extend_8_rr(scratchie,%s);\n",src); + comprintf("\t%s_l(%s,scratchie);\n",op,dst); + comprintf("\tforget_about(scratchie);\n"); + comprintf("\t} else \n" + "\t%s_b(%s,%s);\n",op,dst,src); + break; + case sz_word: + comprintf("if (kill_rodent(dst)) {\n"); + comprintf("\tzero_extend_16_rr(scratchie,%s);\n",src); + comprintf("\t%s_l(%s,scratchie);\n",op,dst); + comprintf("\tforget_about(scratchie);\n"); + comprintf("\t} else \n" + "\t%s_w(%s,%s);\n",op,dst,src); + break; + case sz_long: + comprintf("\t%s_l(%s,%s);\n",op,dst,src); + break; + } + close_brace(); + return; + } + + + case flag_addx: + case flag_subx: + comprintf("\tdont_care_flags();\n"); + { + char* op; + switch(type) { + case flag_addx: op="adc"; break; + case flag_subx: op="sbb"; break; + default: abort(); + } + comprintf("\trestore_carry();\n"); /* Reload the X flag into C */ + switch (size) + { + case sz_byte: + comprintf("\t%s_b(%s,%s);\n",op,dst,src); + break; + case sz_word: + comprintf("\t%s_w(%s,%s);\n",op,dst,src); + break; + case sz_long: + comprintf("\t%s_l(%s,%s);\n",op,dst,src); + break; + } + return; + } + break; + default: return; + } + } + + /* Need the flags, but possibly not all of them */ + switch (type) + { + case flag_logical_noclobber: + failure; + + case flag_and: + case flag_or: + case flag_eor: + comprintf("\tdont_care_flags();\n"); + start_brace(); + { + char* op; + switch(type) { + case flag_and: op="and"; break; + case flag_or: op="or"; break; + case flag_eor: op="xor"; break; + default: abort(); + } + switch (size) + { + case sz_byte: + comprintf("\tstart_needflags();\n" + "\t%s_b(%s,%s);\n",op,dst,src); + break; + case sz_word: + comprintf("\tstart_needflags();\n" + "\t%s_w(%s,%s);\n",op,dst,src); + break; + case sz_long: + comprintf("\tstart_needflags();\n" + "\t%s_l(%s,%s);\n",op,dst,src); + break; + } + comprintf("\tlive_flags();\n"); + comprintf("\tend_needflags();\n"); + close_brace(); + return; + } + + case flag_mov: + comprintf("\tdont_care_flags();\n"); + start_brace(); + { + switch (size) + { + case sz_byte: + comprintf("\tif (%s!=%s) {\n",src,dst); + comprintf("\tmov_b_ri(%s,0);\n" + "\tstart_needflags();\n",dst); + comprintf("\tor_b(%s,%s);\n",dst,src); + comprintf("\t} else {\n"); + comprintf("\tmov_b_rr(%s,%s);\n",dst,src); + comprintf("\ttest_b_rr(%s,%s);\n",dst,dst); + comprintf("\t}\n"); + break; + case sz_word: + comprintf("\tif (%s!=%s) {\n",src,dst); + comprintf("\tmov_w_ri(%s,0);\n" + "\tstart_needflags();\n",dst); + comprintf("\tor_w(%s,%s);\n",dst,src); + comprintf("\t} else {\n"); + comprintf("\tmov_w_rr(%s,%s);\n",dst,src); + comprintf("\ttest_w_rr(%s,%s);\n",dst,dst); + comprintf("\t}\n"); + break; + case sz_long: + comprintf("\tif (%s!=%s) {\n",src,dst); + comprintf("\tmov_l_ri(%s,0);\n" + "\tstart_needflags();\n",dst); + comprintf("\tor_l(%s,%s);\n",dst,src); + comprintf("\t} else {\n"); + comprintf("\tmov_l_rr(%s,%s);\n",dst,src); + comprintf("\ttest_l_rr(%s,%s);\n",dst,dst); + comprintf("\t}\n"); + break; + } + comprintf("\tlive_flags();\n"); + comprintf("\tend_needflags();\n"); + close_brace(); + return; + } + + case flag_logical: + comprintf("\tdont_care_flags();\n"); + start_brace(); + switch (size) + { + case sz_byte: + comprintf("\tstart_needflags();\n" + "\ttest_b_rr(%s,%s);\n",value,value); + break; + case sz_word: + comprintf("\tstart_needflags();\n" + "\ttest_w_rr(%s,%s);\n",value,value); + break; + case sz_long: + comprintf("\tstart_needflags();\n" + "\ttest_l_rr(%s,%s);\n",value,value); + break; + } + comprintf("\tlive_flags();\n"); + comprintf("\tend_needflags();\n"); + close_brace(); + return; + + + case flag_add: + case flag_sub: + case flag_cmp: + comprintf("\tdont_care_flags();\n"); + { + char* op; + switch(type) { + case flag_add: op="add"; break; + case flag_sub: op="sub"; break; + case flag_cmp: op="cmp"; break; + default: abort(); + } + switch (size) + { + case sz_byte: + comprintf("\tstart_needflags();\n" + "\t%s_b(%s,%s);\n",op,dst,src); + break; + case sz_word: + comprintf("\tstart_needflags();\n" + "\t%s_w(%s,%s);\n",op,dst,src); + break; + case sz_long: + comprintf("\tstart_needflags();\n" + "\t%s_l(%s,%s);\n",op,dst,src); + break; + } + comprintf("\tlive_flags();\n"); + comprintf("\tend_needflags();\n"); + if (type!=flag_cmp) { + duplicate_carry(); + } + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + + return; + } + + case flag_addx: + case flag_subx: + uses_cmov; + comprintf("\tdont_care_flags();\n"); + { + char* op; + switch(type) { + case flag_addx: op="adc"; break; + case flag_subx: op="sbb"; break; + default: abort(); + } + start_brace(); + comprintf("\tint zero=scratchie++;\n" + "\tint one=scratchie++;\n" + "\tif (needed_flags&FLAG_Z) {\n" + "\tmov_l_ri(zero,0);\n" + "\tmov_l_ri(one,-1);\n" + "\tmake_flags_live();\n" + "\tcmov_l_rr(zero,one,5);\n" + "\t}\n"); + comprintf("\trestore_carry();\n"); /* Reload the X flag into C */ + switch (size) + { + case sz_byte: + comprintf("\tstart_needflags();\n" + "\t%s_b(%s,%s);\n",op,dst,src); + break; + case sz_word: + comprintf("\tstart_needflags();\n" + "\t%s_w(%s,%s);\n",op,dst,src); + break; + case sz_long: + comprintf("\tstart_needflags();\n" + "\t%s_l(%s,%s);\n",op,dst,src); + break; + } + comprintf("\tlive_flags();\n"); + comprintf("\tif (needed_flags&FLAG_Z) {\n" + "\tcmov_l_rr(zero,one,5);\n" + "\tset_zero(zero, one);\n" /* No longer need one */ + "\tlive_flags();\n" + "\t}\n"); + comprintf("\tend_needflags();\n"); + duplicate_carry(); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + return; + } + default: + failure; + break; + } +} + +static void +force_range_for_rox (const char *var, wordsizes size) +{ + /* Could do a modulo operation here... which one is faster? */ + switch (size) + { + case sz_long: + comprintf ("\tif (%s >= 33) %s -= 33;\n", var, var); + break; + case sz_word: + comprintf ("\tif (%s >= 34) %s -= 34;\n", var, var); + comprintf ("\tif (%s >= 17) %s -= 17;\n", var, var); + break; + case sz_byte: + comprintf ("\tif (%s >= 36) %s -= 36;\n", var, var); + comprintf ("\tif (%s >= 18) %s -= 18;\n", var, var); + comprintf ("\tif (%s >= 9) %s -= 9;\n", var, var); + break; + } +} + +static const char * +cmask (wordsizes size) +{ + switch (size) + { + case sz_byte: + return "0x80"; + case sz_word: + return "0x8000"; + case sz_long: + return "0x80000000"; + default: + abort (); + } +} + +static int +source_is_imm1_8 (struct instr *i) +{ + return i->stype == 3; +} + +static int /* returns zero for success, non-zero for failure */ +gen_opcode (unsigned long int opcode) +{ + struct instr *curi = table68k + opcode; + char* ssize=NULL; + + insn_n_cycles = 2; + global_failure=0; + long_opcode=0; + global_isjump=0; + global_iscjump=0; + global_isaddx=0; + global_cmov=0; + global_fpu=0; + global_mayfail=0; + hack_opcode=opcode; + endstr[0]=0; + + start_brace (); + comprintf("\tuae_u8 scratchie=S1;\n"); + switch (curi->plev) + { + case 0: /* not privileged */ + break; + case 1: /* unprivileged only on 68000 */ + if (cpu_level == 0) + break; + if (next_cpu_level < 0) + next_cpu_level = 0; + + /* fall through */ + case 2: /* priviledged */ + failure; /* Easy ones first */ + break; + case 3: /* privileged if size == word */ + if (curi->size == sz_byte) + break; + failure; + break; + } + switch (curi->size) { + case sz_byte: ssize="b"; break; + case sz_word: ssize="w"; break; + case sz_long: ssize="l"; break; + default: abort(); + } + + switch (curi->mnemo) + { + case i_OR: + case i_AND: + case i_EOR: + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + switch(curi->mnemo) { + case i_OR: genflags (flag_or, curi->size, "", "src", "dst"); break; + case i_AND: genflags (flag_and, curi->size, "", "src", "dst"); break; + case i_EOR: genflags (flag_eor, curi->size, "", "src", "dst"); break; + } + genastore ("dst", curi->dmode, "dstreg", curi->size, "dst"); + break; + + case i_ORSR: + case i_EORSR: + failure; + isjump; + break; + case i_ANDSR: + failure; + isjump; + break; + case i_SUB: + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genflags (flag_sub, curi->size, "", "src", "dst"); + genastore ("dst", curi->dmode, "dstreg", curi->size, "dst"); + break; + case i_SUBA: + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->dmode, "dstreg", sz_long, "dst", 1, 0); + start_brace(); + comprintf("\tint tmp=scratchie++;\n"); + switch(curi->size) { + case sz_byte: comprintf("\tsign_extend_8_rr(tmp,src);\n"); break; + case sz_word: comprintf("\tsign_extend_16_rr(tmp,src);\n"); break; + case sz_long: comprintf("\ttmp=src;\n"); break; + default: abort(); + } + comprintf("\tsub_l(dst,tmp);\n"); + genastore ("dst", curi->dmode, "dstreg", sz_long, "dst"); + break; + case i_SUBX: + isaddx; + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genflags (flag_subx, curi->size, "", "src", "dst"); + genastore ("dst", curi->dmode, "dstreg", curi->size, "dst"); + break; + case i_SBCD: + failure; + /* I don't think so! */ + break; + case i_ADD: + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + genflags (flag_add, curi->size, "", "src", "dst"); + genastore ("dst", curi->dmode, "dstreg", curi->size, "dst"); + break; + case i_ADDA: + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->dmode, "dstreg", sz_long, "dst", 1, 0); + start_brace(); + comprintf("\tint tmp=scratchie++;\n"); + switch(curi->size) { + case sz_byte: comprintf("\tsign_extend_8_rr(tmp,src);\n"); break; + case sz_word: comprintf("\tsign_extend_16_rr(tmp,src);\n"); break; + case sz_long: comprintf("\ttmp=src;\n"); break; + default: abort(); + } + comprintf("\tadd_l(dst,tmp);\n"); + genastore ("dst", curi->dmode, "dstreg", sz_long, "dst"); + break; + case i_ADDX: + isaddx; + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + genflags (flag_addx, curi->size, "", "src", "dst"); + genastore ("dst", curi->dmode, "dstreg", curi->size, "dst"); + break; + case i_ABCD: + failure; + /* No BCD maths for me.... */ + break; + case i_NEG: + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace (); + comprintf("\tint dst=scratchie++;\n"); + comprintf("\tmov_l_ri(dst,0);\n"); + genflags (flag_sub, curi->size, "", "src", "dst"); + genastore ("dst", curi->smode, "srcreg", curi->size, "src"); + break; + case i_NEGX: + isaddx; + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace (); + comprintf("\tint dst=scratchie++;\n"); + comprintf("\tmov_l_ri(dst,0);\n"); + genflags (flag_subx, curi->size, "", "src", "dst"); + genastore ("dst", curi->smode, "srcreg", curi->size, "src"); + break; + + case i_NBCD: + failure; + /* Nope! */ + break; + case i_CLR: + genamode (curi->smode, "srcreg", curi->size, "src", 2, 0); + start_brace(); + comprintf("\tint dst=scratchie++;\n"); + comprintf("\tmov_l_ri(dst,0);\n"); + genflags (flag_logical, curi->size, "dst", "", ""); + genastore ("dst", curi->smode, "srcreg", curi->size, "src"); + break; + case i_NOT: + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace (); + comprintf("\tint dst=scratchie++;\n"); + comprintf("\tmov_l_ri(dst,0xffffffff);\n"); + genflags (flag_eor, curi->size, "", "src", "dst"); + genastore ("dst", curi->smode, "srcreg", curi->size, "src"); + break; + case i_TST: + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genflags (flag_logical, curi->size, "src", "", ""); + break; + case i_BCHG: + case i_BCLR: + case i_BSET: + case i_BTST: +/* failure; /* NEW: from "Ipswitch Town" release */ + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + comprintf("\tint s=scratchie++;\n" + "\tint tmp=scratchie++;\n" + "\tmov_l_rr(s,src);\n"); + if (curi->size == sz_byte) + comprintf("\tand_l_ri(s,7);\n"); + else + comprintf("\tand_l_ri(s,31);\n"); + + { + char* op; + int need_write=1; + + switch(curi->mnemo) { + case i_BCHG: op="btc"; break; + case i_BCLR: op="btr"; break; + case i_BSET: op="bts"; break; + case i_BTST: op="bt"; need_write=0; break; + default: abort(); + } + comprintf("\t%s_l_rr(dst,s);\n" /* Answer now in C */ + "\tsbb_l(s,s);\n" /* s is 0 if bit was 0, -1 otherwise */ + "\tmake_flags_live();\n" /* Get the flags back */ + "\tdont_care_flags();\n",op); + if (!noflags) { + comprintf("\tstart_needflags();\n" + "\tset_zero(s,tmp);\n" + "\tlive_flags();\n" + "\tend_needflags();\n"); + } + if (need_write) + genastore ("dst", curi->dmode, "dstreg", curi->size, "dst"); + } + break; + + case i_CMPM: + case i_CMP: + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace (); + genflags (flag_cmp, curi->size, "", "src", "dst"); + break; + case i_CMPA: + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->dmode, "dstreg", sz_long, "dst", 1, 0); + start_brace(); + comprintf("\tint tmps=scratchie++;\n"); + switch(curi->size) { + case sz_byte: comprintf("\tsign_extend_8_rr(tmps,src);\n"); break; + case sz_word: comprintf("\tsign_extend_16_rr(tmps,src);\n"); break; + case sz_long: comprintf("tmps=src;\n"); break; + default: abort(); + } + genflags (flag_cmp, sz_long, "", "tmps", "dst"); + break; + /* The next two are coded a little unconventional, but they are doing + * weird things... */ + case i_MVPRM: + isjump; + failure; + break; + case i_MVPMR: + isjump; + failure; + break; + case i_MOVE: + switch(curi->dmode) { + case Dreg: + case Areg: + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0); + genflags (flag_mov, curi->size, "", "src", "dst"); + genastore ("dst", curi->dmode, "dstreg", curi->size, "dst"); + break; + default: /* It goes to memory, not a register */ + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0); + genflags (flag_logical, curi->size, "src", "", ""); + genastore ("src", curi->dmode, "dstreg", curi->size, "dst"); + break; + } + break; + case i_MOVEA: + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0); + + start_brace(); + comprintf("\tint tmps=scratchie++;\n"); + switch(curi->size) { + case sz_word: comprintf("\tsign_extend_16_rr(dst,src);\n"); break; + case sz_long: comprintf("\tmov_l_rr(dst,src);\n"); break; + default: abort(); + } + genastore ("dst", curi->dmode, "dstreg", sz_long, "dst"); + break; + + case i_MVSR2: + isjump; + failure; + break; + case i_MV2SR: + isjump; + failure; + break; + case i_SWAP: + genamode (curi->smode, "srcreg", sz_long, "src", 1, 0); + comprintf("\tdont_care_flags();\n"); + comprintf("\trol_l_ri(src,16);\n"); + genflags (flag_logical, sz_long, "src", "", ""); + genastore ("src", curi->smode, "srcreg", sz_long, "src"); + break; + case i_EXG: + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + start_brace(); + comprintf("\tint tmp=scratchie++;\n" + "\tmov_l_rr(tmp,src);\n"); + genastore ("dst", curi->smode, "srcreg", curi->size, "src"); + genastore ("tmp", curi->dmode, "dstreg", curi->size, "dst"); + break; + case i_EXT: + genamode (curi->smode, "srcreg", sz_long, "src", 1, 0); + comprintf("\tdont_care_flags();\n"); + start_brace (); + switch (curi->size) + { + case sz_byte: + comprintf ("\tint dst = src;\n" + "\tsign_extend_8_rr(src,src);\n"); + break; + case sz_word: + comprintf ("\tint dst = scratchie++;\n" + "\tsign_extend_8_rr(dst,src);\n"); + break; + case sz_long: + comprintf ("\tint dst = src;\n" + "\tsign_extend_16_rr(src,src);\n"); + break; + default: + abort (); + } + genflags (flag_logical, + curi->size == sz_word ? sz_word : sz_long, "dst", "", ""); + genastore ("dst", curi->smode, "srcreg", + curi->size == sz_word ? sz_word : sz_long, "src"); + break; + case i_MVMEL: + genmovemel (opcode); + break; + case i_MVMLE: + genmovemle (opcode); + break; + case i_TRAP: + isjump; + failure; + break; + case i_MVR2USP: + isjump; + failure; + break; + case i_MVUSP2R: + isjump; + failure; + break; + case i_RESET: + isjump; + failure; + break; + case i_NOP: + break; + case i_STOP: + isjump; + failure; + break; + case i_RTE: + isjump; + failure; + break; + case i_RTD: +/* failure; /* NEW: from "Ipswitch Town" release */ + genamode (curi->smode, "srcreg", curi->size, "offs", 1, 0); + /* offs is constant */ + comprintf("\tadd_l_ri(offs,4);\n"); + start_brace(); + comprintf("\tint newad=scratchie++;\n" + "\treadlong(15,newad,scratchie);\n" + "\tmov_l_mr((uintptr)®s.pc,newad);\n" + "\tget_n_addr_jmp(newad,PC_P,scratchie);\n" + "\tmov_l_mr((uintptr)®s.pc_oldp,PC_P);\n" + "\tm68k_pc_offset=0;\n" + "\tadd_l(15,offs);\n"); + gen_update_next_handler(); + isjump; + break; + case i_LINK: +/* failure; /* NEW: from "Ipswitch Town" release */ + genamode (curi->smode, "srcreg", sz_long, "src", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "offs", 1, 0); + comprintf("\tsub_l_ri(15,4);\n" + "\twritelong_clobber(15,src,scratchie);\n" + "\tmov_l_rr(src,15);\n"); + if (curi->size==sz_word) + comprintf("\tsign_extend_16_rr(offs,offs);\n"); + comprintf("\tadd_l(15,offs);\n"); + genastore ("src", curi->smode, "srcreg", sz_long, "src"); + break; + case i_UNLK: +/* failure; /* NEW: from "Ipswitch Town" release */ + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + comprintf("\tmov_l_rr(15,src);\n" + "\treadlong(15,src,scratchie);\n" + "\tadd_l_ri(15,4);\n"); + genastore ("src", curi->smode, "srcreg", curi->size, "src"); + break; + case i_RTS: + comprintf("\tint newad=scratchie++;\n" + "\treadlong(15,newad,scratchie);\n" + "\tmov_l_mr((uintptr)®s.pc,newad);\n" + "\tget_n_addr_jmp(newad,PC_P,scratchie);\n" + "\tmov_l_mr((uintptr)®s.pc_oldp,PC_P);\n" + "\tm68k_pc_offset=0;\n" + "\tlea_l_brr(15,15,4);\n"); + gen_update_next_handler(); + isjump; + break; + case i_TRAPV: + isjump; + failure; + break; + case i_RTR: + isjump; + failure; + break; + case i_JSR: + isjump; + genamode (curi->smode, "srcreg", curi->size, "src", 0, 0); + start_brace(); + comprintf("\tuae_u32 retadd=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n"); + comprintf("\tint ret=scratchie++;\n" + "\tmov_l_ri(ret,retadd);\n" + "\tsub_l_ri(15,4);\n" + "\twritelong_clobber(15,ret,scratchie);\n"); + comprintf("\tmov_l_mr((uintptr)®s.pc,srca);\n" + "\tget_n_addr_jmp(srca,PC_P,scratchie);\n" + "\tmov_l_mr((uintptr)®s.pc_oldp,PC_P);\n" + "\tm68k_pc_offset=0;\n"); + gen_update_next_handler(); + break; + case i_JMP: + isjump; + genamode (curi->smode, "srcreg", curi->size, "src", 0, 0); + comprintf("\tmov_l_mr((uintptr)®s.pc,srca);\n" + "\tget_n_addr_jmp(srca,PC_P,scratchie);\n" + "\tmov_l_mr((uintptr)®s.pc_oldp,PC_P);\n" + "\tm68k_pc_offset=0;\n"); + gen_update_next_handler(); + break; + case i_BSR: + is_const_jump; + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + start_brace(); + comprintf("\tuae_u32 retadd=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n"); + comprintf("\tint ret=scratchie++;\n" + "\tmov_l_ri(ret,retadd);\n" + "\tsub_l_ri(15,4);\n" + "\twritelong_clobber(15,ret,scratchie);\n"); + comprintf("\tadd_l_ri(src,m68k_pc_offset_thisinst+2);\n"); + comprintf("\tm68k_pc_offset=0;\n"); + comprintf("\tadd_l(PC_P,src);\n"); + + comprintf("\tcomp_pc_p=(uae_u8*)get_const(PC_P);\n"); + break; + case i_Bcc: + comprintf("\tuae_u32 v,v1,v2;\n"); + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + /* That source is an immediate, so we can clobber it with abandon */ + switch(curi->size) { + case sz_byte: comprintf("\tsign_extend_8_rr(src,src);\n"); break; + case sz_word: comprintf("\tsign_extend_16_rr(src,src);\n"); break; + case sz_long: break; + } + comprintf("\tsub_l_ri(src,m68k_pc_offset-m68k_pc_offset_thisinst-2);\n"); + /* Leave the following as "add" --- it will allow it to be optimized + away due to src being a constant ;-) */ + comprintf("\tadd_l_ri(src,(uintptr)comp_pc_p);\n"); + comprintf("\tmov_l_ri(PC_P,(uintptr)comp_pc_p);\n"); + /* Now they are both constant. Might as well fold in m68k_pc_offset */ + comprintf("\tadd_l_ri(src,m68k_pc_offset);\n"); + comprintf("\tadd_l_ri(PC_P,m68k_pc_offset);\n"); + comprintf("\tm68k_pc_offset=0;\n"); + + if (curi->cc>=2) { + comprintf("\tv1=get_const(PC_P);\n" + "\tv2=get_const(src);\n" + "\tregister_branch(v1,v2,%d);\n", + cond_codes_x86[curi->cc]); + comprintf("\tmake_flags_live();\n"); /* Load the flags */ + isjump; + } + else { + is_const_jump; + } + + switch(curi->cc) { + case 0: /* Unconditional jump */ + comprintf("\tmov_l_rr(PC_P,src);\n"); + comprintf("\tcomp_pc_p=(uae_u8*)get_const(PC_P);\n"); + break; + case 1: break; /* This is silly! */ + case 8: failure; break; /* Work out details! FIXME */ + case 9: failure; break; /* Not critical, though! */ + + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + break; + default: abort(); + } + break; + case i_LEA: + genamode (curi->smode, "srcreg", curi->size, "src", 0, 0); + genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0); + genastore ("srca", curi->dmode, "dstreg", curi->size, "dst"); + break; + case i_PEA: + if (table68k[opcode].smode==Areg || + table68k[opcode].smode==Aind || + table68k[opcode].smode==Aipi || + table68k[opcode].smode==Apdi || + table68k[opcode].smode==Ad16 || + table68k[opcode].smode==Ad8r) + comprintf("if (srcreg==7) dodgy=1;\n"); + + genamode (curi->smode, "srcreg", curi->size, "src", 0, 0); + genamode (Apdi, "7", sz_long, "dst", 2, 0); + genastore ("srca", Apdi, "7", sz_long, "dst"); + break; + case i_DBcc: + isjump; + uses_cmov; + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "offs", 1, 0); + + /* That offs is an immediate, so we can clobber it with abandon */ + switch(curi->size) { + case sz_word: comprintf("\tsign_extend_16_rr(offs,offs);\n"); break; + default: abort(); /* Seems this only comes in word flavour */ + } + comprintf("\tsub_l_ri(offs,m68k_pc_offset-m68k_pc_offset_thisinst-2);\n"); + comprintf("\tadd_l_ri(offs,(uintptr)comp_pc_p);\n"); /* New PC, + once the + offset_68k is + * also added */ + /* Let's fold in the m68k_pc_offset at this point */ + comprintf("\tadd_l_ri(offs,m68k_pc_offset);\n"); + comprintf("\tadd_l_ri(PC_P,m68k_pc_offset);\n"); + comprintf("\tm68k_pc_offset=0;\n"); + + start_brace(); + comprintf("\tint nsrc=scratchie++;\n"); + + if (curi->cc>=2) { + comprintf("\tmake_flags_live();\n"); /* Load the flags */ + } + + if (curi->size!=sz_word) + abort(); + + + switch(curi->cc) { + case 0: /* This is an elaborate nop? */ + break; + case 1: + comprintf("\tstart_needflags();\n"); + comprintf("\tsub_w_ri(src,1);\n"); + comprintf("\t end_needflags();\n"); + start_brace(); + comprintf("\tuae_u32 v2,v;\n" + "\tuae_u32 v1=get_const(PC_P);\n"); + comprintf("\tv2=get_const(offs);\n" + "\tregister_branch(v1,v2,3);\n"); + break; + + case 8: failure; break; /* Work out details! FIXME */ + case 9: failure; break; /* Not critical, though! */ + + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + comprintf("\tmov_l_rr(nsrc,src);\n"); + comprintf("\tlea_l_brr(scratchie,src,(uae_s32)-1);\n" + "\tmov_w_rr(src,scratchie);\n"); + comprintf("\tcmov_l_rr(offs,PC_P,%d);\n", + cond_codes_x86[curi->cc]); + comprintf("\tcmov_l_rr(src,nsrc,%d);\n", + cond_codes_x86[curi->cc]); + /* OK, now for cc=true, we have src==nsrc and offs==PC_P, + so whether we move them around doesn't matter. However, + if cc=false, we have offs==jump_pc, and src==nsrc-1 */ + + comprintf("\t start_needflags();\n"); + comprintf("\ttest_w_rr(nsrc,nsrc);\n"); + comprintf("\t end_needflags();\n"); + comprintf("\tcmov_l_rr(PC_P,offs,5);\n"); + break; + default: abort(); + } + genastore ("src", curi->smode, "srcreg", curi->size, "src"); + gen_update_next_handler(); + break; + + case i_Scc: +/* failure; /* NEW: from "Ipswitch Town" release */ + genamode (curi->smode, "srcreg", curi->size, "src", 2, 0); + start_brace (); + comprintf ("\tint val = scratchie++;\n"); + + /* We set val to 0 if we really should use 255, and to 1 for real 0 */ + switch(curi->cc) { + case 0: /* Unconditional set */ + comprintf("\tmov_l_ri(val,0);\n"); + break; + case 1: + /* Unconditional not-set */ + comprintf("\tmov_l_ri(val,1);\n"); + break; + case 8: failure; break; /* Work out details! FIXME */ + case 9: failure; break; /* Not critical, though! */ + + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + comprintf("\tmake_flags_live();\n"); /* Load the flags */ + /* All condition codes can be inverted by changing the LSB */ + comprintf("\tsetcc(val,%d);\n", + cond_codes_x86[curi->cc]^1); break; + default: abort(); + } + comprintf("\tsub_b_ri(val,1);\n"); + genastore ("val", curi->smode, "srcreg", curi->size, "src"); + break; + case i_DIVU: + isjump; + failure; + break; + case i_DIVS: + isjump; + failure; + break; + case i_MULU: +/* failure; /* NEW: from "Ipswitch Town" release */ + comprintf("\tdont_care_flags();\n"); + genamode (curi->smode, "srcreg", sz_word, "src", 1, 0); + genamode (curi->dmode, "dstreg", sz_word, "dst", 1, 0); + /* To do 16x16 unsigned multiplication, we actually use + 32x32 signed, and zero-extend the registers first. + That solves the problem of MUL needing dedicated registers + on the x86 */ + comprintf("\tzero_extend_16_rr(scratchie,src);\n" + "\tzero_extend_16_rr(dst,dst);\n" + "\timul_32_32(dst,scratchie);\n"); + genflags (flag_logical, sz_long, "dst", "", ""); + genastore ("dst", curi->dmode, "dstreg", sz_long, "dst"); + break; + case i_MULS: +/* failure; /* NEW: from "Ipswitch Town" release */ + comprintf("\tdont_care_flags();\n"); + genamode (curi->smode, "srcreg", sz_word, "src", 1, 0); + genamode (curi->dmode, "dstreg", sz_word, "dst", 1, 0); + comprintf("\tsign_extend_16_rr(scratchie,src);\n" + "\tsign_extend_16_rr(dst,dst);\n" + "\timul_32_32(dst,scratchie);\n"); + genflags (flag_logical, sz_long, "dst", "", ""); + genastore ("dst", curi->dmode, "dstreg", sz_long, "dst"); + break; + case i_CHK: + isjump; + failure; + break; + + case i_CHK2: + isjump; + failure; + break; + + case i_ASR: + mayfail; + if (curi->smode==Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + comprintf("\tdont_care_flags();\n"); + + genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); + if (curi->smode!=immi) { +/* failure; /* UNTESTED: NEW: from "Ipswitch Town" release */ + if (!noflags) { + uses_cmov; + start_brace(); + comprintf("\tint highmask;\n" + "\tint width;\n" + "\tint cdata=scratchie++;\n" + "\tint sdata=scratchie++;\n" + "\tint tmpcnt=scratchie++;\n"); + comprintf("\tmov_l_rr(sdata,data);\n" + "\tmov_l_rr(cdata,data);\n" + "\tmov_l_rr(tmpcnt,cnt);\n"); + switch (curi->size) { + case sz_byte: comprintf("\tshra_b_ri(sdata,7);\n"); break; + case sz_word: comprintf("\tshra_w_ri(sdata,15);\n"); break; + case sz_long: comprintf("\tshra_l_ri(sdata,31);\n"); break; + default: abort(); + } + /* sdata is now the MSB propagated to all bits for the + register of specified size */ + comprintf("\tand_l_ri(tmpcnt,63);\n"); + switch(curi->size) { + case sz_byte: comprintf("\tshra_b_rr(data,tmpcnt);\n" + "\thighmask=0x38;\n"); + break; + case sz_word: comprintf("\tshra_w_rr(data,tmpcnt);\n" + "\thighmask=0x30;\n"); + break; + case sz_long: comprintf("\tshra_l_rr(data,tmpcnt);\n" + "\thighmask=0x20;\n"); + break; + } + comprintf("\ttest_l_ri(tmpcnt,highmask);\n"); + switch (curi->size) { + case sz_byte: comprintf("\tcmov_b_rr(data,sdata,NATIVE_CC_NE);\n"); break; + case sz_word: comprintf("\tcmov_w_rr(data,sdata,NATIVE_CC_NE);\n"); break; + case sz_long: comprintf("\tcmov_l_rr(data,sdata,NATIVE_CC_NE);\n"); break; + } + + /* Result of shift is now in data. Now we need to determine + the carry by shifting cdata one less */ + /* NOTE: carry bit is cleared if shift count is zero */ + comprintf("\tmov_l_ri(scratchie,0);\n" + "\ttest_l_rr(tmpcnt,tmpcnt);\n" + "\tcmov_l_rr(sdata,scratchie,NATIVE_CC_EQ);\n" + "\tforget_about(scratchie);\n"); + comprintf("\tsub_l_ri(tmpcnt,1);\n"); + switch(curi->size) { + case sz_byte: comprintf("\tshra_b_rr(cdata,tmpcnt);\n");break; + case sz_word: comprintf("\tshra_w_rr(cdata,tmpcnt);\n");break; + case sz_long: comprintf("\tshra_l_rr(cdata,tmpcnt);\n");break; + default: abort(); + } + /* If the shift count was higher than the width, we need + to pick up the sign from original data (sdata) */ + /* NOTE: for shift count of zero, the following holds + true and cdata contains 0 so that carry bit is cleared */ + comprintf("\ttest_l_ri(tmpcnt,highmask);\n" + "\tforget_about(tmpcnt);\n" + "\tcmov_l_rr(cdata,sdata,NATIVE_CC_NE);\n"); + + /* And create the flags (preserve X flag if shift count is zero) */ + comprintf("\ttest_l_ri(cnt,63);\n" + "\tcmov_l_rr(FLAGX,cdata,NATIVE_CC_NE);\n"); + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch(curi->size) { + case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break; + case sz_word: comprintf("\t test_w_rr(data,data);\n"); break; + case sz_long: comprintf("\t test_l_rr(data,data);\n"); break; + } + comprintf("\t bt_l_ri(cdata,0);\n"); /* Set C */ + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + genastore ("data", curi->dmode, "dstreg", curi->size, "data"); + } + else { + uses_cmov; + start_brace(); + comprintf("\tint highmask;\n" + "\tint width;\n" + "\tint highshift=scratchie++;\n"); + switch(curi->size) { + case sz_byte: comprintf("\tshra_b_rr(data,cnt);\n" + "\thighmask=0x38;\n" + "\twidth=8;\n"); + break; + case sz_word: comprintf("\tshra_w_rr(data,cnt);\n" + "\thighmask=0x30;\n" + "\twidth=16;\n"); + break; + case sz_long: comprintf("\tshra_l_rr(data,cnt);\n" + "\thighmask=0x20;\n" + "\twidth=32;\n"); + break; + default: abort(); + } + comprintf("test_l_ri(cnt,highmask);\n" + "mov_l_ri(highshift,0);\n" + "mov_l_ri(scratchie,width/2);\n" + "cmov_l_rr(highshift,scratchie,5);\n"); + /* The x86 masks out bits, so we now make sure that things + really get shifted as much as planned */ + switch(curi->size) { + case sz_byte: comprintf("\tshra_b_rr(data,highshift);\n");break; + case sz_word: comprintf("\tshra_w_rr(data,highshift);\n");break; + case sz_long: comprintf("\tshra_l_rr(data,highshift);\n");break; + default: abort(); + } + /* And again */ + switch(curi->size) { + case sz_byte: comprintf("\tshra_b_rr(data,highshift);\n");break; + case sz_word: comprintf("\tshra_w_rr(data,highshift);\n");break; + case sz_long: comprintf("\tshra_l_rr(data,highshift);\n");break; + default: abort(); + } + genastore ("data", curi->dmode, "dstreg", curi->size, "data"); + } + } + else { + start_brace(); + comprintf("\tint tmp=scratchie++;\n" + "\tint bp;\n" + "\tmov_l_rr(tmp,data);\n"); + switch(curi->size) { + case sz_byte: comprintf("\tshra_b_ri(data,srcreg);\n" + "\tbp=srcreg-1;\n"); break; + case sz_word: comprintf("\tshra_w_ri(data,srcreg);\n" + "\tbp=srcreg-1;\n"); break; + case sz_long: comprintf("\tshra_l_ri(data,srcreg);\n" + "\tbp=srcreg-1;\n"); break; + default: abort(); + } + + if (!noflags) { + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch(curi->size) { + case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break; + case sz_word: comprintf("\t test_w_rr(data,data);\n"); break; + case sz_long: comprintf("\t test_l_rr(data,data);\n"); break; + } + comprintf("\t bt_l_ri(tmp,bp);\n"); /* Set C */ + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } + genastore ("data", curi->dmode, "dstreg", curi->size, "data"); + } + break; + + case i_ASL: +/* failure; /* NEW: from "Ipswitch Town" release */ + mayfail; + if (curi->smode==Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + comprintf("\tdont_care_flags();\n"); + /* Except for the handling of the V flag, this is identical to + LSL. The handling of V is, uhm, unpleasant, so if it's needed, + let the normal emulation handle it. Shoulders of giants kinda + thing ;-) */ + comprintf("if (needed_flags & FLAG_V) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + + genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); + if (curi->smode!=immi) { + if (!noflags) { + uses_cmov; + start_brace(); + comprintf("\tint highmask;\n" + "\tint cdata=scratchie++;\n" + "\tint tmpcnt=scratchie++;\n"); + comprintf("\tmov_l_rr(tmpcnt,cnt);\n" + "\tand_l_ri(tmpcnt,63);\n" + "\tmov_l_ri(cdata,0);\n" + "\tcmov_l_rr(cdata,data,5);\n"); + /* cdata is now either data (for shift count!=0) or + 0 (for shift count==0) */ + switch(curi->size) { + case sz_byte: comprintf("\tshll_b_rr(data,cnt);\n" + "\thighmask=0x38;\n"); + break; + case sz_word: comprintf("\tshll_w_rr(data,cnt);\n" + "\thighmask=0x30;\n"); + break; + case sz_long: comprintf("\tshll_l_rr(data,cnt);\n" + "\thighmask=0x20;\n"); + break; + default: abort(); + } + comprintf("test_l_ri(cnt,highmask);\n" + "mov_l_ri(scratchie,0);\n" + "cmov_l_rr(scratchie,data,4);\n"); + switch(curi->size) { + case sz_byte: comprintf("\tmov_b_rr(data,scratchie);\n");break; + case sz_word: comprintf("\tmov_w_rr(data,scratchie);\n");break; + case sz_long: comprintf("\tmov_l_rr(data,scratchie);\n");break; + default: abort(); + } + /* Result of shift is now in data. Now we need to determine + the carry by shifting cdata one less */ + comprintf("\tsub_l_ri(tmpcnt,1);\n"); + switch(curi->size) { + case sz_byte: comprintf("\tshll_b_rr(cdata,tmpcnt);\n");break; + case sz_word: comprintf("\tshll_w_rr(cdata,tmpcnt);\n");break; + case sz_long: comprintf("\tshll_l_rr(cdata,tmpcnt);\n");break; + default: abort(); + } + comprintf("test_l_ri(tmpcnt,highmask);\n" + "mov_l_ri(scratchie,0);\n" + "cmov_l_rr(cdata,scratchie,5);\n"); + /* And create the flags */ + comprintf("\tstart_needflags();\n"); + + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch(curi->size) { + case sz_byte: comprintf("\t test_b_rr(data,data);\n"); + comprintf("\t bt_l_ri(cdata,7);\n"); break; + case sz_word: comprintf("\t test_w_rr(data,data);\n"); + comprintf("\t bt_l_ri(cdata,15);\n"); break; + case sz_long: comprintf("\t test_l_rr(data,data);\n"); + comprintf("\t bt_l_ri(cdata,31);\n"); break; + } + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + genastore ("data", curi->dmode, "dstreg", curi->size, "data"); + } + else { + uses_cmov; + start_brace(); + comprintf("\tint highmask;\n"); + switch(curi->size) { + case sz_byte: comprintf("\tshll_b_rr(data,cnt);\n" + "\thighmask=0x38;\n"); + break; + case sz_word: comprintf("\tshll_w_rr(data,cnt);\n" + "\thighmask=0x30;\n"); + break; + case sz_long: comprintf("\tshll_l_rr(data,cnt);\n" + "\thighmask=0x20;\n"); + break; + default: abort(); + } + comprintf("test_l_ri(cnt,highmask);\n" + "mov_l_ri(scratchie,0);\n" + "cmov_l_rr(scratchie,data,4);\n"); + switch(curi->size) { + case sz_byte: comprintf("\tmov_b_rr(data,scratchie);\n");break; + case sz_word: comprintf("\tmov_w_rr(data,scratchie);\n");break; + case sz_long: comprintf("\tmov_l_rr(data,scratchie);\n");break; + default: abort(); + } + genastore ("data", curi->dmode, "dstreg", curi->size, "data"); + } + } + else { + start_brace(); + comprintf("\tint tmp=scratchie++;\n" + "\tint bp;\n" + "\tmov_l_rr(tmp,data);\n"); + switch(curi->size) { + case sz_byte: comprintf("\tshll_b_ri(data,srcreg);\n" + "\tbp=8-srcreg;\n"); break; + case sz_word: comprintf("\tshll_w_ri(data,srcreg);\n" + "\tbp=16-srcreg;\n"); break; + case sz_long: comprintf("\tshll_l_ri(data,srcreg);\n" + "\tbp=32-srcreg;\n"); break; + default: abort(); + } + + if (!noflags) { + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch(curi->size) { + case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break; + case sz_word: comprintf("\t test_w_rr(data,data);\n"); break; + case sz_long: comprintf("\t test_l_rr(data,data);\n"); break; + } + comprintf("\t bt_l_ri(tmp,bp);\n"); /* Set C */ + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } + genastore ("data", curi->dmode, "dstreg", curi->size, "data"); + } + break; + + case i_LSR: +/* failure; /* NEW: from "Ipswitch Town" release */ + mayfail; + if (curi->smode==Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + comprintf("\tdont_care_flags();\n"); + + genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); + if (curi->smode!=immi) { + if (!noflags) { + uses_cmov; + start_brace(); + comprintf("\tint highmask;\n" + "\tint cdata=scratchie++;\n" + "\tint tmpcnt=scratchie++;\n"); + comprintf("\tmov_l_rr(tmpcnt,cnt);\n" + "\tand_l_ri(tmpcnt,63);\n" + "\tmov_l_ri(cdata,0);\n" + "\tcmov_l_rr(cdata,data,NATIVE_CC_NE);\n"); + /* cdata is now either data (for shift count!=0) or + 0 (for shift count==0) */ + switch(curi->size) { + case sz_byte: comprintf("\tshrl_b_rr(data,tmpcnt);\n" + "\thighmask=0x38;\n"); + break; + case sz_word: comprintf("\tshrl_w_rr(data,tmpcnt);\n" + "\thighmask=0x30;\n"); + break; + case sz_long: comprintf("\tshrl_l_rr(data,tmpcnt);\n" + "\thighmask=0x20;\n"); + break; + default: abort(); + } + comprintf("\ttest_l_ri(tmpcnt,highmask);\n" + "\rmov_l_ri(scratchie,0);\n"); + if (curi->size == sz_long) + comprintf("\tcmov_l_rr(data,scratchie,NATIVE_CC_NE);\n"); + else { + comprintf("\tcmov_l_rr(scratchie,data,NATIVE_CC_EQ);\n"); + switch(curi->size) { + case sz_byte: comprintf("\tmov_b_rr(data,scratchie);\n");break; + case sz_word: comprintf("\tmov_w_rr(data,scratchie);\n");break; + default: abort(); + } + } + /* Result of shift is now in data. Now we need to determine + the carry by shifting cdata one less */ + comprintf("\tsub_l_ri(tmpcnt,1);\n"); + comprintf("\tshrl_l_rr(cdata,tmpcnt);\n"); + comprintf("\ttest_l_ri(tmpcnt,highmask);\n"); + comprintf("\tforget_about(tmpcnt);\n"); + if (curi->size != sz_long) /* scratchie is still live for LSR.L */ + comprintf("\tmov_l_ri(scratchie,0);\n"); + comprintf("\tcmov_l_rr(cdata,scratchie,NATIVE_CC_NE);\n"); + comprintf("\tforget_about(scratchie);\n"); + /* And create the flags (preserve X flag if shift count is zero) */ + comprintf("\ttest_l_ri(cnt,63);\n" + "\tcmov_l_rr(FLAGX,cdata,NATIVE_CC_NE);\n"); + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch(curi->size) { + case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break; + case sz_word: comprintf("\t test_w_rr(data,data);\n"); break; + case sz_long: comprintf("\t test_l_rr(data,data);\n"); break; + } + comprintf("\t bt_l_ri(cdata,0);\n"); /* Set C */ + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + genastore ("data", curi->dmode, "dstreg", curi->size, "data"); + } + else { + uses_cmov; + start_brace(); + comprintf("\tint highmask;\n"); + switch(curi->size) { + case sz_byte: comprintf("\tshrl_b_rr(data,cnt);\n" + "\thighmask=0x38;\n"); + break; + case sz_word: comprintf("\tshrl_w_rr(data,cnt);\n" + "\thighmask=0x30;\n"); + break; + case sz_long: comprintf("\tshrl_l_rr(data,cnt);\n" + "\thighmask=0x20;\n"); + break; + default: abort(); + } + comprintf("test_l_ri(cnt,highmask);\n" + "mov_l_ri(scratchie,0);\n" + "cmov_l_rr(scratchie,data,4);\n"); + switch(curi->size) { + case sz_byte: comprintf("\tmov_b_rr(data,scratchie);\n");break; + case sz_word: comprintf("\tmov_w_rr(data,scratchie);\n");break; + case sz_long: comprintf("\tmov_l_rr(data,scratchie);\n");break; + default: abort(); + } + genastore ("data", curi->dmode, "dstreg", curi->size, "data"); + } + } + else { + start_brace(); + comprintf("\tint tmp=scratchie++;\n" + "\tint bp;\n" + "\tmov_l_rr(tmp,data);\n"); + switch(curi->size) { + case sz_byte: comprintf("\tshrl_b_ri(data,srcreg);\n" + "\tbp=srcreg-1;\n"); break; + case sz_word: comprintf("\tshrl_w_ri(data,srcreg);\n" + "\tbp=srcreg-1;\n"); break; + case sz_long: comprintf("\tshrl_l_ri(data,srcreg);\n" + "\tbp=srcreg-1;\n"); break; + default: abort(); + } + + if (!noflags) { + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch(curi->size) { + case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break; + case sz_word: comprintf("\t test_w_rr(data,data);\n"); break; + case sz_long: comprintf("\t test_l_rr(data,data);\n"); break; + } + comprintf("\t bt_l_ri(tmp,bp);\n"); /* Set C */ + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } + genastore ("data", curi->dmode, "dstreg", curi->size, "data"); + } + break; + + case i_LSL: + mayfail; + if (curi->smode==Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + comprintf("\tdont_care_flags();\n"); + + genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); + if (curi->smode!=immi) { +/* failure; /* UNTESTED: NEW: from "Ipswitch Town" release */ + if (!noflags) { + uses_cmov; + start_brace(); + comprintf("\tint highmask;\n" + "\tint cdata=scratchie++;\n" + "\tint tmpcnt=scratchie++;\n"); + comprintf("\tmov_l_rr(tmpcnt,cnt);\n" + "\tand_l_ri(tmpcnt,63);\n" + "\tmov_l_ri(cdata,0);\n" + "\tcmov_l_rr(cdata,data,NATIVE_CC_NE);\n"); + /* cdata is now either data (for shift count!=0) or + 0 (for shift count==0) */ + switch(curi->size) { + case sz_byte: comprintf("\tshll_b_rr(data,tmpcnt);\n" + "\thighmask=0x38;\n"); + break; + case sz_word: comprintf("\tshll_w_rr(data,tmpcnt);\n" + "\thighmask=0x30;\n"); + break; + case sz_long: comprintf("\tshll_l_rr(data,tmpcnt);\n" + "\thighmask=0x20;\n"); + break; + default: abort(); + } + comprintf("\ttest_l_ri(tmpcnt,highmask);\n" + "\tmov_l_ri(scratchie,0);\n"); + if (curi->size == sz_long) + comprintf("\tcmov_l_rr(data,scratchie,NATIVE_CC_NE);\n"); + else { + comprintf("\tcmov_l_rr(scratchie,data,NATIVE_CC_EQ);\n"); + switch(curi->size) { + case sz_byte: comprintf("\tmov_b_rr(data,scratchie);\n");break; + case sz_word: comprintf("\tmov_w_rr(data,scratchie);\n");break; + default: abort(); + } + } + /* Result of shift is now in data. Now we need to determine + the carry by shifting cdata one less */ + comprintf("\tsub_l_ri(tmpcnt,1);\n"); + comprintf("\tshll_l_rr(cdata,tmpcnt);\n"); + comprintf("\ttest_l_ri(tmpcnt,highmask);\n"); + comprintf("\tforget_about(tmpcnt);\n"); + if (curi->size != sz_long) /* scratchie is still live for LSL.L */ + comprintf("\tmov_l_ri(scratchie,0);\n"); + comprintf("\tcmov_l_rr(cdata,scratchie,NATIVE_CC_NE);\n"); + comprintf("\tforget_about(scratchie);\n"); + /* And create the flags (preserve X flag if shift count is zero) */ + switch (curi->size) { + case sz_byte: comprintf("\tshrl_l_ri(cdata,7);\n"); break; + case sz_word: comprintf("\tshrl_l_ri(cdata,15);\n"); break; + case sz_long: comprintf("\tshrl_l_ri(cdata,31);\n"); break; + } + comprintf("\ttest_l_ri(cnt,63);\n" + "\tcmov_l_rr(FLAGX,cdata,NATIVE_CC_NE);\n"); + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch(curi->size) { + case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break; + case sz_word: comprintf("\t test_w_rr(data,data);\n"); break; + case sz_long: comprintf("\t test_l_rr(data,data);\n"); break; + } + comprintf("\t bt_l_ri(cdata,0);\n"); + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + genastore ("data", curi->dmode, "dstreg", curi->size, "data"); + } + else { + uses_cmov; + start_brace(); + comprintf("\tint highmask;\n"); + switch(curi->size) { + case sz_byte: comprintf("\tshll_b_rr(data,cnt);\n" + "\thighmask=0x38;\n"); + break; + case sz_word: comprintf("\tshll_w_rr(data,cnt);\n" + "\thighmask=0x30;\n"); + break; + case sz_long: comprintf("\tshll_l_rr(data,cnt);\n" + "\thighmask=0x20;\n"); + break; + default: abort(); + } + comprintf("test_l_ri(cnt,highmask);\n" + "mov_l_ri(scratchie,0);\n" + "cmov_l_rr(scratchie,data,4);\n"); + switch(curi->size) { + case sz_byte: comprintf("\tmov_b_rr(data,scratchie);\n");break; + case sz_word: comprintf("\tmov_w_rr(data,scratchie);\n");break; + case sz_long: comprintf("\tmov_l_rr(data,scratchie);\n");break; + default: abort(); + } + genastore ("data", curi->dmode, "dstreg", curi->size, "data"); + } + } + else { + start_brace(); + comprintf("\tint tmp=scratchie++;\n" + "\tint bp;\n" + "\tmov_l_rr(tmp,data);\n"); + switch(curi->size) { + case sz_byte: comprintf("\tshll_b_ri(data,srcreg);\n" + "\tbp=8-srcreg;\n"); break; + case sz_word: comprintf("\tshll_w_ri(data,srcreg);\n" + "\tbp=16-srcreg;\n"); break; + case sz_long: comprintf("\tshll_l_ri(data,srcreg);\n" + "\tbp=32-srcreg;\n"); break; + default: abort(); + } + + if (!noflags) { + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch(curi->size) { + case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break; + case sz_word: comprintf("\t test_w_rr(data,data);\n"); break; + case sz_long: comprintf("\t test_l_rr(data,data);\n"); break; + } + comprintf("\t bt_l_ri(tmp,bp);\n"); /* Set C */ + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + comprintf("\t duplicate_carry();\n"); + comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); + } + genastore ("data", curi->dmode, "dstreg", curi->size, "data"); + } + break; + + case i_ROL: + mayfail; + if (curi->smode==Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + comprintf("\tdont_care_flags();\n"); + genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); + start_brace (); + + switch(curi->size) { + case sz_long: comprintf("\t rol_l_rr(data,cnt);\n"); break; + case sz_word: comprintf("\t rol_w_rr(data,cnt);\n"); break; + case sz_byte: comprintf("\t rol_b_rr(data,cnt);\n"); break; + } + + if (!noflags) { + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch(curi->size) { + case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break; + case sz_word: comprintf("\t test_w_rr(data,data);\n"); break; + case sz_long: comprintf("\t test_l_rr(data,data);\n"); break; + } + comprintf("\t bt_l_ri(data,0x00);\n"); /* Set C */ + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } + genastore ("data", curi->dmode, "dstreg", curi->size, "data"); + break; + + case i_ROR: + mayfail; + if (curi->smode==Dreg) { + comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n" + " FAIL(1);\n" + " return;\n" + "} \n"); + start_brace(); + } + comprintf("\tdont_care_flags();\n"); + genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0); + genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0); + start_brace (); + + switch(curi->size) { + case sz_long: comprintf("\t ror_l_rr(data,cnt);\n"); break; + case sz_word: comprintf("\t ror_w_rr(data,cnt);\n"); break; + case sz_byte: comprintf("\t ror_b_rr(data,cnt);\n"); break; + } + + if (!noflags) { + comprintf("\tstart_needflags();\n"); + comprintf("\tif (needed_flags & FLAG_ZNV)\n"); + switch(curi->size) { + case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break; + case sz_word: comprintf("\t test_w_rr(data,data);\n"); break; + case sz_long: comprintf("\t test_l_rr(data,data);\n"); break; + } + switch(curi->size) { + case sz_byte: comprintf("\t bt_l_ri(data,0x07);\n"); break; + case sz_word: comprintf("\t bt_l_ri(data,0x0f);\n"); break; + case sz_long: comprintf("\t bt_l_ri(data,0x1f);\n"); break; + } + comprintf("\t live_flags();\n"); + comprintf("\t end_needflags();\n"); + } + genastore ("data", curi->dmode, "dstreg", curi->size, "data"); + break; + + case i_ROXL: + failure; + break; + case i_ROXR: + failure; + break; + case i_ASRW: + failure; + break; + case i_ASLW: + failure; + break; + case i_LSRW: + failure; + break; + case i_LSLW: + failure; + break; + case i_ROLW: + failure; + break; + case i_RORW: + failure; + break; + case i_ROXLW: + failure; + break; + case i_ROXRW: + failure; + break; + case i_MOVEC2: + isjump; + failure; + break; + case i_MOVE2C: + isjump; + failure; + break; + case i_CAS: + failure; + break; + case i_CAS2: + failure; + break; + case i_MOVES: /* ignore DFC and SFC because we have no MMU */ + isjump; + failure; + break; + case i_BKPT: /* only needed for hardware emulators */ + isjump; + failure; + break; + case i_CALLM: /* not present in 68030 */ + isjump; + failure; + break; + case i_RTM: /* not present in 68030 */ + isjump; + failure; + break; + case i_TRAPcc: + isjump; + failure; + break; + case i_DIVL: + isjump; + failure; + break; + case i_MULL: +/* failure; /* NEW: from "Ipswitch Town" release */ + if (!noflags) { + failure; + break; + } + comprintf("\tuae_u16 extra=%s;\n",gen_nextiword()); + comprintf("\tint r2=(extra>>12)&7;\n" + "\tint tmp=scratchie++;\n"); + + genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0); + /* The two operands are in dst and r2 */ + comprintf("\tif (extra&0x0400) {\n" /* Need full 64 bit result */ + "\tint r3=(extra&7);\n" + "\tmov_l_rr(r3,dst);\n"); /* operands now in r3 and r2 */ + comprintf("\tif (extra&0x0800) { \n" /* signed */ + "\t\timul_64_32(r2,r3);\n" + "\t} else { \n" + "\t\tmul_64_32(r2,r3);\n" + "\t} \n"); + /* The result is in r2/tmp, with r2 holding the lower 32 bits */ + comprintf("\t} else {\n"); /* Only want 32 bit result */ + /* operands in dst and r2, result foes into r2 */ + /* shouldn't matter whether it's signed or unsigned?!? */ + comprintf("\timul_32_32(r2,dst);\n" + "\t}\n"); + break; + + case i_BFTST: + case i_BFEXTU: + case i_BFCHG: + case i_BFEXTS: + case i_BFCLR: + case i_BFFFO: + case i_BFSET: + case i_BFINS: + failure; + break; + case i_PACK: + failure; + break; + case i_UNPK: + failure; + break; + case i_TAS: + failure; + break; + case i_FPP: + uses_fpu; +#ifdef USE_JIT_FPU + mayfail; + comprintf("\tuae_u16 extra=%s;\n",gen_nextiword()); + swap_opcode(); + comprintf("\tcomp_fpp_opp(opcode,extra);\n"); +#else + failure; +#endif + break; + case i_FBcc: + uses_fpu; +#ifdef USE_JIT_FPU + isjump; + uses_cmov; + mayfail; + swap_opcode(); + comprintf("\tcomp_fbcc_opp(opcode);\n"); +#else + isjump; + failure; +#endif + break; + case i_FDBcc: + uses_fpu; + isjump; + failure; + break; + case i_FScc: + uses_fpu; +#ifdef USE_JIT_FPU + mayfail; + uses_cmov; + comprintf("\tuae_u16 extra=%s;\n",gen_nextiword()); + swap_opcode(); + comprintf("\tcomp_fscc_opp(opcode,extra);\n"); +#else + failure; +#endif + break; + case i_FTRAPcc: + uses_fpu; + isjump; + failure; + break; + case i_FSAVE: + uses_fpu; + failure; + break; + case i_FRESTORE: + uses_fpu; + failure; + break; + + case i_CINVL: + case i_CINVP: + case i_CINVA: + isjump; /* Not really, but it's probably a good idea to stop + translating at this point */ + failure; + comprintf ("\tflush_icache();\n"); /* Differentiate a bit more? */ + break; + case i_CPUSHL: + case i_CPUSHP: + case i_CPUSHA: + isjump; /* Not really, but it's probably a good idea to stop + translating at this point */ + failure; + break; + case i_MOVE16: + genmov16(opcode, curi); + break; + + case i_EMULOP_RETURN: + isjump; + failure; + break; + + case i_EMULOP: + failure; + break; + + case i_MMUOP: + isjump; + failure; + break; + default: + abort (); + break; + } + comprintf("%s",endstr); + finish_braces (); + sync_m68k_pc (); + if (global_mayfail) + comprintf("\tif (failure) m68k_pc_offset=m68k_pc_offset_thisinst;\n"); + return global_failure; +} + +static void +generate_includes (FILE * f) +{ + fprintf (f, "#include \"sysdeps.h\"\n"); + fprintf (f, "#include \"m68k.h\"\n"); + fprintf (f, "#include \"memory.h\"\n"); + fprintf (f, "#include \"readcpu.h\"\n"); + fprintf (f, "#include \"newcpu.h\"\n"); + fprintf (f, "#include \"comptbl.h\"\n"); +} + +static int postfix; + +static void +generate_one_opcode (int rp, int noflags) +{ + uae_u16 smsk, dmsk; + const long int opcode = opcode_map[rp]; + const char *opcode_str; + int aborted=0; + int have_srcreg=0; + int have_dstreg=0; + + if (table68k[opcode].mnemo == i_ILLG + || table68k[opcode].clev > cpu_level) + return; + + if (table68k[opcode].handler != -1) + return; + + switch (table68k[opcode].stype) + { + case 0: + smsk = 7; + break; + case 1: + smsk = 255; + break; + case 2: + smsk = 15; + break; + case 3: + smsk = 7; + break; + case 4: + smsk = 7; + break; + case 5: + smsk = 63; + break; + case 6: + smsk = 255; + break; + case 7: + smsk = 3; + break; + default: + abort (); + } + dmsk = 7; + + next_cpu_level = -1; + if (table68k[opcode].suse + && table68k[opcode].smode != imm && table68k[opcode].smode != imm0 + && table68k[opcode].smode != imm1 && table68k[opcode].smode != imm2 + && table68k[opcode].smode != absw && table68k[opcode].smode != absl + && table68k[opcode].smode != PC8r && table68k[opcode].smode != PC16) + { + have_srcreg=1; + if (table68k[opcode].spos == -1) + { + if (((int) table68k[opcode].sreg) >= 128) + comprintf ("\tuae_s32 srcreg = (uae_s32)(uae_s8)%d;\n", (int) table68k[opcode].sreg); + else + comprintf ("\tuae_s32 srcreg = %d;\n", (int) table68k[opcode].sreg); + } + else + { + char source[100]; + int pos = table68k[opcode].spos; + + comprintf ("#ifdef HAVE_GET_WORD_UNSWAPPED\n"); + + if (pos < 8 && (smsk >> (8 - pos)) != 0) + sprintf (source, "(((opcode >> %d) | (opcode << %d)) & %d)", + pos ^ 8, 8 - pos, dmsk); + else if (pos != 8) + sprintf (source, "((opcode >> %d) & %d)", pos ^ 8, smsk); + else + sprintf (source, "(opcode & %d)", smsk); + + if (table68k[opcode].stype == 3) + comprintf ("\tuae_u32 srcreg = imm8_table[%s];\n", source); + else if (table68k[opcode].stype == 1) + comprintf ("\tuae_u32 srcreg = (uae_s32)(uae_s8)%s;\n", source); + else + comprintf ("\tuae_u32 srcreg = %s;\n", source); + + comprintf ("#else\n"); + + if (pos) + sprintf (source, "((opcode >> %d) & %d)", pos, smsk); + else + sprintf (source, "(opcode & %d)", smsk); + + if (table68k[opcode].stype == 3) + comprintf ("\tuae_s32 srcreg = imm8_table[%s];\n", source); + else if (table68k[opcode].stype == 1) + comprintf ("\tuae_s32 srcreg = (uae_s32)(uae_s8)%s;\n", source); + else + comprintf ("\tuae_s32 srcreg = %s;\n", source); + + comprintf ("#endif\n"); + } + } + if (table68k[opcode].duse + /* Yes, the dmode can be imm, in case of LINK or DBcc */ + && table68k[opcode].dmode != imm && table68k[opcode].dmode != imm0 + && table68k[opcode].dmode != imm1 && table68k[opcode].dmode != imm2 + && table68k[opcode].dmode != absw && table68k[opcode].dmode != absl) + { + have_dstreg=1; + if (table68k[opcode].dpos == -1) + { + if (((int) table68k[opcode].dreg) >= 128) + comprintf ("\tuae_s32 dstreg = (uae_s32)(uae_s8)%d;\n", (int) table68k[opcode].dreg); + else + comprintf ("\tuae_s32 dstreg = %d;\n", (int) table68k[opcode].dreg); + } + else + { + int pos = table68k[opcode].dpos; + + comprintf ("#ifdef HAVE_GET_WORD_UNSWAPPED\n"); + + if (pos < 8 && (dmsk >> (8 - pos)) != 0) + comprintf ("\tuae_u32 dstreg = ((opcode >> %d) | (opcode << %d)) & %d;\n", + pos ^ 8, 8 - pos, dmsk); + else if (pos != 8) + comprintf ("\tuae_u32 dstreg = (opcode >> %d) & %d;\n", + pos ^ 8, dmsk); + else + comprintf ("\tuae_u32 dstreg = opcode & %d;\n", dmsk); + + comprintf ("#else\n"); + + if (pos) + comprintf ("\tuae_u32 dstreg = (opcode >> %d) & %d;\n", + pos, dmsk); + else + comprintf ("\tuae_u32 dstreg = opcode & %d;\n", dmsk); + + comprintf ("#endif\n"); + } + } + + if (have_srcreg && have_dstreg && + (table68k[opcode].dmode==Areg || + table68k[opcode].dmode==Aind || + table68k[opcode].dmode==Aipi || + table68k[opcode].dmode==Apdi || + table68k[opcode].dmode==Ad16 || + table68k[opcode].dmode==Ad8r) && + (table68k[opcode].smode==Areg || + table68k[opcode].smode==Aind || + table68k[opcode].smode==Aipi || + table68k[opcode].smode==Apdi || + table68k[opcode].smode==Ad16 || + table68k[opcode].smode==Ad8r) + ) { + comprintf("\tuae_u32 dodgy=(srcreg==(uae_s32)dstreg);\n"); + } + else { + comprintf("\tuae_u32 dodgy=0;\n"); + } + comprintf("\tuae_u32 m68k_pc_offset_thisinst=m68k_pc_offset;\n"); + comprintf("\tm68k_pc_offset+=2;\n"); + + opcode_str = get_instruction_string (opcode); + + aborted=gen_opcode (opcode); + { + int flags=0; + if (global_isjump) flags|=1; + if (long_opcode) flags|=2; + if (global_cmov) flags|=4; + if (global_isaddx) flags|=8; + if (global_iscjump) flags|=16; + if (global_fpu) flags|=32; + + comprintf ("}\n"); + + if (aborted) { + fprintf (stblfile, "{ NULL, 0x%08x, %ld }, /* %s */\n", flags, opcode, opcode_str); + com_discard(); + } + else { + if (noflags) { + fprintf (stblfile, "{ op_%lx_%d_comp_nf, 0x%08x, %ld }, /* %s */\n", opcode, postfix, flags, opcode, opcode_str); + fprintf (headerfile, "extern compop_func op_%lx_%d_comp_nf;\n", opcode, postfix); + printf ("void REGPARAM2 op_%lx_%d_comp_nf(uae_u32 opcode) /* %s */\n{\n", opcode, postfix, opcode_str); + } + else { + fprintf (stblfile, "{ op_%lx_%d_comp_ff, 0x%08x, %ld }, /* %s */\n", opcode, postfix, flags, opcode, opcode_str); + fprintf (headerfile, "extern compop_func op_%lx_%d_comp_ff;\n", opcode, postfix); + printf ("void REGPARAM2 op_%lx_%d_comp_ff(uae_u32 opcode) /* %s */\n{\n", opcode, postfix, opcode_str); + } + com_flush(); + } + } + opcode_next_clev[rp] = next_cpu_level; + opcode_last_postfix[rp] = postfix; +} + +static void +generate_func (int noflags) +{ + int i, j, rp; + + using_prefetch = 0; + using_exception_3 = 0; + for (i = 0; i < 1; i++) /* We only do one level! */ + { + cpu_level = 4 - i; + postfix = i; + + if (noflags) + fprintf (stblfile, "struct comptbl op_smalltbl_%d_comp_nf[] = {\n", postfix); + else + fprintf (stblfile, "struct comptbl op_smalltbl_%d_comp_ff[] = {\n", postfix); + + + /* sam: this is for people with low memory (eg. me :)) */ + !printf ("\n" + "#if !defined(PART_1) && !defined(PART_2) && " + "!defined(PART_3) && !defined(PART_4) && " + "!defined(PART_5) && !defined(PART_6) && " + "!defined(PART_7) && !defined(PART_8)" + "\n" + "#define PART_1 1\n" + "#define PART_2 1\n" + "#define PART_3 1\n" + "#define PART_4 1\n" + "#define PART_5 1\n" + "#define PART_6 1\n" + "#define PART_7 1\n" + "#define PART_8 1\n" + "#endif\n\n"); + + rp = 0; + for (j = 1; j <= 8; ++j) + { + int k = (j * nr_cpuop_funcs) / 8; + printf ("#ifdef PART_%d\n", j); + for (; rp < k; rp++) + generate_one_opcode (rp,noflags); + printf ("#endif\n\n"); + } + + fprintf (stblfile, "{ 0, 0,65536 }};\n"); + } + +} + +int +main (int argc, char **argv) +{ + read_table68k (); + do_merges (); + + opcode_map = (int *) malloc (sizeof (int) * nr_cpuop_funcs); + opcode_last_postfix = (int *) malloc (sizeof (int) * nr_cpuop_funcs); + opcode_next_clev = (int *) malloc (sizeof (int) * nr_cpuop_funcs); + counts = (unsigned long *) malloc (65536 * sizeof (unsigned long)); + read_counts (); + + /* It would be a lot nicer to put all in one file (we'd also get rid of + * cputbl.h that way), but cpuopti can't cope. That could be fixed, but + * I don't dare to touch the 68k version. */ + + headerfile = fopen ("comptbl.h", "wb"); + stblfile = fopen ("compstbl.cpp", "wb"); + freopen ("compemu.cpp", "wb", stdout); + + generate_includes (stdout); + generate_includes (stblfile); + + printf("#include \"compiler/compemu.h\"\n"); + + noflags=0; + generate_func (noflags); + + free(opcode_map); + free(opcode_last_postfix); + free(opcode_next_clev); + free(counts); + + opcode_map = (int *) malloc (sizeof (int) * nr_cpuop_funcs); + opcode_last_postfix = (int *) malloc (sizeof (int) * nr_cpuop_funcs); + opcode_next_clev = (int *) malloc (sizeof (int) * nr_cpuop_funcs); + counts = (unsigned long *) malloc (65536 * sizeof (unsigned long)); + read_counts (); + noflags=1; + generate_func (noflags); + + free(opcode_map); + free(opcode_last_postfix); + free(opcode_next_clev); + free(counts); + + free (table68k); + fclose (stblfile); + fclose (headerfile); + fflush (stdout); + return 0; +} diff --git a/jit2/test_codegen_x86.cpp b/jit2/test_codegen_x86.cpp new file mode 100644 index 00000000..236a2d5e --- /dev/null +++ b/jit2/test_codegen_x86.cpp @@ -0,0 +1,2254 @@ +/******************** -*- mode: C; tab-width: 8 -*- ******************** + * + * Dumb and Brute Force Run-time assembler verifier for IA-32 and AMD64 + * + ***********************************************************************/ + + +/*********************************************************************** + * + * Copyright 2004-2008 Gwenole Beauchesne + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + ***********************************************************************/ + +/* + * STATUS: 26M variations covering unary register based operations, + * reg/reg operations, imm/reg operations. + * + * TODO: + * - Rewrite to use internal BFD/opcodes format instead of string compares + * - Add reg/mem, imm/mem variations + */ + +#define _BSD_SOURCE 1 +#include +#include +#include +#include +#include +#include + +#include "sysdeps.h" + +static int verbose = 2; + +#define TEST_INST_ALU 1 +#define TEST_INST_FPU 1 +#define TEST_INST_MMX 1 +#define TEST_INST_SSE 1 +#if TEST_INST_ALU +#define TEST_INST_ALU_REG 1 +#define TEST_INST_ALU_REG_REG 1 +#define TEST_INST_ALU_CNT_REG 1 +#define TEST_INST_ALU_IMM_REG 1 +#define TEST_INST_ALU_MEM_REG 1 +#endif +#if TEST_INST_FPU +#define TEST_INST_FPU_UNARY 1 +#define TEST_INST_FPU_REG 1 +#define TEST_INST_FPU_MEM 1 +#endif +#if TEST_INST_MMX +#define TEST_INST_MMX_REG_REG 1 +#define TEST_INST_MMX_IMM_REG 1 +#define TEST_INST_MMX_MEM_REG 1 +#endif +#if TEST_INST_SSE +#define TEST_INST_SSE_REG 1 +#define TEST_INST_SSE_REG_REG 1 +#define TEST_INST_SSE_MEM_REG 1 +#endif + +#undef abort +#define abort() do { \ + fprintf(stderr, "ABORT: %s, line %d\n", __FILE__, __LINE__); \ + (abort)(); \ +} while (0) + +#define X86_TARGET_64BIT 1 +#define X86_FLAT_REGISTERS 0 +#define X86_OPTIMIZE_ALU 1 +#define X86_OPTIMIZE_ROTSHI 1 +#define X86_RIP_RELATIVE_ADDR 0 +#include "compiler/codegen_x86.h" + +#if X86_TARGET_64BIT +#define X86_MAX_ALU_REGS 16 +#define X86_MAX_SSE_REGS 16 +#else +#define X86_MAX_ALU_REGS 8 +#define X86_MAX_SSE_REGS 8 +#endif +#define X86_MAX_FPU_REGS 8 +#define X86_MAX_MMX_REGS 8 + +#define VALID_REG(r, b, n) (((unsigned)((r) - X86_##b)) < (n)) +#if X86_TARGET_64BIT +#define VALID_REG8(r) (VALID_REG(r, AL, 16) || VALID_REG(r, AH, 4)) +#define VALID_REG64(r) VALID_REG(r, RAX, X86_MAX_ALU_REGS) +#else +#define VALID_REG8(r) (VALID_REG(r, AL, 4) || VALID_REG(r, AH, 4)) +#define VALID_REG64(r) (0) +#endif +#define VALID_REG16(r) VALID_REG(r, AX, X86_MAX_ALU_REGS) +#define VALID_REG32(r) VALID_REG(r, EAX, X86_MAX_ALU_REGS) + +#define x86_emit_byte(B) emit_byte(B) +#define x86_emit_word(W) emit_word(W) +#define x86_emit_long(L) emit_long(L) +#define x86_emit_quad(Q) emit_quad(Q) +#define x86_get_target() get_target() +#define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__) + +static void jit_fail(const char *msg, const char *file, int line, const char *function) +{ + fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n", + function, file, line, msg); + abort(); +} + +static uint8 *target; + +static inline void emit_byte(uint8 x) +{ + *target++ = x; +} + +static inline void emit_word(uint16 x) +{ + *((uint16 *)target) = x; + target += 2; +} + +static inline void emit_long(uint32 x) +{ + *((uint32 *)target) = x; + target += 4; +} + +static inline void emit_quad(uint64 x) +{ + *((uint64 *)target) = x; + target += 8; +} + +static inline void set_target(uint8 *t) +{ + target = t; +} + +static inline uint8 *get_target(void) +{ + return target; +} + +static uint32 mon_read_byte(uintptr addr) +{ + uint8 *m = (uint8 *)addr; + return (uint32)(*m); +} + +extern "C" { +#include "disass/dis-asm.h" + +int buffer_read_memory(bfd_vma from, bfd_byte *to, unsigned int length, struct disassemble_info *info) +{ + while (length--) + *to++ = mon_read_byte(from++); + return 0; +} + +void perror_memory(int status, bfd_vma memaddr, struct disassemble_info *info) +{ + info->fprintf_func(info->stream, "Unknown error %d\n", status); +} + +void generic_print_address(bfd_vma addr, struct disassemble_info *info) +{ + if (addr >= UVAL64(0x100000000)) + info->fprintf_func(info->stream, "$%08x%08x", (uint32)(addr >> 32), (uint32)addr); + else + info->fprintf_func(info->stream, "$%08x", (uint32)addr); +} + +int generic_symbol_at_address(bfd_vma addr, struct disassemble_info *info) +{ + return 0; +} +} + +struct SFILE { + char *buffer; + char *current; +}; + +static int mon_sprintf(SFILE *f, const char *format, ...) +{ + int n; + va_list args; + va_start(args, format); + vsprintf(f->current, format, args); + f->current += n = strlen(f->current); + va_end(args); + return n; +} + +static int disass_x86(char *buf, uintptr adr) +{ + disassemble_info info; + SFILE sfile; + sfile.buffer = buf; + sfile.current = buf; + INIT_DISASSEMBLE_INFO(info, (FILE *)&sfile, (fprintf_ftype)mon_sprintf); + info.mach = X86_TARGET_64BIT ? bfd_mach_x86_64 : bfd_mach_i386_i386; + info.disassembler_options = "suffix"; + return print_insn_i386(adr, &info); +} + +enum { + op_disp, + op_reg, + op_base, + op_index, + op_scale, + op_imm, +}; +struct operand_t { + int32 disp; + int8 reg; + int8 base; + int8 index; + int8 scale; + int64 imm; + + void clear() { + disp = imm = 0; + reg = base = index = -1; + scale = 1; + } + + void fill(int optype, int value) { + switch (optype) { + case op_disp: disp = value; break; + case op_reg: reg = value; break; + case op_base: base = value; break; + case op_index: index = value; break; + case op_scale: scale = value; break; + case op_imm: imm = value; break; + default: abort(); + } + } +}; + +#define MAX_INSNS 1024 +#define MAX_INSN_LENGTH 16 +#define MAX_INSN_OPERANDS 3 + +struct insn_t { + char name[16]; + int n_operands; + operand_t operands[MAX_INSN_OPERANDS]; + + void clear() { + memset(name, 0, sizeof(name)); + n_operands = 0; + for (int i = 0; i < MAX_INSN_OPERANDS; i++) + operands[i].clear(); + } + + void pretty_print() { + printf("%s, %d operands\n", name, n_operands); + for (int i = 0; i < n_operands; i++) { + operand_t *op = &operands[i]; + if (op->reg != -1) + printf(" reg r%d\n", op->reg); + else { + printf(" mem 0x%08x(", op->disp); + if (op->base != -1) + printf("r%d", op->base); + printf(","); + if (op->index != -1) + printf("r%d", op->index); + printf(","); + if (op->base != -1 || op->index != -1) + printf("%d", op->scale); + printf(")\n"); + } + } + } +}; + +static inline char *find_blanks(char *p) +{ + while (*p && !isspace(*p)) + ++p; + return p; +} + +static inline char *skip_blanks(char *p) +{ + while (*p && isspace(*p)) + ++p; + return p; +} + +static int parse_reg(operand_t *op, int optype, char *buf) +{ + int reg = X86_NOREG; + int len = 0; + char *p = buf; + switch (p[0]) { + case 'a': case 'A': + len = 2; + switch (p[1]) { + case 'l': case 'L': reg = X86_AL; break; + case 'h': case 'H': reg = X86_AH; break; + case 'x': case 'X': reg = X86_AX; break; + } + break; + case 'b': case 'B': + len = 2; + switch (p[1]) { + case 'l': case 'L': reg = X86_BL; break; + case 'h': case 'H': reg = X86_BH; break; + case 'x': case 'X': reg = X86_BX; break; + case 'p': case 'P': + switch (p[2]) { +#if X86_TARGET_64BIT + case 'l': case 'L': reg = X86_BPL, ++len; break; +#endif + default: reg = X86_BP; break; + } + break; + } + break; + case 'c': case 'C': + len = 2; + switch (p[1]) { + case 'l': case 'L': reg = X86_CL; break; + case 'h': case 'H': reg = X86_CH; break; + case 'x': case 'X': reg = X86_CX; break; + } + break; + case 'd': case 'D': + len = 2; + switch (p[1]) { + case 'l': case 'L': reg = X86_DL; break; + case 'h': case 'H': reg = X86_DH; break; + case 'x': case 'X': reg = X86_DX; break; + case 'i': case 'I': + switch (p[2]) { +#if X86_TARGET_64BIT + case 'l': case 'L': reg = X86_DIL; ++len; break; +#endif + default: reg = X86_DI; break; + } + break; + } + break; + case 's': case 'S': + len = 2; + switch (p[2]) { +#if X86_TARGET_64BIT + case 'l': case 'L': + ++len; + switch (p[1]) { + case 'p': case 'P': reg = X86_SPL; break; + case 'i': case 'I': reg = X86_SIL; break; + } + break; +#endif + case '(': + if ((p[1] == 't' || p[1] == 'T') && isdigit(p[3]) && p[4] == ')') + len += 3, reg = X86_ST0 + (p[3] - '0'); + break; + default: + switch (p[1]) { + case 't': case 'T': reg = X86_ST0; break; + case 'p': case 'P': reg = X86_SP; break; + case 'i': case 'I': reg = X86_SI; break; + } + break; + } + break; + case 'e': case 'E': + len = 3; + switch (p[2]) { + case 'x': case 'X': + switch (p[1]) { + case 'a': case 'A': reg = X86_EAX; break; + case 'b': case 'B': reg = X86_EBX; break; + case 'c': case 'C': reg = X86_ECX; break; + case 'd': case 'D': reg = X86_EDX; break; + } + break; + case 'i': case 'I': + switch (p[1]) { + case 's': case 'S': reg = X86_ESI; break; + case 'd': case 'D': reg = X86_EDI; break; + } + break; + case 'p': case 'P': + switch (p[1]) { + case 'b': case 'B': reg = X86_EBP; break; + case 's': case 'S': reg = X86_ESP; break; + } + break; + } + break; +#if X86_TARGET_64BIT + case 'r': case 'R': + len = 3; + switch (p[2]) { + case 'x': case 'X': + switch (p[1]) { + case 'a': case 'A': reg = X86_RAX; break; + case 'b': case 'B': reg = X86_RBX; break; + case 'c': case 'C': reg = X86_RCX; break; + case 'd': case 'D': reg = X86_RDX; break; + } + break; + case 'i': case 'I': + switch (p[1]) { + case 's': case 'S': reg = X86_RSI; break; + case 'd': case 'D': reg = X86_RDI; break; + } + break; + case 'p': case 'P': + switch (p[1]) { + case 'b': case 'B': reg = X86_RBP; break; + case 's': case 'S': reg = X86_RSP; break; + } + break; + case 'b': case 'B': + switch (p[1]) { + case '8': reg = X86_R8B; break; + case '9': reg = X86_R9B; break; + } + break; + case 'w': case 'W': + switch (p[1]) { + case '8': reg = X86_R8W; break; + case '9': reg = X86_R9W; break; + } + break; + case 'd': case 'D': + switch (p[1]) { + case '8': reg = X86_R8D; break; + case '9': reg = X86_R9D; break; + } + break; + case '0': case '1': case '2': case '3': case '4': case '5': + if (p[1] == '1') { + const int r = p[2] - '0'; + switch (p[3]) { + case 'b': case 'B': reg = X86_R10B + r, ++len; break; + case 'w': case 'W': reg = X86_R10W + r, ++len; break; + case 'd': case 'D': reg = X86_R10D + r, ++len; break; + default: reg = X86_R10 + r; break; + } + } + break; + default: + switch (p[1]) { + case '8': reg = X86_R8, len = 2; break; + case '9': reg = X86_R9, len = 2; break; + } + break; + } + break; +#endif + case 'm': case 'M': + if ((p[1] == 'm' || p[1] == 'M') && isdigit(p[2])) + reg = X86_MM0 + (p[2] - '0'), len = 3; + break; + case 'x': case 'X': + if ((p[1] == 'm' || p[1] == 'M') && (p[2] == 'm' || p[2] == 'M')) { +#if X86_TARGET_64BIT + if (p[3] == '1' && isdigit(p[4])) + reg = X86_XMM10 + (p[4] - '0'), len = 5; + else +#endif + if (isdigit(p[3])) + reg = X86_XMM0 + (p[3] - '0'), len = 4; + } + break; + } + + if (len > 0 && reg != X86_NOREG) { + op->fill(optype, reg); + return len; + } + + return X86_NOREG; +} + +static unsigned long parse_imm(char *nptr, char **endptr, int base = 0) +{ + errno = 0; +#if X86_TARGET_64BIT + if (sizeof(unsigned long) != 8) { + unsigned long long val = strtoull(nptr, endptr, 0); + if (errno == 0) + return val; + abort(); + } +#endif + unsigned long val = strtoul(nptr, endptr, 0); + if (errno == 0) + return val; + abort(); + return 0; +} + +static int parse_mem(operand_t *op, char *buf) +{ + char *p = buf; + + if (strncmp(buf, "0x", 2) == 0) + op->disp = parse_imm(buf, &p, 16); + + if (*p == '(') { + p++; + + if (*p == '%') { + p++; + + int n = parse_reg(op, op_base, p); + if (n <= 0) + return -3; + p += n; + } + + if (*p == ',') { + p++; + + if (*p == '%') { + int n = parse_reg(op, op_index, ++p); + if (n <= 0) + return -4; + p += n; + + if (*p != ',') + return -5; + p++; + + goto do_parse_scale; + } + else if (isdigit(*p)) { + do_parse_scale: + long val = strtol(p, &p, 10); + if (val == 0 && errno == EINVAL) + abort(); + op->scale = val; + } + } + + if (*p != ')') + return -6; + p++; + } + + return p - buf; +} + +static void parse_insn(insn_t *ii, char *buf) +{ + char *p = buf; + ii->clear(); + +#if 0 + printf("BUF: %s\n", buf); +#endif + + if (strncmp(p, "rex64", 5) == 0) { + char *q = find_blanks(p); + if (verbose > 1) { + char prefix[16]; + memset(prefix, 0, sizeof(prefix)); + memcpy(prefix, p, q - p); + fprintf(stderr, "Instruction '%s', skip REX prefix '%s'\n", buf, prefix); + } + p = skip_blanks(q); + } + + if (strncmp(p, "rep", 3) == 0) { + char *q = find_blanks(p); + if (verbose > 1) { + char prefix[16]; + memset(prefix, 0, sizeof(prefix)); + memcpy(prefix, p, q - p); + fprintf(stderr, "Instruction '%s', skip REP prefix '%s'\n", buf, prefix); + } + p = skip_blanks(q); + } + + for (int i = 0; !isspace(*p); i++) + ii->name[i] = *p++; + + while (*p && isspace(*p)) + p++; + if (*p == '\0') + return; + + int n_operands = 0; + int optype = op_reg; + bool done = false; + while (!done) { + int n; + switch (*p) { + case '%': + n = parse_reg(&ii->operands[n_operands], optype, ++p); + if (n <= 0) { + fprintf(stderr, "parse_reg(%s) error %d\n", p, n); + abort(); + } + p += n; + break; + case '0': case '(': + n = parse_mem(&ii->operands[n_operands], p); + if (n <= 0) { + fprintf(stderr, "parse_mem(%s) error %d\n", p, n); + abort(); + } + p += n; + break; + case '$': { + ii->operands[n_operands].imm = parse_imm(++p, &p, 0); + break; + } + case '*': + p++; + break; + case ',': + n_operands++; + p++; + break; + case ' ': case '\t': + p++; + break; + case '\0': + done = true; + break; + default: + fprintf(stderr, "parse error> %s\n", p); + abort(); + } + } + ii->n_operands = n_operands + 1; +} + +static unsigned long n_tests, n_failures; +static unsigned long n_all_tests, n_all_failures; + +static bool check_unary(insn_t *ii, const char *name) +{ + if (strcasecmp(ii->name, name) != 0) { + fprintf(stderr, "ERROR: instruction mismatch, expected %s, got %s\n", name, ii->name); + return false; + } + + if (ii->n_operands != 0) { + fprintf(stderr, "ERROR: instruction expected 0 operand, got %d\n", ii->n_operands); + return false; + } + + return true; +} + +static bool check_reg(insn_t *ii, const char *name, int r) +{ + if (strcasecmp(ii->name, name) != 0) { + fprintf(stderr, "ERROR: instruction mismatch, expected %s, got %s\n", name, ii->name); + return false; + } + + if (ii->n_operands != 1) { + fprintf(stderr, "ERROR: instruction expected 1 operand, got %d\n", ii->n_operands); + return false; + } + + int reg = ii->operands[0].reg; + + if (reg != r) { + fprintf(stderr, "ERROR: instruction expected r%d as source, got ", r); + if (reg == -1) + fprintf(stderr, "nothing\n"); + else + fprintf(stderr, "r%d\n", reg); + return false; + } + + return true; +} + +static bool check_reg_reg(insn_t *ii, const char *name, int s, int d) +{ + if (strcasecmp(ii->name, name) != 0) { + fprintf(stderr, "ERROR: instruction mismatch, expected %s, got %s\n", name, ii->name); + return false; + } + + if (ii->n_operands != 2) { + fprintf(stderr, "ERROR: instruction expected 2 operands, got %d\n", ii->n_operands); + return false; + } + + int srcreg = ii->operands[0].reg; + int dstreg = ii->operands[1].reg; + + if (srcreg != s) { + fprintf(stderr, "ERROR: instruction expected r%d as source, got ", s); + if (srcreg == -1) + fprintf(stderr, "nothing\n"); + else + fprintf(stderr, "r%d\n", srcreg); + return false; + } + + if (dstreg != d) { + fprintf(stderr, "ERROR: instruction expected r%d as destination, got ", d); + if (dstreg == -1) + fprintf(stderr, "nothing\n"); + else + fprintf(stderr, "r%d\n", dstreg); + return false; + } + + return true; +} + +static bool check_imm_reg(insn_t *ii, const char *name, uint32 v, int d, int mode = -1) +{ + if (strcasecmp(ii->name, name) != 0) { + fprintf(stderr, "ERROR: instruction mismatch, expected %s, got %s\n", name, ii->name); + return false; + } + + if (ii->n_operands != 2) { + fprintf(stderr, "ERROR: instruction expected 2 operands, got %d\n", ii->n_operands); + return false; + } + + uint32 imm = ii->operands[0].imm; + int dstreg = ii->operands[1].reg; + + if (mode == -1) { + char suffix = name[strlen(name) - 1]; + switch (suffix) { + case 'b': mode = 1; break; + case 'w': mode = 2; break; + case 'l': mode = 4; break; + case 'q': mode = 8; break; + } + } + switch (mode) { + case 1: v &= 0xff; break; + case 2: v &= 0xffff; break; + } + + if (imm != v) { + fprintf(stderr, "ERROR: instruction expected 0x%08x as immediate, got ", v); + if (imm == -1) + fprintf(stderr, "nothing\n"); + else + fprintf(stderr, "0x%08x\n", imm); + return false; + } + + if (dstreg != d) { + fprintf(stderr, "ERROR: instruction expected r%d as destination, got ", d); + if (dstreg == -1) + fprintf(stderr, "nothing\n"); + else + fprintf(stderr, "%d\n", dstreg); + return false; + } + + return true; +} + +static bool do_check_mem(insn_t *ii, uint32 D, int B, int I, int S, int Mpos) +{ + operand_t *mem = &ii->operands[Mpos]; + uint32 d = mem->disp; + int b = mem->base; + int i = mem->index; + int s = mem->scale; + + if (d != D) { + fprintf(stderr, "ERROR: instruction expected 0x%08x as displacement, got 0x%08x\n", D, d); + return false; + } + + if (b != B) { + fprintf(stderr, "ERROR: instruction expected r%d as base, got r%d\n", B, b); + return false; + } + + if (i != I) { + fprintf(stderr, "ERROR: instruction expected r%d as index, got r%d\n", I, i); + return false; + } + + if (s != S) { + fprintf(stderr, "ERROR: instruction expected %d as scale factor, got %d\n", S, s); + return false; + } + + return true; +} + +static bool check_mem(insn_t *ii, const char *name, uint32 D, int B, int I, int S) +{ + if (strcasecmp(ii->name, name) != 0) { + fprintf(stderr, "ERROR: instruction mismatch, expected %s, got %s\n", name, ii->name); + return false; + } + + if (ii->n_operands != 1) { + fprintf(stderr, "ERROR: instruction expected 1 operand, got %d\n", ii->n_operands); + return false; + } + + return do_check_mem(ii, D, B, I, S, 0); +} + +static bool check_mem_reg(insn_t *ii, const char *name, uint32 D, int B, int I, int S, int R, int Rpos = 1) +{ + if (strcasecmp(ii->name, name) != 0) { + fprintf(stderr, "ERROR: instruction mismatch, expected %s, got %s\n", name, ii->name); + return false; + } + + if (ii->n_operands != 2) { + fprintf(stderr, "ERROR: instruction expected 2 operands, got %d\n", ii->n_operands); + return false; + } + + if (!do_check_mem(ii, D, B, I, S, Rpos ^ 1)) + return false; + + int r = ii->operands[Rpos].reg; + + if (r != R) { + fprintf(stderr, "ERROR: instruction expected r%d as reg operand, got r%d\n", R, r); + return false; + } + + return true; +} + +static inline bool check_reg_mem(insn_t *ii, const char *name, uint32 D, int B, int I, int S, int R) +{ + return check_mem_reg(ii, name, D, B, I, S, R, 0); +} + +static void show_instruction(const char *buffer, const uint8 *bytes) +{ + if (verbose > 1) { + if (1) { + for (int j = 0; j < MAX_INSN_LENGTH; j++) + fprintf(stderr, "%02x ", bytes[j]); + fprintf(stderr, "| "); + } + fprintf(stderr, "%s\n", buffer); + } +} + +static void show_status(unsigned long n_tests) +{ +#if 1 + const unsigned long N_STEPS = 100000; + static const char cursors[] = { '-', '\\', '|', '/' }; + if ((n_tests % N_STEPS) == 0) { + printf(" %c (%d)\r", cursors[(n_tests/N_STEPS)%sizeof(cursors)], n_tests); + fflush(stdout); + } +#else + const unsigned long N_STEPS = 1000000; + if ((n_tests % N_STEPS) == 0) + printf(" ... %d\n", n_tests); +#endif +} + +int main(void) +{ + static char buffer[1024]; + static uint8 block[MAX_INSNS * MAX_INSN_LENGTH]; + static char *insns[MAX_INSNS]; + static int modes[MAX_INSNS]; + n_all_tests = n_all_failures = 0; + +#if TEST_INST_ALU_REG + printf("Testing reg forms\n"); + n_tests = n_failures = 0; + for (int r = 0; r < X86_MAX_ALU_REGS; r++) { + set_target(block); + uint8 *b = get_target(); + int i = 0; +#define GEN(INSN, GENOP) do { \ + insns[i++] = INSN; \ + GENOP##r(r); \ +} while (0) +#define GEN64(INSN, GENOP) do { \ + if (X86_TARGET_64BIT) \ + GEN(INSN, GENOP); \ +} while (0) +#define GENA(INSN, GENOP) do { \ + if (VALID_REG8(r)) \ + GEN(INSN "b", GENOP##B); \ + GEN(INSN "w", GENOP##W); \ + GEN(INSN "l", GENOP##L); \ + GEN64(INSN "q", GENOP##Q); \ +} while (0) + GENA("not", NOT); + GENA("neg", NEG); + GENA("mul", MUL); + GENA("imul", IMUL); + GENA("div", DIV); + GENA("idiv", IDIV); + GENA("dec", DEC); + GENA("inc", INC); + if (X86_TARGET_64BIT) { + GEN("callq", CALLs); + GEN("jmpq", JMPs); + GEN("pushq", PUSHQ); + GEN("popq", POPQ); + } + else { + GEN("calll", CALLs); + GEN("jmpl", JMPs); + GEN("pushl", PUSHL); + GEN("popl", POPL); + } + GEN("bswap", BSWAPL); // FIXME: disass bug? no suffix + GEN64("bswap", BSWAPQ); // FIXME: disass bug? no suffix + if (VALID_REG8(r)) { + GEN("seto", SETO); + GEN("setno", SETNO); + GEN("setb", SETB); + GEN("setae", SETAE); + GEN("sete", SETE); + GEN("setne", SETNE); + GEN("setbe", SETBE); + GEN("seta", SETA); + GEN("sets", SETS); + GEN("setns", SETNS); + GEN("setp", SETP); + GEN("setnp", SETNP); + GEN("setl", SETL); + GEN("setge", SETGE); + GEN("setle", SETLE); + GEN("setg", SETG); + } +#undef GENA +#undef GEN64 +#undef GEN + int last_insn = i; + uint8 *e = get_target(); + + uint8 *p = b; + i = 0; + while (p < e) { + int n = disass_x86(buffer, (uintptr)p); + insn_t ii; + parse_insn(&ii, buffer); + + if (!check_reg(&ii, insns[i], r)) { + show_instruction(buffer, p); + n_failures++; + } + + p += n; + i += 1; + n_tests++; + } + if (i != last_insn) + abort(); + } + printf(" done %ld/%ld\n", n_tests - n_failures, n_tests); + n_all_tests += n_tests; + n_all_failures += n_failures; +#endif + +#if TEST_INST_ALU_REG_REG + printf("Testing reg,reg forms\n"); + n_tests = n_failures = 0; + for (int s = 0; s < X86_MAX_ALU_REGS; s++) { + for (int d = 0; d < X86_MAX_ALU_REGS; d++) { + set_target(block); + uint8 *b = get_target(); + int i = 0; +#define GEN(INSN, GENOP) do { \ + insns[i++] = INSN; \ + GENOP##rr(s, d); \ +} while (0) +#define GEN64(INSN, GENOP) do { \ + if (X86_TARGET_64BIT) \ + GEN(INSN, GENOP); \ +} while (0) +#define GEN1(INSN, GENOP, OP) do { \ + insns[i++] = INSN; \ + GENOP##rr(OP, s, d); \ +} while (0) +#define GENA(INSN, GENOP) do { \ + if (VALID_REG8(s) && VALID_REG8(d)) \ + GEN(INSN "b", GENOP##B); \ + GEN(INSN "w", GENOP##W); \ + GEN(INSN "l", GENOP##L); \ + GEN64(INSN "q", GENOP##Q); \ +} while (0) + GENA("adc", ADC); + GENA("add", ADD); + GENA("and", AND); + GENA("cmp", CMP); + GENA("or", OR); + GENA("sbb", SBB); + GENA("sub", SUB); + GENA("xor", XOR); + GENA("mov", MOV); + GEN("btw", BTW); + GEN("btl", BTL); + GEN64("btq", BTQ); + GEN("btcw", BTCW); + GEN("btcl", BTCL); + GEN64("btcq", BTCQ); + GEN("btrw", BTRW); + GEN("btrl", BTRL); + GEN64("btrq", BTRQ); + GEN("btsw", BTSW); + GEN("btsl", BTSL); + GEN64("btsq", BTSQ); + GEN("imulw", IMULW); + GEN("imull", IMULL); + GEN64("imulq", IMULQ); + GEN1("cmove", CMOVW, X86_CC_Z); + GEN1("cmove", CMOVL, X86_CC_Z); + if (X86_TARGET_64BIT) + GEN1("cmove", CMOVQ, X86_CC_Z); + GENA("test", TEST); + GENA("cmpxchg", CMPXCHG); + GENA("xadd", XADD); + GENA("xchg", XCHG); + GEN("bsfw", BSFW); + GEN("bsfl", BSFL); + GEN64("bsfq", BSFQ); + GEN("bsrw", BSRW); + GEN("bsrl", BSRL); + GEN64("bsrq", BSRQ); + if (VALID_REG8(s)) { + GEN("movsbw", MOVSBW); + GEN("movsbl", MOVSBL); + GEN64("movsbq", MOVSBQ); + GEN("movzbw", MOVZBW); + GEN("movzbl", MOVZBL); + GEN64("movzbq", MOVZBQ); + } + GEN("movswl", MOVSWL); + GEN64("movswq", MOVSWQ); + GEN("movzwl", MOVZWL); + GEN64("movzwq", MOVZWQ); + GEN64("movslq", MOVSLQ); +#undef GENA +#undef GEN1 +#undef GEN64 +#undef GEN + int last_insn = i; + uint8 *e = get_target(); + + uint8 *p = b; + i = 0; + while (p < e) { + int n = disass_x86(buffer, (uintptr)p); + insn_t ii; + parse_insn(&ii, buffer); + + if (!check_reg_reg(&ii, insns[i], s, d)) { + show_instruction(buffer, p); + n_failures++; + } + + p += n; + i += 1; + n_tests++; + } + if (i != last_insn) + abort(); + } + } + printf(" done %ld/%ld\n", n_tests - n_failures, n_tests); + n_all_tests += n_tests; + n_all_failures += n_failures; +#endif + +#if TEST_INST_ALU_CNT_REG + printf("Testing cl,reg forms\n"); + n_tests = n_failures = 0; + for (int d = 0; d < X86_MAX_ALU_REGS; d++) { + set_target(block); + uint8 *b = get_target(); + int i = 0; +#define GEN(INSN, GENOP) do { \ + insns[i++] = INSN; \ + GENOP##rr(X86_CL, d); \ +} while (0) +#define GEN64(INSN, GENOP) do { \ + if (X86_TARGET_64BIT) \ + GEN(INSN, GENOP); \ +} while (0) +#define GENA(INSN, GENOP) do { \ + if (VALID_REG8(d)) \ + GEN(INSN "b", GENOP##B); \ + GEN(INSN "w", GENOP##W); \ + GEN(INSN "l", GENOP##L); \ + GEN64(INSN "q", GENOP##Q); \ +} while (0) + GENA("rol", ROL); + GENA("ror", ROR); + GENA("rcl", RCL); + GENA("rcr", RCR); + GENA("shl", SHL); + GENA("shr", SHR); + GENA("sar", SAR); +#undef GENA +#undef GEN64 +#undef GEN + int last_insn = i; + uint8 *e = get_target(); + + uint8 *p = b; + i = 0; + while (p < e) { + int n = disass_x86(buffer, (uintptr)p); + insn_t ii; + parse_insn(&ii, buffer); + + if (!check_reg_reg(&ii, insns[i], X86_CL, d)) { + show_instruction(buffer, p); + n_failures++; + } + + p += n; + i += 1; + n_tests++; + } + if (i != last_insn) + abort(); + } + printf(" done %ld/%ld\n", n_tests - n_failures, n_tests); + n_all_tests += n_tests; + n_all_failures += n_failures; +#endif + + static const uint32 imm_table[] = { + 0x00000000, 0x00000001, 0x00000002, 0x00000004, + 0x00000008, 0x00000010, 0x00000020, 0x00000040, + 0x00000080, 0x000000fe, 0x000000ff, 0x00000100, + 0x00000101, 0x00000102, 0xfffffffe, 0xffffffff, + 0x00000000, 0x10000000, 0x20000000, 0x30000000, + 0x40000000, 0x50000000, 0x60000000, 0x70000000, + 0x80000000, 0x90000000, 0xa0000000, 0xb0000000, + 0xc0000000, 0xd0000000, 0xe0000000, 0xf0000000, + 0xfffffffd, 0xfffffffe, 0xffffffff, 0x00000001, + 0x00000002, 0x00000003, 0x11111111, 0x22222222, + 0x33333333, 0x44444444, 0x55555555, 0x66666666, + 0x77777777, 0x88888888, 0x99999999, 0xaaaaaaaa, + 0xbbbbbbbb, 0xcccccccc, 0xdddddddd, 0xeeeeeeee, + }; + const int n_imm_tab_count = sizeof(imm_table)/sizeof(imm_table[0]); + +#if TEST_INST_ALU_IMM_REG + printf("Testing imm,reg forms\n"); + n_tests = n_failures = 0; + for (int j = 0; j < n_imm_tab_count; j++) { + const uint32 value = imm_table[j]; + for (int d = 0; d < X86_MAX_ALU_REGS; d++) { + set_target(block); + uint8 *b = get_target(); + int i = 0; +#define GEN(INSN, GENOP) do { \ + insns[i] = INSN; \ + modes[i] = -1; \ + i++; GENOP##ir(value, d); \ + } while (0) +#define GEN64(INSN, GENOP) do { \ + if (X86_TARGET_64BIT) \ + GEN(INSN, GENOP); \ + } while (0) +#define GENM(INSN, GENOP, MODE) do { \ + insns[i] = INSN; \ + modes[i] = MODE; \ + i++; GENOP##ir(value, d); \ + } while (0) +#define GENM64(INSN, GENOP, MODE) do { \ + if (X86_TARGET_64BIT) \ + GENM(INSN, GENOP, MODE); \ + } while (0) +#define GENA(INSN, GENOP) do { \ + if (VALID_REG8(d)) \ + GEN(INSN "b", GENOP##B); \ + GEN(INSN "w", GENOP##W); \ + GEN(INSN "l", GENOP##L); \ + GEN64(INSN "q", GENOP##Q); \ + } while (0) +#define GENAM(INSN, GENOP, MODE) do { \ + if (VALID_REG8(d)) \ + GENM(INSN "b", GENOP##B, MODE); \ + GENM(INSN "w", GENOP##W, MODE); \ + GENM(INSN "l", GENOP##L, MODE); \ + GENM64(INSN "q", GENOP##Q, MODE); \ + } while (0) + GENA("adc", ADC); + GENA("add", ADD); + GENA("and", AND); + GENA("cmp", CMP); + GENA("or", OR); + GENA("sbb", SBB); + GENA("sub", SUB); + GENA("xor", XOR); + GENA("mov", MOV); + GENM("btw", BTW, 1); + GENM("btl", BTL, 1); + GENM64("btq", BTQ, 1); + GENM("btcw", BTCW, 1); + GENM("btcl", BTCL, 1); + GENM64("btcq", BTCQ, 1); + GENM("btrw", BTRW, 1); + GENM("btrl", BTRL, 1); + GENM64("btrq", BTRQ, 1); + GENM("btsw", BTSW, 1); + GENM("btsl", BTSL, 1); + GENM64("btsq", BTSQ, 1); + if (value != 1) { + GENAM("rol", ROL, 1); + GENAM("ror", ROR, 1); + GENAM("rcl", RCL, 1); + GENAM("rcr", RCR, 1); + GENAM("shl", SHL, 1); + GENAM("shr", SHR, 1); + GENAM("sar", SAR, 1); + } + GENA("test", TEST); +#undef GENAM +#undef GENA +#undef GENM64 +#undef GENM +#undef GEN64 +#undef GEN + int last_insn = i; + uint8 *e = get_target(); + + uint8 *p = b; + i = 0; + while (p < e) { + int n = disass_x86(buffer, (uintptr)p); + insn_t ii; + parse_insn(&ii, buffer); + + if (!check_imm_reg(&ii, insns[i], value, d, modes[i])) { + show_instruction(buffer, p); + n_failures++; + } + + p += n; + i += 1; + n_tests++; + } + if (i != last_insn) + abort(); + } + } + printf(" done %ld/%ld\n", n_tests - n_failures, n_tests); + n_all_tests += n_tests; + n_all_failures += n_failures; +#endif + + static const uint32 off_table[] = { + 0x00000000, + 0x00000001, + 0x00000040, + 0x00000080, + 0x000000ff, + 0x00000100, + 0xfffffffe, + 0xffffffff, + }; + const int off_table_count = sizeof(off_table) / sizeof(off_table[0]); + +#if TEST_INST_ALU_MEM_REG + printf("Testing mem,reg forms\n"); + n_tests = n_failures = 0; + for (int d = 0; d < off_table_count; d++) { + const uint32 D = off_table[d]; + for (int B = -1; B < X86_MAX_ALU_REGS; B++) { + for (int I = -1; I < X86_MAX_ALU_REGS; I++) { + if (I == X86_RSP) + continue; + for (int S = 1; S < 16; S *= 2) { + if (I == -1 && S > 1) + continue; + for (int r = 0; r < X86_MAX_ALU_REGS; r++) { + set_target(block); + uint8 *b = get_target(); + int i = 0; +#define GEN(INSN, GENOP) do { \ + insns[i++] = INSN; \ + GENOP##mr(D, B, I, S, r); \ + } while (0) +#define GEN64(INSN, GENOP) do { \ + if (X86_TARGET_64BIT) \ + GEN(INSN, GENOP); \ + } while (0) +#define GENA(INSN, GENOP) do { \ + if (VALID_REG8(r)) \ + GEN(INSN "b", GENOP##B); \ + GEN(INSN "w", GENOP##W); \ + GEN(INSN "l", GENOP##L); \ + GEN64(INSN "q", GENOP##Q); \ + } while (0) + GENA("adc", ADC); + GENA("add", ADD); + GENA("and", AND); + GENA("cmp", CMP); + GENA("or", OR); + GENA("sbb", SBB); + GENA("sub", SUB); + GENA("xor", XOR); + GENA("mov", MOV); + GEN("imulw", IMULW); + GEN("imull", IMULL); + GEN64("imulq", IMULQ); + GEN("bsfw", BSFW); + GEN("bsfl", BSFL); + GEN64("bsfq", BSFQ); + GEN("bsrw", BSRW); + GEN("bsrl", BSRL); + GEN64("bsrq", BSRQ); + GEN("movsbw", MOVSBW); + GEN("movsbl", MOVSBL); + GEN64("movsbq", MOVSBQ); + GEN("movzbw", MOVZBW); + GEN("movzbl", MOVZBL); + GEN64("movzbq", MOVZBQ); + GEN("movswl", MOVSWL); + GEN64("movswq", MOVSWQ); + GEN("movzwl", MOVZWL); + GEN64("movzwq", MOVZWQ); + GEN64("movslq", MOVSLQ); +#undef GENA +#undef GEN64 +#undef GEN + int last_insn = i; + uint8 *e = get_target(); + + uint8 *p = b; + i = 0; + while (p < e) { + int n = disass_x86(buffer, (uintptr)p); + insn_t ii; + parse_insn(&ii, buffer); + + if (!check_mem_reg(&ii, insns[i], D, B, I, S, r)) { + show_instruction(buffer, p); + n_failures++; + } + + p += n; + i += 1; + n_tests++; + show_status(n_tests); + } + if (i != last_insn) + abort(); + } + } + } + } + } + printf(" done %ld/%ld\n", n_tests - n_failures, n_tests); + n_all_tests += n_tests; + n_all_failures += n_failures; +#endif + +#if TEST_INST_FPU_UNARY + printf("Testing FPU unary forms\n"); + n_tests = n_failures = 0; + { + set_target(block); + uint8 *b = get_target(); + int i = 0; +#define GEN(INSN, GENOP) do { \ + insns[i++] = INSN; \ + GENOP(); \ +} while (0) + GEN("f2xm1", F2XM1); + GEN("fabs", FABS); + GEN("fchs", FCHS); + GEN("fcompp", FCOMPP); + GEN("fcos", FCOS); + GEN("fdecstp", FDECSTP); + GEN("fincstp", FINCSTP); + GEN("fld1", FLD1); + GEN("fldl2t", FLDL2T); + GEN("fldl2e", FLDL2E); + GEN("fldpi", FLDPI); + GEN("fldlg2", FLDLG2); + GEN("fldln2", FLDLN2); + GEN("fldz", FLDZ); + GEN("fnop", FNOP); + GEN("fpatan", FPATAN); + GEN("fprem", FPREM); + GEN("fprem1", FPREM1); + GEN("fptan", FPTAN); + GEN("frndint", FRNDINT); + GEN("fscale", FSCALE); + GEN("fsin", FSIN); + GEN("fsincos", FSINCOS); + GEN("fsqrt", FSQRT); + GEN("ftst", FTST); + GEN("fucompp", FUCOMPP); + GEN("fxam", FXAM); + GEN("fxtract", FXTRACT); + GEN("fyl2x", FYL2X); + GEN("fyl2xp1", FYL2XP1); +#undef GEN + int last_insn = i; + uint8 *e = get_target(); + + uint8 *p = b; + i = 0; + while (p < e) { + int n = disass_x86(buffer, (uintptr)p); + insn_t ii; + parse_insn(&ii, buffer); + + if (!check_unary(&ii, insns[i])) { + show_instruction(buffer, p); + n_failures++; + } + + p += n; + i += 1; + n_tests++; + } + if (i != last_insn) + abort(); + } + printf(" done %ld/%ld\n", n_tests - n_failures, n_tests); + n_all_tests += n_tests; + n_all_failures += n_failures; +#endif + +#if TEST_INST_FPU_REG + printf("Testing FPU reg forms\n"); + n_tests = n_failures = 0; + for (int r = 0; r < X86_MAX_FPU_REGS; r++) { + set_target(block); + uint8 *b = get_target(); + int i = 0; +#define GENr(INSN, GENOP) do { \ + insns[i] = INSN; \ + modes[i] = 0; \ + i++, GENOP##r(r); \ +} while (0) +#define GENr0(INSN, GENOP) do { \ + insns[i] = INSN; \ + modes[i] = 1; \ + i++, GENOP##r0(r); \ +} while (0) +#define GEN0r(INSN, GENOP) do { \ + insns[i] = INSN; \ + modes[i] = 2; \ + i++, GENOP##0r(r); \ +} while (0) + GENr("fcom", FCOM); + GENr("fcomp", FCOMP); + GENr("ffree", FFREE); + GENr("fxch", FXCH); + GENr("fst", FST); + GENr("fstp", FSTP); + GENr("fucom", FUCOM); + GENr("fucomp", FUCOMP); + GENr0("fadd", FADD); + GENr0("fcmovb", FCMOVB); + GENr0("fcmove", FCMOVE); + GENr0("fcmovbe", FCMOVBE); + GENr0("fcmovu", FCMOVU); + GENr0("fcmovnb", FCMOVNB); + GENr0("fcmovne", FCMOVNE); + GENr0("fcmovnbe", FCMOVNBE); + GENr0("fcmovnu", FCMOVNU); + GENr0("fcomi", FCOMI); + GENr0("fcomip", FCOMIP); + GENr0("fucomi", FUCOMI); + GENr0("fucomip", FUCOMIP); + GENr0("fdiv", FDIV); + GENr0("fdivr", FDIVR); + GENr0("fmul", FMUL); + GENr0("fsub", FSUB); + GENr0("fsubr", FSUBR); +#undef GEN0r +#undef GENr0 +#undef GENr + int last_insn = i; + uint8 *e = get_target(); + + uint8 *p = b; + i = 0; + while (p < e) { + int n = disass_x86(buffer, (uintptr)p); + insn_t ii; + parse_insn(&ii, buffer); + + switch (modes[i]) { + case 0: + if (!check_reg(&ii, insns[i], r)) { + show_instruction(buffer, p); + n_failures++; + } + break; + case 1: + if (!check_reg_reg(&ii, insns[i], r, 0)) { + show_instruction(buffer, p); + n_failures++; + } + break; + case 2: + if (!check_reg_reg(&ii, insns[i], 0, r)) { + show_instruction(buffer, p); + n_failures++; + } + break; + } + + p += n; + i += 1; + n_tests++; + } + if (i != last_insn) + abort(); + } + printf(" done %ld/%ld\n", n_tests - n_failures, n_tests); + n_all_tests += n_tests; + n_all_failures += n_failures; +#endif + +#if TEST_INST_FPU_MEM + printf("Testing FPU mem forms\n"); + n_tests = n_failures = 0; + for (int d = 0; d < off_table_count; d++) { + const uint32 D = off_table[d]; + for (int B = -1; B < X86_MAX_ALU_REGS; B++) { + for (int I = -1; I < X86_MAX_ALU_REGS; I++) { + if (I == X86_RSP) + continue; + for (int S = 1; S < 16; S *= 2) { + if (I == -1 && S > 1) + continue; + set_target(block); + uint8 *b = get_target(); + int i = 0; +#define GEN(INSN, GENOP) do { \ + insns[i++] = INSN; \ + GENOP##m(D, B, I, S); \ +} while (0) + GEN("fadds", FADDS); + GEN("faddl", FADDD); + GEN("fiadd", FIADDW); + GEN("fiaddl", FIADDL); + GEN("fbld", FBLD); + GEN("fbstp", FBSTP); + GEN("fcoms", FCOMS); + GEN("fcoml", FCOMD); + GEN("fcomps", FCOMPS); + GEN("fcompl", FCOMPD); + GEN("fdivs", FDIVS); + GEN("fdivl", FDIVD); + GEN("fidiv", FIDIVW); + GEN("fidivl", FIDIVL); + GEN("fdivrs", FDIVRS); + GEN("fdivrl", FDIVRD); + GEN("fidivr", FIDIVRW); + GEN("fidivrl", FIDIVRL); + GEN("ficom", FICOMW); + GEN("ficoml", FICOML); + GEN("ficomp", FICOMPW); + GEN("ficompl", FICOMPL); + GEN("fild", FILDW); + GEN("fildl", FILDL); + GEN("fildll", FILDQ); + GEN("fist", FISTW); + GEN("fistl", FISTL); + GEN("fistp", FISTPW); + GEN("fistpl", FISTPL); + GEN("fistpll", FISTPQ); + GEN("fisttp", FISTTPW); + GEN("fisttpl", FISTTPL); + GEN("fisttpll", FISTTPQ); + GEN("flds", FLDS); + GEN("fldl", FLDD); + GEN("fldt", FLDT); + GEN("fmuls", FMULS); + GEN("fmull", FMULD); + GEN("fimul", FIMULW); + GEN("fimull", FIMULL); + GEN("fsts", FSTS); + GEN("fstl", FSTD); + GEN("fstps", FSTPS); + GEN("fstpl", FSTPD); + GEN("fstpt", FSTPT); + GEN("fsubs", FSUBS); + GEN("fsubl", FSUBD); + GEN("fisub", FISUBW); + GEN("fisubl", FISUBL); + GEN("fsubrs", FSUBRS); + GEN("fsubrl", FSUBRD); + GEN("fisubr", FISUBRW); + GEN("fisubrl", FISUBRL); +#undef GEN + int last_insn = i; + uint8 *e = get_target(); + + uint8 *p = b; + i = 0; + while (p < e) { + int n = disass_x86(buffer, (uintptr)p); + insn_t ii; + parse_insn(&ii, buffer); + + if (!check_mem(&ii, insns[i], D, B, I, S)) { + show_instruction(buffer, p); + n_failures++; + } + + p += n; + i += 1; + n_tests++; + show_status(n_tests); + } + if (i != last_insn) + abort(); + } + } + } + } + printf(" done %ld/%ld\n", n_tests - n_failures, n_tests); + n_all_tests += n_tests; + n_all_failures += n_failures; +#endif + +#if TEST_INST_MMX_REG_REG + printf("Testing MMX reg,reg forms\n"); + n_tests = n_failures = 0; + for (int s = 0; s < X86_MAX_MMX_REGS; s++) { + for (int d = 0; d < X86_MAX_MMX_REGS; d++) { + set_target(block); + uint8 *b = get_target(); + int i = 0; +#define GEN(INSN, GENOP) do { \ + insns[i++] = INSN; \ + MMX_##GENOP##rr(s, d); \ +} while (0) +#define GEN64(INSN, GENOP) do { \ + if (X86_TARGET_64BIT) \ + GEN(INSN, GENOP); \ +} while (0) + GEN("movq", MOVQ); + GEN("packsswb", PACKSSWB); + GEN("packssdw", PACKSSDW); + GEN("packuswb", PACKUSWB); + GEN("paddb", PADDB); + GEN("paddw", PADDW); + GEN("paddd", PADDD); + GEN("paddq", PADDQ); + GEN("paddsb", PADDSB); + GEN("paddsw", PADDSW); + GEN("paddusb", PADDUSB); + GEN("paddusw", PADDUSW); + GEN("pand", PAND); + GEN("pandn", PANDN); + GEN("pavgb", PAVGB); + GEN("pavgw", PAVGW); + GEN("pcmpeqb", PCMPEQB); + GEN("pcmpeqw", PCMPEQW); + GEN("pcmpeqd", PCMPEQD); + GEN("pcmpgtb", PCMPGTB); + GEN("pcmpgtw", PCMPGTW); + GEN("pcmpgtd", PCMPGTD); + GEN("pmaddwd", PMADDWD); + GEN("pmaxsw", PMAXSW); + GEN("pmaxub", PMAXUB); + GEN("pminsw", PMINSW); + GEN("pminub", PMINUB); + GEN("pmulhuw", PMULHUW); + GEN("pmulhw", PMULHW); + GEN("pmullw", PMULLW); + GEN("pmuludq", PMULUDQ); + GEN("por", POR); + GEN("psadbw", PSADBW); + GEN("psllw", PSLLW); + GEN("pslld", PSLLD); + GEN("psllq", PSLLQ); + GEN("psraw", PSRAW); + GEN("psrad", PSRAD); + GEN("psrlw", PSRLW); + GEN("psrld", PSRLD); + GEN("psrlq", PSRLQ); + GEN("psubb", PSUBB); + GEN("psubw", PSUBW); + GEN("psubd", PSUBD); + GEN("psubq", PSUBQ); + GEN("psubsb", PSUBSB); + GEN("psubsw", PSUBSW); + GEN("psubusb", PSUBUSB); + GEN("psubusw", PSUBUSW); + GEN("punpckhbw", PUNPCKHBW); + GEN("punpckhwd", PUNPCKHWD); + GEN("punpckhdq", PUNPCKHDQ); + GEN("punpcklbw", PUNPCKLBW); + GEN("punpcklwd", PUNPCKLWD); + GEN("punpckldq", PUNPCKLDQ); + GEN("pxor", PXOR); + GEN("pabsb", PABSB); + GEN("pabsw", PABSW); + GEN("pabsd", PABSD); + GEN("phaddw", PHADDW); + GEN("phaddd", PHADDD); + GEN("phaddsw", PHADDSW); + GEN("phsubw", PHSUBW); + GEN("phsubd", PHSUBD); + GEN("phsubsw", PHSUBSW); + GEN("pmaddubsw", PMADDUBSW); + GEN("pmulhrsw", PMULHRSW); + GEN("pshufb", PSHUFB); + GEN("psignb", PSIGNB); + GEN("psignw", PSIGNW); + GEN("psignd", PSIGND); +#undef GEN64 +#undef GEN + int last_insn = i; + uint8 *e = get_target(); + + uint8 *p = b; + i = 0; + while (p < e) { + int n = disass_x86(buffer, (uintptr)p); + insn_t ii; + parse_insn(&ii, buffer); + + if (!check_reg_reg(&ii, insns[i], s, d)) { + show_instruction(buffer, p); + n_failures++; + } + + p += n; + i += 1; + n_tests++; + } + if (i != last_insn) + abort(); + } + } + printf(" done %ld/%ld\n", n_tests - n_failures, n_tests); + n_all_tests += n_tests; + n_all_failures += n_failures; +#endif + + static const uint8 imm8_table[] = { + 0x00, 0x01, 0x02, 0x03, + 0x06, 0x07, 0x08, 0x09, + 0x0e, 0x0f, 0x10, 0x11, + 0x1e, 0x1f, 0x20, 0x21, + 0xfc, 0xfd, 0xfe, 0xff, + }; + const int n_imm8_tab_count = sizeof(imm8_table)/sizeof(imm8_table[0]); + +#if TEST_INST_MMX_IMM_REG + printf("Testing imm,reg forms\n"); + n_tests = n_failures = 0; + for (int j = 0; j < n_imm8_tab_count; j++) { + const uint8 value = imm8_table[j]; + for (int d = 0; d < X86_MAX_MMX_REGS; d++) { + set_target(block); + uint8 *b = get_target(); + int i = 0; +#define GEN(INSN, GENOP) do { \ + insns[i] = INSN; \ + modes[i] = 1; \ + i++; MMX_##GENOP##ir(value, d); \ +} while (0) + GEN("psllw", PSLLW); + GEN("pslld", PSLLD); + GEN("psllq", PSLLQ); + GEN("psraw", PSRAW); + GEN("psrad", PSRAD); + GEN("psrlw", PSRLW); + GEN("psrld", PSRLD); + GEN("psrlq", PSRLQ); +#undef GEN + int last_insn = i; + uint8 *e = get_target(); + + uint8 *p = b; + i = 0; + while (p < e) { + int n = disass_x86(buffer, (uintptr)p); + insn_t ii; + parse_insn(&ii, buffer); + + if (!check_imm_reg(&ii, insns[i], value, d, modes[i])) { + show_instruction(buffer, p); + n_failures++; + } + + p += n; + i += 1; + n_tests++; + } + if (i != last_insn) + abort(); + } + } + printf(" done %ld/%ld\n", n_tests - n_failures, n_tests); + n_all_tests += n_tests; + n_all_failures += n_failures; +#endif + +#if TEST_INST_MMX_MEM_REG + printf("Testing MMX mem,reg forms\n"); + n_tests = n_failures = 0; + for (int d = 0; d < off_table_count; d++) { + const uint32 D = off_table[d]; + for (int B = -1; B < X86_MAX_ALU_REGS; B++) { + for (int I = -1; I < X86_MAX_ALU_REGS; I++) { + if (I == X86_RSP) + continue; + for (int S = 1; S < 16; S *= 2) { + if (I == -1 && S > 1) + continue; + for (int r = 0; r < X86_MAX_MMX_REGS; r++) { + set_target(block); + uint8 *b = get_target(); + int i = 0; +#define _GENrm(INSN, GENOP) do { \ + insns[i] = INSN; \ + modes[i] = 0; \ + i++; MMX_##GENOP##rm(r, D, B, I, S); \ +} while (0) +#define _GENmr(INSN, GENOP) do { \ + insns[i] = INSN; \ + modes[i] = 1; \ + i++; MMX_##GENOP##mr(D, B, I, S, r); \ +} while (0) +#define GEN(INSN, GENOP) do { \ + _GENmr(INSN, GENOP); \ +} while (0) + _GENmr("movd", MOVD); + _GENrm("movd", MOVD); + _GENmr("movq", MOVQ); + _GENrm("movq", MOVQ); + GEN("packsswb", PACKSSWB); + GEN("packssdw", PACKSSDW); + GEN("packuswb", PACKUSWB); + GEN("paddb", PADDB); + GEN("paddw", PADDW); + GEN("paddd", PADDD); + GEN("paddq", PADDQ); + GEN("paddsb", PADDSB); + GEN("paddsw", PADDSW); + GEN("paddusb", PADDUSB); + GEN("paddusw", PADDUSW); + GEN("pand", PAND); + GEN("pandn", PANDN); + GEN("pavgb", PAVGB); + GEN("pavgw", PAVGW); + GEN("pcmpeqb", PCMPEQB); + GEN("pcmpeqw", PCMPEQW); + GEN("pcmpeqd", PCMPEQD); + GEN("pcmpgtb", PCMPGTB); + GEN("pcmpgtw", PCMPGTW); + GEN("pcmpgtd", PCMPGTD); + GEN("pmaddwd", PMADDWD); + GEN("pmaxsw", PMAXSW); + GEN("pmaxub", PMAXUB); + GEN("pminsw", PMINSW); + GEN("pminub", PMINUB); + GEN("pmulhuw", PMULHUW); + GEN("pmulhw", PMULHW); + GEN("pmullw", PMULLW); + GEN("pmuludq", PMULUDQ); + GEN("por", POR); + GEN("psadbw", PSADBW); + GEN("psllw", PSLLW); + GEN("pslld", PSLLD); + GEN("psllq", PSLLQ); + GEN("psraw", PSRAW); + GEN("psrad", PSRAD); + GEN("psrlw", PSRLW); + GEN("psrld", PSRLD); + GEN("psrlq", PSRLQ); + GEN("psubb", PSUBB); + GEN("psubw", PSUBW); + GEN("psubd", PSUBD); + GEN("psubq", PSUBQ); + GEN("psubsb", PSUBSB); + GEN("psubsw", PSUBSW); + GEN("psubusb", PSUBUSB); + GEN("psubusw", PSUBUSW); + GEN("punpckhbw", PUNPCKHBW); + GEN("punpckhwd", PUNPCKHWD); + GEN("punpckhdq", PUNPCKHDQ); + GEN("punpcklbw", PUNPCKLBW); + GEN("punpcklwd", PUNPCKLWD); + GEN("punpckldq", PUNPCKLDQ); + GEN("pxor", PXOR); + GEN("pabsb", PABSB); + GEN("pabsw", PABSW); + GEN("pabsd", PABSD); + GEN("phaddw", PHADDW); + GEN("phaddd", PHADDD); + GEN("phaddsw", PHADDSW); + GEN("phsubw", PHSUBW); + GEN("phsubd", PHSUBD); + GEN("phsubsw", PHSUBSW); + GEN("pmaddubsw", PMADDUBSW); + GEN("pmulhrsw", PMULHRSW); + GEN("pshufb", PSHUFB); + GEN("psignb", PSIGNB); + GEN("psignw", PSIGNW); + GEN("psignd", PSIGND); +#undef GEN +#undef _GENmr +#undef _GENrm + int last_insn = i; + uint8 *e = get_target(); + + uint8 *p = b; + i = 0; + while (p < e) { + int n = disass_x86(buffer, (uintptr)p); + insn_t ii; + parse_insn(&ii, buffer); + + if (!check_mem_reg(&ii, insns[i], D, B, I, S, r, modes[i])) { + show_instruction(buffer, p); + n_failures++; + } + + p += n; + i += 1; + n_tests++; + show_status(n_tests); + } + if (i != last_insn) + abort(); + } + } + } + } + } + printf(" done %ld/%ld\n", n_tests - n_failures, n_tests); + n_all_tests += n_tests; + n_all_failures += n_failures; +#endif + +#if TEST_INST_SSE_REG_REG + printf("Testing SSE reg,reg forms\n"); + n_tests = n_failures = 0; + for (int s = 0; s < X86_MAX_SSE_REGS; s++) { + for (int d = 0; d < X86_MAX_SSE_REGS; d++) { + set_target(block); + uint8 *b = get_target(); + int i = 0; +#define GEN(INSN, GENOP) do { \ + insns[i++] = INSN; \ + GENOP##rr(s, d); \ +} while (0) +#define GEN64(INSN, GENOP) do { \ + if (X86_TARGET_64BIT) \ + GEN(INSN, GENOP); \ +} while (0) +#define GEN1(INSN, GENOP) do { \ + GEN(INSN "s", GENOP##S); \ + GEN(INSN "d", GENOP##D); \ +} while (0) +#define GENA(INSN, GENOP) do { \ + GEN1(INSN "s", GENOP##S); \ + GEN1(INSN "p", GENOP##P); \ +} while (0) +#define GENI(INSN, GENOP, IMM) do { \ + insns[i++] = INSN; \ + GENOP##rr(IMM, s, d); \ +} while (0) +#define GENI1(INSN, GENOP, IMM) do { \ + GENI(INSN "s", GENOP##S, IMM); \ + GENI(INSN "d", GENOP##D, IMM); \ +} while (0) +#define GENIA(INSN, GENOP, IMM) do { \ + GENI1(INSN "s", GENOP##S, IMM); \ + GENI1(INSN "p", GENOP##P, IMM); \ +} while (0) + GEN1("andp", ANDP); + GEN1("andnp", ANDNP); + GEN1("orp", ORP); + GEN1("xorp", XORP); + GENA("add", ADD); + GENA("sub", SUB); + GENA("mul", MUL); + GENA("div", DIV); + GEN1("comis", COMIS); + GEN1("ucomis", UCOMIS); + GENA("min", MIN); + GENA("max", MAX); + GEN("rcpss", RCPSS); + GEN("rcpps", RCPPS); + GEN("rsqrtss", RSQRTSS); + GEN("rsqrtps", RSQRTPS); + GENA("sqrt", SQRT); + GENIA("cmpeq", CMP, X86_SSE_CC_EQ); + GENIA("cmplt", CMP, X86_SSE_CC_LT); + GENIA("cmple", CMP, X86_SSE_CC_LE); + GENIA("cmpunord", CMP, X86_SSE_CC_U); + GENIA("cmpneq", CMP, X86_SSE_CC_NEQ); + GENIA("cmpnlt", CMP, X86_SSE_CC_NLT); + GENIA("cmpnle", CMP, X86_SSE_CC_NLE); + GENIA("cmpord", CMP, X86_SSE_CC_O); + GEN1("movap", MOVAP); + GEN("movdqa", MOVDQA); + GEN("movdqu", MOVDQU); + GEN("movd", MOVDXD); + GEN64("movd", MOVQXD); // FIXME: disass bug? "movq" expected + GEN("movd", MOVDXS); + GEN64("movd", MOVQXS); // FIXME: disass bug? "movq" expected + GEN("cvtdq2pd", CVTDQ2PD); + GEN("cvtdq2ps", CVTDQ2PS); + GEN("cvtpd2dq", CVTPD2DQ); + GEN("cvtpd2ps", CVTPD2PS); + GEN("cvtps2dq", CVTPS2DQ); + GEN("cvtps2pd", CVTPS2PD); + GEN("cvtsd2si", CVTSD2SIL); + GEN64("cvtsd2siq", CVTSD2SIQ); + GEN("cvtsd2ss", CVTSD2SS); + GEN("cvtsi2sd", CVTSI2SDL); + GEN64("cvtsi2sdq", CVTSI2SDQ); + GEN("cvtsi2ss", CVTSI2SSL); + GEN64("cvtsi2ssq", CVTSI2SSQ); + GEN("cvtss2sd", CVTSS2SD); + GEN("cvtss2si", CVTSS2SIL); + GEN64("cvtss2siq", CVTSS2SIQ); + GEN("cvttpd2dq", CVTTPD2DQ); + GEN("cvttps2dq", CVTTPS2DQ); + GEN("cvttsd2si", CVTTSD2SIL); + GEN64("cvttsd2siq", CVTTSD2SIQ); + GEN("cvttss2si", CVTTSS2SIL); + GEN64("cvttss2siq", CVTTSS2SIQ); + if (s < 8) { + // MMX source register + GEN("cvtpi2pd", CVTPI2PD); + GEN("cvtpi2ps", CVTPI2PS); + } + if (d < 8) { + // MMX dest register + GEN("cvtpd2pi", CVTPD2PI); + GEN("cvtps2pi", CVTPS2PI); + GEN("cvttpd2pi", CVTTPD2PI); + GEN("cvttps2pi", CVTTPS2PI); + } +#undef GENIA +#undef GENI1 +#undef GENI +#undef GENA +#undef GEN1 +#undef GEN64 +#undef GEN + int last_insn = i; + uint8 *e = get_target(); + + uint8 *p = b; + i = 0; + while (p < e) { + int n = disass_x86(buffer, (uintptr)p); + insn_t ii; + parse_insn(&ii, buffer); + + if (!check_reg_reg(&ii, insns[i], s, d)) { + show_instruction(buffer, p); + n_failures++; + } + + p += n; + i += 1; + n_tests++; + } + if (i != last_insn) + abort(); + } + } + printf(" done %ld/%ld\n", n_tests - n_failures, n_tests); + n_all_tests += n_tests; + n_all_failures += n_failures; +#endif + +#if TEST_INST_SSE_MEM_REG + printf("Testing SSE mem,reg forms\n"); + n_tests = n_failures = 0; + for (int d = 0; d < off_table_count; d++) { + const uint32 D = off_table[d]; + for (int B = -1; B < X86_MAX_ALU_REGS; B++) { + for (int I = -1; I < X86_MAX_ALU_REGS; I++) { + if (I == X86_RSP) + continue; + for (int S = 1; S < 16; S *= 2) { + if (I == -1 && S > 1) + continue; + for (int r = 0; r < X86_MAX_SSE_REGS; r++) { + set_target(block); + uint8 *b = get_target(); + int i = 0; +#define GEN(INSN, GENOP) do { \ + insns[i++] = INSN; \ + GENOP##mr(D, B, I, S, r); \ +} while (0) +#define GEN64(INSN, GENOP) do { \ + if (X86_TARGET_64BIT) \ + GEN(INSN, GENOP); \ +} while (0) +#define GEN1(INSN, GENOP) do { \ + GEN(INSN "s", GENOP##S); \ + GEN(INSN "d", GENOP##D); \ +} while (0) +#define GENA(INSN, GENOP) do { \ + GEN1(INSN "s", GENOP##S); \ + GEN1(INSN "p", GENOP##P); \ +} while (0) +#define GENI(INSN, GENOP, IMM) do { \ + insns[i++] = INSN; \ + GENOP##mr(IMM, D, B, I, S, r); \ +} while (0) +#define GENI1(INSN, GENOP, IMM) do { \ + GENI(INSN "s", GENOP##S, IMM); \ + GENI(INSN "d", GENOP##D, IMM); \ +} while (0) +#define GENIA(INSN, GENOP, IMM) do { \ + GENI1(INSN "s", GENOP##S, IMM); \ + GENI1(INSN "p", GENOP##P, IMM); \ +} while (0) + GEN1("andp", ANDP); + GEN1("andnp", ANDNP); + GEN1("orp", ORP); + GEN1("xorp", XORP); + GENA("add", ADD); + GENA("sub", SUB); + GENA("mul", MUL); + GENA("div", DIV); + GEN1("comis", COMIS); + GEN1("ucomis", UCOMIS); + GENA("min", MIN); + GENA("max", MAX); + GEN("rcpss", RCPSS); + GEN("rcpps", RCPPS); + GEN("rsqrtss", RSQRTSS); + GEN("rsqrtps", RSQRTPS); + GENA("sqrt", SQRT); + GENIA("cmpeq", CMP, X86_SSE_CC_EQ); + GENIA("cmplt", CMP, X86_SSE_CC_LT); + GENIA("cmple", CMP, X86_SSE_CC_LE); + GENIA("cmpunord", CMP, X86_SSE_CC_U); + GENIA("cmpneq", CMP, X86_SSE_CC_NEQ); + GENIA("cmpnlt", CMP, X86_SSE_CC_NLT); + GENIA("cmpnle", CMP, X86_SSE_CC_NLE); + GENIA("cmpord", CMP, X86_SSE_CC_O); + GEN1("movap", MOVAP); + GEN("movdqa", MOVDQA); + GEN("movdqu", MOVDQU); +#if 0 + // FIXME: extraneous REX bits generated + GEN("movd", MOVDXD); + GEN64("movd", MOVQXD); // FIXME: disass bug? "movq" expected +#endif + GEN("cvtdq2pd", CVTDQ2PD); + GEN("cvtdq2ps", CVTDQ2PS); + GEN("cvtpd2dq", CVTPD2DQ); + GEN("cvtpd2ps", CVTPD2PS); + GEN("cvtps2dq", CVTPS2DQ); + GEN("cvtps2pd", CVTPS2PD); + GEN("cvtsd2si", CVTSD2SIL); + GEN64("cvtsd2siq", CVTSD2SIQ); + GEN("cvtsd2ss", CVTSD2SS); + GEN("cvtsi2sd", CVTSI2SDL); + GEN64("cvtsi2sdq", CVTSI2SDQ); + GEN("cvtsi2ss", CVTSI2SSL); + GEN64("cvtsi2ssq", CVTSI2SSQ); + GEN("cvtss2sd", CVTSS2SD); + GEN("cvtss2si", CVTSS2SIL); + GEN64("cvtss2siq", CVTSS2SIQ); + GEN("cvttpd2dq", CVTTPD2DQ); + GEN("cvttps2dq", CVTTPS2DQ); + GEN("cvttsd2si", CVTTSD2SIL); + GEN64("cvttsd2siq", CVTTSD2SIQ); + GEN("cvttss2si", CVTTSS2SIL); + GEN64("cvttss2siq", CVTTSS2SIQ); + if (r < 8) { + // MMX dest register + GEN("cvtpd2pi", CVTPD2PI); + GEN("cvtps2pi", CVTPS2PI); + GEN("cvttpd2pi", CVTTPD2PI); + GEN("cvttps2pi", CVTTPS2PI); + } +#undef GENIA +#undef GENI1 +#undef GENI +#undef GENA +#undef GEN1 +#undef GEN64 +#undef GEN + int last_insn = i; + uint8 *e = get_target(); + + uint8 *p = b; + i = 0; + while (p < e) { + int n = disass_x86(buffer, (uintptr)p); + insn_t ii; + parse_insn(&ii, buffer); + + if (!check_mem_reg(&ii, insns[i], D, B, I, S, r)) { + show_instruction(buffer, p); + n_failures++; + } + + p += n; + i += 1; + n_tests++; + show_status(n_tests); + } + if (i != last_insn) + abort(); + } + } + } + } + } + printf(" done %ld/%ld\n", n_tests - n_failures, n_tests); + n_all_tests += n_tests; + n_all_failures += n_failures; +#endif + + printf("\n"); + printf("All %ld tests run, %ld failures\n", n_all_tests, n_all_failures); +} diff --git a/od-win32/avioutput.c b/od-win32/avioutput.c index 61209bb5..a8d64e69 100644 --- a/od-win32/avioutput.c +++ b/od-win32/avioutput.c @@ -255,7 +255,7 @@ static int AVIOutput_AllocateAudio (void) // set the source format memset (&wfxSrc, 0, sizeof (wfxSrc)); wfxSrc.Format.wFormatTag = WAVE_FORMAT_PCM; - wfxSrc.Format.nChannels = get_audio_nativechannels () ? get_audio_nativechannels () : 2; + wfxSrc.Format.nChannels = get_audio_nativechannels (currprefs.sound_stereo) ? get_audio_nativechannels (currprefs.sound_stereo) : 2; wfxSrc.Format.nSamplesPerSec = workprefs.sound_freq ? workprefs.sound_freq : 44100; wfxSrc.Format.nBlockAlign = wfxSrc.Format.nChannels * 16 / 8; wfxSrc.Format.nAvgBytesPerSec = wfxSrc.Format.nBlockAlign * wfxSrc.Format.nSamplesPerSec; @@ -370,6 +370,7 @@ static int AVIOutput_GetAudioCodecName (WAVEFORMATEX *wft, TCHAR *name, int len) int AVIOutput_GetAudioCodec (TCHAR *name, int len) { + AVIOutput_Initialize (); if (AVIOutput_AudioAllocated ()) return AVIOutput_GetAudioCodecName (pwfxDst, name, len); if (!AVIOutput_AllocateAudio ()) @@ -384,6 +385,7 @@ int AVIOutput_GetAudioCodec (TCHAR *name, int len) int AVIOutput_ChooseAudioCodec (HWND hwnd, TCHAR *s, int len) { + AVIOutput_Initialize (); AVIOutput_End(); if (!AVIOutput_AllocateAudio ()) return 0; @@ -456,6 +458,7 @@ static int AVIOutput_AllocateVideo (void) avioutput_height = WIN32GFX_GetHeight (); avioutput_bits = WIN32GFX_GetDepth (0); + AVIOutput_Initialize (); AVIOutput_ReleaseVideo (); if (!avioutput_width || !avioutput_height || !avioutput_bits) { avioutput_width = workprefs.gfx_size.width; @@ -549,6 +552,8 @@ static int AVIOutput_GetVideoCodecName (COMPVARS *pcv, TCHAR *name, int len) int AVIOutput_GetVideoCodec (TCHAR *name, int len) { + AVIOutput_Initialize (); + if (AVIOutput_VideoAllocated ()) return AVIOutput_GetVideoCodecName (pcompvars, name, len); if (!AVIOutput_AllocateVideo ()) @@ -564,6 +569,8 @@ int AVIOutput_GetVideoCodec (TCHAR *name, int len) int AVIOutput_ChooseVideoCodec (HWND hwnd, TCHAR *s, int len) { + AVIOutput_Initialize (); + AVIOutput_End (); if (!AVIOutput_AllocateVideo ()) return 0; @@ -925,7 +932,7 @@ static void writewavheader (uae_u32 size) uae_u16 tw; uae_u32 tl; int bits = 16; - int channels = get_audio_nativechannels (); + int channels = get_audio_nativechannels (currprefs.sound_stereo); fseek (wavfile, 0, SEEK_SET); fwrite ("RIFF", 1, 4, wavfile); @@ -1024,6 +1031,8 @@ void AVIOutput_Begin (void) TCHAR *ext1, *ext2; struct avientry *ae = NULL; + AVIOutput_Initialize (); + avientryindex = -1; if (avioutput_enabled) { if (!avioutput_requested) @@ -1216,6 +1225,9 @@ void AVIOutput_Release (void) void AVIOutput_Initialize (void) { + if (avioutput_init) + return; + InitializeCriticalSection (&AVIOutput_CriticalSection); cs_allocated = 1; @@ -1223,11 +1235,8 @@ void AVIOutput_Initialize (void) if (!pcompvars) return; pcompvars->cbSize = sizeof (COMPVARS); - - if (!avioutput_init) { - AVIFileInit (); - avioutput_init = 1; - } + AVIFileInit (); + avioutput_init = 1; } diff --git a/od-win32/blkdev_win32_ioctl.c b/od-win32/blkdev_win32_ioctl.c index 3c335b79..1a73c9e0 100644 --- a/od-win32/blkdev_win32_ioctl.c +++ b/od-win32/blkdev_win32_ioctl.c @@ -62,13 +62,14 @@ static void reseterrormode (int unitnum) static void close_device (int unitnum); static int open_device (int unitnum); -static void mcierr (TCHAR *str, DWORD err) +static int mcierr (TCHAR *str, DWORD err) { TCHAR es[1000]; if (err == MMSYSERR_NOERROR) - return; + return MMSYSERR_NOERROR; if (mciGetErrorString (err, es, sizeof es)) write_log (L"MCIErr: %s: %d = '%s'\n", str, err, es); + return err; } static int win32_error (int unitnum, const TCHAR *format,...) @@ -209,7 +210,7 @@ static int open_mci (int unitnum) mciOpen.lpstrElementName = elname; mciOpen.lpstrAlias = alname; flags = MCI_OPEN_ELEMENT | MCI_OPEN_SHAREABLE | MCI_OPEN_ALIAS | MCI_OPEN_TYPE | MCI_OPEN_TYPE_ID | MCI_WAIT; - err = mciSendCommand (0, MCI_OPEN, flags, (DWORD)(LPVOID)&mciOpen); + err = mciSendCommand (0, MCI_OPEN, flags, (DWORD_PTR)(LPVOID)&mciOpen); ciw->mciid = mciOpen.wDeviceID; if (err != MMSYSERR_NOERROR) { if (closed) @@ -291,7 +292,7 @@ static int ioctl_command_play (int unitnum, uae_u32 start, uae_u32 end, int scan { struct dev_info_ioctl *ciw = &ciw32[unitnum]; - open_mci(unitnum); + open_mci (unitnum); if (ciw->mciid > 0) { @@ -381,19 +382,19 @@ static uae_u8 *ioctl_command_qcode (int unitnum) memset (buf, 0, sizeof buf); memset (&mciStatusParms, 0, sizeof mciStatusParms); mciStatusParms.dwItem = MCI_STATUS_MODE; - err = mciSendCommand (ciw->mciid, MCI_STATUS, MCI_STATUS_ITEM | MCI_WAIT, (DWORD)(LPVOID)&mciStatusParms); + err = mciSendCommand (ciw->mciid, MCI_STATUS, MCI_STATUS_ITEM | MCI_WAIT, (DWORD_PTR)&mciStatusParms); if (err != MMSYSERR_NOERROR) return 0; mode = mciStatusParms.dwReturn; mciStatusParms.dwItem = MCI_STATUS_CURRENT_TRACK; - err = mciSendCommand (ciw->mciid, MCI_STATUS, MCI_STATUS_ITEM | MCI_WAIT, (DWORD)(LPVOID)&mciStatusParms); + err = mciSendCommand (ciw->mciid, MCI_STATUS, MCI_STATUS_ITEM | MCI_WAIT, (DWORD_PTR)&mciStatusParms); if (err != MMSYSERR_NOERROR) return 0; trk = mciStatusParms.dwReturn - 1; if (trk < 0) trk = 0; mciStatusParms.dwItem = MCI_STATUS_POSITION; - err = mciSendCommand (ciw->mciid, MCI_STATUS, MCI_STATUS_ITEM | MCI_WAIT, (DWORD)(LPVOID)&mciStatusParms); + err = mciSendCommand (ciw->mciid, MCI_STATUS, MCI_STATUS_ITEM | MCI_WAIT, (DWORD_PTR)&mciStatusParms); if (err != MMSYSERR_NOERROR) return 0; pos = (((mciStatusParms.dwReturn >> 16) & 0xff) << 0) | (((mciStatusParms.dwReturn >> 8) & 0xff) << 8) | (((mciStatusParms.dwReturn >> 0) & 0xff) << 16); @@ -679,8 +680,9 @@ static int ismedia (int unitnum) DWORD err; MCI_STATUS_PARMS mciStatusParms; + memset (&mciStatusParms, 0, sizeof mciStatusParms); mciStatusParms.dwItem = MCI_STATUS_MEDIA_PRESENT; - err = mciSendCommand (ciw->mciid, MCI_STATUS, MCI_STATUS_ITEM | MCI_WAIT, (DWORD)(LPVOID)&mciStatusParms); + err = mciSendCommand (ciw->mciid, MCI_STATUS, MCI_STATUS_ITEM | MCI_WAIT, (DWORD_PTR)&mciStatusParms); if (err != MMSYSERR_NOERROR) return 0; if (mciStatusParms.dwReturn) @@ -688,9 +690,11 @@ static int ismedia (int unitnum) return 0; } else { + struct device_info di; memset (&di, 0, sizeof di); return fetch_geometry (unitnum, &di); + } } diff --git a/od-win32/direct3d.c b/od-win32/direct3d.c index d4d655e1..b428cfe4 100644 --- a/od-win32/direct3d.c +++ b/od-win32/direct3d.c @@ -2,7 +2,7 @@ #include "sysconfig.h" #include "sysdeps.h" -#if defined (OPENGL) && defined (GFXFILTER) +#if defined (D3D) && defined (GFXFILTER) #include "options.h" #include "xwin.h" @@ -12,6 +12,7 @@ #include "win32.h" #include "win32gfx.h" #include "gfxfilter.h" +#include "statusline.h" #include #include @@ -25,17 +26,18 @@ static int tex_pow2, tex_square, tex_dynamic; static int psEnabled, psActive, psPreProcess; static int tformat; -static int d3d_enabled, d3d_ex, scanlines_ok; +static int d3d_enabled, d3d_ex; static LPDIRECT3D9 d3d; static LPDIRECT3D9EX d3dex; static D3DPRESENT_PARAMETERS dpp; static LPDIRECT3DDEVICE9 d3ddev; static LPDIRECT3DDEVICE9EX d3ddevex; static D3DSURFACE_DESC dsdbb; -static LPDIRECT3DTEXTURE9 texture, sltexture; +static LPDIRECT3DTEXTURE9 texture, sltexture, ledtexture; static LPDIRECT3DTEXTURE9 lpWorkTexture1, lpWorkTexture2; static LPDIRECT3DVOLUMETEXTURE9 lpHq2xLookupTexture; static IDirect3DVertexBuffer9 *vertexBuffer; +static ID3DXSprite *sprite; static HWND d3dhwnd; static D3DXMATRIX m_matProj; @@ -45,6 +47,7 @@ static D3DXMATRIX m_matPreProj; static D3DXMATRIX m_matPreView; static D3DXMATRIX m_matPreWorld; +static int ledwidth, ledheight; static int twidth, theight, max_texture_w, max_texture_h; static int tin_w, tin_h, window_h, window_w; static int t_depth; @@ -662,7 +665,6 @@ static LPDIRECT3DTEXTURE9 createtext (int *ww, int *hh, D3DFORMAT format) return t; } - static int createtexture (int w, int h) { HRESULT hr; @@ -704,19 +706,56 @@ static int createtexture (int w, int h) return 1; } +static void updateleds (void) +{ + D3DLOCKED_RECT locked; + HRESULT hr; + static rc[256], gc[256], bc[256], a[256]; + static int done; + int i, y; + + if (!done) { + for (i = 0; i < 256; i++) { + rc[i] = i << 16; + gc[i] = i << 8; + bc[i] = i << 0; + a[i] = i << 24; + } + done = 1; + } + hr = IDirect3DTexture9_LockRect (ledtexture, 0, &locked, NULL, D3DLOCK_DISCARD); + if (FAILED (hr)) { + write_log (L"SL IDirect3DTexture9_LockRect failed: %s\n", D3D_ErrorString (hr)); + return; + } + for (y = 0; y < TD_TOTAL_HEIGHT; y++) { + uae_u8 *buf = (uae_u8*)locked.pBits + y * locked.Pitch; + draw_status_line_single (buf, 32 / 8, y, ledwidth, rc, gc, bc, a); + } + IDirect3DTexture9_UnlockRect (ledtexture, 0); +} + +static int createledtexture (void) +{ + ledwidth = window_w; + ledheight = TD_TOTAL_HEIGHT; + ledtexture = createtext (&ledwidth, &ledheight, D3DFMT_A8R8G8B8); + if (!ledtexture) + return 0; + return 1; +} + static int createsltexture (void) { UINT ww = required_sl_texture_w; UINT hh = required_sl_texture_h; - sltexture = createtext (&ww, &hh, D3DFMT_A4R4G4B4); + sltexture = createtext (&ww, &hh, t_depth < 32 ? D3DFMT_A4R4G4B4 : D3DFMT_A8R8G8B8); if (!sltexture) return 0; required_sl_texture_w = ww; required_sl_texture_h = hh; write_log (L"D3D: SL %d*%d texture allocated\n", ww, hh); - - scanlines_ok = 1; return 1; } @@ -745,6 +784,7 @@ static void setupscenescaled (void) hr = IDirect3DDevice9_SetSamplerState (d3ddev, 0, D3DSAMP_MINFILTER, v); hr = IDirect3DDevice9_SetSamplerState (d3ddev, 0, D3DSAMP_MAGFILTER, v); hr = IDirect3DDevice9_SetSamplerState (d3ddev, 0, D3DSAMP_MIPFILTER, D3DTEXF_NONE); + hr = IDirect3DDevice9_SetRenderState (d3ddev, D3DRS_ALPHABLENDENABLE, FALSE); } static void setupscenecoordssl (void) @@ -889,11 +929,13 @@ static void createscanlines (int force) int l1, l2; int x, y, yy; uae_u8 *sld, *p; + int bpp; - if (!scanlines_ok) + if (!sltexture) return; if (osl1 == currprefs.gfx_filter_scanlines && osl3 == currprefs.gfx_filter_scanlinelevel && osl2 == currprefs.gfx_filter_scanlineratio && !force) return; + bpp = t_depth < 32 ? 2 : 4; osl1 = currprefs.gfx_filter_scanlines; osl3 = currprefs.gfx_filter_scanlinelevel; osl2 = currprefs.gfx_filter_scanlineratio; @@ -913,27 +955,29 @@ static void createscanlines (int force) } sld = (uae_u8*)locked.pBits; for (y = 0; y < required_sl_texture_h; y++) - memset (sld + y * locked.Pitch, 0, required_sl_texture_w * 2); + memset (sld + y * locked.Pitch, 0, required_sl_texture_w * bpp); for (y = 1; y < required_sl_texture_h; y += l1 + l2) { for (yy = 0; yy < l2 && y + yy < required_sl_texture_h; yy++) { for (x = 0; x < required_sl_texture_w; x++) { - /* 16-bit, A4R4G4B4 */ uae_u8 sll = sl42; - p = &sld[(y + yy) * locked.Pitch + (x * 2)]; - p[1] = (sl4 << 4) | (sll << 0); - p[0] = (sll << 4) | (sll << 0); + p = &sld[(y + yy) * locked.Pitch + (x * bpp)]; + if (bpp < 4) { + /* 16-bit, A4R4G4B4 */ + p[1] = (sl4 << 4) | (sll << 0); + p[0] = (sll << 4) | (sll << 0); + } else { + /* 32-bit, A8R8G8B8 */ + uae_u8 sll4 = sl42 | (sl42 << 4); + uae_u8 sll2 = sll | (sll << 4); + p[0] = sll4; + p[1] = sll2; + p[2] = sll2; + p[3] = sll2; + } } } } IDirect3DTexture9_UnlockRect (sltexture, 0); - if (scanlines_ok) { - /* enable alpha blending for scanlines */ - IDirect3DDevice9_SetRenderState (d3ddev, D3DRS_ALPHABLENDENABLE, TRUE); - IDirect3DDevice9_SetRenderState (d3ddev, D3DRS_SRCBLEND, D3DBLEND_SRCALPHA); - IDirect3DDevice9_SetRenderState (d3ddev, D3DRS_DESTBLEND, D3DBLEND_INVSRCALPHA); - } else { - IDirect3DDevice9_SetRenderState (d3ddev, D3DRS_ALPHABLENDENABLE, FALSE); - } } @@ -943,6 +987,14 @@ static void invalidatedeviceobjects (void) IDirect3DTexture9_Release (texture); texture = NULL; } + if (sprite) { + sprite->lpVtbl->Release (sprite); + sprite = NULL; + } + if (ledtexture) { + IDirect3DTexture9_Release (ledtexture); + ledtexture = NULL; + } if (sltexture) { IDirect3DTexture9_Release (sltexture); sltexture = NULL; @@ -1000,6 +1052,7 @@ static int restoredeviceobjects (void) return 0; if (currprefs.gfx_filter_scanlines > 0) createsltexture (); + createledtexture (); vbsize = sizeof (struct TLVERTEX) * 4; if (psPreProcess) @@ -1043,6 +1096,7 @@ void D3D_free (void) psPreProcess = 0; psActive = 0; resetcount = 0; + changed_prefs.leds_on_screen = currprefs.leds_on_screen = currprefs.leds_on_screen & ~STATUSLINE_TARGET; } const TCHAR *D3D_init (HWND ahwnd, int w_w, int w_h, int t_w, int t_h, int depth) @@ -1059,7 +1113,6 @@ const TCHAR *D3D_init (HWND ahwnd, int w_w, int w_h, int t_w, int t_h, int depth D3D_free (); D3D_canshaders (); d3d_enabled = 0; - scanlines_ok = 0; if (currprefs.gfx_filter != UAE_FILTER_DIRECT3D) { _tcscpy (errmsg, L"D3D: not enabled"); return errmsg; @@ -1238,6 +1291,13 @@ const TCHAR *D3D_init (HWND ahwnd, int w_w, int w_h, int t_w, int t_h, int depth return errmsg; } + changed_prefs.leds_on_screen = currprefs.leds_on_screen = currprefs.leds_on_screen | STATUSLINE_TARGET; + + hr = D3DXCreateSprite (d3ddev, &sprite); + if (FAILED (hr)) { + write_log (L"LED D3DXSprite filaed: %s\n", D3D_ErrorString (hr)); + } + createscanlines (1); d3d_enabled = 1; return 0; @@ -1360,13 +1420,22 @@ static void D3D_render22 (int clear) hr = IDirect3DDevice9_SetTexture (d3ddev, 0, (IDirect3DBaseTexture9*)texture); hr = IDirect3DDevice9_DrawPrimitive (d3ddev, D3DPT_TRIANGLESTRIP, 0, 2); - if (scanlines_ok) { - setupscenecoordssl (); - settransformsl (); - hr = IDirect3DDevice9_SetTexture (d3ddev, 0, (IDirect3DBaseTexture9*)sltexture); - hr = IDirect3DDevice9_DrawPrimitive (d3ddev, D3DPT_TRIANGLESTRIP, 0, 2); - } + } + if (sprite && (sltexture || ledtexture)) { + D3DXVECTOR3 v; + sprite->lpVtbl->Begin (sprite, D3DXSPRITE_ALPHABLEND); + if (sltexture) { + v.x = v.y = v.z = 0; + sprite->lpVtbl->Draw (sprite, sltexture, NULL, NULL, &v, 0xffffffff); + } + if (ledtexture) { + v.x = 0; + v.y = window_h - TD_TOTAL_HEIGHT; + v.z = 0; + sprite->lpVtbl->Draw (sprite, ledtexture, NULL, NULL, &v, 0xffffffff); + } + sprite->lpVtbl->End (sprite); } hr = IDirect3DDevice9_EndScene (d3ddev); @@ -1393,6 +1462,9 @@ void D3D_unlocktexture (void) HRESULT hr; RECT r; + if (currprefs.leds_on_screen & STATUSLINE_CHIPSET) + updateleds (); + hr = IDirect3DTexture9_UnlockRect (texture, 0); r.left = 0; r.right = window_w; r.top = 0; r.bottom = window_h; diff --git a/od-win32/dxwrap.c b/od-win32/dxwrap.c index 064fa897..a67c9523 100644 --- a/od-win32/dxwrap.c +++ b/od-win32/dxwrap.c @@ -136,6 +136,7 @@ void clearsurface (LPDIRECTDRAWSURFACE7 surf) int locksurface (LPDIRECTDRAWSURFACE7 surf, LPDDSURFACEDESC2 desc) { + static int cnt = 50; HRESULT ddrval; desc->dwSize = sizeof (*desc); while (FAILED (ddrval = IDirectDrawSurface7_Lock (surf, NULL, desc, DDLOCK_SURFACEMEMORYPTR | DDLOCK_WAIT, NULL))) { @@ -144,7 +145,10 @@ int locksurface (LPDIRECTDRAWSURFACE7 surf, LPDDSURFACEDESC2 desc) if (FAILED (ddrval)) return 0; } else if (ddrval != DDERR_SURFACEBUSY) { - write_log (L"locksurface: %s\n", DXError (ddrval)); + if (cnt > 0) { + cnt--; + write_log (L"locksurface %d: %s\n", cnt, DXError (ddrval)); + } return 0; } } diff --git a/od-win32/hardfile_win32.c b/od-win32/hardfile_win32.c index 07ab2597..a4d368be 100644 --- a/od-win32/hardfile_win32.c +++ b/od-win32/hardfile_win32.c @@ -1,5 +1,5 @@ #define WIN32_LEAN_AND_MEAN -#define _WIN32_WINNT 0x500 +#define _WIN32_WINNT 0x600 #include "sysconfig.h" #include "sysdeps.h" @@ -100,8 +100,93 @@ static void rdbdump (HANDLE *h, uae_u64 offset, uae_u8 *buf, int blocksize) cnt++; } +static int getsignfromhandle (HANDLE h, DWORD *sign, DWORD *pstyle) +{ + int ok; + DWORD written, outsize; + DRIVE_LAYOUT_INFORMATION_EX *dli; + + ok = 0; + outsize = sizeof (DRIVE_LAYOUT_INFORMATION_EX) + sizeof (PARTITION_INFORMATION_EX) * 32; + dli = xmalloc (outsize); + if (DeviceIoControl (h, IOCTL_DISK_GET_DRIVE_LAYOUT_EX, NULL, 0, dli, outsize, &written, NULL)) { + *sign = dli->Mbr.Signature; + *pstyle = dli->PartitionStyle; + ok = 1; + } else if (DeviceIoControl (h, IOCTL_DISK_GET_DRIVE_LAYOUT, NULL, 0, dli, outsize, &written, NULL)) { + DRIVE_LAYOUT_INFORMATION *dli2 = (DRIVE_LAYOUT_INFORMATION*)dli; + *sign = dli2->Signature; + *pstyle = PARTITION_STYLE_MBR; + ok = 1; + } + xfree (dli); + return ok; +} + +static int ismounted (HANDLE hd) +{ + HANDLE h; + TCHAR volname[MAX_DPATH]; + int mounted; + DWORD sign, pstyle; + + if (!getsignfromhandle (hd, &sign, &pstyle)) + return 0; + if (pstyle == PARTITION_STYLE_GPT) + return 1; + if (pstyle == PARTITION_STYLE_RAW) + return 0; + mounted = 0; + h = FindFirstVolume (volname, sizeof volname / sizeof (TCHAR)); + while (h && !mounted) { + HANDLE d; + if (volname[_tcslen (volname) - 1] == '\\') + volname[_tcslen (volname) - 1] = 0; + d = CreateFile (volname, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, + NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + if (d != INVALID_HANDLE_VALUE) { + DWORD isntfs, outsize, written; + isntfs = 0; + if (DeviceIoControl (d, FSCTL_IS_VOLUME_MOUNTED, NULL, 0, NULL, 0, &written, NULL)) { + VOLUME_DISK_EXTENTS *vde; + NTFS_VOLUME_DATA_BUFFER ntfs; + if (DeviceIoControl (d, FSCTL_GET_NTFS_VOLUME_DATA, NULL, 0, &ntfs, sizeof ntfs, &written, NULL)) { + isntfs = 1; + } + outsize = sizeof (VOLUME_DISK_EXTENTS) + sizeof (DISK_EXTENT) * 32; + vde = xmalloc (outsize); + if (DeviceIoControl (d, IOCTL_VOLUME_GET_VOLUME_DISK_EXTENTS, NULL, 0, vde, outsize, &written, NULL)) { + int i; + for (i = 0; i < vde->NumberOfDiskExtents; i++) { + TCHAR pdrv[MAX_DPATH]; + HANDLE ph; + _stprintf (pdrv, L"\\\\.\\PhysicalDrive%d", vde->Extents[i].DiskNumber); + ph = CreateFile (pdrv, 0, FILE_SHARE_READ | FILE_SHARE_WRITE, + NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + if (ph != INVALID_HANDLE_VALUE) { + DWORD sign2; + if (getsignfromhandle (ph, &sign2, &pstyle)) { + if (sign == sign2 && pstyle == PARTITION_STYLE_MBR) + mounted = isntfs ? -1 : 1; + } + CloseHandle (ph); + } + } + } + } + CloseHandle (d); + } else { + write_log (L"'%s': %d\n", volname, GetLastError ()); + } + if (!FindNextVolume (h, volname, sizeof volname / sizeof (TCHAR))) + break; + } + FindVolumeClose (h); + return mounted; +} + #define CA "Commodore\0Amiga\0" -static int safetycheck (HANDLE *h, uae_u64 offset, uae_u8 *buf, int blocksize) +static int safetycheck (HANDLE *h, const TCHAR *name, uae_u64 offset, uae_u8 *buf, int blocksize) { int i, j, blocks = 63, empty = 1; DWORD outlen, high; @@ -125,7 +210,7 @@ static int safetycheck (HANDLE *h, uae_u64 offset, uae_u8 *buf, int blocksize) write_log (L"hd accepted (adide rdb detected at block %d)\n", j); return -3; } - if (!memcmp (buf, "RDSK", 4)) { + if (!memcmp (buf, "RDSK", 4) || !memcmp (buf, "DRKS", 4)) { if (do_rdbdump) rdbdump (h, offset, buf, blocksize); write_log (L"hd accepted (rdb detected at block %d)\n", j); @@ -145,7 +230,42 @@ static int safetycheck (HANDLE *h, uae_u64 offset, uae_u8 *buf, int blocksize) offset += blocksize; } if (!empty) { - write_log (L"hd ignored, not empty and no RDB detected\n"); + int mounted; + if (regexiststree (NULL, L"DangerousDrives")) { + UAEREG *fkey = regcreatetree (NULL, L"DangerousDrives"); + int match = 0; + if (fkey) { + int idx = 0; + DWORD size, size2; + TCHAR tmp2[MAX_DPATH], tmp[MAX_DPATH]; + for (;;) { + size = sizeof (tmp) / sizeof (TCHAR); + size2 = sizeof (tmp2) / sizeof (TCHAR); + if (!regenumstr (fkey, idx, tmp, &size, tmp2, &size2)) + break; + if (!_tcscmp (tmp, name)) + match = 1; + idx++; + } + regclosetree (fkey); + } + if (match) { + write_log (L"hd accepted, enabled in registry!\n"); + return -7; + } + } + mounted = ismounted (h); + if (!mounted) { + write_log (L"hd accepted, not empty and not mounted in Windows\n"); + return -8; + } + if (mounted < 0) { + write_log (L"hd ignored, NTFS partitions\n"); + return 0; + } + if (harddrive_dangerous == 0x1234dead) + return -6; + write_log (L"hd ignored, not empty and no RDB detected or Windows mounted\n"); return 0; } write_log (L"hd accepted (empty)\n"); @@ -172,6 +292,37 @@ int isharddrive (const TCHAR *name) static TCHAR *hdz[] = { L"hdz", L"zip", L"rar", L"7z", NULL }; +#if 0 +static void getserial (HANDLE h) +{ + DWORD outsize, written; + DISK_GEOMETRY_EX *out; + VOLUME_DISK_EXTENTS *vde; + + DWORD serial, mcl, fsflags; + if (GetVolumeInformationByHandleW (h, NULL, 0, &serial, &mcl, &fsflags, NULL, 0)) { + } + + outsize = sizeof (DISK_GEOMETRY_EX) + 10 * (sizeof (DISK_DETECTION_INFO) + sizeof (DISK_PARTITION_INFO)); + out = xmalloc (outsize); + if (DeviceIoControl (h, IOCTL_DISK_GET_DRIVE_GEOMETRY_EX, NULL, 0, out, outsize, &written, NULL)) { + DISK_DETECTION_INFO *ddi = DiskGeometryGetDetect (out); + DISK_PARTITION_INFO *dpi = DiskGeometryGetPartition (out); + write_log (L""); + } + xfree (out); + + + outsize = sizeof (VOLUME_DISK_EXTENTS) + sizeof (DISK_EXTENT) * 10; + vde = xmalloc (outsize); + if (DeviceIoControl (h, IOCTL_VOLUME_GET_VOLUME_DISK_EXTENTS, NULL, 0, vde, outsize, &written, NULL)) { + if (vde->NumberOfDiskExtents > 0) + write_log(L"%d\n", vde->Extents[0].DiskNumber); + } + xfree (vde); +} +#endif + int hdf_open_target (struct hardfiledata *hfd, const TCHAR *pname) { HANDLE h = INVALID_HANDLE_VALUE; @@ -220,9 +371,26 @@ int hdf_open_target (struct hardfiledata *hfd, const TCHAR *pname) hfd->physsize = hfd->virtsize = udi->size; hfd->blocksize = udi->bytespersector; if (hfd->offset == 0 && !hfd->drive_empty) { - int sf = safetycheck (hfd->handle, 0, hfd->cache, hfd->blocksize); + int sf = safetycheck (hfd->handle, udi->device_path, 0, hfd->cache, hfd->blocksize); if (sf > 0) goto end; + if (sf == 0 && !hfd->readonly && harddrive_dangerous != 0x1234dead) { + write_log (L"'%s' forced read-only, safetycheck enabled\n", udi->device_path); + hfd->dangerous = 1; + // clear GENERIC_WRITE + CloseHandle (h); + h = CreateFile (udi->device_path, + GENERIC_READ, + FILE_SHARE_READ | FILE_SHARE_WRITE, + NULL, OPEN_EXISTING, flags, NULL); + hfd->handle = h; + if (h == INVALID_HANDLE_VALUE) + goto end; + if (!DeviceIoControl(h, FSCTL_ALLOW_EXTENDED_DASD_IO, NULL, 0, NULL, 0, &r, NULL)) + write_log (L"WARNING: '%s' FSCTL_ALLOW_EXTENDED_DASD_IO returned %d\n", name, GetLastError ()); + } + +#if 0 if (sf == 0 && hfd->warned >= 0) { if (harddrive_dangerous != 0x1234dead) { if (!hfd->warned) @@ -237,6 +405,7 @@ int hdf_open_target (struct hardfiledata *hfd, const TCHAR *pname) } } else { hfd->warned = -1; +#endif } hfd->handle_valid = HDF_HANDLE_WIN32; hfd->emptyname = my_strdup (name); @@ -329,6 +498,7 @@ void hdf_close_target (struct hardfiledata *hfd) hfd->cache = 0; hfd->cache_valid = 0; hfd->drive_empty = 0; + hfd->dangerous = 0; } int hdf_dup_target (struct hardfiledata *dhfd, const struct hardfiledata *shfd) @@ -337,7 +507,7 @@ int hdf_dup_target (struct hardfiledata *dhfd, const struct hardfiledata *shfd) return 0; if (shfd->handle_valid == HDF_HANDLE_WIN32) { HANDLE duphandle; - if (!DuplicateHandle (GetCurrentProcess(), shfd->handle, GetCurrentProcess() , &duphandle, 0, FALSE, DUPLICATE_SAME_ACCESS)) + if (!DuplicateHandle (GetCurrentProcess (), shfd->handle, GetCurrentProcess () , &duphandle, 0, FALSE, DUPLICATE_SAME_ACCESS)) return 0; dhfd->handle = duphandle; dhfd->handle_valid = HDF_HANDLE_WIN32; @@ -617,8 +787,11 @@ int hdf_read_target (struct hardfiledata *hfd, void *buffer, uae_u64 offset, int static int hdf_write_2 (struct hardfiledata *hfd, void *buffer, uae_u64 offset, int len) { DWORD outlen = 0; + if (hfd->readonly) return 0; + if (hfd->dangerous) + return 0; hfd->cache_valid = 0; hdf_seek (hfd, offset); poscheck (hfd, len); @@ -642,6 +815,8 @@ int hdf_write_target (struct hardfiledata *hfd, void *buffer, uae_u64 offset, in while (len > 0) { int maxlen = len > CACHE_SIZE ? CACHE_SIZE : len; int ret = hdf_write_2(hfd, p, offset, maxlen); + if (ret < 0) + return ret; got += ret; if (ret != maxlen) return got; @@ -680,7 +855,7 @@ static void generatestorageproperty (struct uae_driveinfo *udi, int ignoreduplic { _tcscpy (udi->vendor_id, L"UAE"); _tcscpy (udi->product_id, L"DISK"); - _tcscpy (udi->product_rev, L"1.0"); + _tcscpy (udi->product_rev, L"1.1"); _stprintf (udi->device_name, L"%s", udi->device_path); udi->removablemedia = 1; } @@ -790,7 +965,7 @@ static BOOL GetDevicePropertyFromName(const TCHAR *DevicePath, DWORD Index, DWOR write_log (L"opening device '%s'\n", udi->device_path); hDevice = CreateFile( udi->device_path, // device interface name - GENERIC_READ | GENERIC_WRITE, // dwDesiredAccess + GENERIC_READ, // dwDesiredAccess FILE_SHARE_READ | FILE_SHARE_WRITE, // dwShareMode NULL, // lpSecurityAttributes OPEN_EXISTING, // dwCreationDistribution @@ -944,8 +1119,8 @@ static BOOL GetDevicePropertyFromName(const TCHAR *DevicePath, DWORD Index, DWOR continue; } nonzeropart++; - if (pi->PartitionType != 0x76) { - write_log (L"type not 0x76\n"); + if (pi->PartitionType != 0x76 && pi->PartitionType != 0x30) { + write_log (L"type not 0x76 or 0x30\n"); continue; } memmove (udi, udi2, sizeof (*udi)); @@ -953,7 +1128,7 @@ static BOOL GetDevicePropertyFromName(const TCHAR *DevicePath, DWORD Index, DWOR udi->offset = pi->StartingOffset.QuadPart; udi->size = pi->PartitionLength.QuadPart; write_log (L"used\n"); - if (safetycheck (hDevice, udi->offset, buffer, dg.BytesPerSector) <= 0) { + if (safetycheck (hDevice, udi->device_path, udi->offset, buffer, dg.BytesPerSector) <= 0) { _stprintf (udi->device_name, L"HD_P#%d_%s", pi->PartitionNumber, orgname); udi++; (*index2)++; @@ -972,7 +1147,7 @@ static BOOL GetDevicePropertyFromName(const TCHAR *DevicePath, DWORD Index, DWOR write_log (L"no MBR partition table detected, checking for RDB\n"); } - udi->dangerous = safetycheck (hDevice, 0, buffer, dg.BytesPerSector); + udi->dangerous = safetycheck (hDevice, udi->device_path, 0, buffer, dg.BytesPerSector); if (udi->dangerous > 0) goto end; amipartfound: @@ -1108,10 +1283,11 @@ end: return ret; } - #endif + + static int num_drives; static int hdf_init2 (int force) @@ -1132,6 +1308,7 @@ static int hdf_init2 (int force) #ifdef WINDDK buffer = VirtualAlloc (NULL, 65536, MEM_COMMIT, PAGE_READWRITE); if (buffer) { + errormode = SetErrorMode (SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX); memset (uae_drives, 0, sizeof (uae_drives)); num_drives = 0; hIntDevInfo = SetupDiGetClassDevs (&GUID_DEVINTERFACE_DISK, NULL, NULL, DIGCF_PRESENT | DIGCF_INTERFACEDEVICE); @@ -1143,16 +1320,15 @@ static int hdf_init2 (int force) index++; num_drives = index2; } - SetupDiDestroyDeviceInfoList(hIntDevInfo); + SetupDiDestroyDeviceInfoList (hIntDevInfo); } - errormode = SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX); - dwDriveMask = GetLogicalDrives(); + dwDriveMask = GetLogicalDrives (); for(drive = 'A'; drive <= 'Z'; drive++) { if((dwDriveMask & 1) && (drive >= 'C' || usefloppydrives)) { TCHAR tmp1[20], tmp2[20]; DWORD drivetype; _stprintf (tmp1, L"%c:\\", drive); - drivetype = GetDriveType(tmp1); + drivetype = GetDriveType (tmp1); if (drivetype != DRIVE_REMOTE) { _stprintf (tmp2, L"\\\\.\\%c:", drive); GetDevicePropertyFromName (tmp2, index, &index2, buffer, 1); @@ -1161,7 +1337,7 @@ static int hdf_init2 (int force) } dwDriveMask >>= 1; } - SetErrorMode(errormode); + SetErrorMode (errormode); #if 0 hIntDevInfo = SetupDiGetClassDevs (&GUID_DEVCLASS_MTD, NULL, NULL, DIGCF_PRESENT); if (hIntDevInfo != INVALID_HANDLE_VALUE) { @@ -1176,7 +1352,7 @@ static int hdf_init2 (int force) VirtualFree (buffer, 0, MEM_RELEASE); } num_drives = index2; - write_log (L"Drive scan result: %d Amiga formatted drives detected\n", num_drives); + write_log (L"Drive scan result: %d drives detected\n", num_drives); #endif return num_drives; } @@ -1191,7 +1367,7 @@ int hdf_getnumharddrives (void) return num_drives; } -TCHAR *hdf_getnameharddrive (int index, int flags, int *sectorsize) +TCHAR *hdf_getnameharddrive (int index, int flags, int *sectorsize, int *dangerousdrive) { static TCHAR name[512]; TCHAR tmp[32]; @@ -1200,28 +1376,47 @@ TCHAR *hdf_getnameharddrive (int index, int flags, int *sectorsize) TCHAR *dang = L"?"; TCHAR *rw = L"RW"; + if (dangerousdrive) + *dangerousdrive = 0; switch (uae_drives[index].dangerous) { + case -6: + dang = L"[MBR]"; + break; + case -7: + dang = L"[!]"; + break; + case -8: + dang = L"[UNK]"; + break; case -9: - dang = L"Empty"; + dang = L"[EMPTY]"; break; case -3: - dang = L"CPRM"; + dang = L"(CPRM)"; break; case -2: - dang = L"SRAM"; + dang = L"(SRAM)"; break; case -1: - dang = L"RDB"; + dang = L"(RDB)"; break; case 0: - dang = L"NON-EMPTY"; + dang = L"[OS]"; + if (dangerousdrive) + *dangerousdrive |= 1; break; } - if (nomedia) - dang = L"NO MEDIA"; - if (uae_drives[index].readonly) + if (nomedia) { + dang = L"[NO MEDIA]"; + if (dangerousdrive) + *dangerousdrive &= ~1; + } + if (uae_drives[index].readonly) { rw = L"RO"; + if (dangerousdrive && !nomedia) + *dangerousdrive |= 2; + } if (sectorsize) *sectorsize = uae_drives[index].bytespersector; @@ -1385,7 +1580,7 @@ int win32_hardfile_media_change (const TCHAR *drvname, int inserted) static int progressdialogreturn; static int progressdialogactive; -static INT_PTR CALLBACK ProgressDialogProc(HWND hDlg, UINT msg, WPARAM wParam, LPARAM lParam) +static INT_PTR CALLBACK ProgressDialogProc (HWND hDlg, UINT msg, WPARAM wParam, LPARAM lParam) { switch(msg) { @@ -1417,7 +1612,7 @@ extern HMODULE hUIDLL; extern HINSTANCE hInst; #define COPY_CACHE_SIZE 1024*1024 -int harddrive_to_hdf(HWND hDlg, struct uae_prefs *p, int idx) +int harddrive_to_hdf (HWND hDlg, struct uae_prefs *p, int idx) { HANDLE h = INVALID_HANDLE_VALUE, hdst = INVALID_HANDLE_VALUE; void *cache = NULL; @@ -1448,22 +1643,22 @@ int harddrive_to_hdf(HWND hDlg, struct uae_prefs *p, int idx) if (hdst == INVALID_HANDLE_VALUE) goto err; li.QuadPart = size; - ret = SetFilePointer(hdst, li.LowPart, &li.HighPart, FILE_BEGIN); - if (ret == INVALID_FILE_SIZE && GetLastError() != NO_ERROR) + ret = SetFilePointer (hdst, li.LowPart, &li.HighPart, FILE_BEGIN); + if (ret == INVALID_FILE_SIZE && GetLastError () != NO_ERROR) goto err; - if (!SetEndOfFile(hdst)) + if (!SetEndOfFile (hdst)) goto err; li.QuadPart = 0; - SetFilePointer(hdst, 0, &li.HighPart, FILE_BEGIN); + SetFilePointer (hdst, 0, &li.HighPart, FILE_BEGIN); li.QuadPart = 0; - SetFilePointer(h, 0, &li.HighPart, FILE_BEGIN); + SetFilePointer (h, 0, &li.HighPart, FILE_BEGIN); progressdialogreturn = -1; progressdialogactive = 1; hwnd = CreateDialog (hUIDLL ? hUIDLL : hInst, MAKEINTRESOURCE (IDD_PROGRESSBAR), hDlg, ProgressDialogProc); if (hwnd == NULL) goto err; - hwndprogress = GetDlgItem(hwnd, IDC_PROGRESSBAR); - hwndprogresstxt = GetDlgItem(hwnd, IDC_PROGRESSBAR_TEXT); + hwndprogress = GetDlgItem (hwnd, IDC_PROGRESSBAR); + hwndprogresstxt = GetDlgItem (hwnd, IDC_PROGRESSBAR_TEXT); ShowWindow (hwnd, SW_SHOW); pct = 0; cnt = 1000; @@ -1473,9 +1668,9 @@ int harddrive_to_hdf(HWND hDlg, struct uae_prefs *p, int idx) if (progressdialogreturn >= 0) break; if (cnt > 0) { - SendMessage(hwndprogress, PBM_SETPOS, (WPARAM)pct, 0); + SendMessage (hwndprogress, PBM_SETPOS, (WPARAM)pct, 0); _stprintf (tmp, L"%dM / %dM (%d%%)", (int)(written >> 20), (int)(size >> 20), pct); - SendMessage(hwndprogresstxt, WM_SETTEXT, 0, (LPARAM)tmp); + SendMessage (hwndprogresstxt, WM_SETTEXT, 0, (LPARAM)tmp); while (PeekMessage (&msg, 0, 0, 0, PM_REMOVE)) { TranslateMessage (&msg); DispatchMessage (&msg); @@ -1485,7 +1680,7 @@ int harddrive_to_hdf(HWND hDlg, struct uae_prefs *p, int idx) got = gotdst = 0; li.QuadPart = sizecnt; if (SetFilePointer(h, li.LowPart, &li.HighPart, FILE_BEGIN) == INVALID_SET_FILE_POINTER) { - DWORD err = GetLastError(); + DWORD err = GetLastError (); if (err != NO_ERROR) { progressdialogreturn = 3; break; @@ -1494,7 +1689,7 @@ int harddrive_to_hdf(HWND hDlg, struct uae_prefs *p, int idx) get = COPY_CACHE_SIZE; if (sizecnt + get > size) get = size - sizecnt; - if (!ReadFile(h, cache, get, &got, NULL)) { + if (!ReadFile (h, cache, get, &got, NULL)) { progressdialogreturn = 4; break; } @@ -1505,7 +1700,7 @@ int harddrive_to_hdf(HWND hDlg, struct uae_prefs *p, int idx) if (got > 0) { if (written + got > size) got = size - written; - if (!WriteFile(hdst, cache, got, &gotdst, NULL)) { + if (!WriteFile (hdst, cache, got, &gotdst, NULL)) { progressdialogreturn = 5; break; } @@ -1546,10 +1741,10 @@ err: ok: if (h != INVALID_HANDLE_VALUE) - CloseHandle(h); + CloseHandle (h); if (cache) - VirtualFree(cache, 0, MEM_RELEASE); + VirtualFree (cache, 0, MEM_RELEASE); if (hdst != INVALID_HANDLE_VALUE) - CloseHandle(hdst); + CloseHandle (hdst); return retcode; } diff --git a/od-win32/mman.c b/od-win32/mman.c index bb17c1c7..7d609c82 100644 --- a/od-win32/mman.c +++ b/od-win32/mman.c @@ -15,6 +15,7 @@ #if defined(NATMEM_OFFSET) #define BARRIER 32 +#define MAXZ3MEM 0x7F000000 static struct shmid_ds shmids[MAX_SHMID]; static int memwatchok = 0; @@ -226,15 +227,15 @@ void preinit_shm (void) } size64 = total64; if (maxmem < 0) - size64 = 0x7f000000; + size64 = MAXZ3MEM; else if (maxmem > 0) size64 = maxmem * 1024 * 1024; if (os_64bit) { - if (size64 > 0x7f000000) - size64 = 0x7f000000; + if (size64 > MAXZ3MEM) + size64 = MAXZ3MEM; } else { - if (size64 > 0x7f000000) - size64 = 0x7f000000; + if (size64 > MAXZ3MEM) + size64 = MAXZ3MEM; } if (size64 < 8 * 1024 * 1024) size64 = 8 * 1024 * 1024; diff --git a/od-win32/parser.c b/od-win32/parser.c index 5a50e454..b9189bdf 100644 --- a/od-win32/parser.c +++ b/od-win32/parser.c @@ -659,7 +659,7 @@ static void *uaeser_trap_thread (void *arg) int cnt, actual; DWORD evtmask; - uae_set_thread_priority (2); + uae_set_thread_priority (NULL, 1); sd->threadactive = 1; uae_sem_post (&sd->sync_sem); startwce(sd, &evtmask); @@ -846,7 +846,7 @@ int openser (TCHAR *sername) NULL); if (hCom == INVALID_HANDLE_VALUE) { write_log (L"SERIAL: failed to open '%s' err=%d\n", sername, GetLastError()); - closeser(); + closeser (); return 0; } @@ -1155,8 +1155,8 @@ void hsyncstuff(void) //extern int warned_JIT_0xF10000; //warned_JIT_0xF10000 = 0; if (flashscreen > 0) { - DX_Fill (0, 0, -1, 30, 0x000000); - DX_Invalidate (0, 0, -1, 30); + //DX_Fill (0, 0, -1, 30, 0x000000); can't do anymore + //DX_Invalidate (0, 0, -1, 30); flashscreen--; if (flashscreen == 0) picasso_refresh (); @@ -1175,7 +1175,7 @@ void hsyncstuff(void) #endif } -static int enumserialports_2(void) +static int enumserialports_2 (int cnt) { // Create a device information set that will be the container for // the device interfaces. @@ -1185,7 +1185,6 @@ static int enumserialports_2(void) SP_DEVICE_INTERFACE_DATA ifcData; DWORD dwDetDataSize = sizeof (SP_DEVICE_INTERFACE_DETAIL_DATA) + 256; DWORD ii; - int cnt = 0; hDevInfo = SetupDiGetClassDevs (&GUID_CLASS_COMPORT, NULL, NULL, DIGCF_PRESENT | DIGCF_DEVICEINTERFACE); if(hDevInfo == INVALID_HANDLE_VALUE) @@ -1249,7 +1248,7 @@ end: return cnt; } -int enumserialports(void) +int enumserialports (void) { int cnt, i, j; TCHAR name[256]; @@ -1257,7 +1256,15 @@ int enumserialports(void) TCHAR devname[1000]; write_log (L"Serial port enumeration..\n"); - cnt = enumserialports_2 (); + + comports[0].dev = my_strdup (L"ENET:H"); + comports[0].cfgname = my_strdup (comports[0].dev); + comports[0].name = my_strdup (L"NET (host)"); + comports[1].dev = my_strdup (L"ENET:L"); + comports[1].cfgname = my_strdup (comports[1].dev); + comports[1].name = my_strdup (L"NET (client)"); + + cnt = enumserialports_2 (2); for (i = 0; i < 10; i++) { _stprintf(name, L"COM%d", i); if (!QueryDosDevice (name, devname, sizeof devname)) @@ -1277,17 +1284,11 @@ int enumserialports(void) cnt++; } } - if (isIPC (COMPIPENAME)) { - comports[j].dev = xmalloc (100); - _stprintf (comports[cnt].dev, L"\\\\.\\pipe\\%s", COMPIPENAME); - comports[j].cfgname = my_strdup (COMPIPENAME); - comports[j].name = my_strdup (COMPIPENAME); - } write_log (L"Serial port enumeration end\n"); return cnt; } -void sernametodev(TCHAR *sername) +void sernametodev (TCHAR *sername) { int i; @@ -1300,7 +1301,7 @@ void sernametodev(TCHAR *sername) sername[0] = 0; } -void serdevtoname(TCHAR *sername) +void serdevtoname (TCHAR *sername) { int i; for (i = 0; i < MAX_SERIAL_PORTS && comports[i].name; i++) { diff --git a/od-win32/picasso96_win.c b/od-win32/picasso96_win.c index d7b7b782..d211bf6d 100644 --- a/od-win32/picasso96_win.c +++ b/od-win32/picasso96_win.c @@ -34,6 +34,8 @@ #define P96DX 0 #define WINCURSOR 1 +static int multithreaded = 0; + #include "sysconfig.h" #include "sysdeps.h" @@ -138,6 +140,9 @@ static uaecptr boardinfo; static int interrupt_enabled; int p96vblank; +static uae_sem_t sem; +static int thread_alive; + static uaecptr uaegfx_resname, uaegfx_resid, uaegfx_init, @@ -682,6 +687,14 @@ static int isvsync (void) return currprefs.gfx_pfullscreen && currprefs.gfx_pvsync; } +static void flushpixels_do (void) +{ + if (multithreaded) + uae_sem_post (&sem); + else + flushpixels (); +} + void picasso_handle_vsync (void) { static int vsynccnt; @@ -711,15 +724,20 @@ void picasso_handle_vsync (void) return; framecnt++; - mouseupdate (); + if (!multithreaded) + mouseupdate (); if (thisisvsync) { - if (doskip () && p96skipmode == 0) { - ; + if (multithreaded) { + uae_sem_post (&sem); } else { - flushpixels (); + if (doskip () && p96skipmode == 0) { + ; + } else { + flushpixels_do (); + } + gfx_unlock_picasso (); } - gfx_unlock_picasso (); } } @@ -938,7 +956,7 @@ void picasso_refresh (void) width = picasso96_state.Width; height = picasso96_state.Height; } - flushpixels (); + flushpixels_do (); } else { write_log (L"ERROR - picasso_refresh() can't refresh!\n"); } @@ -4036,7 +4054,7 @@ static void statusline (uae_u8 *dst) yy = 0; for (y = dst_height - TD_TOTAL_HEIGHT; y < dst_height; y++) { uae_u8 *buf = dst + y * pitch; - draw_status_line_single (buf, picasso_vidinfo.pixbytes, yy, picasso96_state.Width, p96rc, p96gc, p96bc); + draw_status_line_single (buf, picasso_vidinfo.pixbytes, yy, picasso96_state.Width, p96rc, p96gc, p96bc, NULL); yy++; } } @@ -4561,6 +4579,25 @@ static void initvblankirq (TrapContext *ctx, uaecptr base) CallLib (ctx, get_long (4), -168); /* AddIntServer */ } +static void *picasso_copy (void *data) +{ + thread_alive = 1; + while (thread_alive) { + uae_sem_wait (&sem); + if (!thread_alive) + break; + if (!picasso_on) + continue; + if (dx_islost ()) + continue; + mouseupdate (); + flushpixels (); + } + thread_alive = -1; + return NULL; +} + + static uaecptr uaegfx_card_install (TrapContext *ctx, uae_u32 extrasize) { uae_u32 functable, datatable, a2; @@ -4568,9 +4605,9 @@ static uaecptr uaegfx_card_install (TrapContext *ctx, uae_u32 extrasize) uaecptr findcardfunc, initcardfunc; uaecptr exec = get_long (4); - uaegfx_resid = ds (L"UAE Graphics Card 3.2"); + uaegfx_resid = ds (L"UAE Graphics Card 3.3"); uaegfx_vblankname = ds (L"UAE Graphics Card VBLANK"); - uaegfx_vblankname = ds (L"UAE Graphics Card PORTS"); + uaegfx_portsname = ds (L"UAE Graphics Card PORTS"); /* Open */ openfunc = here (); @@ -4626,6 +4663,11 @@ static uaecptr uaegfx_card_install (TrapContext *ctx, uae_u32 extrasize) if (currprefs.win32_rtgvblankrate >= -1) initvblankirq (ctx, uaegfx_base); + if (multithreaded && thread_alive == 0) { + uae_sem_init (&sem, FALSE, FALSE); + uae_start_thread (L"rtg_copy", picasso_copy, NULL, NULL); + } + write_log (L"uaegfx.card %d.%d init @%08X\n", UAEGFX_VERSION, UAEGFX_REVISION, uaegfx_base); return uaegfx_base; } diff --git a/od-win32/picasso96_win.h b/od-win32/picasso96_win.h index 308c8431..a49cca06 100644 --- a/od-win32/picasso96_win.h +++ b/od-win32/picasso96_win.h @@ -548,10 +548,7 @@ extern int uaegfx_card_found; extern struct picasso96_state_struct picasso96_state; extern uae_u16 picasso96_pixel_format; -extern int DX_InvertRect (int X, int Y, int Width, int Height); extern void DX_SetPalette (int start, int count); -extern void DX_Invalidate (int, int, int, int); -extern int DX_Flip (void); extern void picasso_enablescreen (int on); extern void picasso_refresh (void); extern void picasso_handle_vsync (void); diff --git a/od-win32/posixemu.c b/od-win32/posixemu.c index ba9e8dea..fd2730e4 100644 --- a/od-win32/posixemu.c +++ b/od-win32/posixemu.c @@ -280,7 +280,6 @@ int uae_start_thread (TCHAR *name, void *(*f)(void *), void *arg, uae_thread_id thp->arg = arg; hThread = (HANDLE)_beginthreadex (NULL, 0, thread_init, thp, 0, &foo); if (hThread) { - SetThreadPriority (hThread, THREAD_PRIORITY_ABOVE_NORMAL); if (name) write_log (L"Thread '%s' started (%d)\n", name, hThread); } else { @@ -301,7 +300,15 @@ int uae_start_thread_fast (void *(*f)(void *), void *arg, uae_thread_id *tid) DWORD_PTR cpu_affinity = 1, cpu_paffinity = 1; -void uae_set_thread_priority (int pri) +void uae_set_thread_priority (uae_thread_id *tid, int pri) { + int pri2 = GetThreadPriority (NULL); + + pri2 += pri; + if (pri2 > 1) + pri2 = 1; + if (pri2 < -1) + pri2 = -1; + SetThreadPriority (tid ? *tid : NULL, pri2); } diff --git a/od-win32/resources/resource b/od-win32/resources/resource index f6d63703..64b879a9 100644 --- a/od-win32/resources/resource +++ b/od-win32/resources/resource @@ -25,6 +25,7 @@ #define IDS_FRONTEND 19 #define IDS_CHIPSET2 20 #define IDS_GAMEPORTS 21 +#define IDS_RTG 22 #define IDS_EXTTEXT 100 #define IDS_EXTACTUAL 101 #define IDS_SOUND 102 @@ -317,6 +318,8 @@ #define IDI_FILE 349 #define IDS_AUTOSCALE_RESIZE 350 #define IDS_PRINTER_ASCII 351 +#define IDD_DIALOG2 351 +#define IDD_RTG 351 #define IDS_PRINTER_EPSON 352 #define IDS_PRINTER_POSTSCRIPT_DETECTION 353 #define IDS_PRINTER_POSTSCRIPT_EMULATION 354 @@ -1042,7 +1045,7 @@ #ifndef APSTUDIO_READONLY_SYMBOLS #define _APS_NO_MFC 1 #define _APS_3D_CONTROLS 1 -#define _APS_NEXT_RESOURCE_VALUE 350 +#define _APS_NEXT_RESOURCE_VALUE 352 #define _APS_NEXT_COMMAND_VALUE 40045 #define _APS_NEXT_CONTROL_VALUE 1790 #define _APS_NEXT_SYMED_VALUE 101 diff --git a/od-win32/resources/winuae.rc b/od-win32/resources/winuae.rc index 9c91e886..15a93713 100644 --- a/od-win32/resources/winuae.rc +++ b/od-win32/resources/winuae.rc @@ -152,7 +152,7 @@ BEGIN RTEXT "Fullscreen:",IDC_STATIC,17,19,40,15,SS_CENTERIMAGE END -IDD_MEMORY DIALOGEX 0, 0, 300, 239 +IDD_MEMORY DIALOGEX 0, 0, 300, 158 STYLE DS_LOCALEDIT | DS_SETFONT | DS_3DLOOK | DS_CONTROL | WS_CHILD EXSTYLE WS_EX_CONTEXTHELP FONT 8, "MS Sans Serif", 0, 0, 0x1 @@ -170,31 +170,13 @@ BEGIN RTEXT "Z3 Fast:",IDC_Z3TEXT,139,51,30,10,SS_CENTERIMAGE CONTROL "Slider1",IDC_Z3FASTMEM,"msctls_trackbar32",TBS_AUTOTICKS | TBS_TOP | WS_TABSTOP,179,47,60,20 EDITTEXT IDC_Z3FASTRAM,243,50,34,12,ES_CENTER | ES_READONLY - RTEXT "Memory: [] Graphics card memory. Required for RTG (Picasso96) emulation.",IDC_GFXCARDTEXT,25,98,53,10,SS_NOTIFY | SS_CENTERIMAGE - CONTROL "Slider1",IDC_P96MEM,"msctls_trackbar32",TBS_AUTOTICKS | TBS_TOP | WS_TABSTOP,88,94,60,20 - EDITTEXT IDC_P96RAM,152,97,34,12,ES_CENTER | ES_READONLY - GROUPBOX "A3000/A4000 Advanced Memory Settings",-1,13,179,275,57 - RTEXT "Motherboard Fast RAM:",-1,39,194,129,10,SS_CENTERIMAGE - CONTROL "",IDC_MBMEM1,"msctls_trackbar32",TBS_AUTOTICKS | TBS_TOP | WS_TABSTOP,180,190,59,20 - EDITTEXT IDC_MBRAM1,243,193,34,12,ES_CENTER | ES_READONLY - RTEXT "Prosessor Slot Fast RAM:",-1,39,217,129,10,SS_CENTERIMAGE - CONTROL "",IDC_MBMEM2,"msctls_trackbar32",TBS_AUTOTICKS | TBS_TOP | WS_TABSTOP,180,213,59,20 - EDITTEXT IDC_MBRAM2,243,216,34,12,ES_CENTER | ES_READONLY - GROUPBOX "RTG Graphics Card Settings",-1,14,81,275,95 - CONTROL "Scale if smaller than display size setting",IDC_RTG_SCALE, - "Button",BS_AUTOCHECKBOX | WS_TABSTOP,25,128,162,10 - CONTROL "Match host and RTG color depth if possible",IDC_RTG_MATCH_DEPTH, - "Button",BS_AUTOCHECKBOX | WS_TABSTOP,25,116,163,10 - COMBOBOX IDC_RTG_8BIT,211,91,68,150,CBS_DROPDOWNLIST | CBS_HASSTRINGS | WS_VSCROLL | WS_TABSTOP - COMBOBOX IDC_RTG_16BIT,211,106,68,150,CBS_DROPDOWNLIST | CBS_HASSTRINGS | WS_VSCROLL | WS_TABSTOP - COMBOBOX IDC_RTG_24BIT,211,121,68,150,CBS_DROPDOWNLIST | CBS_HASSTRINGS | WS_VSCROLL | WS_TABSTOP - COMBOBOX IDC_RTG_32BIT,211,136,68,150,CBS_DROPDOWNLIST | CBS_HASSTRINGS | WS_VSCROLL | WS_TABSTOP - CONTROL "Always scale in windowed mode",IDC_RTG_SCALE_ALLOW, - "Button",BS_AUTOCHECKBOX | WS_TABSTOP,25,140,162,10 - COMBOBOX IDC_RTG_SCALE_ASPECTRATIO,211,155,68,150,CBS_DROPDOWNLIST | CBS_HASSTRINGS | WS_VSCROLL | WS_TABSTOP - RTEXT "Aspect ratio:",-1,153,156,52,10,SS_CENTERIMAGE - RTEXT "Refresh rate:",-1,22,157,51,10,SS_CENTERIMAGE - COMBOBOX IDC_RTG_VBLANKRATE,77,155,68,150,CBS_DROPDOWN | CBS_HASSTRINGS | WS_VSCROLL | WS_TABSTOP + GROUPBOX "A3000/A4000 Advanced Memory Settings",-1,14,84,274,65 + RTEXT "Motherboard Fast RAM:",-1,40,100,129,10,SS_CENTERIMAGE + CONTROL "",IDC_MBMEM1,"msctls_trackbar32",TBS_AUTOTICKS | TBS_TOP | WS_TABSTOP,181,96,59,20 + EDITTEXT IDC_MBRAM1,244,99,34,12,ES_CENTER | ES_READONLY + RTEXT "Prosessor Slot Fast RAM:",-1,40,123,129,10,SS_CENTERIMAGE + CONTROL "",IDC_MBMEM2,"msctls_trackbar32",TBS_AUTOTICKS | TBS_TOP | WS_TABSTOP,181,119,59,20 + EDITTEXT IDC_MBRAM2,244,122,34,12,ES_CENTER | ES_READONLY END IDD_CPU DIALOGEX 0, 0, 300, 226 @@ -818,12 +800,12 @@ BEGIN GROUPBOX "",IDC_PANEL_FRAME,112,4,303,247,NOT WS_VISIBLE CONTROL "",IDC_PANELTREE,"SysTreeView32",TVS_HASLINES | TVS_SHOWSELALWAYS | TVS_NOSCROLL | WS_BORDER | WS_HSCROLL | WS_TABSTOP,5,5,101,248,WS_EX_CLIENTEDGE GROUPBOX "",IDC_PANEL_FRAME_OUTER,110,2,307,251 - PUSHBUTTON "Reset",IDC_RESETAMIGA,6,259,47,14 - PUSHBUTTON "Quit",IDC_QUITEMU,57,259,47,14 + PUSHBUTTON "Reset",IDC_RESETAMIGA,5,259,47,14 + PUSHBUTTON "Quit",IDC_QUITEMU,55,259,47,14 DEFPUSHBUTTON "OK",IDOK,260,259,50,14 PUSHBUTTON "Cancel",IDCANCEL,313,259,50,14 PUSHBUTTON "Help",IDHELP,366,259,50,14,WS_DISABLED - PUSHBUTTON "Restart",IDC_RESTARTEMU,109,259,47,14,NOT WS_VISIBLE + PUSHBUTTON "Restart",IDC_RESTARTEMU,106,259,47,14,NOT WS_VISIBLE END IDD_PATHS DIALOGEX 0, 0, 300, 243 @@ -968,6 +950,30 @@ BEGIN CTEXT "Enter address",IDC_DBG_ADDRINPUTTXT,20,1,100,10,SS_CENTERIMAGE | WS_TABSTOP END +IDD_RTG DIALOGEX 0, 0, 300, 135 +STYLE DS_LOCALEDIT | DS_SETFONT | DS_3DLOOK | DS_CONTROL | WS_CHILD +FONT 8, "MS Sans Serif", 0, 0, 0x1 +BEGIN + RTEXT "Memory: [] Graphics card memory. Required for RTG (Picasso96) emulation.",IDC_GFXCARDTEXT,25,44,53,10,SS_NOTIFY | SS_CENTERIMAGE + CONTROL "",IDC_P96MEM,"msctls_trackbar32",TBS_AUTOTICKS | TBS_TOP | WS_TABSTOP,88,40,60,20 + EDITTEXT IDC_P96RAM,152,43,34,12,ES_CENTER | ES_READONLY + GROUPBOX "RTG Graphics Card Settings",-1,14,12,275,113 + CONTROL "Scale if smaller than display size setting",IDC_RTG_SCALE, + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,25,74,162,10 + CONTROL "Match host and RTG color depth if possible",IDC_RTG_MATCH_DEPTH, + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,25,62,163,10 + COMBOBOX IDC_RTG_8BIT,211,29,68,150,CBS_DROPDOWNLIST | CBS_HASSTRINGS | WS_VSCROLL | WS_TABSTOP + COMBOBOX IDC_RTG_16BIT,211,44,68,150,CBS_DROPDOWNLIST | CBS_HASSTRINGS | WS_VSCROLL | WS_TABSTOP + COMBOBOX IDC_RTG_24BIT,211,59,68,150,CBS_DROPDOWNLIST | CBS_HASSTRINGS | WS_VSCROLL | WS_TABSTOP + COMBOBOX IDC_RTG_32BIT,211,74,68,150,CBS_DROPDOWNLIST | CBS_HASSTRINGS | WS_VSCROLL | WS_TABSTOP + CONTROL "Always scale in windowed mode",IDC_RTG_SCALE_ALLOW, + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,25,86,162,10 + COMBOBOX IDC_RTG_SCALE_ASPECTRATIO,211,101,68,150,CBS_DROPDOWNLIST | CBS_HASSTRINGS | WS_VSCROLL | WS_TABSTOP + RTEXT "Aspect ratio:",-1,153,102,52,10,SS_CENTERIMAGE + RTEXT "Refresh rate:",-1,22,103,51,10,SS_CENTERIMAGE + COMBOBOX IDC_RTG_VBLANKRATE,77,101,68,150,CBS_DROPDOWN | CBS_HASSTRINGS | WS_VSCROLL | WS_TABSTOP +END + ///////////////////////////////////////////////////////////////////////////// // @@ -975,8 +981,8 @@ END // VS_VERSION_INFO VERSIONINFO - FILEVERSION 1,6,1,0 - PRODUCTVERSION 1,6,1,0 + FILEVERSION 1,6,2,0 + PRODUCTVERSION 1,6,2,0 FILEFLAGSMASK 0x3fL #ifdef _DEBUG FILEFLAGS 0x1L @@ -992,12 +998,12 @@ BEGIN BLOCK "040904b0" BEGIN VALUE "FileDescription", "WinUAE" - VALUE "FileVersion", "1.6.1" + VALUE "FileVersion", "1.6.2" VALUE "InternalName", "WinUAE" VALUE "LegalCopyright", "© 1996-2009 under the GNU Public License (GPL)" VALUE "OriginalFilename", "WinUAE.exe" VALUE "ProductName", "WinUAE" - VALUE "ProductVersion", "1.6.1" + VALUE "ProductVersion", "1.6.2" END END BLOCK "VarFileInfo" @@ -1156,6 +1162,7 @@ BEGIN IDS_FRONTEND "Frontend" IDS_CHIPSET2 "Adv. Chipset" IDS_GAMEPORTS "Game ports" + IDS_RTG "Display Card" END STRINGTABLE @@ -1393,8 +1400,6 @@ STRINGTABLE BEGIN IDS_ROM_AVAILABLE "available" IDS_ROM_UNAVAILABLE "unavailable" - IDS_HARDDRIVESAFETYWARNING1 - "Warning: The drive safety check is active. Selected drive is not empty and non-RDB partitioned." IDS_NUMSG_KS68EC020 "The selected system ROM requires a 68020 with 24-bit addressing or higher CPU." IDS_ROMSCANNOROMS "No supported system ROMs detected." IDS_NUMSG_KICKREP "You need to have a floppy disk (image file) in DF0: to use the system ROM replacement." @@ -1404,8 +1409,6 @@ BEGIN IDS_HDCLONE_FAIL "Hard drive image file creation failed.\nError code %d:%d." IDS_NUMSG_KS68030 "The selected system ROM requires a 68030 CPU." IDS_NUMSG_EXPROMNEED "One of the following expansion boot ROMs is required:\n\n%s\n\nCheck the System ROM path in the Paths panel and click Rescan ROMs." - IDS_HARDDRIVESAFETYWARNING2 - "Warning: The drive safety check has been disabled, and non-empty and non-RDB partitioned hard disk(s) were detected." IDS_SB_FAVORITENAME "Enter name..." IDS_SB_CUSTOMEVENT "Enter custom event string.." END diff --git a/od-win32/resources/winuae_minimal.rc b/od-win32/resources/winuae_minimal.rc index 76037934..34f7d197 100644 --- a/od-win32/resources/winuae_minimal.rc +++ b/od-win32/resources/winuae_minimal.rc @@ -12,14 +12,47 @@ #undef APSTUDIO_READONLY_SYMBOLS ///////////////////////////////////////////////////////////////////////////// -// English (U.S.) resources +// English (Neutral) resources #if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU) #ifdef _WIN32 -LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US +LANGUAGE LANG_ENGLISH, SUBLANG_NEUTRAL #pragma code_page(1252) #endif //_WIN32 +///////////////////////////////////////////////////////////////////////////// +// +// Icon +// + +// Icon with lowest ID value placed first to ensure application icon +// remains consistent on all systems. +IDI_APPICON ICON "winuae.ico" +IDI_FLOPPY ICON "35floppy.ico" +IDI_ABOUT ICON "amigainfo.ico" +IDI_HARDDISK ICON "Drive.ico" +IDI_CPU ICON "cpu.ico" +IDI_GAMEPORTS ICON "joystick.ico" +IDI_IOPORTS ICON "joystick.ico" +IDI_INPUT ICON "joystick.ico" +IDI_MISC1 ICON "misc.ico" +IDI_MISC2 ICON "misc.ico" +IDI_MOVE_UP ICON "move_up.ico" +IDI_MOVE_DOWN ICON "move_dow.ico" +IDI_AVIOUTPUT ICON "avioutput.ico" +IDI_DISK ICON "Drive.ico" +IDI_FOLDER ICON "folder.ico" +IDI_SOUND ICON "sound.ico" +IDI_DISPLAY ICON "screen.ico" +IDI_ROOT ICON "root.ico" +IDI_MEMORY ICON "chip.ico" +IDI_QUICKSTART ICON "quickstart.ico" +IDI_PATHS ICON "paths.ico" +IDI_DISKIMAGE ICON "diskimage.ico" +IDI_PORTS ICON "port.ico" +IDI_CONFIGFILE ICON "configfile.ico" +IDI_FILE ICON "file.ico" + ///////////////////////////////////////////////////////////////////////////// // // Accelerator @@ -115,8 +148,8 @@ BEGIN RTEXT "Resolution:",IDC_STATIC,27,152,59,15,SS_CENTERIMAGE COMBOBOX IDC_LORES,100,152,102,150,CBS_DROPDOWNLIST | CBS_HASSTRINGS | WS_VSCROLL | WS_TABSTOP CONTROL "Remove interlace artifacts",IDC_FLICKERFIXER,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,19,139,99,10 - RTEXT "Windowed:",IDC_STATIC,18,46,38,15,SS_CENTERIMAGE - RTEXT "Fullscreen:",IDC_STATIC,17,19,38,15,SS_CENTERIMAGE + RTEXT "Windowed:",IDC_STATIC,17,46,40,15,SS_CENTERIMAGE + RTEXT "Fullscreen:",IDC_STATIC,17,19,40,15,SS_CENTERIMAGE END IDD_MEMORY DIALOGEX 0, 0, 300, 239 @@ -936,44 +969,14 @@ BEGIN END -///////////////////////////////////////////////////////////////////////////// -// -// Icon -// - -// Icon with lowest ID value placed first to ensure application icon -// remains consistent on all systems. -IDI_APPICON ICON "winuae.ico" -IDI_FLOPPY ICON "35floppy.ico" -IDI_ABOUT ICON "amigainfo.ico" -IDI_HARDDISK ICON "Drive.ico" -IDI_CPU ICON "cpu.ico" -IDI_GAMEPORTS ICON "joystick.ico" -IDI_IOPORTS ICON "joystick.ico" -IDI_INPUT ICON "joystick.ico" -IDI_MISC1 ICON "misc.ico" -IDI_MISC2 ICON "misc.ico" -IDI_MOVE_UP ICON "move_up.ico" -IDI_MOVE_DOWN ICON "move_dow.ico" -IDI_AVIOUTPUT ICON "avioutput.ico" -IDI_DISK ICON "Drive.ico" -IDI_CONFIGFILE ICON "file.ico" -IDI_FOLDER ICON "folder.ico" -IDI_SOUND ICON "sound.ico" -IDI_DISPLAY ICON "screen.ico" -IDI_ROOT ICON "root.ico" -IDI_MEMORY ICON "chip.ico" -IDI_QUICKSTART ICON "quickstart.ico" -IDI_PATHS ICON "paths.ico" - ///////////////////////////////////////////////////////////////////////////// // // Version // VS_VERSION_INFO VERSIONINFO - FILEVERSION 1,6,0,0 - PRODUCTVERSION 1,6,0,0 + FILEVERSION 1,6,1,0 + PRODUCTVERSION 1,6,1,0 FILEFLAGSMASK 0x3fL #ifdef _DEBUG FILEFLAGS 0x1L @@ -989,12 +992,12 @@ BEGIN BLOCK "040904b0" BEGIN VALUE "FileDescription", "WinUAE" - VALUE "FileVersion", "1.6.0" + VALUE "FileVersion", "1.6.1" VALUE "InternalName", "WinUAE" VALUE "LegalCopyright", "© 1996-2009 under the GNU Public License (GPL)" VALUE "OriginalFilename", "WinUAE.exe" VALUE "ProductName", "WinUAE" - VALUE "ProductVersion", "1.6.0" + VALUE "ProductVersion", "1.6.1" END END BLOCK "VarFileInfo" @@ -1085,6 +1088,7 @@ BEGIN END END + ///////////////////////////////////////////////////////////////////////////// // // DESIGNINFO @@ -1234,6 +1238,7 @@ BEGIN IDS_SOUND_51 "5.1 Channels" IDS_AUTOMATIC "Automatic" IDS_ALL "All" + IDS_INPUTTOGGLE "Toggle" END STRINGTABLE @@ -1439,7 +1444,7 @@ BEGIN IDS_RES_SUPERHIRES "SuperHires" END -#endif // English (U.S.) resources +#endif // English (Neutral) resources ///////////////////////////////////////////////////////////////////////////// diff --git a/od-win32/rp.c b/od-win32/rp.c index 69d7a787..2e3bab9a 100644 --- a/od-win32/rp.c +++ b/od-win32/rp.c @@ -156,7 +156,7 @@ static int port_insert2 (int num, const TCHAR *name) } } trimws (tmp2); - return inputdevice_joyport_config (&changed_prefs, tmp2, num, type); + return inputdevice_joyport_config (&changed_prefs, tmp2, num, 0, type); } static int port_insert (int num, const TCHAR *name) @@ -166,7 +166,7 @@ static int port_insert (int num, const TCHAR *name) if (num < 0 || num >= MAX_JPORTS) return FALSE; if (_tcslen (name) == 0) { - inputdevice_joyport_config (&changed_prefs, L"none", num, 0); + inputdevice_joyport_config (&changed_prefs, L"none", num, 0, 0); return TRUE; } if (_tcslen (name) >= sizeof (tmp1) / sizeof (TCHAR) - 1) @@ -353,6 +353,8 @@ static void get_screenmode (struct RPScreenMode *sm, struct uae_prefs *p) } if (full > 1) m |= RP_SCREENMODE_FULLWINDOW; + if (p->gfx_filter_scanlines || p->gfx_linedbl == 2) + m |= RP_SCREENMODE_SCANLINES; sm->dwScreenMode = m; if (log_rp) @@ -484,6 +486,13 @@ static void set_screenmode (struct RPScreenMode *sm, struct uae_prefs *p) p->gfx_xcenter_size = sm->lClipWidth; p->gfx_ycenter_size = sm->lClipHeight; + if (sm->dwScreenMode & RP_SCREENMODE_SCANLINES) { + if (p->gfx_linedbl > 0) { + p->gfx_linedbl = 2; + p->gfx_filter_scanlines = 0; + } + } + updatewinfsmode (p); hwndset = 0; } @@ -689,7 +698,7 @@ static void sendfeatures (void) feat = RP_FEATURE_POWERLED | RP_FEATURE_SCREEN1X | RP_FEATURE_FULLSCREEN; feat |= RP_FEATURE_PAUSE | RP_FEATURE_TURBO | RP_FEATURE_VOLUME | RP_FEATURE_SCREENCAPTURE; - feat |= RP_FEATURE_STATE; + feat |= RP_FEATURE_STATE | RP_FEATURE_SCANLINES; if (!WIN32GFX_IsPicassoScreen ()) feat |= RP_FEATURE_SCREEN2X | RP_FEATURE_SCREEN4X; RPSendMessagex (RPIPCGM_FEATURES, feat, 0, NULL, 0, &guestinfo, NULL); diff --git a/od-win32/screenshot.c b/od-win32/screenshot.c index e16406be..7afd5385 100644 --- a/od-win32/screenshot.c +++ b/od-win32/screenshot.c @@ -134,7 +134,7 @@ int screenshot_prepare (void) bi.bmiHeader.biClrImportant = 0; // Reserve memory for bitmap bits - if (!(lpvBits = malloc (bi.bmiHeader.biSizeImage))) + if (!(lpvBits = xmalloc (bi.bmiHeader.biSizeImage))) goto oops; // out of memory // Have GetDIBits convert offscreen_bitmap to a DIB (device-independent bitmap): diff --git a/od-win32/sounddep/sound.c b/od-win32/sounddep/sound.c index d0cc982d..96bb9cf0 100644 --- a/od-win32/sounddep/sound.c +++ b/od-win32/sounddep/sound.c @@ -81,6 +81,7 @@ struct sound_dp PaStream *pastream; HANDLE paevent; int opacounter; + int pablocking; }; #define ADJUST_SIZE 30 @@ -138,7 +139,7 @@ void update_sound (int freq, int longframe) lines += 1.0; if (have_sound) { - scaled_sample_evtime_orig = 227.0 * (lines + maxvpos) * freq * CYCLE_UNIT / (float)sdp->obtainedfreq; + scaled_sample_evtime_orig = maxhpos * (lines + maxvpos) * freq * CYCLE_UNIT / (float)sdp->obtainedfreq; scaled_sample_evtime = scaled_sample_evtime_orig; } } @@ -389,11 +390,18 @@ static DWORD fillsupportedmodes (struct sound_data *sd, int freq, struct dsaudio static void finish_sound_buffer_pa (struct sound_data *sd, uae_u16 *sndbuffer) { struct sound_dp *s = sd->data; - while (s->opacounter == s->pacounter && s->pastream && !sd->paused) - WaitForSingleObject (s->paevent, 10); - ResetEvent (s->paevent); - s->opacounter = s->pacounter; - memcpy (s->pasoundbuffer[s->patoggle], sndbuffer, sd->sndbufsize); + if (s->pablocking) { + if (s->paframesperbuffer != sd->sndbufsize / (sd->channels * 2)) { + write_log (L"sound buffer size mistmatch %d <> %d\n", s->paframesperbuffer, sd->sndbufsize / (sd->channels * 2)); + } else { + Pa_WriteStream (s->pastream, sndbuffer, s->paframesperbuffer); + } + } else { + while (s->opacounter == s->pacounter && s->pastream && !sd->paused) + WaitForSingleObject (s->paevent, 10); + s->opacounter = s->pacounter; + memcpy (s->pasoundbuffer[s->patoggle], sndbuffer, sd->sndbufsize); + } } static int _cdecl portAudioCallback (const void *inputBuffer, void *outputBuffer, @@ -406,7 +414,7 @@ static int _cdecl portAudioCallback (const void *inputBuffer, void *outputBuffer struct sound_dp *s = sd->data; if (framesPerBuffer != sd->sndbufsize / (sd->channels * 2)) { - write_log (L"%d <> %d\n", framesPerBuffer, sd->sndbufsize / (sd->channels * 2)); + write_log (L"sound buffer size mistmatch %d <> %d\n", framesPerBuffer, sd->sndbufsize / (sd->channels * 2)); } else { memcpy (outputBuffer, s->pasoundbuffer[s->patoggle], sd->sndbufsize); } @@ -448,21 +456,22 @@ static int open_audio_pa (struct sound_data *sd, int index) PaError err; TCHAR *name; TCHAR *errtxt; + int defaultrate = 0; size = sd->sndbufsize; s->paframesperbuffer = size; - sd->sndbufsize = size * ch * 2; sd->devicetype = SOUND_DEVICE_PA; memset (&p, 0, sizeof p); di = Pa_GetDeviceInfo (dev); - p.channelCount = ch; - p.device = dev; - p.hostApiSpecificStreamInfo = NULL; - p.sampleFormat = paInt16; - p.suggestedLatency = di->defaultLowOutputLatency; - p.hostApiSpecificStreamInfo = NULL; for (;;) { int err2; + p.channelCount = ch; + p.device = dev; + p.hostApiSpecificStreamInfo = NULL; + p.sampleFormat = paInt16; + p.suggestedLatency = di->defaultLowOutputLatency; + p.hostApiSpecificStreamInfo = NULL; + err = Pa_IsFormatSupported (NULL, &p, freq); if (err == paFormatIsSupported) break; @@ -470,36 +479,44 @@ static int open_audio_pa (struct sound_data *sd, int index) errtxt = au (Pa_GetErrorText (err)); write_log (L"PASOUND: sound format not supported, ch=%d, rate=%d. %s\n", freq, ch, errtxt); xfree (errtxt); - if (freq < 48000) { - freq = 48000; - err = Pa_IsFormatSupported (NULL, &p, freq); - if (err == paFormatIsSupported) { - sd->freq = freq; - break; + if (err == paInvalidChannelCount) { + if (ch > 2) { + ch = sd->channels = 2; + continue; } + goto end; + } + if (freq < 44000 && err == paInvalidSampleRate) { + freq = 44000; + sd->freq = freq; + continue; + } + if (freq < 48000 && err == paInvalidSampleRate) { + freq = 48000; + sd->freq = freq; + continue; } - if (freq != di->defaultSampleRate) { + if (freq != di->defaultSampleRate && err == paInvalidSampleRate && !defaultrate) { freq = di->defaultSampleRate; - err = Pa_IsFormatSupported (NULL, &p, freq); - if (err == paFormatIsSupported) { - sd->freq = freq; - break; - } + sd->freq = freq; + defaultrate = 1; + continue; } - if (err2 != err) { + goto end; + } + sd->sndbufsize = size * ch * 2; +// s->pablocking = 1; +// err = Pa_OpenStream (&s->pastream, NULL, &p, freq, s->paframesperbuffer, paNoFlag, NULL, NULL); +// if (err != paNoError) { + s->pablocking = 0; + err = Pa_OpenStream (&s->pastream, NULL, &p, freq, s->paframesperbuffer, paNoFlag, portAudioCallback, sd); + if (err != paNoError) { errtxt = au (Pa_GetErrorText (err)); - write_log (L"PASOUND: sound format not supported, ch=%d, rate=%d. %s\n", freq, ch, errtxt); + write_log (L"PASOUND: Pa_OpenStream() error %d (%s)\n", err, errtxt); xfree (errtxt); + goto end; } - goto end; - } - err = Pa_OpenStream (&s->pastream, NULL, &p, freq, s->paframesperbuffer, paNoFlag, portAudioCallback, sd); - if (err != paNoError) { - errtxt = au (Pa_GetErrorText (err)); - write_log (L"PASOUND: Pa_OpenStream() error %d (%s)\n", err, errtxt); - xfree (errtxt); - goto end; - } +// } s->paevent = CreateEvent (NULL, FALSE, FALSE, NULL); for (i = 0; i < 2; i++) s->pasoundbuffer[i] = xcalloc (sd->sndbufsize, 1); @@ -626,6 +643,7 @@ static int open_audio_ds (struct sound_data *sd, int index) if (s->max_sndbufsize * 2 > s->dsoundbuf) s->max_sndbufsize = s->dsoundbuf / 2; + sd->samplesize = sd->channels * 2; recalc_offsets (sd); @@ -656,12 +674,12 @@ static int open_audio_ds (struct sound_data *sd, int index) int maxfreq = DSCaps.dwMaxSecondarySampleRate; if (minfreq > freq && freq < 22050) { freq = minfreq; - changed_prefs.sound_freq = currprefs.sound_freq = freq; + sd->freq = freq; write_log (L"DSSOUND: minimum supported frequency: %d\n", minfreq); } if (maxfreq < freq && freq > 44100) { freq = maxfreq; - changed_prefs.sound_freq = currprefs.sound_freq = freq; + sd->freq = freq; write_log (L"DSSOUND: maximum supported frequency: %d\n", maxfreq); } } @@ -749,7 +767,6 @@ int open_sound_device (struct sound_data *sd, int index, int bufsize, int freq, sd->sndbufsize = bufsize; sd->freq = freq; sd->channels = channels; - sd->samplesize = channels * 2; sd->paused = 1; if (sound_devices[index].type == SOUND_DEVICE_AL) ret = open_audio_al (sd, index); @@ -757,6 +774,7 @@ int open_sound_device (struct sound_data *sd, int index, int bufsize, int freq, ret = open_audio_ds (sd, index); else if (sound_devices[index].type == SOUND_DEVICE_PA) ret = open_audio_pa (sd, index); + sd->samplesize = sd->channels * 2; return ret; } void close_sound_device (struct sound_data *sd) @@ -795,7 +813,7 @@ void resume_sound_device (struct sound_data *sd) static int open_sound (void) { - int ret = 0, num; + int ret = 0, num, ch; int size = currprefs.sound_maxbsiz; if (!currprefs.produce_sound) @@ -814,15 +832,19 @@ static int open_sound (void) num = enumerate_sound_devices (); if (currprefs.win32_soundcard >= num) currprefs.win32_soundcard = changed_prefs.win32_soundcard = 0; - ret = open_sound_device (sdp, currprefs.win32_soundcard, size, currprefs.sound_freq, get_audio_nativechannels ()); + ch = get_audio_nativechannels (currprefs.sound_stereo); + ret = open_sound_device (sdp, currprefs.win32_soundcard, size, currprefs.sound_freq, ch); if (!ret) return 0; + currprefs.sound_freq = changed_prefs.sound_freq = sdp->freq; + if (ch != sdp->channels) + currprefs.sound_stereo = changed_prefs.sound_stereo = get_audio_stereomode (sdp->channels); set_volume (currprefs.sound_volume, sdp->mute); - if (get_audio_amigachannels () == 4) + if (get_audio_amigachannels (currprefs.sound_stereo) == 4) sample_handler = sample16ss_handler; else - sample_handler = get_audio_ismono () ? sample16_handler : sample16s_handler; + sample_handler = get_audio_ismono (currprefs.sound_stereo) ? sample16_handler : sample16s_handler; sdp->obtainedfreq = currprefs.sound_freq; @@ -1395,9 +1417,9 @@ void finish_sound_buffer (void) if (currprefs.turbo_emulation) return; if (currprefs.sound_stereo_swap_paula) { - if (get_audio_nativechannels () == 2 || get_audio_nativechannels () == 4) + if (get_audio_nativechannels (currprefs.sound_stereo) == 2 || get_audio_nativechannels (currprefs.sound_stereo) == 4) channelswap ((uae_s16*)paula_sndbuffer, sdp->sndbufsize / 2); - else if (get_audio_nativechannels () == 6) + else if (get_audio_nativechannels (currprefs.sound_stereo) == 6) channelswap6 ((uae_s16*)paula_sndbuffer, sdp->sndbufsize / 2); } #ifdef DRIVESOUND @@ -1532,6 +1554,12 @@ static void PortAudioEnumerate (struct sound_device *sds) TCHAR tmp[MAX_DPATH], *s1, *s2; num = Pa_GetDeviceCount (); + if (num < 0) { + TCHAR *errtxt = au (Pa_GetErrorText (num)); + write_log (L"PA: Pa_GetDeviceCount() failed: %08x (%s)\n", num, errtxt); + xfree (errtxt); + return; + } for (j = 0; j < num; j++) { const PaDeviceInfo *di; const PaHostApiInfo *hai; diff --git a/od-win32/sounddep/sound.h b/od-win32/sounddep/sound.h index 520832de..bddbcfea 100644 --- a/od-win32/sounddep/sound.h +++ b/od-win32/sounddep/sound.h @@ -51,8 +51,13 @@ int get_offset_sound_device (struct sound_data *sd); int blocking_sound_device (struct sound_data *sd); -STATIC_INLINE void check_sound_buffers (void) +STATIC_INLINE void check_sound_buffers (int outputsample, int doublesample) { + static uae_u16 *paula_sndbufpt_prev; + uae_u16 *start; + int len; + + start = paula_sndbufpt; if (currprefs.sound_stereo == SND_4CH_CLONEDSTEREO) { ((uae_u16*)paula_sndbufpt)[0] = ((uae_u16*)paula_sndbufpt)[-2]; ((uae_u16*)paula_sndbufpt)[1] = ((uae_u16*)paula_sndbufpt)[-1]; @@ -67,10 +72,30 @@ STATIC_INLINE void check_sound_buffers (void) p[1] = sum >> 3; paula_sndbufpt = (uae_u16 *)(((uae_u8 *)paula_sndbufpt) + 4 * 2); } + if (outputsample == 0) { + paula_sndbufpt_prev = start; + return; + } + len = paula_sndbufpt - start; + if (outputsample < 0) { + int i; + uae_s16 *p1 = (uae_s16*)paula_sndbufpt_prev; + uae_s16 *p2 = (uae_s16*)start; + for (i = 0; i < len; i++) + p1[i] = (p1[i] + p2[i]) / 2; + paula_sndbufpt -= len; + } if ((uae_u8*)paula_sndbufpt - (uae_u8*)paula_sndbuffer >= paula_sndbufsize) { finish_sound_buffer (); paula_sndbufpt = paula_sndbuffer; } + if (doublesample) { + memcpy (paula_sndbufpt, start, len * 2); + if ((uae_u8*)paula_sndbufpt - (uae_u8*)paula_sndbuffer >= paula_sndbufsize) { + finish_sound_buffer (); + paula_sndbufpt = paula_sndbuffer; + } + } } STATIC_INLINE void clear_sound_buffers (void) diff --git a/od-win32/sysconfig.h b/od-win32/sysconfig.h index d66949cd..d9e1db11 100644 --- a/od-win32/sysconfig.h +++ b/od-win32/sysconfig.h @@ -33,7 +33,7 @@ #define CD32 /* CD32 emulation */ #define CDTV /* CDTV emulation */ #define D3D /* D3D display filter support */ -#define OPENGL /* OpenGL display filter support */ +//#define OPENGL /* OpenGL display filter support */ #define PARALLEL_PORT /* parallel port emulation */ #define PARALLEL_DIRECT /* direct parallel port emulation */ #define SERIAL_PORT /* serial port emulation */ diff --git a/od-win32/threaddep/thread.h b/od-win32/threaddep/thread.h index 6a2b248e..6d786b31 100644 --- a/od-win32/threaddep/thread.h +++ b/od-win32/threaddep/thread.h @@ -10,7 +10,7 @@ extern void uae_sem_init (uae_sem_t*, int manual_reset, int initial_state); extern int uae_start_thread (TCHAR *name, void *(*f)(void *), void *arg, uae_thread_id *thread); extern int uae_start_thread_fast (void *(*f)(void *), void *arg, uae_thread_id *thread); extern void uae_end_thread (uae_thread_id *thread); -extern void uae_set_thread_priority (int); +extern void uae_set_thread_priority (uae_thread_id *, int); #include "commpipe.h" diff --git a/od-win32/win32.c b/od-win32/win32.c index 71a4de40..ed52b64f 100644 --- a/od-win32/win32.c +++ b/od-win32/win32.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "resource" @@ -517,6 +518,7 @@ static void releasecapture (void) void setmouseactive (int active) { + //write_log (L"setmouseactive %d->%d\n", mouseactive, active); if (active == 0) releasecapture (); if (mouseactive == active && active >= 0) @@ -574,6 +576,8 @@ void setmouseactive (int active) if (rp_isactive () && isfullscreen () == 0) donotfocus = 0; #endif + if (isfullscreen () > 0) + donotfocus = 0; if (donotfocus) { focus = 0; mouseactive = 0; @@ -769,7 +773,7 @@ static LRESULT CALLBACK AmigaWindowProc (HWND hWnd, UINT message, WPARAM wParam, { case WM_SETFOCUS: - winuae_active (hWnd, minimized); + winuae_active (hWnd, minimized); minimized = 0; dx_check (); return 0; @@ -794,7 +798,7 @@ static LRESULT CALLBACK AmigaWindowProc (HWND hWnd, UINT message, WPARAM wParam, case WM_KEYDOWN: if (dinput_wmkey ((uae_u32)lParam)) - gui_display (-1); + inputdevice_add_inputcode (AKS_ENTERGUI, 1); return 0; case WM_LBUTTONUP: @@ -803,8 +807,8 @@ static LRESULT CALLBACK AmigaWindowProc (HWND hWnd, UINT message, WPARAM wParam, return 0; case WM_LBUTTONDOWN: case WM_LBUTTONDBLCLK: - if (!mouseactive && isfullscreen() <= 0 && !gui_active && (!mousehack_alive () || currprefs.input_tablet != TABLET_MOUSEHACK)) { - setmouseactive (message == WM_LBUTTONDBLCLK ? 2 : 1); + if (!mouseactive && !gui_active && (!mousehack_alive () || currprefs.input_tablet != TABLET_MOUSEHACK || isfullscreen () > 0)) { + setmouseactive ((message == WM_LBUTTONDBLCLK || isfullscreen() > 0) ? 2 : 1); } else if (dinput_winmouse () >= 0 && isfocus ()) { setmousebuttonstate (dinput_winmouse (), 0, 1); } @@ -971,7 +975,7 @@ static LRESULT CALLBACK AmigaWindowProc (HWND hWnd, UINT message, WPARAM wParam, mx -= mouseposx; my -= mouseposy; - //write_log(L"%d %d %d %d\n", mx, my, mouseposx, mouseposy); + //write_log (L"%d %d %d %d %d %d %d\n", wm, mouseactive, focus, mx, my, mouseposx, mouseposy); if (recapture && isfullscreen () <= 0) { setmouseactive (1); return 0; @@ -997,6 +1001,7 @@ static LRESULT CALLBACK AmigaWindowProc (HWND hWnd, UINT message, WPARAM wParam, int myy = (amigawin_rect.bottom - amigawin_rect.top) / 2; mx = mx - mxx; my = my - myy; + //write_log (L"%d:%dx%d\n", dinput_winmouse(), mx, my); setmousestate (dinput_winmouse (), 0, mx, 0); setmousestate (dinput_winmouse (), 1, my, 0); } @@ -1184,7 +1189,7 @@ static LRESULT CALLBACK AmigaWindowProc (HWND hWnd, UINT message, WPARAM wParam, } } else if (num == 4) { if (nm->code == NM_CLICK) - gui_display (-1); + inputdevice_add_inputcode (AKS_ENTERGUI, 1); else uae_reset (0); } @@ -1199,13 +1204,33 @@ static LRESULT CALLBACK AmigaWindowProc (HWND hWnd, UINT message, WPARAM wParam, if ((HWND) wParam == hwndNextViewer) hwndNextViewer = (HWND) lParam; else if (hwndNextViewer != NULL) - SendMessage(hwndNextViewer, message, wParam, lParam); + SendMessage (hwndNextViewer, message, wParam, lParam); return 0; case WM_DRAWCLIPBOARD: clipboard_changed (hWnd); - SendMessage(hwndNextViewer, message, wParam, lParam); + SendMessage (hwndNextViewer, message, wParam, lParam); return 0; + case WM_WTSSESSION_CHANGE: + { + static int wasactive; + switch (wParam) + { + case WTS_CONSOLE_CONNECT: + case WTS_SESSION_UNLOCK: + if (wasactive) + winuae_active (hWnd, 0); + wasactive = 0; + break; + case WTS_CONSOLE_DISCONNECT: + case WTS_SESSION_LOCK: + wasactive = mouseactive; + winuae_inactive (hWnd, 0); + break; + } + } + + case WT_PROXIMITY: { send_tablet_proximity (LOWORD (lParam) ? 1 : 0); @@ -1309,6 +1334,7 @@ static LRESULT CALLBACK MainWindowProc (HWND hWnd, UINT message, WPARAM wParam, case WM_NOTIFY: case WM_ENABLE: case WT_PACKET: + case WM_WTSSESSION_CHANGE: return AmigaWindowProc (hWnd, message, wParam, lParam); case WM_DISPLAYCHANGE: @@ -1476,6 +1502,19 @@ static LRESULT CALLBACK HiddenWindowProc (HWND hWnd, UINT message, WPARAM wParam return DefWindowProc (hWnd, message, wParam, lParam); } +int handle_msgpump (void) +{ + int got = 0; + MSG msg; + + while (PeekMessage (&msg, 0, 0, 0, PM_REMOVE)) { + got = 1; + TranslateMessage (&msg); + DispatchMessage (&msg); + } + return got; +} + void handle_events (void) { MSG msg; @@ -3817,6 +3856,7 @@ static int PASCAL WinMain2 (HINSTANCE hInstance, HINSTANCE hPrevInstance, LPWSTR TCHAR **argv = NULL, **argv2 = NULL, **argv3; int argc, i; +#if 0 #ifdef _DEBUG { int tmp = _CrtSetDbgFlag(_CRTDBG_REPORT_FLAG); @@ -3830,7 +3870,7 @@ static int PASCAL WinMain2 (HINSTANCE hInstance, HINSTANCE hPrevInstance, LPWSTR _CrtSetDbgFlag(tmp); } #endif - +#endif if (!osdetect ()) return 0; if (!dxdetect ()) @@ -3893,9 +3933,6 @@ static int PASCAL WinMain2 (HINSTANCE hInstance, HINSTANCE hPrevInstance, LPWSTR else default_freq = 60; } -#ifdef AVIOUTPUT - AVIOutput_Initialize (); -#endif WIN32_HandleRegistryStuff (); WIN32_InitLang (); WIN32_InitHtmlHelp (); @@ -4182,10 +4219,11 @@ typedef ULONG (CALLBACK *SHCHANGENOTIFYREGISTER) const SHChangeNotifyEntry *pshcne); typedef BOOL (CALLBACK *SHCHANGENOTIFYDEREGISTER)(ULONG ulID); -void addnotifications (HWND hwnd, int remove) +void addnotifications (HWND hwnd, int remove, int isgui) { static ULONG ret; static HDEVNOTIFY hdn; + static int wtson; LPITEMIDLIST ppidl; SHCHANGENOTIFYREGISTER pSHChangeNotifyRegister; SHCHANGENOTIFYDEREGISTER pSHChangeNotifyDeregister; @@ -4202,6 +4240,9 @@ void addnotifications (HWND hwnd, int remove) if (hdn) UnregisterDeviceNotification (hdn); hdn = 0; + if (os_winxp && wtson && !isgui) + WTSUnRegisterSessionNotification (hwnd); + wtson = 0; } else { DEV_BROADCAST_DEVICEINTERFACE NotificationFilter = { 0 }; if(pSHChangeNotifyRegister && SHGetSpecialFolderLocation (hwnd, CSIDL_DESKTOP, &ppidl) == NOERROR) { @@ -4216,6 +4257,8 @@ void addnotifications (HWND hwnd, int remove) NotificationFilter.dbcc_devicetype = DBT_DEVTYP_DEVICEINTERFACE; NotificationFilter.dbcc_classguid = GUID_DEVINTERFACE_HID; hdn = RegisterDeviceNotification (hwnd, &NotificationFilter, DEVICE_NOTIFY_WINDOW_HANDLE); + if (os_winxp && !isgui) + wtson = WTSRegisterSessionNotification (hwnd, NOTIFY_FOR_THIS_SESSION); } } diff --git a/od-win32/win32.h b/od-win32/win32.h index f453d188..a2da725f 100644 --- a/od-win32/win32.h +++ b/od-win32/win32.h @@ -15,10 +15,10 @@ #define GETBDM(x) (((x) - ((x / 10000) * 10000)) / 100) #define GETBDD(x) ((x) % 100) -#define WINUAEPUBLICBETA 0 +#define WINUAEPUBLICBETA 1 -#define WINUAEBETA L"" -#define WINUAEDATE MAKEBD(2009, 6, 18) +#define WINUAEBETA L"Beta 0" +#define WINUAEDATE MAKEBD(2009, 7, 9) #define WINUAEEXTRA L"" #define WINUAEREV L"" @@ -119,7 +119,7 @@ extern int close_tablet (void*); extern void send_tablet (int x, int y, int z, int pres, uae_u32 buttons, int flags, int ax, int ay, int az, int rx, int ry, int rz, RECT *r); extern void send_tablet_proximity (int); -void addnotifications (HWND hwnd, int remove); +void addnotifications (HWND hwnd, int remove, int isgui); int win32_hardfile_media_change (const TCHAR *drvname, int inserted); extern int CheckRM (TCHAR *DriveName); void systray (HWND hwnd, int remove); diff --git a/od-win32/win32_filesys.c b/od-win32/win32_filesys.c index dc688fb9..1725d5b0 100644 --- a/od-win32/win32_filesys.c +++ b/od-win32/win32_filesys.c @@ -7,8 +7,8 @@ int CheckRM (TCHAR *DriveName) DWORD dwHold; BOOL result = FALSE; - _stprintf(filename, L"%s.", DriveName); - dwHold = GetFileAttributes(filename); + _stprintf (filename, L"%s.", DriveName); + dwHold = GetFileAttributes (filename); if(dwHold != 0xFFFFFFFF) result = TRUE; return result; @@ -90,28 +90,73 @@ int target_get_volume_name (struct uaedev_mount_info *mtinf, const TCHAR *volume return result; } -static void filesys_addexternals(void) +static int getidfromhandle (HANDLE h) +{ + int drvnum = -1; + DWORD written, outsize; + VOLUME_DISK_EXTENTS *vde; + + outsize = sizeof (VOLUME_DISK_EXTENTS) + sizeof (DISK_EXTENT) * 32; + vde = xmalloc (outsize); + if (DeviceIoControl (h, IOCTL_VOLUME_GET_VOLUME_DISK_EXTENTS, NULL, 0, vde, outsize, &written, NULL)) { + if (vde->NumberOfDiskExtents > 0) + drvnum = vde->Extents[0].DiskNumber; + } + xfree (vde); + return drvnum; +} + +static int hfdcheck (TCHAR drive) +{ + HANDLE h; + TCHAR tmp[16]; + int disknum, i; + + _stprintf (tmp, L"\\\\.\\%c:", drive); + h = CreateFile (tmp, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + if (h == INVALID_HANDLE_VALUE) + return 0; + disknum = getidfromhandle (h); + CloseHandle (h); + for (i = 0; i < MAX_FILESYSTEM_UNITS; i++) { + struct hardfiledata *hfd = get_hardfile_data (i); + int reopen = 0; + if (!hfd || !(hfd->flags & HFD_FLAGS_REALDRIVE) || !hfd->handle_valid) + continue; + if (getidfromhandle (hfd->handle) == disknum) + return 1; + } + return 0; +} + +static void filesys_addexternals (void) { int drive, drivetype; UINT errormode; TCHAR volumename[MAX_DPATH] = L""; - TCHAR volumepath[6]; + TCHAR volumepath[16]; DWORD dwDriveMask; int drvnum = 0; - errormode = SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX); - dwDriveMask = GetLogicalDrives(); + errormode = SetErrorMode (SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX); + dwDriveMask = GetLogicalDrives (); dwDriveMask >>= 2; // Skip A and B drives... for(drive = 'C'; drive <= 'Z'; ++drive) { - _stprintf(volumepath, L"%c:\\", drive); + _stprintf (volumepath, L"%c:\\", drive); /* Is this drive-letter valid (it used to check for media in drive) */ if(dwDriveMask & 1) { - TCHAR devname[100]; + TCHAR devname[MAX_DPATH]; BOOL inserted = CheckRM (volumepath); /* Is there a disk inserted? */ int nok = FALSE; int rw = 1; drivetype = GetDriveType (volumepath); + if (inserted && drivetype != DRIVE_NO_ROOT_DIR && drivetype != DRIVE_UNKNOWN) { + if (hfdcheck (drive)) { + write_log (L"Drive %c:\\ ignored, was configured as a harddrive\n", drive); + continue; + } + } devname[0] = 0; for (;;) { if (drivetype == DRIVE_CDROM && currprefs.win32_automount_cddrives) { @@ -150,5 +195,5 @@ static void filesys_addexternals(void) } /* if drivemask */ dwDriveMask >>= 1; } - SetErrorMode(errormode); + SetErrorMode (errormode); } diff --git a/od-win32/win32_scale2x.c b/od-win32/win32_scale2x.c index 35ebda62..e573de40 100644 --- a/od-win32/win32_scale2x.c +++ b/od-win32/win32_scale2x.c @@ -23,7 +23,9 @@ struct uae_filter uaefilters[] = { UAE_FILTER_DIRECT3D, 0, 1, L"Direct3D", L"direct3d", 1, 0, 0, 0, 0 }, +#ifdef OPENGL { UAE_FILTER_OPENGL, 0, 1, L"OpenGL (unsupported)", L"opengl", 1, 0, 0, 0, 0 }, +#endif { UAE_FILTER_SCALE2X, 0, 2, L"Scale2X", L"scale2x", 0, 0, UAE_FILTER_MODE_16_16 | UAE_FILTER_MODE_32_32, 0, 0, 0 }, @@ -402,18 +404,22 @@ static void statusline (void) DDSURFACEDESC2 desc; RECT sr, dr; int y; + int lx, ly, sx; if (!(currprefs.leds_on_screen & STATUSLINE_CHIPSET) || !tempsurf) return; - SetRect (&sr, 0, 0, dst_width, TD_TOTAL_HEIGHT); - SetRect (&dr, 0, dst_height - TD_TOTAL_HEIGHT, dst_width, dst_height); + lx = dst_width; + ly = dst_height; + sx = lx; + if (sx > dst_width) + sx = dst_width; + SetRect (&sr, 0, 0, sx, TD_TOTAL_HEIGHT); + SetRect (&dr, lx - sx, ly - TD_TOTAL_HEIGHT, lx, ly); DirectDraw_BlitRect (tempsurf, &sr, NULL, &dr); if (locksurface (tempsurf, &desc)) { - int yy = 0; - for (y = dst_height - TD_TOTAL_HEIGHT; y < dst_height; y++) { - uae_u8 *buf = (uae_u8*)desc.lpSurface + yy * desc.lPitch; - draw_status_line_single (buf, dst_depth / 8, yy, dst_width, rc, gc, bc); - yy++; + for (y = 0; y < TD_TOTAL_HEIGHT; y++) { + uae_u8 *buf = (uae_u8*)desc.lpSurface + y * desc.lPitch; + draw_status_line_single (buf, dst_depth / 8, y, sx, rc, gc, bc, NULL); } unlocksurface (tempsurf); DirectDraw_BlitRect (NULL, &dr, tempsurf, &sr); diff --git a/od-win32/win32_uaenet.c b/od-win32/win32_uaenet.c index c3b7da14..2e6486d3 100644 --- a/od-win32/win32_uaenet.c +++ b/od-win32/win32_uaenet.c @@ -73,7 +73,7 @@ static void *uaenet_trap_thread (void *arg) int readactive, writeactive; DWORD actual; - uae_set_thread_priority (2); + uae_set_thread_priority (NULL, 2); sd->threadactive = 1; uae_sem_post (&sd->sync_sem); readactive = 0; @@ -144,7 +144,7 @@ static void *uaenet_trap_threadr (void *arg) struct pcap_pkthdr *header; const u_char *pkt_data; - uae_set_thread_priority (2); + uae_set_thread_priority (NULL, 1); sd->threadactiver = 1; uae_sem_post (&sd->sync_semr); while (sd->threadactiver == 1) { @@ -169,7 +169,7 @@ static void *uaenet_trap_threadw (void *arg) { struct uaenetdatawin32 *sd = arg; - uae_set_thread_priority (2); + uae_set_thread_priority (NULL, 1); sd->threadactivew = 1; uae_sem_post (&sd->sync_semw); while (sd->threadactivew == 1) { diff --git a/od-win32/win32gfx.c b/od-win32/win32gfx.c index 8c69a6fe..7b9229fa 100644 --- a/od-win32/win32gfx.c +++ b/od-win32/win32gfx.c @@ -965,7 +965,7 @@ static void RTGleds (void) yy = 0; for (sy = dst_height - dxdata.statusheight; sy < dst_height; sy++) { uae_u8 *buf = (uae_u8*)desc.lpSurface + yy * desc.lPitch; - draw_status_line_single (buf, currentmode->current_depth / 8, yy, dst_width, p96rc, p96gc, p96bc); + draw_status_line_single (buf, currentmode->current_depth / 8, yy, dst_width, p96rc, p96gc, p96bc, NULL); yy++; } unlocksurface (dxdata.statussurface); @@ -1015,7 +1015,7 @@ static void close_hwnds (void) hStatusWnd = 0; } if (hAmigaWnd) { - addnotifications (hAmigaWnd, TRUE); + addnotifications (hAmigaWnd, TRUE, FALSE); #ifdef OPENGL OGL_free (); #endif @@ -1062,10 +1062,12 @@ static void updatemodes (void) } flags &= ~DM_DDRAW; } +#if defined (OPENGL) if (usedfilter->type == UAE_FILTER_OPENGL) { flags |= DM_OPENGL; flags &= ~DM_DDRAW; } +#endif } } #endif @@ -1162,12 +1164,16 @@ static int open_windows (int full) int ret, i; inputdevice_unacquire (); - reset_sound(); + reset_sound (); in_sizemove = 0; updatewinfsmode (&currprefs); +#ifdef D3D D3D_free (); +#endif +#ifdef OPENGL OGL_free (); +#endif if (!DirectDraw_Start (displayGUID)) return 0; write_log (L"DirectDraw GUID=%s\n", outGUID (displayGUID)); @@ -1264,12 +1270,14 @@ int check_prefs_changed_gfx (void) if (changed_prefs.gfx_afullscreen == 1) { if (currprefs.gfx_filter == UAE_FILTER_DIRECT3D && changed_prefs.gfx_filter != UAE_FILTER_DIRECT3D) display_change_requested = 1; +#ifdef OPENGL if (currprefs.gfx_filter == UAE_FILTER_OPENGL && changed_prefs.gfx_filter != UAE_FILTER_OPENGL) display_change_requested = 1; - if (changed_prefs.gfx_filter == UAE_FILTER_DIRECT3D && currprefs.gfx_filter != UAE_FILTER_DIRECT3D) - display_change_requested = 1; if (changed_prefs.gfx_filter == UAE_FILTER_OPENGL && currprefs.gfx_filter != UAE_FILTER_OPENGL) display_change_requested = 1; +#endif + if (changed_prefs.gfx_filter == UAE_FILTER_DIRECT3D && currprefs.gfx_filter != UAE_FILTER_DIRECT3D) + display_change_requested = 1; } if (display_change_requested) { @@ -1747,7 +1755,11 @@ void gfx_set_picasso_state (int on) update_gfxparams (); clearscreen (); if (currprefs.gfx_afullscreen != currprefs.gfx_pfullscreen || - (currprefs.gfx_afullscreen == 1 && (currprefs.gfx_filter == UAE_FILTER_DIRECT3D || currprefs.gfx_filter == UAE_FILTER_OPENGL))) { + (currprefs.gfx_afullscreen == 1 && (currprefs.gfx_filter == UAE_FILTER_DIRECT3D +#ifdef OPENGL + || currprefs.gfx_filter == UAE_FILTER_OPENGL +#endif + ))) { mode = 1; } else { mode = modeswitchneeded (&wc); @@ -2227,7 +2239,7 @@ static int create_windows_2 (void) GetWindowRect (hAmigaWnd, &amigawin_rect); if (dxfs || d3dfs) SetCursorPos (x + w / 2, y + h / 2); - addnotifications (hAmigaWnd, FALSE); + addnotifications (hAmigaWnd, FALSE, FALSE); if (hMainWnd != hAmigaWnd) { if (!currprefs.headless) ShowWindow (hMainWnd, firstwindow ? SW_SHOWDEFAULT : SW_SHOWNORMAL); diff --git a/od-win32/win32gfx.h b/od-win32/win32gfx.h index dadd01ac..a7598635 100644 --- a/od-win32/win32gfx.h +++ b/od-win32/win32gfx.h @@ -19,6 +19,7 @@ int WIN32GFX_ClearPalette (void); int WIN32GFX_SetPalette (void); void WIN32GFX_WindowMove (void); void WIN32GFX_WindowSize (void);; +void DX_Invalidate (int x, int y, int width, int height); RGBFTYPE WIN32GFX_FigurePixelFormats (RGBFTYPE colortype); int WIN32GFX_AdjustScreenmode (struct MultiDisplay *md, uae_u32 *pwidth, uae_u32 *pheight, uae_u32 *ppixbits); diff --git a/od-win32/win32gui.c b/od-win32/win32gui.c index 134729dd..d3ad1a3f 100644 --- a/od-win32/win32gui.c +++ b/od-win32/win32gui.c @@ -137,7 +137,7 @@ static int C_PAGES; static int LOADSAVE_ID = -1, MEMORY_ID = -1, KICKSTART_ID = -1, CPU_ID = -1, DISPLAY_ID = -1, HW3D_ID = -1, CHIPSET_ID = -1, CHIPSET2_ID = -1, SOUND_ID = -1, FLOPPY_ID = -1, DISK_ID = -1, HARDDISK_ID = -1, IOPORTS_ID = -1, GAMEPORTS_ID = -1, INPUT_ID = -1, MISC1_ID = -1, MISC2_ID = -1, AVIOUTPUT_ID = -1, - PATHS_ID = -1, QUICKSTART_ID = -1, ABOUT_ID = -1, FRONTEND_ID = -1; + PATHS_ID = -1, QUICKSTART_ID = -1, ABOUT_ID = -1, RTG_ID = -1, FRONTEND_ID = -1; static HWND pages[MAX_C_PAGES]; #define MAX_IMAGETOOLTIPS 10 static HWND guiDlg, panelDlg, ToolTipHWND; @@ -1924,7 +1924,7 @@ static BOOL CreateHardFile (HWND hDlg, UINT hfsizem, TCHAR *dostype, TCHAR *newp if (dynamic) { result = vhd_create (init_path, hfsize); } else { - SetCursor (LoadCursor(NULL, IDC_WAIT)); + SetCursor (LoadCursor (NULL, IDC_WAIT)); if ((hf = CreateFile (init_path, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL)) != INVALID_HANDLE_VALUE) { if (sparse) { DWORD ret; @@ -5786,7 +5786,7 @@ static void updatez3 (uae_u32 *size1p, uae_u32 *size2p) *size2p = s2; } -static INT_PTR CALLBACK MemoryDlgProc (HWND hDlg, UINT msg, WPARAM wParam, LPARAM lParam) +static INT_PTR CALLBACK RTGDlgProc (HWND hDlg, UINT msg, WPARAM wParam, LPARAM lParam) { int v; TCHAR tmp[100]; @@ -5795,9 +5795,9 @@ static INT_PTR CALLBACK MemoryDlgProc (HWND hDlg, UINT msg, WPARAM wParam, LPARA switch (msg) { case WM_INITDIALOG: - pages[MEMORY_ID] = hDlg; - currentpage = MEMORY_ID; - WIN32GUI_LoadUIString(IDS_ALL, tmp, sizeof tmp / sizeof (TCHAR)); + pages[RTG_ID] = hDlg; + currentpage = RTG_ID; + WIN32GUI_LoadUIString(IDS_ALL, tmp, sizeof tmp / sizeof (TCHAR)); SendDlgItemMessage (hDlg, IDC_RTG_8BIT, CB_RESETCONTENT, 0, 0); SendDlgItemMessage (hDlg, IDC_RTG_8BIT, CB_ADDSTRING, 0, (LPARAM)L"(8bit)"); SendDlgItemMessage (hDlg, IDC_RTG_8BIT, CB_ADDSTRING, 0, (LPARAM)L"8-bit (*)"); @@ -5822,13 +5822,7 @@ static INT_PTR CALLBACK MemoryDlgProc (HWND hDlg, UINT msg, WPARAM wParam, LPARA SendDlgItemMessage (hDlg, IDC_RTG_32BIT, CB_ADDSTRING, 0, (LPARAM)L"A8B8G8R8"); SendDlgItemMessage (hDlg, IDC_RTG_32BIT, CB_ADDSTRING, 0, (LPARAM)L"R8G8B8A8"); SendDlgItemMessage (hDlg, IDC_RTG_32BIT, CB_ADDSTRING, 0, (LPARAM)L"B8G8R8A8 (*)"); - SendDlgItemMessage (hDlg, IDC_CHIPMEM, TBM_SETRANGE, TRUE, MAKELONG (MIN_CHIP_MEM, MAX_CHIP_MEM)); - SendDlgItemMessage (hDlg, IDC_FASTMEM, TBM_SETRANGE, TRUE, MAKELONG (MIN_FAST_MEM, MAX_FAST_MEM)); - SendDlgItemMessage (hDlg, IDC_SLOWMEM, TBM_SETRANGE, TRUE, MAKELONG (MIN_SLOW_MEM, MAX_SLOW_MEM)); - SendDlgItemMessage (hDlg, IDC_Z3FASTMEM, TBM_SETRANGE, TRUE, MAKELONG (MIN_Z3_MEM, MAX_Z3_MEM)); SendDlgItemMessage (hDlg, IDC_P96MEM, TBM_SETRANGE, TRUE, MAKELONG (MIN_P96_MEM, MAX_P96_MEM)); - SendDlgItemMessage (hDlg, IDC_MBMEM1, TBM_SETRANGE, TRUE, MAKELONG (MIN_MB_MEM, MAX_MB_MEM)); - SendDlgItemMessage (hDlg, IDC_MBMEM2, TBM_SETRANGE, TRUE, MAKELONG (MIN_MB_MEM, MAX_MB_MEM)); SendDlgItemMessage (hDlg, IDC_RTG_SCALE_ASPECTRATIO, CB_RESETCONTENT, 0, 0); WIN32GUI_LoadUIString (IDS_DISABLED, tmp, sizeof tmp / sizeof (TCHAR)); SendDlgItemMessage (hDlg, IDC_RTG_SCALE_ASPECTRATIO, CB_ADDSTRING, 0, (LPARAM)tmp); @@ -5857,14 +5851,7 @@ static INT_PTR CALLBACK MemoryDlgProc (HWND hDlg, UINT msg, WPARAM wParam, LPARA break; case WM_HSCROLL: - workprefs.chipmem_size = memsizes[msi_chip[SendMessage (GetDlgItem (hDlg, IDC_CHIPMEM), TBM_GETPOS, 0, 0)]]; - workprefs.bogomem_size = memsizes[msi_bogo[SendMessage (GetDlgItem (hDlg, IDC_SLOWMEM), TBM_GETPOS, 0, 0)]]; - workprefs.fastmem_size = memsizes[msi_fast[SendMessage (GetDlgItem (hDlg, IDC_FASTMEM), TBM_GETPOS, 0, 0)]]; - workprefs.z3fastmem_size = memsizes[msi_z3fast[SendMessage (GetDlgItem (hDlg, IDC_Z3FASTMEM), TBM_GETPOS, 0, 0)]]; - updatez3 (&workprefs.z3fastmem_size, &workprefs.z3fastmem2_size); workprefs.gfxmem_size = memsizes[msi_gfx[SendMessage (GetDlgItem (hDlg, IDC_P96MEM), TBM_GETPOS, 0, 0)]]; - workprefs.mbresmem_low_size = memsizes[msi_gfx[SendMessage (GetDlgItem (hDlg, IDC_MBMEM1), TBM_GETPOS, 0, 0)]]; - workprefs.mbresmem_high_size = memsizes[msi_gfx[SendMessage (GetDlgItem (hDlg, IDC_MBMEM2), TBM_GETPOS, 0, 0)]]; fix_values_memorydlg (); values_to_memorydlg (hDlg); enable_for_memorydlg (hDlg); @@ -6001,6 +5988,63 @@ static INT_PTR CALLBACK MemoryDlgProc (HWND hDlg, UINT msg, WPARAM wParam, LPARA return FALSE; } + +static INT_PTR CALLBACK MemoryDlgProc (HWND hDlg, UINT msg, WPARAM wParam, LPARAM lParam) +{ + int v; + static int recursive = 0; + + switch (msg) + { + case WM_INITDIALOG: + pages[MEMORY_ID] = hDlg; + currentpage = MEMORY_ID; + SendDlgItemMessage (hDlg, IDC_CHIPMEM, TBM_SETRANGE, TRUE, MAKELONG (MIN_CHIP_MEM, MAX_CHIP_MEM)); + SendDlgItemMessage (hDlg, IDC_FASTMEM, TBM_SETRANGE, TRUE, MAKELONG (MIN_FAST_MEM, MAX_FAST_MEM)); + SendDlgItemMessage (hDlg, IDC_SLOWMEM, TBM_SETRANGE, TRUE, MAKELONG (MIN_SLOW_MEM, MAX_SLOW_MEM)); + SendDlgItemMessage (hDlg, IDC_Z3FASTMEM, TBM_SETRANGE, TRUE, MAKELONG (MIN_Z3_MEM, MAX_Z3_MEM)); + SendDlgItemMessage (hDlg, IDC_MBMEM1, TBM_SETRANGE, TRUE, MAKELONG (MIN_MB_MEM, MAX_MB_MEM)); + SendDlgItemMessage (hDlg, IDC_MBMEM2, TBM_SETRANGE, TRUE, MAKELONG (MIN_MB_MEM, MAX_MB_MEM)); + + case WM_USER: + recursive++; + fix_values_memorydlg (); + values_to_memorydlg (hDlg); + enable_for_memorydlg (hDlg); + recursive--; + break; + + case WM_HSCROLL: + workprefs.chipmem_size = memsizes[msi_chip[SendMessage (GetDlgItem (hDlg, IDC_CHIPMEM), TBM_GETPOS, 0, 0)]]; + workprefs.bogomem_size = memsizes[msi_bogo[SendMessage (GetDlgItem (hDlg, IDC_SLOWMEM), TBM_GETPOS, 0, 0)]]; + workprefs.fastmem_size = memsizes[msi_fast[SendMessage (GetDlgItem (hDlg, IDC_FASTMEM), TBM_GETPOS, 0, 0)]]; + workprefs.z3fastmem_size = memsizes[msi_z3fast[SendMessage (GetDlgItem (hDlg, IDC_Z3FASTMEM), TBM_GETPOS, 0, 0)]]; + updatez3 (&workprefs.z3fastmem_size, &workprefs.z3fastmem2_size); + workprefs.mbresmem_low_size = memsizes[msi_gfx[SendMessage (GetDlgItem (hDlg, IDC_MBMEM1), TBM_GETPOS, 0, 0)]]; + workprefs.mbresmem_high_size = memsizes[msi_gfx[SendMessage (GetDlgItem (hDlg, IDC_MBMEM2), TBM_GETPOS, 0, 0)]]; + fix_values_memorydlg (); + values_to_memorydlg (hDlg); + enable_for_memorydlg (hDlg); + break; + + case WM_COMMAND: + { + if (recursive > 0) + break; + recursive++; + switch (LOWORD (wParam)) + { + ; + } + if (HIWORD (wParam) == CBN_SELENDOK || HIWORD (wParam) == CBN_KILLFOCUS || HIWORD (wParam) == CBN_EDITCHANGE) { + } + recursive--; + } + break; + } + return FALSE; +} + static void addromfiles (UAEREG *fkey, HWND hDlg, DWORD d, TCHAR *path, int type) { int idx; @@ -7601,7 +7645,7 @@ static void hardfile_testrdb (HWND hDlg, struct hfdlg_vals *hdf) hdf->controller = HD_CONTROLLER_PCMCIA_SRAM; break; } - if (!memcmp (tmp, "RDSK\0\0\0", 7) || (tmp[0] == 0x53 && tmp[1] == 0x10 && tmp[2] == 0x9b && tmp[3] == 0x13 && tmp[4] == 0 && tmp[5] == 0)) { + if (!memcmp (tmp, "RDSK\0\0\0", 7) || !memcmp (tmp, "DRKS\0\0", 6) || (tmp[0] == 0x53 && tmp[1] == 0x10 && tmp[2] == 0x9b && tmp[3] == 0x13 && tmp[4] == 0 && tmp[5] == 0)) { // RDSK or ADIDE "encoded" RDSK hdf->sectors = 0; hdf->surfaces = 0; @@ -7848,8 +7892,8 @@ static INT_PTR CALLBACK HarddriveSettingsProc (HWND hDlg, UINT msg, WPARAM wPara ew (hDlg, IDC_HDF_CONTROLLER, FALSE); index = -1; for (i = 0; i < hdf_getnumharddrives (); i++) { - SendDlgItemMessage (hDlg, IDC_HARDDRIVE, CB_ADDSTRING, 0, (LPARAM)hdf_getnameharddrive (i, 1, NULL)); - if (!_tcscmp (current_hfdlg.filename, hdf_getnameharddrive (i, 0, NULL))) + SendDlgItemMessage (hDlg, IDC_HARDDRIVE, CB_ADDSTRING, 0, (LPARAM)hdf_getnameharddrive (i, 1, NULL, NULL)); + if (!_tcscmp (current_hfdlg.filename, hdf_getnameharddrive (i, 0, NULL, NULL))) index = i; } if (index >= 0) { @@ -7867,12 +7911,17 @@ static INT_PTR CALLBACK HarddriveSettingsProc (HWND hDlg, UINT msg, WPARAM wPara if (oposn != posn && posn != CB_ERR) { oposn = posn; if (posn >= 0) { + int dang = 1; + hdf_getnameharddrive (posn, 1, NULL, &dang); ew (hDlg, IDC_HARDDRIVE_IMAGE, TRUE); ew (hDlg, IDOK, TRUE); - ew (hDlg, IDC_HDF_RW, TRUE); + ew (hDlg, IDC_HDF_RW, !dang); + if (dang) + current_hfdlg.rw = FALSE; ew (hDlg, IDC_HDF_CONTROLLER, TRUE); hardfile_testrdb (hDlg, ¤t_hfdlg); SendDlgItemMessage (hDlg, IDC_HDF_CONTROLLER, CB_SETCURSEL, current_hfdlg.controller, 0); + CheckDlgButton(hDlg, IDC_HDF_RW, current_hfdlg.rw); } } if (HIWORD (wParam) == BN_CLICKED) { @@ -7890,7 +7939,7 @@ static INT_PTR CALLBACK HarddriveSettingsProc (HWND hDlg, UINT msg, WPARAM wPara } } if (posn != CB_ERR) - _tcscpy (current_hfdlg.filename, hdf_getnameharddrive ((int)posn, 0, ¤t_hfdlg.blocksize)); + _tcscpy (current_hfdlg.filename, hdf_getnameharddrive ((int)posn, 0, ¤t_hfdlg.blocksize, NULL)); current_hfdlg.rw = IsDlgButtonChecked (hDlg, IDC_HDF_RW); posn = SendDlgItemMessage (hDlg, IDC_HDF_CONTROLLER, CB_GETCURSEL, 0, 0); if (posn != CB_ERR) @@ -10393,6 +10442,7 @@ static void values_to_hw3ddlg (HWND hDlg) while (uaefilters[i].name) { switch (uaefilters[i].type) { +#if 0 #ifndef D3D case UAE_FILTER_DIRECT3D: nofilter = 1; @@ -10402,6 +10452,7 @@ static void values_to_hw3ddlg (HWND hDlg) case UAE_FILTER_OPENGL: nofilter = 1; break; +#endif #endif default: nofilter = 0; @@ -11595,6 +11646,7 @@ static void createTreeView (HWND hDlg, int currentpage) CN (MEMORY_ID); CN (FLOPPY_ID); CN (HARDDISK_ID); + CN (RTG_ID); p = CreateFolderNode (TVhDlg, IDS_TREEVIEW_HOST, root, LOADSAVE_ID, CONFIG_TYPE_HOST); CN (DISPLAY_ID); @@ -11876,7 +11928,7 @@ static INT_PTR CALLBACK DialogProc (HWND hDlg, UINT msg, WPARAM wParam, LPARAM l doit = 1; } else if (pBHdr && pBHdr->dbch_devicetype == DBT_DEVTYP_DEVICEINTERFACE) { DEV_BROADCAST_DEVICEINTERFACE *dbd = (DEV_BROADCAST_DEVICEINTERFACE*)lParam; - write_log (L"%s: %s\n", wParam == DBT_DEVICEREMOVECOMPLETE ? "Removed" : "Inserted", + write_log (L"%s: %s\n", wParam == DBT_DEVICEREMOVECOMPLETE ? L"Removed" : L"Inserted", dbd->dbcc_name); if (wParam == DBT_DEVICEREMOVECOMPLETE) doit = 1; @@ -11894,7 +11946,7 @@ static INT_PTR CALLBACK DialogProc (HWND hDlg, UINT msg, WPARAM wParam, LPARAM l PostQuitMessage (0); return TRUE; case WM_CLOSE: - addnotifications (hDlg, 1); + addnotifications (hDlg, TRUE, TRUE); DestroyWindow(hDlg); if (dialogreturn < 0) { dialogreturn = 0; @@ -11917,7 +11969,7 @@ static INT_PTR CALLBACK DialogProc (HWND hDlg, UINT msg, WPARAM wParam, LPARAM l centerWindow (hDlg); createTreeView (hDlg, currentpage); updatePanel (hDlg, currentpage); - addnotifications (hDlg, 0); + addnotifications (hDlg, FALSE, TRUE); return TRUE; case WM_DROPFILES: if (dragdrop (hDlg, (HDROP)wParam, (gui_active || full_property_sheet) ? &workprefs : &changed_prefs, currentpage)) @@ -12177,6 +12229,7 @@ static int GetSettings (int all_options, HWND hwnd) panelresource = getresource (IDD_PANEL); LOADSAVE_ID = init_page (IDD_LOADSAVE, IDI_FILE, IDS_LOADSAVE, LoadSaveDlgProc, NULL, L"gui/configurations.htm"); MEMORY_ID = init_page (IDD_MEMORY, IDI_MEMORY, IDS_MEMORY, MemoryDlgProc, NULL, L"gui/ram.htm"); + RTG_ID = init_page (IDD_RTG, IDI_DISPLAY, IDS_RTG, RTGDlgProc, NULL, L"gui/rtg.htm"); KICKSTART_ID = init_page (IDD_KICKSTART, IDI_MEMORY, IDS_KICKSTART, KickstartDlgProc, NULL, L"gui/rom.htm"); CPU_ID = init_page (IDD_CPU, IDI_CPU, IDS_CPU, CPUDlgProc, NULL, L"gui/cpu.htm"); DISPLAY_ID = init_page (IDD_DISPLAY, IDI_DISPLAY, IDS_DISPLAY, DisplayDlgProc, NULL, L"gui/display.htm"); diff --git a/od-win32/winuae_msvc/winuae_msvc.vcproj b/od-win32/winuae_msvc/winuae_msvc.vcproj index 2ff518f6..54970a00 100644 --- a/od-win32/winuae_msvc/winuae_msvc.vcproj +++ b/od-win32/winuae_msvc/winuae_msvc.vcproj @@ -69,7 +69,7 @@ Detect64BitPortabilityProblems="false" DebugInformationFormat="4" CallingConvention="2" - CompileAs="1" + CompileAs="0" DisableSpecificWarnings="4996" EnablePREfast="false" /> @@ -87,19 +87,19 @@ @@ -285,7 +285,7 @@ WarningLevel="3" SuppressStartupBanner="true" CallingConvention="2" - CompileAs="1" + CompileAs="0" DisableSpecificWarnings="4996" ForcedIncludeFiles="" /> @@ -302,19 +302,20 @@ /> - - @@ -1449,11 +1446,11 @@ > " + and all keyboard layouts with keyboard layout A (old bug) +- portaudio fallback code added, unsupported samplerate = try 44000, + 48000 and default reported before disabling sound. Same with number + of channels, unsupported channels = fall back to stereo +- real harddrive safetycheck modified, now all drives can be mounted as + long as drive does not have any Windows mounted partitions. Any drive + with one or more Windows mounted partitions are only available in read + only mode. Command line parameter is now only needed if you want to + add drives that have Windows mounted FAT partitions. Drives with NTFS + partition(s) are never (there is a complex way..) allowed in + read-write mode. +- SCSI (RDB hdf emulation) write commands return proper write protected + sense status if hdf is read-only +- CD32 CD controller emulation improved, CDXL animations should run more + smoothly now, previously emulation couldn't load more than few sectors + before cd driver decided to read some previously loaded sectors again +- added seek delays to CD32 emulation (CD32 drive has really slow seeks) +- CD32 Lotus Trilogy's Lotus 3 finally loads (stupid loader overwriting + already loaded data if CD DMA sequence is not identical to real + hardware) +- CD32 CD audio moved to separate thread, no more emulator pausing when + CD audio starts or track changes (possibly introduces other issues..) +- distortion in sound was possible with short square wave samples (160) +- final 16-bit sound samples always had lowest bit (or two lowest bits) + zero, now lowest bit(s) are duplicated from real lowest significant + bit(s) +- A590/A2091 SCSI ROM 7.0 was not accepted (debugging code..) +- added new RTG configuration panel, more space for future options, also + old RTG setting panel was not really in correct place anymore +- RDB filesystem loader still had fse_PatchFlags hack instead of + handling it 100% correctly (no functional change, at least with any + popular filesystem) +- D3D 2D sprites (ID3DXSPrite) used in D3D filter onscreen leds, correct + positioning, no scaling and free transparency. Scanline texture also + converted to 2D sprite, now works correctly with D3D filters (future + plan: convert all DirectDraw code to D3D 2D sprites) +- accept also partition type 0x30 (another Amithlon like RDB drive + inside real PC partition) +- OpenGL filter removed, totally obsolete now. Gone forever unless + someone updates it. (includes correct positioning and scaling) + +1.6.1 + Beta 5: (1.6.1 RC) - JIT indirect/direct state is remembered when JIT is switched off and diff --git a/od-win32/writelog.c b/od-win32/writelog.c index 65d0390d..7a631441 100644 --- a/od-win32/writelog.c +++ b/od-win32/writelog.c @@ -169,7 +169,7 @@ void close_console (void) consoleopen = 0; } -static void writeconsole (const TCHAR *buffer) +static void writeconsole_2 (const TCHAR *buffer) { DWORD temp; @@ -186,6 +186,25 @@ static void writeconsole (const TCHAR *buffer) } } +static void writeconsole (const TCHAR *buffer) +{ + if (_tcslen (buffer) > 256) { + TCHAR *p = my_strdup (buffer); + TCHAR *p2 = p; + while (_tcslen (p) > 256) { + TCHAR tmp = p[256]; + p[256] = 0; + writeconsole_2 (p); + p[256] = tmp; + p += 256; + } + writeconsole_2 (p); + xfree (p2); + } else { + writeconsole_2 (buffer); + } +} + static void flushconsole (void) { if (consoleopen > 0) { diff --git a/sana2.c b/sana2.c index 5292470f..f0e6db6b 100644 --- a/sana2.c +++ b/sana2.c @@ -1341,7 +1341,7 @@ static void *dev_thread (void *devs) { struct devstruct *dev = (struct devstruct*)devs; - uae_set_thread_priority (2); + uae_set_thread_priority (NULL, 1); dev->thread_running = 1; uae_sem_post (&dev->sync_sem); for (;;) { diff --git a/savestate.c b/savestate.c index 3cfe5f69..9ae7ffae 100644 --- a/savestate.c +++ b/savestate.c @@ -44,6 +44,8 @@ * */ +#define OPEN_LOG 0 + #include "sysconfig.h" #include "sysdeps.h" @@ -373,7 +375,11 @@ void restore_ram (size_t filepos, uae_u8 *memory) static uae_u8 *restore_log (uae_u8 *src) { - //write_log (src); +#if OPEN_LOG > 0 + TCHAR *s = utf8u (src); + write_log (L"%s\n", s); + xfree (s); +#endif src += strlen (src) + 1; return src; } diff --git a/scsiemul.c b/scsiemul.c index 0a288c0c..aa94ca5a 100644 --- a/scsiemul.c +++ b/scsiemul.c @@ -668,7 +668,7 @@ static void *dev_thread (void *devs) { struct devstruct *dev = (struct devstruct*)devs; - uae_set_thread_priority (2); + uae_set_thread_priority (NULL, 1); dev->thread_running = 1; uae_sem_post (&dev->sync_sem); for (;;) { diff --git a/traps.c b/traps.c index 13ab9bb9..2f77e961 100644 --- a/traps.c +++ b/traps.c @@ -73,7 +73,7 @@ struct Trap /* Defined traps */ static struct Trap traps[MAX_TRAPS]; -static unsigned int trap_count; +static unsigned int trap_count = 1; static const int trace_traps = 0; @@ -229,7 +229,7 @@ static void *trap_thread (void *arg) { ExtendedTrapContext *context = (ExtendedTrapContext *) arg; - uae_set_thread_priority (2); + uae_set_thread_priority (NULL, 1); /* Wait until main thread is ready to switch to the * this trap context. */ diff --git a/uaeserial.c b/uaeserial.c index d9d6dbb8..8bbebeb8 100644 --- a/uaeserial.c +++ b/uaeserial.c @@ -588,7 +588,7 @@ static void *dev_thread (void *devs) { struct devstruct *dev = (struct devstruct*)devs; - uae_set_thread_priority (2); + uae_set_thread_priority (NULL, 1); dev->thread_running = 1; uae_sem_post (&dev->sync_sem); for (;;) { diff --git a/zfile.c b/zfile.c index c1a7882f..f3bd2cc6 100644 --- a/zfile.c +++ b/zfile.c @@ -151,8 +151,12 @@ int zfile_gettype (struct zfile *z) return ZFILE_DISKIMAGE; if (!memcmp (buf, "RDSK", 4)) return ZFILE_HDFRDB; - if (!memcmp (buf, "DOS", 3)) - return ZFILE_HDF; + if (!memcmp (buf, "DOS", 3)) { + if (z->size < 4 * 1024 * 1024) + return ZFILE_DISKIMAGE; + else + return ZFILE_HDF; + } if (ext != NULL) { if (strcasecmp (ext, L"hdf") == 0) return ZFILE_HDF; @@ -930,6 +934,7 @@ static struct zfile *zfile_fopen_2 (const TCHAR *name, const TCHAR *mode, int ma } l->zfdmask = mask; } else { + struct _stat64 st; l = zfile_create (NULL); l->mode = my_strdup (mode); l->name = my_strdup (name); @@ -944,6 +949,8 @@ static struct zfile *zfile_fopen_2 (const TCHAR *name, const TCHAR *mode, int ma zfile_fclose (l); return 0; } + if (stat (l->name, &st) != -1) + l->size = st.st_size; l->f = f; } return l;