+++ /dev/null
-/*
- * compiler/codegen_x86.cpp - IA-32 code generator
- *
- * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
- *
- * Adaptation for Basilisk II and improvements, copyright 2000-2005
- * Gwenole Beauchesne
- *
- * Basilisk II (C) 1997-2008 Christian Bauer
- *
- * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-/* This should eventually end up in machdep/, but for now, x86 is the
- only target, and it's easier this way... */
-
-#include "flags_x86.h"
-
-/*************************************************************************
- * Some basic information about the the target CPU *
- *************************************************************************/
-
-#define EAX_INDEX 0
-#define ECX_INDEX 1
-#define EDX_INDEX 2
-#define EBX_INDEX 3
-#define ESP_INDEX 4
-#define EBP_INDEX 5
-#define ESI_INDEX 6
-#define EDI_INDEX 7
-#if defined(__x86_64__)
-#define R8_INDEX 8
-#define R9_INDEX 9
-#define R10_INDEX 10
-#define R11_INDEX 11
-#define R12_INDEX 12
-#define R13_INDEX 13
-#define R14_INDEX 14
-#define R15_INDEX 15
-#endif
-/* XXX this has to match X86_Reg8H_Base + 4 */
-#define AH_INDEX (0x10+4+EAX_INDEX)
-#define CH_INDEX (0x10+4+ECX_INDEX)
-#define DH_INDEX (0x10+4+EDX_INDEX)
-#define BH_INDEX (0x10+4+EBX_INDEX)
-
-/* The register in which subroutines return an integer return value */
-#define REG_RESULT EAX_INDEX
-
-/* The registers subroutines take their first and second argument in */
-#if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
-/* Handle the _fastcall parameters of ECX and EDX */
-#define REG_PAR1 ECX_INDEX
-#define REG_PAR2 EDX_INDEX
-#elif defined(__x86_64__)
-#define REG_PAR1 EDI_INDEX
-#define REG_PAR2 ESI_INDEX
-#else
-#define REG_PAR1 EAX_INDEX
-#define REG_PAR2 EDX_INDEX
-#endif
-
-#define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */
-#if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
-#define REG_PC_TMP EAX_INDEX
-#else
-#define REG_PC_TMP ECX_INDEX /* Another register that is not the above */
-#endif
-
-#define SHIFTCOUNT_NREG ECX_INDEX /* Register that can be used for shiftcount.
- -1 if any reg will do */
-#define MUL_NREG1 EAX_INDEX /* %eax will hold the low 32 bits after a 32x32 mul */
-#define MUL_NREG2 EDX_INDEX /* %edx will hold the high 32 bits */
-
-#define STACK_ALIGN 16
-#define STACK_OFFSET sizeof(void *)
-
-uae_s8 always_used[]={4,-1};
-#if defined(__x86_64__)
-uae_s8 can_byte[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
-uae_s8 can_word[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
-#else
-uae_s8 can_byte[]={0,1,2,3,-1};
-uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
-#endif
-
-#if USE_OPTIMIZED_CALLS
-/* Make sure interpretive core does not use cpuopti */
-uae_u8 call_saved[]={0,0,0,1,1,1,1,1};
-#error FIXME: code not ready
-#else
-/* cpuopti mutate instruction handlers to assume registers are saved
- by the caller */
-uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0};
-#endif
-
-/* This *should* be the same as call_saved. But:
- - We might not really know which registers are saved, and which aren't,
- so we need to preserve some, but don't want to rely on everyone else
- also saving those registers
- - Special registers (such like the stack pointer) should not be "preserved"
- by pushing, even though they are "saved" across function calls
-*/
-#if defined(__x86_64__)
-/* callee-saved registers as defined by Linux AMD64 ABI: rbx, rbp, rsp, r12 - r15 */
-/* preserve r11 because it's generally used to hold pointers to functions */
-static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1};
-#else
-/* callee-saved registers as defined by System V IA-32 ABI: edi, esi, ebx, ebp */
-static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,1,1};
-#endif
-
-/* Whether classes of instructions do or don't clobber the native flags */
-#define CLOBBER_MOV
-#define CLOBBER_LEA
-#define CLOBBER_CMOV
-#define CLOBBER_POP
-#define CLOBBER_PUSH
-#define CLOBBER_SUB clobber_flags()
-#define CLOBBER_SBB clobber_flags()
-#define CLOBBER_CMP clobber_flags()
-#define CLOBBER_ADD clobber_flags()
-#define CLOBBER_ADC clobber_flags()
-#define CLOBBER_AND clobber_flags()
-#define CLOBBER_OR clobber_flags()
-#define CLOBBER_XOR clobber_flags()
-
-#define CLOBBER_ROL clobber_flags()
-#define CLOBBER_ROR clobber_flags()
-#define CLOBBER_SHLL clobber_flags()
-#define CLOBBER_SHRL clobber_flags()
-#define CLOBBER_SHRA clobber_flags()
-#define CLOBBER_TEST clobber_flags()
-#define CLOBBER_CL16
-#define CLOBBER_CL8
-#define CLOBBER_SE32
-#define CLOBBER_SE16
-#define CLOBBER_SE8
-#define CLOBBER_ZE32
-#define CLOBBER_ZE16
-#define CLOBBER_ZE8
-#define CLOBBER_SW16 clobber_flags()
-#define CLOBBER_SW32
-#define CLOBBER_SETCC
-#define CLOBBER_MUL clobber_flags()
-#define CLOBBER_BT clobber_flags()
-#define CLOBBER_BSF clobber_flags()
-
-/* The older code generator is now deprecated. */
-#define USE_NEW_RTASM 1
-
-#if USE_NEW_RTASM
-
-#if defined(__x86_64__)
-#define X86_TARGET_64BIT 1
-/* The address override prefix causes a 5 cycles penalty on Intel Core
- processors. Another solution would be to decompose the load in an LEA,
- MOV (to zero-extend), MOV (from memory): is it better? */
-#define ADDR32 x86_emit_byte(0x67),
-#else
-#define ADDR32 /**/
-#endif
-#define X86_FLAT_REGISTERS 0
-#define X86_OPTIMIZE_ALU 1
-#define X86_OPTIMIZE_ROTSHI 1
-#include "codegen_x86.h"
-
-#define x86_emit_byte(B) emit_byte(B)
-#define x86_emit_word(W) emit_word(W)
-#define x86_emit_long(L) emit_long(L)
-#define x86_emit_quad(Q) emit_quad(Q)
-#define x86_get_target() get_target()
-#define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
-
-static void jit_fail(const char *msg, const char *file, int line, const char *function)
-{
- fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
- function, file, line, msg);
- abort();
-}
-
-LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
-{
-#if defined(__x86_64__)
- PUSHQr(r);
-#else
- PUSHLr(r);
-#endif
-}
-LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
-
-LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
-{
-#if defined(__x86_64__)
- POPQr(r);
-#else
- POPLr(r);
-#endif
-}
-LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
-
-LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
-{
-#if defined(__x86_64__)
- POPQm(d, X86_NOREG, X86_NOREG, 1);
-#else
- POPLm(d, X86_NOREG, X86_NOREG, 1);
-#endif
-}
-LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
-
-LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
-{
- BTLir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
-{
- BTLrr(b, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
-
-LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
-{
- BTCLir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
-{
- BTCLrr(b, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
-
-LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
-{
- BTRLir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
-{
- BTRLrr(b, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
-
-LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
-{
- BTSLir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
-{
- BTSLrr(b, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
-
-LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
-{
- SUBWir(i, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
-
-LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
-{
- MOVLmr(s, X86_NOREG, X86_NOREG, 1, d);
-}
-LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
-
-LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
-{
- MOVLim(s, d, X86_NOREG, X86_NOREG, 1);
-}
-LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
-
-LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
-{
- MOVWim(s, d, X86_NOREG, X86_NOREG, 1);
-}
-LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
-
-LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
-{
- MOVBim(s, d, X86_NOREG, X86_NOREG, 1);
-}
-LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
-
-LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
-{
- ROLBim(i, d, X86_NOREG, X86_NOREG, 1);
-}
-LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
-{
- ROLBir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
-{
- ROLWir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
-{
- ROLLir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
-{
- ROLLrr(r, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
-{
- ROLWrr(r, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
-{
- ROLBrr(r, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
-{
- SHLLrr(r, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
-{
- SHLWrr(r, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
-{
- SHLBrr(r, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
-{
- RORBir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
-{
- RORWir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
-
-LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
-{
- ORLmr(s, X86_NOREG, X86_NOREG, 1, d);
-}
-LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
-
-LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
-{
- RORLir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
-{
- RORLrr(r, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
-{
- RORWrr(r, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
-{
- RORBrr(r, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
-{
- SHRLrr(r, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
-{
- SHRWrr(r, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
-{
- SHRBrr(r, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
-{
- SARLrr(r, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
-{
- SARWrr(r, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
-{
- SARBrr(r, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
-{
- SHLLir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
-{
- SHLWir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
-{
- SHLBir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
-{
- SHRLir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
-{
- SHRWir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
-{
- SHRBir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
-{
- SARLir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
-{
- SARWir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
-{
- SARBir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
-
-LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
-{
- SAHF();
-}
-LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
-
-LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
-{
- CPUID();
-}
-LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
-
-LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
-{
- LAHF();
-}
-LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
-
-LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
-{
- SETCCir(cc, d);
-}
-LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
-
-LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
-{
- SETCCim(cc, d, X86_NOREG, X86_NOREG, 1);
-}
-LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
-
-LOWFUNC(READ,NONE,3,raw_cmov_b_rr,(RW1 d, R1 s, IMM cc))
-{
- /* replacement using branch and mov */
- int8 *target_p = (int8 *)x86_get_target() + 1;
- JCCSii(cc^1, 0);
- MOVBrr(s, d);
- *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1);
-}
-LENDFUNC(READ,NONE,3,raw_cmov_b_rr,(RW1 d, R1 s, IMM cc))
-
-LOWFUNC(READ,NONE,3,raw_cmov_w_rr,(RW2 d, R2 s, IMM cc))
-{
- if (have_cmov)
- CMOVWrr(cc, s, d);
- else { /* replacement using branch and mov */
- int8 *target_p = (int8 *)x86_get_target() + 1;
- JCCSii(cc^1, 0);
- MOVWrr(s, d);
- *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1);
- }
-}
-LENDFUNC(READ,NONE,3,raw_cmov_w_rr,(RW2 d, R2 s, IMM cc))
-
-LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
-{
- if (have_cmov)
- CMOVLrr(cc, s, d);
- else { /* replacement using branch and mov */
- int8 *target_p = (int8 *)x86_get_target() + 1;
- JCCSii(cc^1, 0);
- MOVLrr(s, d);
- *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1);
- }
-}
-LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
-
-LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
-{
- BSFLrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
-
-LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
-{
- MOVSLQrr(s, d);
-}
-LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
-
-LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
-{
- MOVSWLrr(s, d);
-}
-LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
-
-LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
-{
- MOVSBLrr(s, d);
-}
-LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
-
-LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
-{
- MOVZWLrr(s, d);
-}
-LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
-
-LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
-{
- MOVZBLrr(s, d);
-}
-LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
-
-LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
-{
- IMULLrr(s, d);
-}
-LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
-
-LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
-{
- if (d!=MUL_NREG1 || s!=MUL_NREG2) {
- write_log("Bad register in IMUL: d=%d, s=%d\n",d,s);
- abort();
- }
- IMULLr(s);
-}
-LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
-
-LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
-{
- if (d!=MUL_NREG1 || s!=MUL_NREG2) {
- write_log("Bad register in MUL: d=%d, s=%d\n",d,s);
- abort();
- }
- MULLr(s);
-}
-LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
-
-LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
-{
- abort(); /* %^$&%^$%#^ x86! */
-}
-LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
-
-LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
-{
- MOVBrr(s, d);
-}
-LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
-
-LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
-{
- MOVWrr(s, d);
-}
-LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
-
-LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
-{
- ADDR32 MOVLmr(0, baser, index, factor, d);
-}
-LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
-
-LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
-{
- ADDR32 MOVWmr(0, baser, index, factor, d);
-}
-LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
-
-LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
-{
- ADDR32 MOVBmr(0, baser, index, factor, d);
-}
-LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
-
-LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
-{
- ADDR32 MOVLrm(s, 0, baser, index, factor);
-}
-LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
-
-LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
-{
- ADDR32 MOVWrm(s, 0, baser, index, factor);
-}
-LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
-
-LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
-{
- ADDR32 MOVBrm(s, 0, baser, index, factor);
-}
-LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
-
-LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
-{
- ADDR32 MOVLrm(s, base, baser, index, factor);
-}
-LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
-
-LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
-{
- ADDR32 MOVWrm(s, base, baser, index, factor);
-}
-LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
-
-LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
-{
- ADDR32 MOVBrm(s, base, baser, index, factor);
-}
-LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
-
-LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
-{
- ADDR32 MOVLmr(base, baser, index, factor, d);
-}
-LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
-
-LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
-{
- ADDR32 MOVWmr(base, baser, index, factor, d);
-}
-LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
-
-LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
-{
- ADDR32 MOVBmr(base, baser, index, factor, d);
-}
-LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
-
-LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
-{
- ADDR32 MOVLmr(base, X86_NOREG, index, factor, d);
-}
-LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
-
-LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
-{
- if (have_cmov)
- ADDR32 CMOVLmr(cond, base, X86_NOREG, index, factor, d);
- else { /* replacement using branch and mov */
- int8 *target_p = (int8 *)x86_get_target() + 1;
- JCCSii(cond^1, 0);
- ADDR32 MOVLmr(base, X86_NOREG, index, factor, d);
- *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1);
- }
-}
-LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
-
-LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
-{
- if (have_cmov)
- CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d);
- else { /* replacement using branch and mov */
- int8 *target_p = (int8 *)x86_get_target() + 1;
- JCCSii(cond^1, 0);
- MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d);
- *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1);
- }
-}
-LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
-
-LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
-{
- ADDR32 MOVLmr(offset, s, X86_NOREG, 1, d);
-}
-LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
-{
- ADDR32 MOVWmr(offset, s, X86_NOREG, 1, d);
-}
-LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
-{
- ADDR32 MOVBmr(offset, s, X86_NOREG, 1, d);
-}
-LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
-{
- ADDR32 MOVLmr(offset, s, X86_NOREG, 1, d);
-}
-LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
-{
- ADDR32 MOVWmr(offset, s, X86_NOREG, 1, d);
-}
-LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
-{
- ADDR32 MOVBmr(offset, s, X86_NOREG, 1, d);
-}
-LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
-{
- ADDR32 MOVLim(i, offset, d, X86_NOREG, 1);
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
-{
- ADDR32 MOVWim(i, offset, d, X86_NOREG, 1);
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
-{
- ADDR32 MOVBim(i, offset, d, X86_NOREG, 1);
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
-{
- ADDR32 MOVLrm(s, offset, d, X86_NOREG, 1);
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
-{
- ADDR32 MOVWrm(s, offset, d, X86_NOREG, 1);
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
-{
- ADDR32 MOVBrm(s, offset, d, X86_NOREG, 1);
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
-
-LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
-{
- LEALmr(offset, s, X86_NOREG, 1, d);
-}
-LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
-{
- LEALmr(offset, s, index, factor, d);
-}
-LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
-
-LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
-{
- LEALmr(0, s, index, factor, d);
-}
-LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
-
-LOWFUNC(NONE,NONE,4,raw_lea_l_r_scaled,(W4 d, R4 index, IMM factor))
-{
- LEALmr(0, X86_NOREG, index, factor, d);
-}
-LENDFUNC(NONE,NONE,4,raw_lea_l_r_scaled,(W4 d, R4 index, IMM factor))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
-{
- ADDR32 MOVLrm(s, offset, d, X86_NOREG, 1);
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
-{
- ADDR32 MOVWrm(s, offset, d, X86_NOREG, 1);
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
-{
- ADDR32 MOVBrm(s, offset, d, X86_NOREG, 1);
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
-
-LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
-{
- BSWAPLr(r);
-}
-LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
-
-LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
-{
- ROLWir(8, r);
-}
-LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
-
-LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
-{
- MOVLrr(s, d);
-}
-LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
-
-LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
-{
- MOVLrm(s, d, X86_NOREG, X86_NOREG, 1);
-}
-LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
-
-LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
-{
- MOVWrm(s, d, X86_NOREG, X86_NOREG, 1);
-}
-LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
-
-LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
-{
- MOVWmr(s, X86_NOREG, X86_NOREG, 1, d);
-}
-LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
-
-LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
-{
- MOVBrm(s, d, X86_NOREG, X86_NOREG, 1);
-}
-LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
-
-LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
-{
- MOVBmr(s, X86_NOREG, X86_NOREG, 1, d);
-}
-LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
-
-LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
-{
- MOVLir(s, d);
-}
-LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
-
-LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
-{
- MOVWir(s, d);
-}
-LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
-
-LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
-{
- MOVBir(s, d);
-}
-LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
-
-LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
-{
- ADCLim(s, d, X86_NOREG, X86_NOREG, 1);
-}
-LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
-
-LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
-{
- ADDLim(s, d, X86_NOREG, X86_NOREG, 1);
-}
-LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
-
-LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
-{
- ADDWim(s, d, X86_NOREG, X86_NOREG, 1);
-}
-LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
-
-LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
-{
- ADDBim(s, d, X86_NOREG, X86_NOREG, 1);
-}
-LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
-
-LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
-{
- TESTLir(i, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
-{
- TESTLrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
-
-LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
-{
- TESTWrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
-
-LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
-{
- TESTBrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
-
-LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
-{
- XORLir(i, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
-{
- ANDLir(i, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
-{
- ANDWir(i, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
-{
- ANDLrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
-
-LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
-{
- ANDWrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
-
-LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
-{
- ANDBrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
-
-LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
-{
- ORLir(i, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
-{
- ORLrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
-
-LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
-{
- ORWrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
-
-LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
-{
- ORBrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
-
-LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
-{
- ADCLrr(s, d);
-}
-LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
-
-LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
-{
- ADCWrr(s, d);
-}
-LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
-
-LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
-{
- ADCBrr(s, d);
-}
-LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
-
-LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
-{
- ADDLrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
-
-LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
-{
- ADDWrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
-
-LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
-{
- ADDBrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
-
-LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
-{
- SUBLir(i, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
-{
- SUBBir(i, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
-{
- ADDLir(i, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
-{
- ADDWir(i, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
-{
- ADDBir(i, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
-
-LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
-{
- SBBLrr(s, d);
-}
-LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
-
-LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
-{
- SBBWrr(s, d);
-}
-LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
-
-LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
-{
- SBBBrr(s, d);
-}
-LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
-
-LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
-{
- SUBLrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
-
-LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
-{
- SUBWrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
-
-LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
-{
- SUBBrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
-
-LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
-{
- CMPLrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
-
-LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
-{
- CMPLir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
-{
- CMPWrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
-
-LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
-{
- CMPBim(s, d, X86_NOREG, X86_NOREG, 1);
-}
-LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
-
-LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
-{
- CMPBir(i, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
-{
- CMPBrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
-
-LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
-{
- ADDR32 CMPLmr(offset, X86_NOREG, index, factor, d);
-}
-LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
-
-LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
-{
- XORLrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
-
-LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
-{
- XORWrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
-
-LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
-{
- XORBrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
-
-LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
-{
- SUBLim(s, d, X86_NOREG, X86_NOREG, 1);
-}
-LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
-
-LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
-{
- CMPLim(s, d, X86_NOREG, X86_NOREG, 1);
-}
-LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
-
-LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
-{
- XCHGLrr(r2, r1);
-}
-LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
-
-LOWFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2))
-{
- XCHGBrr(r2, r1);
-}
-LENDFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2))
-
-LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
-{
- PUSHF();
-}
-LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
-
-LOWFUNC(WRITE,READ,0,raw_popfl,(void))
-{
- POPF();
-}
-LENDFUNC(WRITE,READ,0,raw_popfl,(void))
-
-/* Generate floating-point instructions */
-static inline void x86_fadd_m(MEMR s)
-{
- FADDLm(s,X86_NOREG,X86_NOREG,1);
-}
-
-#else
-
-const bool optimize_accum = true;
-const bool optimize_imm8 = true;
-const bool optimize_shift_once = true;
-
-/*************************************************************************
- * Actual encoding of the instructions on the target CPU *
- *************************************************************************/
-
-static __inline__ int isaccum(int r)
-{
- return (r == EAX_INDEX);
-}
-
-static __inline__ int isbyte(uae_s32 x)
-{
- return (x>=-128 && x<=127);
-}
-
-static __inline__ int isword(uae_s32 x)
-{
- return (x>=-32768 && x<=32767);
-}
-
-LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
-{
- emit_byte(0x50+r);
-}
-LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
-
-LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
-{
- emit_byte(0x58+r);
-}
-LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
-
-LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
-{
- emit_byte(0x8f);
- emit_byte(0x05);
- emit_long(d);
-}
-LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
-
-LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
-{
- emit_byte(0x0f);
- emit_byte(0xba);
- emit_byte(0xe0+r);
- emit_byte(i);
-}
-LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
-{
- emit_byte(0x0f);
- emit_byte(0xa3);
- emit_byte(0xc0+8*b+r);
-}
-LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
-
-LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
-{
- emit_byte(0x0f);
- emit_byte(0xba);
- emit_byte(0xf8+r);
- emit_byte(i);
-}
-LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
-{
- emit_byte(0x0f);
- emit_byte(0xbb);
- emit_byte(0xc0+8*b+r);
-}
-LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
-
-
-LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
-{
- emit_byte(0x0f);
- emit_byte(0xba);
- emit_byte(0xf0+r);
- emit_byte(i);
-}
-LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
-{
- emit_byte(0x0f);
- emit_byte(0xb3);
- emit_byte(0xc0+8*b+r);
-}
-LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
-
-LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
-{
- emit_byte(0x0f);
- emit_byte(0xba);
- emit_byte(0xe8+r);
- emit_byte(i);
-}
-LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
-{
- emit_byte(0x0f);
- emit_byte(0xab);
- emit_byte(0xc0+8*b+r);
-}
-LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
-
-LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
-{
- emit_byte(0x66);
- if (isbyte(i)) {
- emit_byte(0x83);
- emit_byte(0xe8+d);
- emit_byte(i);
- }
- else {
- if (optimize_accum && isaccum(d))
- emit_byte(0x2d);
- else {
- emit_byte(0x81);
- emit_byte(0xe8+d);
- }
- emit_word(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
-
-
-LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
-{
- emit_byte(0x8b);
- emit_byte(0x05+8*d);
- emit_long(s);
-}
-LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
-
-LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
-{
- emit_byte(0xc7);
- emit_byte(0x05);
- emit_long(d);
- emit_long(s);
-}
-LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
-
-LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
-{
- emit_byte(0x66);
- emit_byte(0xc7);
- emit_byte(0x05);
- emit_long(d);
- emit_word(s);
-}
-LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
-
-LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
-{
- emit_byte(0xc6);
- emit_byte(0x05);
- emit_long(d);
- emit_byte(s);
-}
-LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
-
-LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
-{
- if (optimize_shift_once && (i == 1)) {
- emit_byte(0xd0);
- emit_byte(0x05);
- emit_long(d);
- }
- else {
- emit_byte(0xc0);
- emit_byte(0x05);
- emit_long(d);
- emit_byte(i);
- }
-}
-LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
-{
- if (optimize_shift_once && (i == 1)) {
- emit_byte(0xd0);
- emit_byte(0xc0+r);
- }
- else {
- emit_byte(0xc0);
- emit_byte(0xc0+r);
- emit_byte(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
-{
- emit_byte(0x66);
- emit_byte(0xc1);
- emit_byte(0xc0+r);
- emit_byte(i);
-}
-LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
-{
- if (optimize_shift_once && (i == 1)) {
- emit_byte(0xd1);
- emit_byte(0xc0+r);
- }
- else {
- emit_byte(0xc1);
- emit_byte(0xc0+r);
- emit_byte(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
-{
- emit_byte(0xd3);
- emit_byte(0xc0+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
-{
- emit_byte(0x66);
- emit_byte(0xd3);
- emit_byte(0xc0+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
-{
- emit_byte(0xd2);
- emit_byte(0xc0+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
-{
- emit_byte(0xd3);
- emit_byte(0xe0+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
-{
- emit_byte(0x66);
- emit_byte(0xd3);
- emit_byte(0xe0+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
-{
- emit_byte(0xd2);
- emit_byte(0xe0+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
-{
- if (optimize_shift_once && (i == 1)) {
- emit_byte(0xd0);
- emit_byte(0xc8+r);
- }
- else {
- emit_byte(0xc0);
- emit_byte(0xc8+r);
- emit_byte(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
-{
- emit_byte(0x66);
- emit_byte(0xc1);
- emit_byte(0xc8+r);
- emit_byte(i);
-}
-LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
-
-// gb-- used for making an fpcr value in compemu_fpp.cpp
-LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
-{
- emit_byte(0x0b);
- emit_byte(0x05+8*d);
- emit_long(s);
-}
-LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
-
-LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
-{
- if (optimize_shift_once && (i == 1)) {
- emit_byte(0xd1);
- emit_byte(0xc8+r);
- }
- else {
- emit_byte(0xc1);
- emit_byte(0xc8+r);
- emit_byte(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
-{
- emit_byte(0xd3);
- emit_byte(0xc8+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
-{
- emit_byte(0x66);
- emit_byte(0xd3);
- emit_byte(0xc8+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
-{
- emit_byte(0xd2);
- emit_byte(0xc8+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
-{
- emit_byte(0xd3);
- emit_byte(0xe8+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
-{
- emit_byte(0x66);
- emit_byte(0xd3);
- emit_byte(0xe8+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
-{
- emit_byte(0xd2);
- emit_byte(0xe8+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
-{
- emit_byte(0xd3);
- emit_byte(0xf8+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
-{
- emit_byte(0x66);
- emit_byte(0xd3);
- emit_byte(0xf8+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
-{
- emit_byte(0xd2);
- emit_byte(0xf8+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
-{
- if (optimize_shift_once && (i == 1)) {
- emit_byte(0xd1);
- emit_byte(0xe0+r);
- }
- else {
- emit_byte(0xc1);
- emit_byte(0xe0+r);
- emit_byte(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
-{
- emit_byte(0x66);
- emit_byte(0xc1);
- emit_byte(0xe0+r);
- emit_byte(i);
-}
-LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
-{
- if (optimize_shift_once && (i == 1)) {
- emit_byte(0xd0);
- emit_byte(0xe0+r);
- }
- else {
- emit_byte(0xc0);
- emit_byte(0xe0+r);
- emit_byte(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
-{
- if (optimize_shift_once && (i == 1)) {
- emit_byte(0xd1);
- emit_byte(0xe8+r);
- }
- else {
- emit_byte(0xc1);
- emit_byte(0xe8+r);
- emit_byte(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
-{
- emit_byte(0x66);
- emit_byte(0xc1);
- emit_byte(0xe8+r);
- emit_byte(i);
-}
-LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
-{
- if (optimize_shift_once && (i == 1)) {
- emit_byte(0xd0);
- emit_byte(0xe8+r);
- }
- else {
- emit_byte(0xc0);
- emit_byte(0xe8+r);
- emit_byte(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
-{
- if (optimize_shift_once && (i == 1)) {
- emit_byte(0xd1);
- emit_byte(0xf8+r);
- }
- else {
- emit_byte(0xc1);
- emit_byte(0xf8+r);
- emit_byte(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
-{
- emit_byte(0x66);
- emit_byte(0xc1);
- emit_byte(0xf8+r);
- emit_byte(i);
-}
-LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
-{
- if (optimize_shift_once && (i == 1)) {
- emit_byte(0xd0);
- emit_byte(0xf8+r);
- }
- else {
- emit_byte(0xc0);
- emit_byte(0xf8+r);
- emit_byte(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
-
-LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
-{
- emit_byte(0x9e);
-}
-LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
-
-LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
-{
- emit_byte(0x0f);
- emit_byte(0xa2);
-}
-LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
-
-LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
-{
- emit_byte(0x9f);
-}
-LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
-
-LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
-{
- emit_byte(0x0f);
- emit_byte(0x90+cc);
- emit_byte(0xc0+d);
-}
-LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
-
-LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
-{
- emit_byte(0x0f);
- emit_byte(0x90+cc);
- emit_byte(0x05);
- emit_long(d);
-}
-LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
-
-LOWFUNC(READ,NONE,3,raw_cmov_b_rr,(RW1 d, R1 s, IMM cc))
-{
- /* replacement using branch and mov */
- int uncc=(cc^1);
- emit_byte(0x70+uncc);
- emit_byte(3); /* skip next 2 bytes if not cc=true */
- emit_byte(0x88);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(READ,NONE,3,raw_cmov_b_rr,(RW1 d, R1 s, IMM cc))
-
-LOWFUNC(READ,NONE,3,raw_cmov_w_rr,(RW2 d, R2 s, IMM cc))
-{
- if (have_cmov) {
- emit_byte(0x66);
- emit_byte(0x0f);
- emit_byte(0x40+cc);
- emit_byte(0xc0+8*d+s);
- }
- else { /* replacement using branch and mov */
- int uncc=(cc^1);
- emit_byte(0x70+uncc);
- emit_byte(3); /* skip next 3 bytes if not cc=true */
- emit_byte(0x66);
- emit_byte(0x89);
- emit_byte(0xc0+8*s+d);
- }
-}
-LENDFUNC(READ,NONE,3,raw_cmov_w_rr,(RW2 d, R2 s, IMM cc))
-
-LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
-{
- if (have_cmov) {
- emit_byte(0x0f);
- emit_byte(0x40+cc);
- emit_byte(0xc0+8*d+s);
- }
- else { /* replacement using branch and mov */
- int uncc=(cc^1);
- emit_byte(0x70+uncc);
- emit_byte(2); /* skip next 2 bytes if not cc=true */
- emit_byte(0x89);
- emit_byte(0xc0+8*s+d);
- }
-}
-LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
-
-LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
-{
- emit_byte(0x0f);
- emit_byte(0xbc);
- emit_byte(0xc0+8*d+s);
-}
-LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
-
-LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
-{
- emit_byte(0x0f);
- emit_byte(0xbf);
- emit_byte(0xc0+8*d+s);
-}
-LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
-
-LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
-{
- emit_byte(0x0f);
- emit_byte(0xbe);
- emit_byte(0xc0+8*d+s);
-}
-LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
-
-LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
-{
- emit_byte(0x0f);
- emit_byte(0xb7);
- emit_byte(0xc0+8*d+s);
-}
-LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
-
-LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
-{
- emit_byte(0x0f);
- emit_byte(0xb6);
- emit_byte(0xc0+8*d+s);
-}
-LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
-
-LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
-{
- emit_byte(0x0f);
- emit_byte(0xaf);
- emit_byte(0xc0+8*d+s);
-}
-LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
-
-LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
-{
- if (d!=MUL_NREG1 || s!=MUL_NREG2)
- abort();
- emit_byte(0xf7);
- emit_byte(0xea);
-}
-LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
-
-LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
-{
- if (d!=MUL_NREG1 || s!=MUL_NREG2) {
- printf("Bad register in MUL: d=%d, s=%d\n",d,s);
- abort();
- }
- emit_byte(0xf7);
- emit_byte(0xe2);
-}
-LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
-
-LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
-{
- abort(); /* %^$&%^$%#^ x86! */
- emit_byte(0x0f);
- emit_byte(0xaf);
- emit_byte(0xc0+8*d+s);
-}
-LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
-
-LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
-{
- emit_byte(0x88);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
-
-LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
-{
- emit_byte(0x66);
- emit_byte(0x89);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
-
-LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
-{
- int isebp=(baser==5)?0x40:0;
- int fi;
-
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default: abort();
- }
-
-
- emit_byte(0x8b);
- emit_byte(0x04+8*d+isebp);
- emit_byte(baser+8*index+0x40*fi);
- if (isebp)
- emit_byte(0x00);
-}
-LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
-
-LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
-{
- int fi;
- int isebp;
-
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default: abort();
- }
- isebp=(baser==5)?0x40:0;
-
- emit_byte(0x66);
- emit_byte(0x8b);
- emit_byte(0x04+8*d+isebp);
- emit_byte(baser+8*index+0x40*fi);
- if (isebp)
- emit_byte(0x00);
-}
-LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
-
-LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
-{
- int fi;
- int isebp;
-
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default: abort();
- }
- isebp=(baser==5)?0x40:0;
-
- emit_byte(0x8a);
- emit_byte(0x04+8*d+isebp);
- emit_byte(baser+8*index+0x40*fi);
- if (isebp)
- emit_byte(0x00);
-}
-LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
-
-LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
-{
- int fi;
- int isebp;
-
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default: abort();
- }
-
-
- isebp=(baser==5)?0x40:0;
-
- emit_byte(0x89);
- emit_byte(0x04+8*s+isebp);
- emit_byte(baser+8*index+0x40*fi);
- if (isebp)
- emit_byte(0x00);
-}
-LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
-
-LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
-{
- int fi;
- int isebp;
-
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default: abort();
- }
- isebp=(baser==5)?0x40:0;
-
- emit_byte(0x66);
- emit_byte(0x89);
- emit_byte(0x04+8*s+isebp);
- emit_byte(baser+8*index+0x40*fi);
- if (isebp)
- emit_byte(0x00);
-}
-LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
-
-LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
-{
- int fi;
- int isebp;
-
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default: abort();
- }
- isebp=(baser==5)?0x40:0;
-
- emit_byte(0x88);
- emit_byte(0x04+8*s+isebp);
- emit_byte(baser+8*index+0x40*fi);
- if (isebp)
- emit_byte(0x00);
-}
-LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
-
-LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
-{
- int fi;
-
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default: abort();
- }
-
- emit_byte(0x89);
- emit_byte(0x84+8*s);
- emit_byte(baser+8*index+0x40*fi);
- emit_long(base);
-}
-LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
-
-LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
-{
- int fi;
-
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default: abort();
- }
-
- emit_byte(0x66);
- emit_byte(0x89);
- emit_byte(0x84+8*s);
- emit_byte(baser+8*index+0x40*fi);
- emit_long(base);
-}
-LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
-
-LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
-{
- int fi;
-
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default: abort();
- }
-
- emit_byte(0x88);
- emit_byte(0x84+8*s);
- emit_byte(baser+8*index+0x40*fi);
- emit_long(base);
-}
-LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
-
-LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
-{
- int fi;
-
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default: abort();
- }
-
- emit_byte(0x8b);
- emit_byte(0x84+8*d);
- emit_byte(baser+8*index+0x40*fi);
- emit_long(base);
-}
-LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
-
-LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
-{
- int fi;
-
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default: abort();
- }
-
- emit_byte(0x66);
- emit_byte(0x8b);
- emit_byte(0x84+8*d);
- emit_byte(baser+8*index+0x40*fi);
- emit_long(base);
-}
-LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
-
-LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
-{
- int fi;
-
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default: abort();
- }
-
- emit_byte(0x8a);
- emit_byte(0x84+8*d);
- emit_byte(baser+8*index+0x40*fi);
- emit_long(base);
-}
-LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
-
-LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
-{
- int fi;
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default:
- fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
- abort();
- }
- emit_byte(0x8b);
- emit_byte(0x04+8*d);
- emit_byte(0x05+8*index+64*fi);
- emit_long(base);
-}
-LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
-
-LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
-{
- int fi;
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default:
- fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
- abort();
- }
- if (have_cmov) {
- emit_byte(0x0f);
- emit_byte(0x40+cond);
- emit_byte(0x04+8*d);
- emit_byte(0x05+8*index+64*fi);
- emit_long(base);
- }
- else { /* replacement using branch and mov */
- int uncc=(cond^1);
- emit_byte(0x70+uncc);
- emit_byte(7); /* skip next 7 bytes if not cc=true */
- emit_byte(0x8b);
- emit_byte(0x04+8*d);
- emit_byte(0x05+8*index+64*fi);
- emit_long(base);
- }
-}
-LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
-
-LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
-{
- if (have_cmov) {
- emit_byte(0x0f);
- emit_byte(0x40+cond);
- emit_byte(0x05+8*d);
- emit_long(mem);
- }
- else { /* replacement using branch and mov */
- int uncc=(cond^1);
- emit_byte(0x70+uncc);
- emit_byte(6); /* skip next 6 bytes if not cc=true */
- emit_byte(0x8b);
- emit_byte(0x05+8*d);
- emit_long(mem);
- }
-}
-LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
-
-LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
-{
- Dif(!isbyte(offset)) abort();
- emit_byte(0x8b);
- emit_byte(0x40+8*d+s);
- emit_byte(offset);
-}
-LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
-{
- Dif(!isbyte(offset)) abort();
- emit_byte(0x66);
- emit_byte(0x8b);
- emit_byte(0x40+8*d+s);
- emit_byte(offset);
-}
-LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
-{
- Dif(!isbyte(offset)) abort();
- emit_byte(0x8a);
- emit_byte(0x40+8*d+s);
- emit_byte(offset);
-}
-LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
-{
- emit_byte(0x8b);
- emit_byte(0x80+8*d+s);
- emit_long(offset);
-}
-LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
-{
- emit_byte(0x66);
- emit_byte(0x8b);
- emit_byte(0x80+8*d+s);
- emit_long(offset);
-}
-LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
-{
- emit_byte(0x8a);
- emit_byte(0x80+8*d+s);
- emit_long(offset);
-}
-LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
-{
- Dif(!isbyte(offset)) abort();
- emit_byte(0xc7);
- emit_byte(0x40+d);
- emit_byte(offset);
- emit_long(i);
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
-{
- Dif(!isbyte(offset)) abort();
- emit_byte(0x66);
- emit_byte(0xc7);
- emit_byte(0x40+d);
- emit_byte(offset);
- emit_word(i);
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
-{
- Dif(!isbyte(offset)) abort();
- emit_byte(0xc6);
- emit_byte(0x40+d);
- emit_byte(offset);
- emit_byte(i);
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
-{
- Dif(!isbyte(offset)) abort();
- emit_byte(0x89);
- emit_byte(0x40+8*s+d);
- emit_byte(offset);
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
-{
- Dif(!isbyte(offset)) abort();
- emit_byte(0x66);
- emit_byte(0x89);
- emit_byte(0x40+8*s+d);
- emit_byte(offset);
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
-{
- Dif(!isbyte(offset)) abort();
- emit_byte(0x88);
- emit_byte(0x40+8*s+d);
- emit_byte(offset);
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
-
-LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
-{
- if (optimize_imm8 && isbyte(offset)) {
- emit_byte(0x8d);
- emit_byte(0x40+8*d+s);
- emit_byte(offset);
- }
- else {
- emit_byte(0x8d);
- emit_byte(0x80+8*d+s);
- emit_long(offset);
- }
-}
-LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
-{
- int fi;
-
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default: abort();
- }
-
- if (optimize_imm8 && isbyte(offset)) {
- emit_byte(0x8d);
- emit_byte(0x44+8*d);
- emit_byte(0x40*fi+8*index+s);
- emit_byte(offset);
- }
- else {
- emit_byte(0x8d);
- emit_byte(0x84+8*d);
- emit_byte(0x40*fi+8*index+s);
- emit_long(offset);
- }
-}
-LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
-
-LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
-{
- int isebp=(s==5)?0x40:0;
- int fi;
-
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default: abort();
- }
-
- emit_byte(0x8d);
- emit_byte(0x04+8*d+isebp);
- emit_byte(0x40*fi+8*index+s);
- if (isebp)
- emit_byte(0);
-}
-LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
-{
- if (optimize_imm8 && isbyte(offset)) {
- emit_byte(0x89);
- emit_byte(0x40+8*s+d);
- emit_byte(offset);
- }
- else {
- emit_byte(0x89);
- emit_byte(0x80+8*s+d);
- emit_long(offset);
- }
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
-{
- emit_byte(0x66);
- emit_byte(0x89);
- emit_byte(0x80+8*s+d);
- emit_long(offset);
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
-{
- if (optimize_imm8 && isbyte(offset)) {
- emit_byte(0x88);
- emit_byte(0x40+8*s+d);
- emit_byte(offset);
- }
- else {
- emit_byte(0x88);
- emit_byte(0x80+8*s+d);
- emit_long(offset);
- }
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
-
-LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
-{
- emit_byte(0x0f);
- emit_byte(0xc8+r);
-}
-LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
-
-LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
-{
- emit_byte(0x66);
- emit_byte(0xc1);
- emit_byte(0xc0+r);
- emit_byte(0x08);
-}
-LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
-
-LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
-{
- emit_byte(0x89);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
-
-LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
-{
- emit_byte(0x89);
- emit_byte(0x05+8*s);
- emit_long(d);
-}
-LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
-
-LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
-{
- emit_byte(0x66);
- emit_byte(0x89);
- emit_byte(0x05+8*s);
- emit_long(d);
-}
-LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
-
-LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
-{
- emit_byte(0x66);
- emit_byte(0x8b);
- emit_byte(0x05+8*d);
- emit_long(s);
-}
-LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
-
-LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
-{
- emit_byte(0x88);
- emit_byte(0x05+8*(s&0xf)); /* XXX this handles %ah case (defined as 0x10+4) and others */
- emit_long(d);
-}
-LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
-
-LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
-{
- emit_byte(0x8a);
- emit_byte(0x05+8*d);
- emit_long(s);
-}
-LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
-
-LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
-{
- emit_byte(0xb8+d);
- emit_long(s);
-}
-LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
-
-LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
-{
- emit_byte(0x66);
- emit_byte(0xb8+d);
- emit_word(s);
-}
-LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
-
-LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
-{
- emit_byte(0xb0+d);
- emit_byte(s);
-}
-LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
-
-LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
-{
- emit_byte(0x81);
- emit_byte(0x15);
- emit_long(d);
- emit_long(s);
-}
-LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
-
-LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
-{
- if (optimize_imm8 && isbyte(s)) {
- emit_byte(0x83);
- emit_byte(0x05);
- emit_long(d);
- emit_byte(s);
- }
- else {
- emit_byte(0x81);
- emit_byte(0x05);
- emit_long(d);
- emit_long(s);
- }
-}
-LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
-
-LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
-{
- emit_byte(0x66);
- emit_byte(0x81);
- emit_byte(0x05);
- emit_long(d);
- emit_word(s);
-}
-LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
-
-LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
-{
- emit_byte(0x80);
- emit_byte(0x05);
- emit_long(d);
- emit_byte(s);
-}
-LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
-
-LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
-{
- if (optimize_accum && isaccum(d))
- emit_byte(0xa9);
- else {
- emit_byte(0xf7);
- emit_byte(0xc0+d);
- }
- emit_long(i);
-}
-LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
-{
- emit_byte(0x85);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
-
-LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
-{
- emit_byte(0x66);
- emit_byte(0x85);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
-
-LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
-{
- emit_byte(0x84);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
-
-LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
-{
- emit_byte(0x81);
- emit_byte(0xf0+d);
- emit_long(i);
-}
-LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
-{
- if (optimize_imm8 && isbyte(i)) {
- emit_byte(0x83);
- emit_byte(0xe0+d);
- emit_byte(i);
- }
- else {
- if (optimize_accum && isaccum(d))
- emit_byte(0x25);
- else {
- emit_byte(0x81);
- emit_byte(0xe0+d);
- }
- emit_long(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
-{
- emit_byte(0x66);
- if (optimize_imm8 && isbyte(i)) {
- emit_byte(0x83);
- emit_byte(0xe0+d);
- emit_byte(i);
- }
- else {
- if (optimize_accum && isaccum(d))
- emit_byte(0x25);
- else {
- emit_byte(0x81);
- emit_byte(0xe0+d);
- }
- emit_word(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
-{
- emit_byte(0x21);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
-
-LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
-{
- emit_byte(0x66);
- emit_byte(0x21);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
-
-LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
-{
- emit_byte(0x20);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
-
-LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
-{
- if (optimize_imm8 && isbyte(i)) {
- emit_byte(0x83);
- emit_byte(0xc8+d);
- emit_byte(i);
- }
- else {
- if (optimize_accum && isaccum(d))
- emit_byte(0x0d);
- else {
- emit_byte(0x81);
- emit_byte(0xc8+d);
- }
- emit_long(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
-{
- emit_byte(0x09);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
-
-LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
-{
- emit_byte(0x66);
- emit_byte(0x09);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
-
-LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
-{
- emit_byte(0x08);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
-
-LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
-{
- emit_byte(0x11);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
-
-LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
-{
- emit_byte(0x66);
- emit_byte(0x11);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
-
-LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
-{
- emit_byte(0x10);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
-
-LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
-{
- emit_byte(0x01);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
-
-LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
-{
- emit_byte(0x66);
- emit_byte(0x01);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
-
-LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
-{
- emit_byte(0x00);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
-
-LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
-{
- if (isbyte(i)) {
- emit_byte(0x83);
- emit_byte(0xe8+d);
- emit_byte(i);
- }
- else {
- if (optimize_accum && isaccum(d))
- emit_byte(0x2d);
- else {
- emit_byte(0x81);
- emit_byte(0xe8+d);
- }
- emit_long(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
-{
- if (optimize_accum && isaccum(d))
- emit_byte(0x2c);
- else {
- emit_byte(0x80);
- emit_byte(0xe8+d);
- }
- emit_byte(i);
-}
-LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
-{
- if (isbyte(i)) {
- emit_byte(0x83);
- emit_byte(0xc0+d);
- emit_byte(i);
- }
- else {
- if (optimize_accum && isaccum(d))
- emit_byte(0x05);
- else {
- emit_byte(0x81);
- emit_byte(0xc0+d);
- }
- emit_long(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
-{
- emit_byte(0x66);
- if (isbyte(i)) {
- emit_byte(0x83);
- emit_byte(0xc0+d);
- emit_byte(i);
- }
- else {
- if (optimize_accum && isaccum(d))
- emit_byte(0x05);
- else {
- emit_byte(0x81);
- emit_byte(0xc0+d);
- }
- emit_word(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
-{
- if (optimize_accum && isaccum(d))
- emit_byte(0x04);
- else {
- emit_byte(0x80);
- emit_byte(0xc0+d);
- }
- emit_byte(i);
-}
-LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
-
-LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
-{
- emit_byte(0x19);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
-
-LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
-{
- emit_byte(0x66);
- emit_byte(0x19);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
-
-LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
-{
- emit_byte(0x18);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
-
-LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
-{
- emit_byte(0x29);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
-
-LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
-{
- emit_byte(0x66);
- emit_byte(0x29);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
-
-LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
-{
- emit_byte(0x28);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
-
-LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
-{
- emit_byte(0x39);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
-
-LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
-{
- if (optimize_imm8 && isbyte(i)) {
- emit_byte(0x83);
- emit_byte(0xf8+r);
- emit_byte(i);
- }
- else {
- if (optimize_accum && isaccum(r))
- emit_byte(0x3d);
- else {
- emit_byte(0x81);
- emit_byte(0xf8+r);
- }
- emit_long(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
-{
- emit_byte(0x66);
- emit_byte(0x39);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
-
-LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
-{
- emit_byte(0x80);
- emit_byte(0x3d);
- emit_long(d);
- emit_byte(s);
-}
-LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
-
-LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
-{
- if (optimize_accum && isaccum(d))
- emit_byte(0x3c);
- else {
- emit_byte(0x80);
- emit_byte(0xf8+d);
- }
- emit_byte(i);
-}
-LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
-{
- emit_byte(0x38);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
-
-LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
-{
- int fi;
-
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default: abort();
- }
- emit_byte(0x39);
- emit_byte(0x04+8*d);
- emit_byte(5+8*index+0x40*fi);
- emit_long(offset);
-}
-LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
-
-LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
-{
- emit_byte(0x31);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
-
-LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
-{
- emit_byte(0x66);
- emit_byte(0x31);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
-
-LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
-{
- emit_byte(0x30);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
-
-LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
-{
- if (optimize_imm8 && isbyte(s)) {
- emit_byte(0x83);
- emit_byte(0x2d);
- emit_long(d);
- emit_byte(s);
- }
- else {
- emit_byte(0x81);
- emit_byte(0x2d);
- emit_long(d);
- emit_long(s);
- }
-}
-LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
-
-LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
-{
- if (optimize_imm8 && isbyte(s)) {
- emit_byte(0x83);
- emit_byte(0x3d);
- emit_long(d);
- emit_byte(s);
- }
- else {
- emit_byte(0x81);
- emit_byte(0x3d);
- emit_long(d);
- emit_long(s);
- }
-}
-LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
-
-LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
-{
- emit_byte(0x87);
- emit_byte(0xc0+8*r1+r2);
-}
-LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
-
-LOWFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2))
-{
- emit_byte(0x86);
- emit_byte(0xc0+8*(r1&0xf)+(r2&0xf)); /* XXX this handles upper-halves registers (e.g. %ah defined as 0x10+4) */
-}
-LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
-
-/*************************************************************************
- * FIXME: mem access modes probably wrong *
- *************************************************************************/
-
-LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
-{
- emit_byte(0x9c);
-}
-LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
-
-LOWFUNC(WRITE,READ,0,raw_popfl,(void))
-{
- emit_byte(0x9d);
-}
-LENDFUNC(WRITE,READ,0,raw_popfl,(void))
-
-/* Generate floating-point instructions */
-static inline void x86_fadd_m(MEMR s)
-{
- emit_byte(0xdc);
- emit_byte(0x05);
- emit_long(s);
-}
-
-#endif
-
-/*************************************************************************
- * Unoptimizable stuff --- jump *
- *************************************************************************/
-
-static __inline__ void raw_call_r(R4 r)
-{
-#if USE_NEW_RTASM
- CALLsr(r);
-#else
- emit_byte(0xff);
- emit_byte(0xd0+r);
-#endif
-}
-
-static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
-{
-#if USE_NEW_RTASM
- CALLsm(base, X86_NOREG, r, m);
-#else
- int mu;
- switch(m) {
- case 1: mu=0; break;
- case 2: mu=1; break;
- case 4: mu=2; break;
- case 8: mu=3; break;
- default: abort();
- }
- emit_byte(0xff);
- emit_byte(0x14);
- emit_byte(0x05+8*r+0x40*mu);
- emit_long(base);
-#endif
-}
-
-static __inline__ void raw_jmp_r(R4 r)
-{
-#if USE_NEW_RTASM
- JMPsr(r);
-#else
- emit_byte(0xff);
- emit_byte(0xe0+r);
-#endif
-}
-
-static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
-{
-#if USE_NEW_RTASM
- JMPsm(base, X86_NOREG, r, m);
-#else
- int mu;
- switch(m) {
- case 1: mu=0; break;
- case 2: mu=1; break;
- case 4: mu=2; break;
- case 8: mu=3; break;
- default: abort();
- }
- emit_byte(0xff);
- emit_byte(0x24);
- emit_byte(0x05+8*r+0x40*mu);
- emit_long(base);
-#endif
-}
-
-static __inline__ void raw_jmp_m(uae_u32 base)
-{
- emit_byte(0xff);
- emit_byte(0x25);
- emit_long(base);
-}
-
-
-static __inline__ void raw_call(uae_u32 t)
-{
-#if USE_NEW_RTASM
- CALLm(t);
-#else
- emit_byte(0xe8);
- emit_long(t-(uae_u32)target-4);
-#endif
-}
-
-static __inline__ void raw_jmp(uae_u32 t)
-{
-#if USE_NEW_RTASM
- JMPm(t);
-#else
- emit_byte(0xe9);
- emit_long(t-(uae_u32)target-4);
-#endif
-}
-
-static __inline__ void raw_jl(uae_u32 t)
-{
- emit_byte(0x0f);
- emit_byte(0x8c);
- emit_long(t-(uintptr)target-4);
-}
-
-static __inline__ void raw_jz(uae_u32 t)
-{
- emit_byte(0x0f);
- emit_byte(0x84);
- emit_long(t-(uintptr)target-4);
-}
-
-static __inline__ void raw_jnz(uae_u32 t)
-{
- emit_byte(0x0f);
- emit_byte(0x85);
- emit_long(t-(uintptr)target-4);
-}
-
-static __inline__ void raw_jnz_l_oponly(void)
-{
- emit_byte(0x0f);
- emit_byte(0x85);
-}
-
-static __inline__ void raw_jcc_l_oponly(int cc)
-{
- emit_byte(0x0f);
- emit_byte(0x80+cc);
-}
-
-static __inline__ void raw_jnz_b_oponly(void)
-{
- emit_byte(0x75);
-}
-
-static __inline__ void raw_jz_b_oponly(void)
-{
- emit_byte(0x74);
-}
-
-static __inline__ void raw_jcc_b_oponly(int cc)
-{
- emit_byte(0x70+cc);
-}
-
-static __inline__ void raw_jmp_l_oponly(void)
-{
- emit_byte(0xe9);
-}
-
-static __inline__ void raw_jmp_b_oponly(void)
-{
- emit_byte(0xeb);
-}
-
-static __inline__ void raw_ret(void)
-{
- emit_byte(0xc3);
-}
-
-static __inline__ void raw_nop(void)
-{
- emit_byte(0x90);
-}
-
-static __inline__ void raw_emit_nop_filler(int nbytes)
-{
- /* Source: GNU Binutils 2.12.90.0.15 */
- /* Various efficient no-op patterns for aligning code labels.
- Note: Don't try to assemble the instructions in the comments.
- 0L and 0w are not legal. */
- static const uae_u8 f32_1[] =
- {0x90}; /* nop */
- static const uae_u8 f32_2[] =
- {0x89,0xf6}; /* movl %esi,%esi */
- static const uae_u8 f32_3[] =
- {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
- static const uae_u8 f32_4[] =
- {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
- static const uae_u8 f32_5[] =
- {0x90, /* nop */
- 0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
- static const uae_u8 f32_6[] =
- {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
- static const uae_u8 f32_7[] =
- {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
- static const uae_u8 f32_8[] =
- {0x90, /* nop */
- 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
- static const uae_u8 f32_9[] =
- {0x89,0xf6, /* movl %esi,%esi */
- 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
- static const uae_u8 f32_10[] =
- {0x8d,0x76,0x00, /* leal 0(%esi),%esi */
- 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
- static const uae_u8 f32_11[] =
- {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */
- 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
- static const uae_u8 f32_12[] =
- {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
- 0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */
- static const uae_u8 f32_13[] =
- {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
- 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
- static const uae_u8 f32_14[] =
- {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */
- 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
- static const uae_u8 f32_15[] =
- {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
- 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
- static const uae_u8 f32_16[] =
- {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
- 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
- static const uae_u8 *const f32_patt[] = {
- f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
- f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
- };
- static const uae_u8 prefixes[4] = { 0x66, 0x66, 0x66, 0x66 };
-
-#if defined(__x86_64__)
- /* The recommended way to pad 64bit code is to use NOPs preceded by
- maximally four 0x66 prefixes. Balance the size of nops. */
- if (nbytes == 0)
- return;
-
- int i;
- int nnops = (nbytes + 3) / 4;
- int len = nbytes / nnops;
- int remains = nbytes - nnops * len;
-
- for (i = 0; i < remains; i++) {
- emit_block(prefixes, len);
- raw_nop();
- }
- for (; i < nnops; i++) {
- emit_block(prefixes, len - 1);
- raw_nop();
- }
-#else
- int nloops = nbytes / 16;
- while (nloops-- > 0)
- emit_block(f32_16, sizeof(f32_16));
-
- nbytes %= 16;
- if (nbytes)
- emit_block(f32_patt[nbytes - 1], nbytes);
-#endif
-}
-
-
-/*************************************************************************
- * Flag handling, to and fro UAE flag register *
- *************************************************************************/
-
-static __inline__ void raw_flags_evicted(int r)
-{
- //live.state[FLAGTMP].status=CLEAN;
- live.state[FLAGTMP].status=INMEM;
- live.state[FLAGTMP].realreg=-1;
- /* We just "evicted" FLAGTMP. */
- if (live.nat[r].nholds!=1) {
- /* Huh? */
- abort();
- }
- live.nat[r].nholds=0;
-}
-
-#define FLAG_NREG1_FLAGREG 0 /* Set to -1 if any register will do */
-static __inline__ void raw_flags_to_reg_FLAGREG(int r)
-{
- raw_lahf(0); /* Most flags in AH */
- //raw_setcc(r,0); /* V flag in AL */
- raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0);
-
-#if 1 /* Let's avoid those nasty partial register stalls */
- //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r);
- raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,AH_INDEX);
- raw_flags_evicted(r);
-#endif
-}
-
-#define FLAG_NREG2_FLAGREG 0 /* Set to -1 if any register will do */
-static __inline__ void raw_reg_to_flags_FLAGREG(int r)
-{
- raw_cmp_b_ri(r,-127); /* set V */
- raw_sahf(0);
-}
-
-#define FLAG_NREG3_FLAGREG 0 /* Set to -1 if any register will do */
-static __inline__ void raw_flags_set_zero_FLAGREG(int s, int tmp)
-{
- raw_mov_l_rr(tmp,s);
- raw_lahf(s); /* flags into ah */
- raw_and_l_ri(s,0xffffbfff);
- raw_and_l_ri(tmp,0x00004000);
- raw_xor_l_ri(tmp,0x00004000);
- raw_or_l(s,tmp);
- raw_sahf(s);
-}
-
-static __inline__ void raw_flags_init_FLAGREG(void) { }
-
-#define FLAG_NREG1_FLAGSTK -1 /* Set to -1 if any register will do */
-static __inline__ void raw_flags_to_reg_FLAGSTK(int r)
-{
- raw_pushfl();
- raw_pop_l_r(r);
- raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r);
- raw_flags_evicted(r);
-}
-
-#define FLAG_NREG2_FLAGSTK -1 /* Set to -1 if any register will do */
-static __inline__ void raw_reg_to_flags_FLAGSTK(int r)
-{
- raw_push_l_r(r);
- raw_popfl();
-}
-
-#define FLAG_NREG3_FLAGSTK -1 /* Set to -1 if any register will do */
-static __inline__ void raw_flags_set_zero_FLAGSTK(int s, int tmp)
-{
- raw_mov_l_rr(tmp,s);
- raw_pushfl();
- raw_pop_l_r(s);
- raw_and_l_ri(s,0xffffffbf);
- raw_and_l_ri(tmp,0x00000040);
- raw_xor_l_ri(tmp,0x00000040);
- raw_or_l(s,tmp);
- raw_push_l_r(s);
- raw_popfl();
-}
-
-static __inline__ void raw_flags_init_FLAGSTK(void) { }
-
-#if defined(__x86_64__)
-/* Try to use the LAHF/SETO method on x86_64 since it is faster.
- This can't be the default because some older CPUs don't support
- LAHF/SAHF in long mode. */
-static int FLAG_NREG1_FLAGGEN = 0;
-static __inline__ void raw_flags_to_reg_FLAGGEN(int r)
-{
- if (have_lahf_lm) {
- // NOTE: the interpreter uses the normal EFLAGS layout
- // pushf/popf CF(0) ZF( 6) SF( 7) OF(11)
- // sahf/lahf CF(8) ZF(14) SF(15) OF( 0)
- assert(r == 0);
- raw_setcc(r,0); /* V flag in AL */
- raw_lea_l_r_scaled(0,0,8); /* move it to its EFLAGS location */
- raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,0);
- raw_lahf(0); /* most flags in AH */
- raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,AH_INDEX);
- raw_flags_evicted(r);
- }
- else
- raw_flags_to_reg_FLAGSTK(r);
-}
-
-static int FLAG_NREG2_FLAGGEN = 0;
-static __inline__ void raw_reg_to_flags_FLAGGEN(int r)
-{
- if (have_lahf_lm) {
- raw_xchg_b_rr(0,AH_INDEX);
- raw_cmp_b_ri(r,-120); /* set V */
- raw_sahf(0);
- }
- else
- raw_reg_to_flags_FLAGSTK(r);
-}
-
-static int FLAG_NREG3_FLAGGEN = 0;
-static __inline__ void raw_flags_set_zero_FLAGGEN(int s, int tmp)
-{
- if (have_lahf_lm)
- raw_flags_set_zero_FLAGREG(s, tmp);
- else
- raw_flags_set_zero_FLAGSTK(s, tmp);
-}
-
-static __inline__ void raw_flags_init_FLAGGEN(void)
-{
- if (have_lahf_lm) {
- FLAG_NREG1_FLAGGEN = FLAG_NREG1_FLAGREG;
- FLAG_NREG2_FLAGGEN = FLAG_NREG2_FLAGREG;
- FLAG_NREG1_FLAGGEN = FLAG_NREG3_FLAGREG;
- }
- else {
- FLAG_NREG1_FLAGGEN = FLAG_NREG1_FLAGSTK;
- FLAG_NREG2_FLAGGEN = FLAG_NREG2_FLAGSTK;
- FLAG_NREG1_FLAGGEN = FLAG_NREG3_FLAGSTK;
- }
-}
-#endif
-
-#ifdef SAHF_SETO_PROFITABLE
-#define FLAG_SUFFIX FLAGREG
-#elif defined __x86_64__
-#define FLAG_SUFFIX FLAGGEN
-#else
-#define FLAG_SUFFIX FLAGSTK
-#endif
-
-#define FLAG_GLUE_2(x, y) x ## _ ## y
-#define FLAG_GLUE_1(x, y) FLAG_GLUE_2(x, y)
-#define FLAG_GLUE(x) FLAG_GLUE_1(x, FLAG_SUFFIX)
-
-#define raw_flags_init FLAG_GLUE(raw_flags_init)
-#define FLAG_NREG1 FLAG_GLUE(FLAG_NREG1)
-#define raw_flags_to_reg FLAG_GLUE(raw_flags_to_reg)
-#define FLAG_NREG2 FLAG_GLUE(FLAG_NREG2)
-#define raw_reg_to_flags FLAG_GLUE(raw_reg_to_flags)
-#define FLAG_NREG3 FLAG_GLUE(FLAG_NREG3)
-#define raw_flags_set_zero FLAG_GLUE(raw_flags_set_zero)
-
-/* Apparently, there are enough instructions between flag store and
- flag reload to avoid the partial memory stall */
-static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
-{
-#if 1
- raw_mov_l_rm(target,(uintptr)live.state[r].mem);
-#else
- raw_mov_b_rm(target,(uintptr)live.state[r].mem);
- raw_mov_b_rm(target+4,((uintptr)live.state[r].mem)+1);
-#endif
-}
-
-/* FLAGX is byte sized, and we *do* write it at that size */
-static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
-{
- if (live.nat[target].canbyte)
- raw_mov_b_rm(target,(uintptr)live.state[r].mem);
- else if (live.nat[target].canword)
- raw_mov_w_rm(target,(uintptr)live.state[r].mem);
- else
- raw_mov_l_rm(target,(uintptr)live.state[r].mem);
-}
-
-static __inline__ void raw_dec_sp(int off)
-{
- if (off) raw_sub_l_ri(ESP_INDEX,off);
-}
-
-static __inline__ void raw_inc_sp(int off)
-{
- if (off) raw_add_l_ri(ESP_INDEX,off);
-}
-
-/*************************************************************************
- * Handling mistaken direct memory access *
- *************************************************************************/
-
-// gb-- I don't need that part for JIT Basilisk II
-#if defined(NATMEM_OFFSET) && 0
-#include <asm/sigcontext.h>
-#include <signal.h>
-
-#define SIG_READ 1
-#define SIG_WRITE 2
-
-static int in_handler=0;
-static uae_u8 veccode[256];
-
-static void vec(int x, struct sigcontext sc)
-{
- uae_u8* i=(uae_u8*)sc.eip;
- uae_u32 addr=sc.cr2;
- int r=-1;
- int size=4;
- int dir=-1;
- int len=0;
- int j;
-
- write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
- if (!canbang)
- write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
- if (in_handler)
- write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
-
- if (canbang && i>=compiled_code && i<=current_compile_p) {
- if (*i==0x66) {
- i++;
- size=2;
- len++;
- }
-
- switch(i[0]) {
- case 0x8a:
- if ((i[1]&0xc0)==0x80) {
- r=(i[1]>>3)&7;
- dir=SIG_READ;
- size=1;
- len+=6;
- break;
- }
- break;
- case 0x88:
- if ((i[1]&0xc0)==0x80) {
- r=(i[1]>>3)&7;
- dir=SIG_WRITE;
- size=1;
- len+=6;
- break;
- }
- break;
- case 0x8b:
- if ((i[1]&0xc0)==0x80) {
- r=(i[1]>>3)&7;
- dir=SIG_READ;
- len+=6;
- break;
- }
- if ((i[1]&0xc0)==0x40) {
- r=(i[1]>>3)&7;
- dir=SIG_READ;
- len+=3;
- break;
- }
- break;
- case 0x89:
- if ((i[1]&0xc0)==0x80) {
- r=(i[1]>>3)&7;
- dir=SIG_WRITE;
- len+=6;
- break;
- }
- if ((i[1]&0xc0)==0x40) {
- r=(i[1]>>3)&7;
- dir=SIG_WRITE;
- len+=3;
- break;
- }
- break;
- }
- }
-
- if (r!=-1) {
- void* pr=NULL;
- write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
-
- switch(r) {
- case 0: pr=&(sc.eax); break;
- case 1: pr=&(sc.ecx); break;
- case 2: pr=&(sc.edx); break;
- case 3: pr=&(sc.ebx); break;
- case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
- case 5: pr=(size>1)?
- (void*)(&(sc.ebp)):
- (void*)(((uae_u8*)&(sc.ecx))+1); break;
- case 6: pr=(size>1)?
- (void*)(&(sc.esi)):
- (void*)(((uae_u8*)&(sc.edx))+1); break;
- case 7: pr=(size>1)?
- (void*)(&(sc.edi)):
- (void*)(((uae_u8*)&(sc.ebx))+1); break;
- default: abort();
- }
- if (pr) {
- blockinfo* bi;
-
- if (currprefs.comp_oldsegv) {
- addr-=NATMEM_OFFSET;
-
- if ((addr>=0x10000000 && addr<0x40000000) ||
- (addr>=0x50000000)) {
- write_log("Suspicious address in %x SEGV handler.\n",addr);
- }
- if (dir==SIG_READ) {
- switch(size) {
- case 1: *((uae_u8*)pr)=get_byte(addr); break;
- case 2: *((uae_u16*)pr)=get_word(addr); break;
- case 4: *((uae_u32*)pr)=get_long(addr); break;
- default: abort();
- }
- }
- else { /* write */
- switch(size) {
- case 1: put_byte(addr,*((uae_u8*)pr)); break;
- case 2: put_word(addr,*((uae_u16*)pr)); break;
- case 4: put_long(addr,*((uae_u32*)pr)); break;
- default: abort();
- }
- }
- write_log("Handled one access!\n");
- fflush(stdout);
- segvcount++;
- sc.eip+=len;
- }
- else {
- void* tmp=target;
- int i;
- uae_u8 vecbuf[5];
-
- addr-=NATMEM_OFFSET;
-
- if ((addr>=0x10000000 && addr<0x40000000) ||
- (addr>=0x50000000)) {
- write_log("Suspicious address in %x SEGV handler.\n",addr);
- }
-
- target=(uae_u8*)sc.eip;
- for (i=0;i<5;i++)
- vecbuf[i]=target[i];
- emit_byte(0xe9);
- emit_long((uintptr)veccode-(uintptr)target-4);
- write_log("Create jump to %p\n",veccode);
-
- write_log("Handled one access!\n");
- fflush(stdout);
- segvcount++;
-
- target=veccode;
-
- if (dir==SIG_READ) {
- switch(size) {
- case 1: raw_mov_b_ri(r,get_byte(addr)); break;
- case 2: raw_mov_w_ri(r,get_byte(addr)); break;
- case 4: raw_mov_l_ri(r,get_byte(addr)); break;
- default: abort();
- }
- }
- else { /* write */
- switch(size) {
- case 1: put_byte(addr,*((uae_u8*)pr)); break;
- case 2: put_word(addr,*((uae_u16*)pr)); break;
- case 4: put_long(addr,*((uae_u32*)pr)); break;
- default: abort();
- }
- }
- for (i=0;i<5;i++)
- raw_mov_b_mi(sc.eip+i,vecbuf[i]);
- raw_mov_l_mi((uintptr)&in_handler,0);
- emit_byte(0xe9);
- emit_long(sc.eip+len-(uintptr)target-4);
- in_handler=1;
- target=tmp;
- }
- bi=active;
- while (bi) {
- if (bi->handler &&
- (uae_u8*)bi->direct_handler<=i &&
- (uae_u8*)bi->nexthandler>i) {
- write_log("deleted trigger (%p<%p<%p) %p\n",
- bi->handler,
- i,
- bi->nexthandler,
- bi->pc_p);
- invalidate_block(bi);
- raise_in_cl_list(bi);
- set_special(0);
- return;
- }
- bi=bi->next;
- }
- /* Not found in the active list. Might be a rom routine that
- is in the dormant list */
- bi=dormant;
- while (bi) {
- if (bi->handler &&
- (uae_u8*)bi->direct_handler<=i &&
- (uae_u8*)bi->nexthandler>i) {
- write_log("deleted trigger (%p<%p<%p) %p\n",
- bi->handler,
- i,
- bi->nexthandler,
- bi->pc_p);
- invalidate_block(bi);
- raise_in_cl_list(bi);
- set_special(0);
- return;
- }
- bi=bi->next;
- }
- write_log("Huh? Could not find trigger!\n");
- return;
- }
- }
- write_log("Can't handle access!\n");
- for (j=0;j<10;j++) {
- write_log("instruction byte %2d is %02x\n",j,i[j]);
- }
- write_log("Please send the above info (starting at \"fault address\") to\n"
- "bmeyer@csse.monash.edu.au\n"
- "This shouldn't happen ;-)\n");
- fflush(stdout);
- signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
-}
-#endif
-
-
-/*************************************************************************
- * Checking for CPU features *
- *************************************************************************/
-
-struct cpuinfo_x86 {
- uae_u8 x86; // CPU family
- uae_u8 x86_vendor; // CPU vendor
- uae_u8 x86_processor; // CPU canonical processor type
- uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
- uae_u32 x86_hwcap;
- uae_u8 x86_model;
- uae_u8 x86_mask;
- int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
- char x86_vendor_id[16];
-};
-struct cpuinfo_x86 cpuinfo;
-
-enum {
- X86_VENDOR_INTEL = 0,
- X86_VENDOR_CYRIX = 1,
- X86_VENDOR_AMD = 2,
- X86_VENDOR_UMC = 3,
- X86_VENDOR_NEXGEN = 4,
- X86_VENDOR_CENTAUR = 5,
- X86_VENDOR_RISE = 6,
- X86_VENDOR_TRANSMETA = 7,
- X86_VENDOR_NSC = 8,
- X86_VENDOR_UNKNOWN = 0xff
-};
-
-enum {
- X86_PROCESSOR_I386, /* 80386 */
- X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
- X86_PROCESSOR_PENTIUM,
- X86_PROCESSOR_PENTIUMPRO,
- X86_PROCESSOR_K6,
- X86_PROCESSOR_ATHLON,
- X86_PROCESSOR_PENTIUM4,
- X86_PROCESSOR_X86_64,
- X86_PROCESSOR_max
-};
-
-static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
- "80386",
- "80486",
- "Pentium",
- "PentiumPro",
- "K6",
- "Athlon",
- "Pentium4",
- "x86-64"
-};
-
-static struct ptt {
- const int align_loop;
- const int align_loop_max_skip;
- const int align_jump;
- const int align_jump_max_skip;
- const int align_func;
-}
-x86_alignments[X86_PROCESSOR_max] = {
- { 4, 3, 4, 3, 4 },
- { 16, 15, 16, 15, 16 },
- { 16, 7, 16, 7, 16 },
- { 16, 15, 16, 7, 16 },
- { 32, 7, 32, 7, 32 },
- { 16, 7, 16, 7, 16 },
- { 0, 0, 0, 0, 0 },
- { 16, 7, 16, 7, 16 }
-};
-
-static void
-x86_get_cpu_vendor(struct cpuinfo_x86 *c)
-{
- char *v = c->x86_vendor_id;
-
- if (!strcmp(v, "GenuineIntel"))
- c->x86_vendor = X86_VENDOR_INTEL;
- else if (!strcmp(v, "AuthenticAMD"))
- c->x86_vendor = X86_VENDOR_AMD;
- else if (!strcmp(v, "CyrixInstead"))
- c->x86_vendor = X86_VENDOR_CYRIX;
- else if (!strcmp(v, "Geode by NSC"))
- c->x86_vendor = X86_VENDOR_NSC;
- else if (!strcmp(v, "UMC UMC UMC "))
- c->x86_vendor = X86_VENDOR_UMC;
- else if (!strcmp(v, "CentaurHauls"))
- c->x86_vendor = X86_VENDOR_CENTAUR;
- else if (!strcmp(v, "NexGenDriven"))
- c->x86_vendor = X86_VENDOR_NEXGEN;
- else if (!strcmp(v, "RiseRiseRise"))
- c->x86_vendor = X86_VENDOR_RISE;
- else if (!strcmp(v, "GenuineTMx86") ||
- !strcmp(v, "TransmetaCPU"))
- c->x86_vendor = X86_VENDOR_TRANSMETA;
- else
- c->x86_vendor = X86_VENDOR_UNKNOWN;
-}
-
-static void
-cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
-{
- const int CPUID_SPACE = 4096;
- uae_u8* cpuid_space = (uae_u8 *)vm_acquire(CPUID_SPACE);
- if (cpuid_space == VM_MAP_FAILED)
- abort();
- vm_protect(cpuid_space, CPUID_SPACE, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
-
- static uae_u32 s_op, s_eax, s_ebx, s_ecx, s_edx;
- uae_u8* tmp=get_target();
-
- s_op = op;
- set_target(cpuid_space);
- raw_push_l_r(0); /* eax */
- raw_push_l_r(1); /* ecx */
- raw_push_l_r(2); /* edx */
- raw_push_l_r(3); /* ebx */
- raw_mov_l_rm(0,(uintptr)&s_op);
- raw_cpuid(0);
- raw_mov_l_mr((uintptr)&s_eax,0);
- raw_mov_l_mr((uintptr)&s_ebx,3);
- raw_mov_l_mr((uintptr)&s_ecx,1);
- raw_mov_l_mr((uintptr)&s_edx,2);
- raw_pop_l_r(3);
- raw_pop_l_r(2);
- raw_pop_l_r(1);
- raw_pop_l_r(0);
- raw_ret();
- set_target(tmp);
-
- ((cpuop_func*)cpuid_space)(0);
- if (eax != NULL) *eax = s_eax;
- if (ebx != NULL) *ebx = s_ebx;
- if (ecx != NULL) *ecx = s_ecx;
- if (edx != NULL) *edx = s_edx;
-
- vm_release(cpuid_space, CPUID_SPACE);
-}
-
-static void
-raw_init_cpu(void)
-{
- struct cpuinfo_x86 *c = &cpuinfo;
-
- /* Defaults */
- c->x86_processor = X86_PROCESSOR_max;
- c->x86_vendor = X86_VENDOR_UNKNOWN;
- c->cpuid_level = -1; /* CPUID not detected */
- c->x86_model = c->x86_mask = 0; /* So far unknown... */
- c->x86_vendor_id[0] = '\0'; /* Unset */
- c->x86_hwcap = 0;
-
- /* Get vendor name */
- c->x86_vendor_id[12] = '\0';
- cpuid(0x00000000,
- (uae_u32 *)&c->cpuid_level,
- (uae_u32 *)&c->x86_vendor_id[0],
- (uae_u32 *)&c->x86_vendor_id[8],
- (uae_u32 *)&c->x86_vendor_id[4]);
- x86_get_cpu_vendor(c);
-
- /* Intel-defined flags: level 0x00000001 */
- c->x86_brand_id = 0;
- if ( c->cpuid_level >= 0x00000001 ) {
- uae_u32 tfms, brand_id;
- cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
- c->x86 = (tfms >> 8) & 15;
- if (c->x86 == 0xf)
- c->x86 += (tfms >> 20) & 0xff; /* extended family */
- c->x86_model = (tfms >> 4) & 15;
- if (c->x86_model == 0xf)
- c->x86_model |= (tfms >> 12) & 0xf0; /* extended model */
- c->x86_brand_id = brand_id & 0xff;
- c->x86_mask = tfms & 15;
- } else {
- /* Have CPUID level 0 only - unheard of */
- c->x86 = 4;
- }
-
- /* AMD-defined flags: level 0x80000001 */
- uae_u32 xlvl;
- cpuid(0x80000000, &xlvl, NULL, NULL, NULL);
- if ( (xlvl & 0xffff0000) == 0x80000000 ) {
- if ( xlvl >= 0x80000001 ) {
- uae_u32 features, extra_features;
- cpuid(0x80000001, NULL, NULL, &extra_features, &features);
- if (features & (1 << 29)) {
- /* Assume x86-64 if long mode is supported */
- c->x86_processor = X86_PROCESSOR_X86_64;
- }
- if (extra_features & (1 << 0))
- have_lahf_lm = true;
- }
- }
-
- /* Canonicalize processor ID */
- switch (c->x86) {
- case 3:
- c->x86_processor = X86_PROCESSOR_I386;
- break;
- case 4:
- c->x86_processor = X86_PROCESSOR_I486;
- break;
- case 5:
- if (c->x86_vendor == X86_VENDOR_AMD)
- c->x86_processor = X86_PROCESSOR_K6;
- else
- c->x86_processor = X86_PROCESSOR_PENTIUM;
- break;
- case 6:
- if (c->x86_vendor == X86_VENDOR_AMD)
- c->x86_processor = X86_PROCESSOR_ATHLON;
- else
- c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
- break;
- case 15:
- if (c->x86_processor == X86_PROCESSOR_max) {
- switch (c->x86_vendor) {
- case X86_VENDOR_INTEL:
- c->x86_processor = X86_PROCESSOR_PENTIUM4;
- break;
- case X86_VENDOR_AMD:
- /* Assume a 32-bit Athlon processor if not in long mode */
- c->x86_processor = X86_PROCESSOR_ATHLON;
- break;
- }
- }
- break;
- }
- if (c->x86_processor == X86_PROCESSOR_max) {
- c->x86_processor = X86_PROCESSOR_I386;
- fprintf(stderr, "Error: unknown processor type, assuming i386\n");
- fprintf(stderr, " Family : %d\n", c->x86);
- fprintf(stderr, " Model : %d\n", c->x86_model);
- fprintf(stderr, " Mask : %d\n", c->x86_mask);
- fprintf(stderr, " Vendor : %s [%d]\n", c->x86_vendor_id, c->x86_vendor);
- if (c->x86_brand_id)
- fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
- }
-
- /* Have CMOV support? */
- have_cmov = c->x86_hwcap & (1 << 15);
-#if defined(__x86_64__)
- if (!have_cmov) {
- write_log("x86-64 implementations are bound to have CMOV!\n");
- abort();
- }
-#endif
-
- /* Can the host CPU suffer from partial register stalls? */
- have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
-#if 1
- /* It appears that partial register writes are a bad idea even on
- AMD K7 cores, even though they are not supposed to have the
- dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
- if (c->x86_processor == X86_PROCESSOR_ATHLON)
- have_rat_stall = true;
-#endif
-
- /* Alignments */
- if (tune_alignment) {
- align_loops = x86_alignments[c->x86_processor].align_loop;
- align_jumps = x86_alignments[c->x86_processor].align_jump;
- }
-
- write_log("Max CPUID level=%d Processor is %s [%s]\n",
- c->cpuid_level, c->x86_vendor_id,
- x86_processor_string_table[c->x86_processor]);
-
- raw_flags_init();
-}
-
-static bool target_check_bsf(void)
-{
- bool mismatch = false;
- for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
- for (int g_CF = 0; g_CF <= 1; g_CF++) {
- for (int g_OF = 0; g_OF <= 1; g_OF++) {
- for (int g_SF = 0; g_SF <= 1; g_SF++) {
- for (int value = -1; value <= 1; value++) {
- unsigned long flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
- unsigned long tmp = value;
- __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
- : "+r" (flags), "+r" (tmp) : : "cc");
- int OF = (flags >> 11) & 1;
- int SF = (flags >> 7) & 1;
- int ZF = (flags >> 6) & 1;
- int CF = flags & 1;
- tmp = (value == 0);
- if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
- mismatch = true;
- }
- }}}}
- if (mismatch)
- write_log("Target CPU defines all flags on BSF instruction\n");
- return !mismatch;
-}
-
-
-/*************************************************************************
- * FPU stuff *
- *************************************************************************/
-
-
-static __inline__ void raw_fp_init(void)
-{
- int i;
-
- for (i=0;i<N_FREGS;i++)
- live.spos[i]=-2;
- live.tos=-1; /* Stack is empty */
-}
-
-static __inline__ void raw_fp_cleanup_drop(void)
-{
-#if 0
- /* using FINIT instead of popping all the entries.
- Seems to have side effects --- there is display corruption in
- Quake when this is used */
- if (live.tos>1) {
- emit_byte(0x9b);
- emit_byte(0xdb);
- emit_byte(0xe3);
- live.tos=-1;
- }
-#endif
- while (live.tos>=1) {
- emit_byte(0xde);
- emit_byte(0xd9);
- live.tos-=2;
- }
- while (live.tos>=0) {
- emit_byte(0xdd);
- emit_byte(0xd8);
- live.tos--;
- }
- raw_fp_init();
-}
-
-static __inline__ void make_tos(int r)
-{
- int p,q;
-
- if (live.spos[r]<0) { /* Register not yet on stack */
- emit_byte(0xd9);
- emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
- live.tos++;
- live.spos[r]=live.tos;
- live.onstack[live.tos]=r;
- return;
- }
- /* Register is on stack */
- if (live.tos==live.spos[r])
- return;
- p=live.spos[r];
- q=live.onstack[live.tos];
-
- emit_byte(0xd9);
- emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
- live.onstack[live.tos]=r;
- live.spos[r]=live.tos;
- live.onstack[p]=q;
- live.spos[q]=p;
-}
-
-static __inline__ void make_tos2(int r, int r2)
-{
- int q;
-
- make_tos(r2); /* Put the reg that's supposed to end up in position2
- on top */
-
- if (live.spos[r]<0) { /* Register not yet on stack */
- make_tos(r); /* This will extend the stack */
- return;
- }
- /* Register is on stack */
- emit_byte(0xd9);
- emit_byte(0xc9); /* Move r2 into position 2 */
-
- q=live.onstack[live.tos-1];
- live.onstack[live.tos]=q;
- live.spos[q]=live.tos;
- live.onstack[live.tos-1]=r2;
- live.spos[r2]=live.tos-1;
-
- make_tos(r); /* And r into 1 */
-}
-
-static __inline__ int stackpos(int r)
-{
- if (live.spos[r]<0)
- abort();
- if (live.tos<live.spos[r]) {
- printf("Looking for spos for fnreg %d\n",r);
- abort();
- }
- return live.tos-live.spos[r];
-}
-
-static __inline__ void usereg(int r)
-{
- if (live.spos[r]<0)
- make_tos(r);
-}
-
-/* This is called with one FP value in a reg *above* tos, which it will
- pop off the stack if necessary */
-static __inline__ void tos_make(int r)
-{
- if (live.spos[r]<0) {
- live.tos++;
- live.spos[r]=live.tos;
- live.onstack[live.tos]=r;
- return;
- }
- emit_byte(0xdd);
- emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
- and pop it*/
-}
-
-/* FP helper functions */
-#if USE_NEW_RTASM
-#define DEFINE_OP(NAME, GEN) \
-static inline void raw_##NAME(uint32 m) \
-{ \
- GEN(m, X86_NOREG, X86_NOREG, 1); \
-}
-DEFINE_OP(fstl, FSTLm);
-DEFINE_OP(fstpl, FSTPLm);
-DEFINE_OP(fldl, FLDLm);
-DEFINE_OP(fildl, FILDLm);
-DEFINE_OP(fistl, FISTLm);
-DEFINE_OP(flds, FLDSm);
-DEFINE_OP(fsts, FSTSm);
-DEFINE_OP(fstpt, FSTPTm);
-DEFINE_OP(fldt, FLDTm);
-#else
-#define DEFINE_OP(NAME, OP1, OP2) \
-static inline void raw_##NAME(uint32 m) \
-{ \
- emit_byte(OP1); \
- emit_byte(OP2); \
- emit_long(m); \
-}
-DEFINE_OP(fstl, 0xdd, 0x15);
-DEFINE_OP(fstpl, 0xdd, 0x1d);
-DEFINE_OP(fldl, 0xdd, 0x05);
-DEFINE_OP(fildl, 0xdb, 0x05);
-DEFINE_OP(fistl, 0xdb, 0x15);
-DEFINE_OP(flds, 0xd9, 0x05);
-DEFINE_OP(fsts, 0xd9, 0x15);
-DEFINE_OP(fstpt, 0xdb, 0x3d);
-DEFINE_OP(fldt, 0xdb, 0x2d);
-#endif
-#undef DEFINE_OP
-
-LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
-{
- make_tos(r);
- raw_fstl(m);
-}
-LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
-
-LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
-{
- make_tos(r);
- raw_fstpl(m);
- live.onstack[live.tos]=-1;
- live.tos--;
- live.spos[r]=-2;
-}
-LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
-
-LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
-{
- raw_fldl(m);
- tos_make(r);
-}
-LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
-
-LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
-{
- raw_fildl(m);
- tos_make(r);
-}
-LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
-
-LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
-{
- make_tos(r);
- raw_fistl(m);
-}
-LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
-
-LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
-{
- raw_flds(m);
- tos_make(r);
-}
-LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
-
-LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
-{
- make_tos(r);
- raw_fsts(m);
-}
-LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
-
-LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
-{
- int rs;
-
- /* Stupid x87 can't write a long double to mem without popping the
- stack! */
- usereg(r);
- rs=stackpos(r);
- emit_byte(0xd9); /* Get a copy to the top of stack */
- emit_byte(0xc0+rs);
-
- raw_fstpt(m); /* store and pop it */
-}
-LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
-
-LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
-{
- int rs;
-
- make_tos(r);
- raw_fstpt(m); /* store and pop it */
- live.onstack[live.tos]=-1;
- live.tos--;
- live.spos[r]=-2;
-}
-LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
-
-LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
-{
- raw_fldt(m);
- tos_make(r);
-}
-LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
-
-LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
-{
- emit_byte(0xd9);
- emit_byte(0xeb);
- tos_make(r);
-}
-LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
-
-LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
-{
- emit_byte(0xd9);
- emit_byte(0xec);
- tos_make(r);
-}
-LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
-
-LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
-{
- emit_byte(0xd9);
- emit_byte(0xea);
- tos_make(r);
-}
-LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
-
-LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
-{
- emit_byte(0xd9);
- emit_byte(0xed);
- tos_make(r);
-}
-LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
-
-LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
-{
- emit_byte(0xd9);
- emit_byte(0xe8);
- tos_make(r);
-}
-LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
-
-LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
-{
- emit_byte(0xd9);
- emit_byte(0xee);
- tos_make(r);
-}
-LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
-
-LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
-{
- int ds;
-
- usereg(s);
- ds=stackpos(s);
- if (ds==0 && live.spos[d]>=0) {
- /* source is on top of stack, and we already have the dest */
- int dd=stackpos(d);
- emit_byte(0xdd);
- emit_byte(0xd0+dd);
- }
- else {
- emit_byte(0xd9);
- emit_byte(0xc0+ds); /* duplicate source on tos */
- tos_make(d); /* store to destination, pop if necessary */
- }
-}
-LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
-
-LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
-{
- emit_byte(0xd9);
- emit_byte(0xa8+index);
- emit_long(base);
-}
-LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
-
-
-LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
-{
- int ds;
-
- if (d!=s) {
- usereg(s);
- ds=stackpos(s);
- emit_byte(0xd9);
- emit_byte(0xc0+ds); /* duplicate source */
- emit_byte(0xd9);
- emit_byte(0xfa); /* take square root */
- tos_make(d); /* store to destination */
- }
- else {
- make_tos(d);
- emit_byte(0xd9);
- emit_byte(0xfa); /* take square root */
- }
-}
-LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
-
-LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
-{
- int ds;
-
- if (d!=s) {
- usereg(s);
- ds=stackpos(s);
- emit_byte(0xd9);
- emit_byte(0xc0+ds); /* duplicate source */
- emit_byte(0xd9);
- emit_byte(0xe1); /* take fabs */
- tos_make(d); /* store to destination */
- }
- else {
- make_tos(d);
- emit_byte(0xd9);
- emit_byte(0xe1); /* take fabs */
- }
-}
-LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
-
-LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
-{
- int ds;
-
- if (d!=s) {
- usereg(s);
- ds=stackpos(s);
- emit_byte(0xd9);
- emit_byte(0xc0+ds); /* duplicate source */
- emit_byte(0xd9);
- emit_byte(0xfc); /* take frndint */
- tos_make(d); /* store to destination */
- }
- else {
- make_tos(d);
- emit_byte(0xd9);
- emit_byte(0xfc); /* take frndint */
- }
-}
-LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
-
-LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
-{
- int ds;
-
- if (d!=s) {
- usereg(s);
- ds=stackpos(s);
- emit_byte(0xd9);
- emit_byte(0xc0+ds); /* duplicate source */
- emit_byte(0xd9);
- emit_byte(0xff); /* take cos */
- tos_make(d); /* store to destination */
- }
- else {
- make_tos(d);
- emit_byte(0xd9);
- emit_byte(0xff); /* take cos */
- }
-}
-LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
-
-LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
-{
- int ds;
-
- if (d!=s) {
- usereg(s);
- ds=stackpos(s);
- emit_byte(0xd9);
- emit_byte(0xc0+ds); /* duplicate source */
- emit_byte(0xd9);
- emit_byte(0xfe); /* take sin */
- tos_make(d); /* store to destination */
- }
- else {
- make_tos(d);
- emit_byte(0xd9);
- emit_byte(0xfe); /* take sin */
- }
-}
-LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
-
-static const double one=1;
-LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
-{
- int ds;
-
- usereg(s);
- ds=stackpos(s);
- emit_byte(0xd9);
- emit_byte(0xc0+ds); /* duplicate source */
-
- emit_byte(0xd9);
- emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
- emit_byte(0xd9);
- emit_byte(0xfc); /* rndint */
- emit_byte(0xd9);
- emit_byte(0xc9); /* swap top two elements */
- emit_byte(0xd8);
- emit_byte(0xe1); /* subtract rounded from original */
- emit_byte(0xd9);
- emit_byte(0xf0); /* f2xm1 */
- x86_fadd_m((uintptr)&one); /* Add '1' without using extra stack space */
- emit_byte(0xd9);
- emit_byte(0xfd); /* and scale it */
- emit_byte(0xdd);
- emit_byte(0xd9); /* take he rounded value off */
- tos_make(d); /* store to destination */
-}
-LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
-
-LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
-{
- int ds;
-
- usereg(s);
- ds=stackpos(s);
- emit_byte(0xd9);
- emit_byte(0xc0+ds); /* duplicate source */
- emit_byte(0xd9);
- emit_byte(0xea); /* fldl2e */
- emit_byte(0xde);
- emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
-
- emit_byte(0xd9);
- emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
- emit_byte(0xd9);
- emit_byte(0xfc); /* rndint */
- emit_byte(0xd9);
- emit_byte(0xc9); /* swap top two elements */
- emit_byte(0xd8);
- emit_byte(0xe1); /* subtract rounded from original */
- emit_byte(0xd9);
- emit_byte(0xf0); /* f2xm1 */
- x86_fadd_m((uintptr)&one); /* Add '1' without using extra stack space */
- emit_byte(0xd9);
- emit_byte(0xfd); /* and scale it */
- emit_byte(0xdd);
- emit_byte(0xd9); /* take he rounded value off */
- tos_make(d); /* store to destination */
-}
-LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
-
-LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
-{
- int ds;
-
- usereg(s);
- ds=stackpos(s);
- emit_byte(0xd9);
- emit_byte(0xc0+ds); /* duplicate source */
- emit_byte(0xd9);
- emit_byte(0xe8); /* push '1' */
- emit_byte(0xd9);
- emit_byte(0xc9); /* swap top two */
- emit_byte(0xd9);
- emit_byte(0xf1); /* take 1*log2(x) */
- tos_make(d); /* store to destination */
-}
-LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
-
-
-LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
-{
- int ds;
-
- if (d!=s) {
- usereg(s);
- ds=stackpos(s);
- emit_byte(0xd9);
- emit_byte(0xc0+ds); /* duplicate source */
- emit_byte(0xd9);
- emit_byte(0xe0); /* take fchs */
- tos_make(d); /* store to destination */
- }
- else {
- make_tos(d);
- emit_byte(0xd9);
- emit_byte(0xe0); /* take fchs */
- }
-}
-LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
-
-LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
-{
- int ds;
-
- usereg(s);
- usereg(d);
-
- if (live.spos[s]==live.tos) {
- /* Source is on top of stack */
- ds=stackpos(d);
- emit_byte(0xdc);
- emit_byte(0xc0+ds); /* add source to dest*/
- }
- else {
- make_tos(d);
- ds=stackpos(s);
-
- emit_byte(0xd8);
- emit_byte(0xc0+ds); /* add source to dest*/
- }
-}
-LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
-
-LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
-{
- int ds;
-
- usereg(s);
- usereg(d);
-
- if (live.spos[s]==live.tos) {
- /* Source is on top of stack */
- ds=stackpos(d);
- emit_byte(0xdc);
- emit_byte(0xe8+ds); /* sub source from dest*/
- }
- else {
- make_tos(d);
- ds=stackpos(s);
-
- emit_byte(0xd8);
- emit_byte(0xe0+ds); /* sub src from dest */
- }
-}
-LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
-
-LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
-{
- int ds;
-
- usereg(s);
- usereg(d);
-
- make_tos(d);
- ds=stackpos(s);
-
- emit_byte(0xdd);
- emit_byte(0xe0+ds); /* cmp dest with source*/
-}
-LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
-
-LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
-{
- int ds;
-
- usereg(s);
- usereg(d);
-
- if (live.spos[s]==live.tos) {
- /* Source is on top of stack */
- ds=stackpos(d);
- emit_byte(0xdc);
- emit_byte(0xc8+ds); /* mul dest by source*/
- }
- else {
- make_tos(d);
- ds=stackpos(s);
-
- emit_byte(0xd8);
- emit_byte(0xc8+ds); /* mul dest by source*/
- }
-}
-LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
-
-LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
-{
- int ds;
-
- usereg(s);
- usereg(d);
-
- if (live.spos[s]==live.tos) {
- /* Source is on top of stack */
- ds=stackpos(d);
- emit_byte(0xdc);
- emit_byte(0xf8+ds); /* div dest by source */
- }
- else {
- make_tos(d);
- ds=stackpos(s);
-
- emit_byte(0xd8);
- emit_byte(0xf0+ds); /* div dest by source*/
- }
-}
-LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
-
-LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
-{
- int ds;
-
- usereg(s);
- usereg(d);
-
- make_tos2(d,s);
- ds=stackpos(s);
-
- if (ds!=1) {
- printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
- abort();
- }
- emit_byte(0xd9);
- emit_byte(0xf8); /* take rem from dest by source */
-}
-LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
-
-LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
-{
- int ds;
-
- usereg(s);
- usereg(d);
-
- make_tos2(d,s);
- ds=stackpos(s);
-
- if (ds!=1) {
- printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
- abort();
- }
- emit_byte(0xd9);
- emit_byte(0xf5); /* take rem1 from dest by source */
-}
-LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
-
-
-LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
-{
- make_tos(r);
- emit_byte(0xd9); /* ftst */
- emit_byte(0xe4);
-}
-LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
-
-/* %eax register is clobbered if target processor doesn't support fucomi */
-#define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
-#define FFLAG_NREG EAX_INDEX
-
-static __inline__ void raw_fflags_into_flags(int r)
-{
- int p;
-
- usereg(r);
- p=stackpos(r);
-
- emit_byte(0xd9);
- emit_byte(0xee); /* Push 0 */
- emit_byte(0xd9);
- emit_byte(0xc9+p); /* swap top two around */
- if (have_cmov) {
- // gb-- fucomi is for P6 cores only, not K6-2 then...
- emit_byte(0xdb);
- emit_byte(0xe9+p); /* fucomi them */
- }
- else {
- emit_byte(0xdd);
- emit_byte(0xe1+p); /* fucom them */
- emit_byte(0x9b);
- emit_byte(0xdf);
- emit_byte(0xe0); /* fstsw ax */
- raw_sahf(0); /* sahf */
- }
- emit_byte(0xdd);
- emit_byte(0xd9+p); /* store value back, and get rid of 0 */
-}
+++ /dev/null
-/******************** -*- mode: C; tab-width: 8 -*- ********************
- *
- * Run-time assembler for IA-32 and AMD64
- *
- ***********************************************************************/
-
-
-/***********************************************************************
- *
- * This file is derived from CCG.
- *
- * Copyright 1999, 2000, 2001, 2002, 2003 Ian Piumarta
- *
- * Adaptations and enhancements for AMD64 support, Copyright 2003-2008
- * Gwenole Beauchesne
- *
- * Basilisk II (C) 1997-2008 Christian Bauer
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- ***********************************************************************/
-
-#ifndef X86_RTASM_H
-#define X86_RTASM_H
-
-/* NOTES
- *
- * o Best viewed on a 1024x768 screen with fixed-6x10 font ;-)
- *
- * TODO
- *
- * o Fix FIXMEs
- * o i387 FPU instructions
- * o SSE instructions
- * o Optimize for cases where register numbers are not integral constants
- */
-
-/* --- Configuration ------------------------------------------------------- */
-
-/* Define to settle a "flat" register set, i.e. different regno for
- each size variant. */
-#ifndef X86_FLAT_REGISTERS
-#define X86_FLAT_REGISTERS 1
-#endif
-
-/* Define to generate x86-64 code. */
-#ifndef X86_TARGET_64BIT
-#define X86_TARGET_64BIT 0
-#endif
-
-/* Define to optimize ALU instructions. */
-#ifndef X86_OPTIMIZE_ALU
-#define X86_OPTIMIZE_ALU 1
-#endif
-
-/* Define to optimize rotate/shift instructions. */
-#ifndef X86_OPTIMIZE_ROTSHI
-#define X86_OPTIMIZE_ROTSHI 1
-#endif
-
-/* Define to optimize absolute addresses for RIP relative addressing. */
-#ifndef X86_RIP_RELATIVE_ADDR
-#define X86_RIP_RELATIVE_ADDR 1
-#endif
-
-
-/* --- Macros -------------------------------------------------------------- */
-
-/* Functions used to emit code.
- *
- * x86_emit_byte(B)
- * x86_emit_word(W)
- * x86_emit_long(L)
- */
-
-/* Get pointer to current code
- *
- * x86_get_target()
- */
-
-/* Abort assembler, fatal failure.
- *
- * x86_emit_failure(MSG)
- */
-
-#define x86_emit_failure0(MSG) (x86_emit_failure(MSG),0)
-
-
-/* --- Register set -------------------------------------------------------- */
-
-enum {
- X86_RIP = -2,
-#if X86_FLAT_REGISTERS
- X86_NOREG = 0,
- X86_Reg8L_Base = 0x10,
- X86_Reg8H_Base = 0x20,
- X86_Reg16_Base = 0x30,
- X86_Reg32_Base = 0x40,
- X86_Reg64_Base = 0x50,
- X86_RegMMX_Base = 0x60,
- X86_RegXMM_Base = 0x70,
-#else
- X86_NOREG = -1,
- X86_Reg8L_Base = 0,
- X86_Reg8H_Base = 16,
- X86_Reg16_Base = 0,
- X86_Reg32_Base = 0,
- X86_Reg64_Base = 0,
- X86_RegMMX_Base = 0,
- X86_RegXMM_Base = 0,
-#endif
-};
-
-enum {
- X86_AL = X86_Reg8L_Base,
- X86_CL, X86_DL, X86_BL,
- X86_SPL, X86_BPL, X86_SIL, X86_DIL,
- X86_R8B, X86_R9B, X86_R10B, X86_R11B,
- X86_R12B, X86_R13B, X86_R14B, X86_R15B,
- X86_AH = X86_Reg8H_Base + 4,
- X86_CH, X86_DH, X86_BH
-};
-
-enum {
- X86_AX = X86_Reg16_Base,
- X86_CX, X86_DX, X86_BX,
- X86_SP, X86_BP, X86_SI, X86_DI,
- X86_R8W, X86_R9W, X86_R10W, X86_R11W,
- X86_R12W, X86_R13W, X86_R14W, X86_R15W
-};
-
-enum {
- X86_EAX = X86_Reg32_Base,
- X86_ECX, X86_EDX, X86_EBX,
- X86_ESP, X86_EBP, X86_ESI, X86_EDI,
- X86_R8D, X86_R9D, X86_R10D, X86_R11D,
- X86_R12D, X86_R13D, X86_R14D, X86_R15D
-};
-
-enum {
- X86_RAX = X86_Reg64_Base,
- X86_RCX, X86_RDX, X86_RBX,
- X86_RSP, X86_RBP, X86_RSI, X86_RDI,
- X86_R8, X86_R9, X86_R10, X86_R11,
- X86_R12, X86_R13, X86_R14, X86_R15
-};
-
-enum {
- X86_MM0 = X86_RegMMX_Base,
- X86_MM1, X86_MM2, X86_MM3,
- X86_MM4, X86_MM5, X86_MM6, X86_MM7,
-};
-
-enum {
- X86_XMM0 = X86_RegXMM_Base,
- X86_XMM1, X86_XMM2, X86_XMM3,
- X86_XMM4, X86_XMM5, X86_XMM6, X86_XMM7,
- X86_XMM8, X86_XMM9, X86_XMM10, X86_XMM11,
- X86_XMM12, X86_XMM13, X86_XMM14, X86_XMM15
-};
-
-/* Register control and access
- *
- * _r0P(R) Null register?
- * _rIP(R) RIP register?
- * _rXP(R) Extended register?
- *
- * _rC(R) Class of register (only valid if X86_FLAT_REGISTERS)
- * _rR(R) Full register number
- * _rN(R) Short register number for encoding
- *
- * _r1(R) 8-bit register ID
- * _r2(R) 16-bit register ID
- * _r4(R) 32-bit register ID
- * _r8(R) 64-bit register ID
- * _rM(R) MMX register ID
- * _rX(R) XMM register ID
- * _rA(R) Address register ID used for EA calculation
- */
-
-#define _r0P(R) ((int)(R) == (int)X86_NOREG)
-#define _rIP(R) (X86_TARGET_64BIT ? ((int)(R) == (int)X86_RIP) : 0)
-
-#if X86_FLAT_REGISTERS
-#define _rC(R) ((R) & 0xf0)
-#define _rR(R) ((R) & 0x0f)
-#define _rN(R) ((R) & 0x07)
-#define _rXP(R) ((R) > 0 && _rR(R) > 7)
-#else
-#define _rN(R) ((R) & 0x07)
-#define _rR(R) (int(R))
-#define _rXP(R) (_rR(R) > 7 && _rR(R) < 16)
-#endif
-
-#if !defined(_ASM_SAFETY) || ! X86_FLAT_REGISTERS
-#define _r1(R) _rN(R)
-#define _r2(R) _rN(R)
-#define _r4(R) _rN(R)
-#define _r8(R) _rN(R)
-#define _rA(R) _rN(R)
-#define _rM(R) _rN(R)
-#define _rX(R) _rN(R)
-#else
-#define _r1(R) ( ((_rC(R) & (X86_Reg8L_Base | X86_Reg8H_Base)) != 0) ? _rN(R) : x86_emit_failure0( "8-bit register required"))
-#define _r2(R) ( (_rC(R) == X86_Reg16_Base) ? _rN(R) : x86_emit_failure0("16-bit register required"))
-#define _r4(R) ( (_rC(R) == X86_Reg32_Base) ? _rN(R) : x86_emit_failure0("32-bit register required"))
-#define _r8(R) ( (_rC(R) == X86_Reg64_Base) ? _rN(R) : x86_emit_failure0("64-bit register required"))
-#define _rA(R) ( X86_TARGET_64BIT ? \
- ( (_rC(R) == X86_Reg64_Base) ? _rN(R) : x86_emit_failure0("not a valid 64-bit base/index expression")) : \
- ( (_rC(R) == X86_Reg32_Base) ? _rN(R) : x86_emit_failure0("not a valid 32-bit base/index expression")) )
-#define _rM(R) ( (_rC(R) == X86_RegMMX_Base) ? _rN(R) : x86_emit_failure0("MMX register required"))
-#define _rX(R) ( (_rC(R) == X86_RegXMM_Base) ? _rN(R) : x86_emit_failure0("SSE register required"))
-#endif
-
-#define _rSP() (X86_TARGET_64BIT ? (int)X86_RSP : (int)X86_ESP)
-#define _r1e8lP(R) (int(R) >= X86_SPL && int(R) <= X86_DIL)
-#define _rbpP(R) (_rR(R) == _rR(X86_RBP))
-#define _rspP(R) (_rR(R) == _rR(X86_RSP))
-#define _rbp13P(R) (_rN(R) == _rN(X86_RBP))
-#define _rsp12P(R) (_rN(R) == _rN(X86_RSP))
-
-
-/* ========================================================================= */
-/* --- UTILITY ------------------------------------------------------------- */
-/* ========================================================================= */
-
-typedef signed char _sc;
-typedef unsigned char _uc;
-typedef signed short _ss;
-typedef unsigned short _us;
-typedef signed int _sl;
-typedef unsigned int _ul;
-
-#define _UC(X) ((_uc )(unsigned long)(X))
-#define _US(X) ((_us )(unsigned long)(X))
-#define _SL(X) ((_sl )(unsigned long)(X))
-#define _UL(X) ((_ul )(unsigned long)(X))
-
-#define _PUC(X) ((_uc *)(X))
-#define _PUS(X) ((_us *)(X))
-#define _PSL(X) ((_sl *)(X))
-#define _PUL(X) ((_ul *)(X))
-
-#define _B(B) x86_emit_byte((B))
-#define _W(W) x86_emit_word((W))
-#define _L(L) x86_emit_long((L))
-#define _Q(Q) x86_emit_quad((Q))
-
-#define _MASK(N) ((unsigned)((1<<(N)))-1)
-#define _siP(N,I) (!((((unsigned)(I))^(((unsigned)(I))<<1))&~_MASK(N)))
-#define _uiP(N,I) (!(((unsigned)(I))&~_MASK(N)))
-#define _suiP(N,I) (_siP(N,I) | _uiP(N,I))
-
-#ifndef _ASM_SAFETY
-#define _ck_s(W,I) (_UL(I) & _MASK(W))
-#define _ck_u(W,I) (_UL(I) & _MASK(W))
-#define _ck_su(W,I) (_UL(I) & _MASK(W))
-#define _ck_d(W,I) (_UL(I) & _MASK(W))
-#else
-#define _ck_s(W,I) (_siP(W,I) ? (_UL(I) & _MASK(W)) : x86_emit_failure0( "signed integer `"#I"' too large for "#W"-bit field"))
-#define _ck_u(W,I) (_uiP(W,I) ? (_UL(I) & _MASK(W)) : x86_emit_failure0("unsigned integer `"#I"' too large for "#W"-bit field"))
-#define _ck_su(W,I) (_suiP(W,I) ? (_UL(I) & _MASK(W)) : x86_emit_failure0( "integer `"#I"' too large for "#W"-bit field"))
-#define _ck_d(W,I) (_siP(W,I) ? (_UL(I) & _MASK(W)) : x86_emit_failure0( "displacement `"#I"' too large for "#W"-bit field"))
-#endif
-
-#define _s0P(I) ((I)==0)
-#define _s8P(I) _siP(8,I)
-#define _s16P(I) _siP(16,I)
-#define _u8P(I) _uiP(8,I)
-#define _u16P(I) _uiP(16,I)
-
-#define _su8(I) _ck_su(8,I)
-#define _su16(I) _ck_su(16,I)
-
-#define _s1(I) _ck_s( 1,I)
-#define _s2(I) _ck_s( 2,I)
-#define _s3(I) _ck_s( 3,I)
-#define _s4(I) _ck_s( 4,I)
-#define _s5(I) _ck_s( 5,I)
-#define _s6(I) _ck_s( 6,I)
-#define _s7(I) _ck_s( 7,I)
-#define _s8(I) _ck_s( 8,I)
-#define _s9(I) _ck_s( 9,I)
-#define _s10(I) _ck_s(10,I)
-#define _s11(I) _ck_s(11,I)
-#define _s12(I) _ck_s(12,I)
-#define _s13(I) _ck_s(13,I)
-#define _s14(I) _ck_s(14,I)
-#define _s15(I) _ck_s(15,I)
-#define _s16(I) _ck_s(16,I)
-#define _s17(I) _ck_s(17,I)
-#define _s18(I) _ck_s(18,I)
-#define _s19(I) _ck_s(19,I)
-#define _s20(I) _ck_s(20,I)
-#define _s21(I) _ck_s(21,I)
-#define _s22(I) _ck_s(22,I)
-#define _s23(I) _ck_s(23,I)
-#define _s24(I) _ck_s(24,I)
-#define _s25(I) _ck_s(25,I)
-#define _s26(I) _ck_s(26,I)
-#define _s27(I) _ck_s(27,I)
-#define _s28(I) _ck_s(28,I)
-#define _s29(I) _ck_s(29,I)
-#define _s30(I) _ck_s(30,I)
-#define _s31(I) _ck_s(31,I)
-#define _u1(I) _ck_u( 1,I)
-#define _u2(I) _ck_u( 2,I)
-#define _u3(I) _ck_u( 3,I)
-#define _u4(I) _ck_u( 4,I)
-#define _u5(I) _ck_u( 5,I)
-#define _u6(I) _ck_u( 6,I)
-#define _u7(I) _ck_u( 7,I)
-#define _u8(I) _ck_u( 8,I)
-#define _u9(I) _ck_u( 9,I)
-#define _u10(I) _ck_u(10,I)
-#define _u11(I) _ck_u(11,I)
-#define _u12(I) _ck_u(12,I)
-#define _u13(I) _ck_u(13,I)
-#define _u14(I) _ck_u(14,I)
-#define _u15(I) _ck_u(15,I)
-#define _u16(I) _ck_u(16,I)
-#define _u17(I) _ck_u(17,I)
-#define _u18(I) _ck_u(18,I)
-#define _u19(I) _ck_u(19,I)
-#define _u20(I) _ck_u(20,I)
-#define _u21(I) _ck_u(21,I)
-#define _u22(I) _ck_u(22,I)
-#define _u23(I) _ck_u(23,I)
-#define _u24(I) _ck_u(24,I)
-#define _u25(I) _ck_u(25,I)
-#define _u26(I) _ck_u(26,I)
-#define _u27(I) _ck_u(27,I)
-#define _u28(I) _ck_u(28,I)
-#define _u29(I) _ck_u(29,I)
-#define _u30(I) _ck_u(30,I)
-#define _u31(I) _ck_u(31,I)
-
-/* ========================================================================= */
-/* --- ASSEMBLER ----------------------------------------------------------- */
-/* ========================================================================= */
-
-#define _b00 0
-#define _b01 1
-#define _b10 2
-#define _b11 3
-
-#define _b000 0
-#define _b001 1
-#define _b010 2
-#define _b011 3
-#define _b100 4
-#define _b101 5
-#define _b110 6
-#define _b111 7
-
-#define _OFF4(D) (_UL(D) - _UL(x86_get_target()))
-#define _CKD8(D) _ck_d(8, ((_uc) _OFF4(D)) )
-
-#define _D8(D) (_B(0), ((*(_PUC(x86_get_target())-1))= _CKD8(D)))
-#define _D32(D) (_L(0), ((*(_PUL(x86_get_target())-1))= _OFF4(D)))
-
-#ifndef _ASM_SAFETY
-# define _M(M) (M)
-# define _r(R) (R)
-# define _m(M) (M)
-# define _s(S) (S)
-# define _i(I) (I)
-# define _b(B) (B)
-#else
-# define _M(M) (((M)>3) ? x86_emit_failure0("internal error: mod = " #M) : (M))
-# define _r(R) (((R)>7) ? x86_emit_failure0("internal error: reg = " #R) : (R))
-# define _m(M) (((M)>7) ? x86_emit_failure0("internal error: r/m = " #M) : (M))
-# define _s(S) (((S)>3) ? x86_emit_failure0("internal error: memory scale = " #S) : (S))
-# define _i(I) (((I)>7) ? x86_emit_failure0("internal error: memory index = " #I) : (I))
-# define _b(B) (((B)>7) ? x86_emit_failure0("internal error: memory base = " #B) : (B))
-#endif
-
-#define _Mrm(Md,R,M) _B((_M(Md)<<6)|(_r(R)<<3)|_m(M))
-#define _SIB(Sc,I, B) _B((_s(Sc)<<6)|(_i(I)<<3)|_b(B))
-
-#define _SCL(S) ((((S)==1) ? _b00 : \
- (((S)==2) ? _b01 : \
- (((S)==4) ? _b10 : \
- (((S)==8) ? _b11 : x86_emit_failure0("illegal scale: " #S))))))
-
-
-/* --- Memory subformats - urgh! ------------------------------------------- */
-
-/* _r_D() is RIP addressing mode if X86_TARGET_64BIT, use _r_DSIB() instead */
-#define _r_D( R, D ) (_Mrm(_b00,_rN(R),_b101 ) ,_L((_sl)(D)))
-#define _r_DSIB(R, D ) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(1),_b100 ,_b101 ),_L((_sl)(D)))
-#define _r_0B( R, B ) (_Mrm(_b00,_rN(R),_rA(B)) )
-#define _r_0BIS(R, B,I,S) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_rA(B)) )
-#define _r_1B( R, D,B ) (_Mrm(_b01,_rN(R),_rA(B)) ,_B((_sc)(D)))
-#define _r_1BIS(R, D,B,I,S) (_Mrm(_b01,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_rA(B)),_B((_sc)(D)))
-#define _r_4B( R, D,B ) (_Mrm(_b10,_rN(R),_rA(B)) ,_L((_sl)(D)))
-#define _r_4IS( R, D,I,S) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_b101 ),_L((_sl)(D)))
-#define _r_4BIS(R, D,B,I,S) (_Mrm(_b10,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_rA(B)),_L((_sl)(D)))
-
-#define _r_DB( R, D,B ) ((_s0P(D) && (!_rbp13P(B)) ? _r_0B (R, B ) : (_s8P(D) ? _r_1B( R,D,B ) : _r_4B( R,D,B ))))
-#define _r_DBIS(R, D,B,I,S) ((_s0P(D) && (!_rbp13P(B)) ? _r_0BIS(R, B,I,S) : (_s8P(D) ? _r_1BIS(R,D,B,I,S) : _r_4BIS(R,D,B,I,S))))
-
-/* Use RIP-addressing in 64-bit mode, if possible */
-#define _x86_RIP_addressing_possible(D,O) (X86_RIP_RELATIVE_ADDR && \
- ((uintptr)x86_get_target() + 4 + (O) - (D) <= 0xffffffff))
-
-#define _r_X( R, D,B,I,S,O) (_r0P(I) ? (_r0P(B) ? (!X86_TARGET_64BIT ? _r_D(R,D) : \
- (_x86_RIP_addressing_possible(D, O) ? \
- _r_D(R, (D) - ((uintptr)x86_get_target() + 4 + (O))) : \
- _r_DSIB(R,D))) : \
- (_rIP(B) ? _r_D (R,D ) : \
- (_rsp12P(B) ? _r_DBIS(R,D,_rSP(),_rSP(),1) : \
- _r_DB (R,D, B )))) : \
- (_r0P(B) ? _r_4IS (R,D, I,S) : \
- (!_rspP(I) ? _r_DBIS(R,D, B, I,S) : \
- x86_emit_failure("illegal index register: %esp"))))
-
-
-/* --- Instruction formats ------------------------------------------------- */
-
-#define _m32only(X) (! X86_TARGET_64BIT ? X : x86_emit_failure("invalid instruction in 64-bit mode"))
-#define _m64only(X) ( X86_TARGET_64BIT ? X : x86_emit_failure("invalid instruction in 32-bit mode"))
-#define _m64(X) ( X86_TARGET_64BIT ? X : ((void)0) )
-
-/* _format Opcd ModR/M dN(rB,rI,Sc) imm... */
-
-#define _d16() ( _B(0x66 ) )
-#define _O( OP ) ( _B( OP ) )
-#define _Or( OP,R ) ( _B( (OP)|_r(R)) )
-#define _OO( OP ) ( _B((OP)>>8), _B(( (OP) )&0xff) )
-#define _OOr( OP,R ) ( _B((OP)>>8), _B(( (OP)|_r(R))&0xff) )
-#define _Os( OP,B ) ( _s8P(B) ? _B(((OP)|_b10)) : _B(OP) )
-#define _sW( W ) ( _s8P(W) ? _B(W):_W(W) )
-#define _sL( L ) ( _s8P(L) ? _B(L):_L(L) )
-#define _sWO( W ) ( _s8P(W) ? 1 : 2 )
-#define _sLO( L ) ( _s8P(L) ? 1 : 4 )
-#define _O_B( OP ,B ) ( _O ( OP ) ,_B(B) )
-#define _O_W( OP ,W ) ( _O ( OP ) ,_W(W) )
-#define _O_L( OP ,L ) ( _O ( OP ) ,_L(L) )
-#define _OO_L( OP ,L ) ( _OO ( OP ) ,_L(L) )
-#define _O_D8( OP ,D ) ( _O ( OP ) ,_D8(D) )
-#define _O_D32( OP ,D ) ( _O ( OP ) ,_D32(D) )
-#define _OO_D32( OP ,D ) ( _OO ( OP ) ,_D32(D) )
-#define _Os_sW( OP ,W ) ( _Os ( OP,W) ,_sW(W) )
-#define _Os_sL( OP ,L ) ( _Os ( OP,L) ,_sL(L) )
-#define _O_W_B( OP ,W,B) ( _O ( OP ) ,_W(W),_B(B))
-#define _Or_B( OP,R ,B ) ( _Or ( OP,R) ,_B(B) )
-#define _Or_W( OP,R ,W ) ( _Or ( OP,R) ,_W(W) )
-#define _Or_L( OP,R ,L ) ( _Or ( OP,R) ,_L(L) )
-#define _Or_Q( OP,R ,Q ) ( _Or ( OP,R) ,_Q(Q) )
-#define _O_Mrm( OP ,MO,R,M ) ( _O ( OP ),_Mrm(MO,R,M ) )
-#define _OO_Mrm( OP ,MO,R,M ) ( _OO ( OP ),_Mrm(MO,R,M ) )
-#define _O_Mrm_B( OP ,MO,R,M ,B ) ( _O ( OP ),_Mrm(MO,R,M ) ,_B(B) )
-#define _O_Mrm_W( OP ,MO,R,M ,W ) ( _O ( OP ),_Mrm(MO,R,M ) ,_W(W) )
-#define _O_Mrm_L( OP ,MO,R,M ,L ) ( _O ( OP ),_Mrm(MO,R,M ) ,_L(L) )
-#define _OO_Mrm_B( OP ,MO,R,M ,B ) ( _OO ( OP ),_Mrm(MO,R,M ) ,_B(B) )
-#define _Os_Mrm_sW(OP ,MO,R,M ,W ) ( _Os ( OP,W),_Mrm(MO,R,M ),_sW(W) )
-#define _Os_Mrm_sL(OP ,MO,R,M ,L ) ( _Os ( OP,L),_Mrm(MO,R,M ),_sL(L) )
-#define _O_r_X( OP ,R ,MD,MB,MI,MS ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS,0) )
-#define _OO_r_X( OP ,R ,MD,MB,MI,MS ) ( _OO ( OP ),_r_X( R ,MD,MB,MI,MS,0) )
-#define _O_r_X_B( OP ,R ,MD,MB,MI,MS,B ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS,1) ,_B(B) )
-#define _O_r_X_W( OP ,R ,MD,MB,MI,MS,W ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS,2) ,_W(W) )
-#define _O_r_X_L( OP ,R ,MD,MB,MI,MS,L ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS,4) ,_L(L) )
-#define _OO_r_X_B( OP ,R ,MD,MB,MI,MS,B ) ( _OO ( OP ),_r_X( R ,MD,MB,MI,MS,1) ,_B(B) )
-#define _Os_r_X_sW(OP ,R ,MD,MB,MI,MS,W ) ( _Os ( OP,W),_r_X( R ,MD,MB,MI,MS,_sWO(W)),_sW(W))
-#define _Os_r_X_sL(OP ,R ,MD,MB,MI,MS,L ) ( _Os ( OP,L),_r_X( R ,MD,MB,MI,MS,_sLO(L)),_sL(L))
-#define _O_X_B( OP ,MD,MB,MI,MS,B ) ( _O_r_X_B( OP ,0 ,MD,MB,MI,MS ,B) )
-#define _O_X_W( OP ,MD,MB,MI,MS,W ) ( _O_r_X_W( OP ,0 ,MD,MB,MI,MS ,W) )
-#define _O_X_L( OP ,MD,MB,MI,MS,L ) ( _O_r_X_L( OP ,0 ,MD,MB,MI,MS ,L) )
-
-
-/* --- REX prefixes -------------------------------------------------------- */
-
-#define _VOID() ((void)0)
-#define _BIT(X) (!!(X))
-#define _d64(W,R,X,B) (_B(0x40|(W)<<3|(R)<<2|(X)<<1|(B)))
-
-#define __REXwrxb(L,W,R,X,B) ((W|R|X|B) || (L) ? _d64(W,R,X,B) : _VOID())
-#define __REXwrx_(L,W,R,X,MR) (__REXwrxb(L,W,R,X,_BIT(_rIP(MR)?0:_rXP(MR))))
-#define __REXw_x_(L,W,R,X,MR) (__REXwrx_(L,W,_BIT(_rXP(R)),X,MR))
-#define __REX_reg(RR) (__REXwrxb(0,0,0,00,_BIT(_rXP(RR))))
-#define __REX_mem(MB,MI) (__REXwrxb(0,0,0,_BIT(_rXP(MI)),_BIT(_rXP(MB))))
-
-// FIXME: can't mix new (SPL,BPL,SIL,DIL) with (AH,BH,CH,DH)
-#define _REXBrr(RR,MR) _m64(__REXw_x_(_r1e8lP(RR)||_r1e8lP(MR),0,RR,0,MR))
-#define _REXBmr(MB,MI,RD) _m64(__REXw_x_(_r1e8lP(RD)||_r1e8lP(MB),0,RD,_BIT(_rXP(MI)),MB))
-#define _REXBrm(RS,MB,MI) _REXBmr(MB,MI,RS)
-
-#define _REXBLrr(RR,MR) _m64(__REXw_x_(_r1e8lP(MR),0,RR,0,MR))
-#define _REXLrr(RR,MR) _m64(__REXw_x_(0,0,RR,0,MR))
-#define _REXLmr(MB,MI,RD) _m64(__REXw_x_(0,0,RD,_BIT(_rXP(MI)),MB))
-#define _REXLrm(RS,MB,MI) _REXLmr(MB,MI,RS)
-#define _REXLr(RR) _m64(__REX_reg(RR))
-#define _REXLm(MB,MI) _m64(__REX_mem(MB,MI))
-
-#define _REXQrr(RR,MR) _m64only(__REXw_x_(0,1,RR,0,MR))
-#define _REXQmr(MB,MI,RD) _m64only(__REXw_x_(0,1,RD,_BIT(_rXP(MI)),MB))
-#define _REXQrm(RS,MB,MI) _REXQmr(MB,MI,RS)
-#define _REXQr(RR) _m64only(__REX_reg(RR))
-#define _REXQm(MB,MI) _m64only(__REX_mem(MB,MI))
-
-
-/* ========================================================================= */
-/* --- Fully-qualified intrinsic instructions ------------------------------ */
-/* ========================================================================= */
-
-/* OPCODE + i = immediate operand
- * + r = register operand
- * + m = memory operand (disp,base,index,scale)
- * + sr/sm = a star preceding a register or memory
- * + 0 = top of stack register (for FPU instructions)
- *
- * NOTE in x86-64 mode: a memory operand with only a valid
- * displacement value will lead to the expect absolute mode. If
- * RIP addressing is necessary, X86_RIP shall be used as the base
- * register argument.
- */
-
-/* --- ALU instructions ---------------------------------------------------- */
-
-enum {
- X86_ADD = 0,
- X86_OR = 1,
- X86_ADC = 2,
- X86_SBB = 3,
- X86_AND = 4,
- X86_SUB = 5,
- X86_XOR = 6,
- X86_CMP = 7,
-};
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-#define _ALUBrr(OP,RS, RD) (_REXBrr(RS, RD), _O_Mrm (((OP) << 3) ,_b11,_r1(RS),_r1(RD) ))
-#define _ALUBmr(OP, MD, MB, MI, MS, RD) (_REXBmr(MB, MI, RD), _O_r_X (((OP) << 3) + 2 ,_r1(RD) ,MD,MB,MI,MS ))
-#define _ALUBrm(OP, RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (((OP) << 3) ,_r1(RS) ,MD,MB,MI,MS ))
-#define _ALUBir(OP, IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_AL) ? \
- (_REXBrr(0, RD), _O_B (((OP) << 3) + 4 ,_su8(IM))) : \
- (_REXBrr(0, RD), _O_Mrm_B (0x80 ,_b11,OP ,_r1(RD) ,_su8(IM))) )
-#define _ALUBim(OP, IM, MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X_B (0x80 ,OP ,MD,MB,MI,MS ,_su8(IM)))
-
-#define _ALUWrr(OP, RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (((OP) << 3) + 1,_b11,_r2(RS),_r2(RD) ))
-#define _ALUWmr(OP, MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _O_r_X (((OP) << 3) + 3 ,_r2(RD) ,MD,MB,MI,MS ))
-#define _ALUWrm(OP, RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (((OP) << 3) + 1 ,_r2(RS) ,MD,MB,MI,MS ))
-#define _ALUWir(OP, IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_AX) ? \
- (_d16(), _REXLrr(0, RD), _O_W (((OP) << 3) + 5 ,_su16(IM))) : \
- (_d16(), _REXLrr(0, RD), _Os_Mrm_sW (0x81 ,_b11,OP ,_r2(RD) ,_su16(IM))) )
-#define _ALUWim(OP, IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _Os_r_X_sW (0x81 ,OP ,MD,MB,MI,MS ,_su16(IM)))
-
-#define _ALULrr(OP, RS, RD) (_REXLrr(RS, RD), _O_Mrm (((OP) << 3) + 1,_b11,_r4(RS),_r4(RD) ))
-#define _ALULmr(OP, MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _O_r_X (((OP) << 3) + 3 ,_r4(RD) ,MD,MB,MI,MS ))
-#define _ALULrm(OP, RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _O_r_X (((OP) << 3) + 1 ,_r4(RS) ,MD,MB,MI,MS ))
-#define _ALULir(OP, IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_EAX) ? \
- (_REXLrr(0, RD), _O_L (((OP) << 3) + 5 ,IM )) : \
- (_REXLrr(0, RD), _Os_Mrm_sL (0x81 ,_b11,OP ,_r4(RD) ,IM )) )
-#define _ALULim(OP, IM, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _Os_r_X_sL (0x81 ,OP ,MD,MB,MI,MS ,IM ))
-
-#define _ALUQrr(OP, RS, RD) (_REXQrr(RS, RD), _O_Mrm (((OP) << 3) + 1,_b11,_r8(RS),_r8(RD) ))
-#define _ALUQmr(OP, MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _O_r_X (((OP) << 3) + 3 ,_r8(RD) ,MD,MB,MI,MS ))
-#define _ALUQrm(OP, RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _O_r_X (((OP) << 3) + 1 ,_r8(RS) ,MD,MB,MI,MS ))
-#define _ALUQir(OP, IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_RAX) ? \
- (_REXQrr(0, RD), _O_L (((OP) << 3) + 5 ,IM )) : \
- (_REXQrr(0, RD), _Os_Mrm_sL (0x81 ,_b11,OP ,_r8(RD) ,IM )) )
-#define _ALUQim(OP, IM, MD, MB, MI, MS) (_REXQrm(0, MB, MI), _Os_r_X_sL (0x81 ,OP ,MD,MB,MI,MS ,IM ))
-
-#define ADCBrr(RS, RD) _ALUBrr(X86_ADC, RS, RD)
-#define ADCBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_ADC, MD, MB, MI, MS, RD)
-#define ADCBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_ADC, RS, MD, MB, MI, MS)
-#define ADCBir(IM, RD) _ALUBir(X86_ADC, IM, RD)
-#define ADCBim(IM, MD, MB, MI, MS) _ALUBim(X86_ADC, IM, MD, MB, MI, MS)
-
-#define ADCWrr(RS, RD) _ALUWrr(X86_ADC, RS, RD)
-#define ADCWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_ADC, MD, MB, MI, MS, RD)
-#define ADCWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_ADC, RS, MD, MB, MI, MS)
-#define ADCWir(IM, RD) _ALUWir(X86_ADC, IM, RD)
-#define ADCWim(IM, MD, MB, MI, MS) _ALUWim(X86_ADC, IM, MD, MB, MI, MS)
-
-#define ADCLrr(RS, RD) _ALULrr(X86_ADC, RS, RD)
-#define ADCLmr(MD, MB, MI, MS, RD) _ALULmr(X86_ADC, MD, MB, MI, MS, RD)
-#define ADCLrm(RS, MD, MB, MI, MS) _ALULrm(X86_ADC, RS, MD, MB, MI, MS)
-#define ADCLir(IM, RD) _ALULir(X86_ADC, IM, RD)
-#define ADCLim(IM, MD, MB, MI, MS) _ALULim(X86_ADC, IM, MD, MB, MI, MS)
-
-#define ADCQrr(RS, RD) _ALUQrr(X86_ADC, RS, RD)
-#define ADCQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_ADC, MD, MB, MI, MS, RD)
-#define ADCQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_ADC, RS, MD, MB, MI, MS)
-#define ADCQir(IM, RD) _ALUQir(X86_ADC, IM, RD)
-#define ADCQim(IM, MD, MB, MI, MS) _ALUQim(X86_ADC, IM, MD, MB, MI, MS)
-
-#define ADDBrr(RS, RD) _ALUBrr(X86_ADD, RS, RD)
-#define ADDBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_ADD, MD, MB, MI, MS, RD)
-#define ADDBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_ADD, RS, MD, MB, MI, MS)
-#define ADDBir(IM, RD) _ALUBir(X86_ADD, IM, RD)
-#define ADDBim(IM, MD, MB, MI, MS) _ALUBim(X86_ADD, IM, MD, MB, MI, MS)
-
-#define ADDWrr(RS, RD) _ALUWrr(X86_ADD, RS, RD)
-#define ADDWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_ADD, MD, MB, MI, MS, RD)
-#define ADDWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_ADD, RS, MD, MB, MI, MS)
-#define ADDWir(IM, RD) _ALUWir(X86_ADD, IM, RD)
-#define ADDWim(IM, MD, MB, MI, MS) _ALUWim(X86_ADD, IM, MD, MB, MI, MS)
-
-#define ADDLrr(RS, RD) _ALULrr(X86_ADD, RS, RD)
-#define ADDLmr(MD, MB, MI, MS, RD) _ALULmr(X86_ADD, MD, MB, MI, MS, RD)
-#define ADDLrm(RS, MD, MB, MI, MS) _ALULrm(X86_ADD, RS, MD, MB, MI, MS)
-#define ADDLir(IM, RD) _ALULir(X86_ADD, IM, RD)
-#define ADDLim(IM, MD, MB, MI, MS) _ALULim(X86_ADD, IM, MD, MB, MI, MS)
-
-#define ADDQrr(RS, RD) _ALUQrr(X86_ADD, RS, RD)
-#define ADDQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_ADD, MD, MB, MI, MS, RD)
-#define ADDQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_ADD, RS, MD, MB, MI, MS)
-#define ADDQir(IM, RD) _ALUQir(X86_ADD, IM, RD)
-#define ADDQim(IM, MD, MB, MI, MS) _ALUQim(X86_ADD, IM, MD, MB, MI, MS)
-
-#define ANDBrr(RS, RD) _ALUBrr(X86_AND, RS, RD)
-#define ANDBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_AND, MD, MB, MI, MS, RD)
-#define ANDBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_AND, RS, MD, MB, MI, MS)
-#define ANDBir(IM, RD) _ALUBir(X86_AND, IM, RD)
-#define ANDBim(IM, MD, MB, MI, MS) _ALUBim(X86_AND, IM, MD, MB, MI, MS)
-
-#define ANDWrr(RS, RD) _ALUWrr(X86_AND, RS, RD)
-#define ANDWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_AND, MD, MB, MI, MS, RD)
-#define ANDWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_AND, RS, MD, MB, MI, MS)
-#define ANDWir(IM, RD) _ALUWir(X86_AND, IM, RD)
-#define ANDWim(IM, MD, MB, MI, MS) _ALUWim(X86_AND, IM, MD, MB, MI, MS)
-
-#define ANDLrr(RS, RD) _ALULrr(X86_AND, RS, RD)
-#define ANDLmr(MD, MB, MI, MS, RD) _ALULmr(X86_AND, MD, MB, MI, MS, RD)
-#define ANDLrm(RS, MD, MB, MI, MS) _ALULrm(X86_AND, RS, MD, MB, MI, MS)
-#define ANDLir(IM, RD) _ALULir(X86_AND, IM, RD)
-#define ANDLim(IM, MD, MB, MI, MS) _ALULim(X86_AND, IM, MD, MB, MI, MS)
-
-#define ANDQrr(RS, RD) _ALUQrr(X86_AND, RS, RD)
-#define ANDQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_AND, MD, MB, MI, MS, RD)
-#define ANDQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_AND, RS, MD, MB, MI, MS)
-#define ANDQir(IM, RD) _ALUQir(X86_AND, IM, RD)
-#define ANDQim(IM, MD, MB, MI, MS) _ALUQim(X86_AND, IM, MD, MB, MI, MS)
-
-#define CMPBrr(RS, RD) _ALUBrr(X86_CMP, RS, RD)
-#define CMPBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_CMP, MD, MB, MI, MS, RD)
-#define CMPBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_CMP, RS, MD, MB, MI, MS)
-#define CMPBir(IM, RD) _ALUBir(X86_CMP, IM, RD)
-#define CMPBim(IM, MD, MB, MI, MS) _ALUBim(X86_CMP, IM, MD, MB, MI, MS)
-
-#define CMPWrr(RS, RD) _ALUWrr(X86_CMP, RS, RD)
-#define CMPWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_CMP, MD, MB, MI, MS, RD)
-#define CMPWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_CMP, RS, MD, MB, MI, MS)
-#define CMPWir(IM, RD) _ALUWir(X86_CMP, IM, RD)
-#define CMPWim(IM, MD, MB, MI, MS) _ALUWim(X86_CMP, IM, MD, MB, MI, MS)
-
-#define CMPLrr(RS, RD) _ALULrr(X86_CMP, RS, RD)
-#define CMPLmr(MD, MB, MI, MS, RD) _ALULmr(X86_CMP, MD, MB, MI, MS, RD)
-#define CMPLrm(RS, MD, MB, MI, MS) _ALULrm(X86_CMP, RS, MD, MB, MI, MS)
-#define CMPLir(IM, RD) _ALULir(X86_CMP, IM, RD)
-#define CMPLim(IM, MD, MB, MI, MS) _ALULim(X86_CMP, IM, MD, MB, MI, MS)
-
-#define CMPQrr(RS, RD) _ALUQrr(X86_CMP, RS, RD)
-#define CMPQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_CMP, MD, MB, MI, MS, RD)
-#define CMPQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_CMP, RS, MD, MB, MI, MS)
-#define CMPQir(IM, RD) _ALUQir(X86_CMP, IM, RD)
-#define CMPQim(IM, MD, MB, MI, MS) _ALUQim(X86_CMP, IM, MD, MB, MI, MS)
-
-#define ORBrr(RS, RD) _ALUBrr(X86_OR, RS, RD)
-#define ORBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_OR, MD, MB, MI, MS, RD)
-#define ORBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_OR, RS, MD, MB, MI, MS)
-#define ORBir(IM, RD) _ALUBir(X86_OR, IM, RD)
-#define ORBim(IM, MD, MB, MI, MS) _ALUBim(X86_OR, IM, MD, MB, MI, MS)
-
-#define ORWrr(RS, RD) _ALUWrr(X86_OR, RS, RD)
-#define ORWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_OR, MD, MB, MI, MS, RD)
-#define ORWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_OR, RS, MD, MB, MI, MS)
-#define ORWir(IM, RD) _ALUWir(X86_OR, IM, RD)
-#define ORWim(IM, MD, MB, MI, MS) _ALUWim(X86_OR, IM, MD, MB, MI, MS)
-
-#define ORLrr(RS, RD) _ALULrr(X86_OR, RS, RD)
-#define ORLmr(MD, MB, MI, MS, RD) _ALULmr(X86_OR, MD, MB, MI, MS, RD)
-#define ORLrm(RS, MD, MB, MI, MS) _ALULrm(X86_OR, RS, MD, MB, MI, MS)
-#define ORLir(IM, RD) _ALULir(X86_OR, IM, RD)
-#define ORLim(IM, MD, MB, MI, MS) _ALULim(X86_OR, IM, MD, MB, MI, MS)
-
-#define ORQrr(RS, RD) _ALUQrr(X86_OR, RS, RD)
-#define ORQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_OR, MD, MB, MI, MS, RD)
-#define ORQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_OR, RS, MD, MB, MI, MS)
-#define ORQir(IM, RD) _ALUQir(X86_OR, IM, RD)
-#define ORQim(IM, MD, MB, MI, MS) _ALUQim(X86_OR, IM, MD, MB, MI, MS)
-
-#define SBBBrr(RS, RD) _ALUBrr(X86_SBB, RS, RD)
-#define SBBBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_SBB, MD, MB, MI, MS, RD)
-#define SBBBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_SBB, RS, MD, MB, MI, MS)
-#define SBBBir(IM, RD) _ALUBir(X86_SBB, IM, RD)
-#define SBBBim(IM, MD, MB, MI, MS) _ALUBim(X86_SBB, IM, MD, MB, MI, MS)
-
-#define SBBWrr(RS, RD) _ALUWrr(X86_SBB, RS, RD)
-#define SBBWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_SBB, MD, MB, MI, MS, RD)
-#define SBBWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_SBB, RS, MD, MB, MI, MS)
-#define SBBWir(IM, RD) _ALUWir(X86_SBB, IM, RD)
-#define SBBWim(IM, MD, MB, MI, MS) _ALUWim(X86_SBB, IM, MD, MB, MI, MS)
-
-#define SBBLrr(RS, RD) _ALULrr(X86_SBB, RS, RD)
-#define SBBLmr(MD, MB, MI, MS, RD) _ALULmr(X86_SBB, MD, MB, MI, MS, RD)
-#define SBBLrm(RS, MD, MB, MI, MS) _ALULrm(X86_SBB, RS, MD, MB, MI, MS)
-#define SBBLir(IM, RD) _ALULir(X86_SBB, IM, RD)
-#define SBBLim(IM, MD, MB, MI, MS) _ALULim(X86_SBB, IM, MD, MB, MI, MS)
-
-#define SBBQrr(RS, RD) _ALUQrr(X86_SBB, RS, RD)
-#define SBBQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_SBB, MD, MB, MI, MS, RD)
-#define SBBQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_SBB, RS, MD, MB, MI, MS)
-#define SBBQir(IM, RD) _ALUQir(X86_SBB, IM, RD)
-#define SBBQim(IM, MD, MB, MI, MS) _ALUQim(X86_SBB, IM, MD, MB, MI, MS)
-
-#define SUBBrr(RS, RD) _ALUBrr(X86_SUB, RS, RD)
-#define SUBBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_SUB, MD, MB, MI, MS, RD)
-#define SUBBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_SUB, RS, MD, MB, MI, MS)
-#define SUBBir(IM, RD) _ALUBir(X86_SUB, IM, RD)
-#define SUBBim(IM, MD, MB, MI, MS) _ALUBim(X86_SUB, IM, MD, MB, MI, MS)
-
-#define SUBWrr(RS, RD) _ALUWrr(X86_SUB, RS, RD)
-#define SUBWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_SUB, MD, MB, MI, MS, RD)
-#define SUBWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_SUB, RS, MD, MB, MI, MS)
-#define SUBWir(IM, RD) _ALUWir(X86_SUB, IM, RD)
-#define SUBWim(IM, MD, MB, MI, MS) _ALUWim(X86_SUB, IM, MD, MB, MI, MS)
-
-#define SUBLrr(RS, RD) _ALULrr(X86_SUB, RS, RD)
-#define SUBLmr(MD, MB, MI, MS, RD) _ALULmr(X86_SUB, MD, MB, MI, MS, RD)
-#define SUBLrm(RS, MD, MB, MI, MS) _ALULrm(X86_SUB, RS, MD, MB, MI, MS)
-#define SUBLir(IM, RD) _ALULir(X86_SUB, IM, RD)
-#define SUBLim(IM, MD, MB, MI, MS) _ALULim(X86_SUB, IM, MD, MB, MI, MS)
-
-#define SUBQrr(RS, RD) _ALUQrr(X86_SUB, RS, RD)
-#define SUBQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_SUB, MD, MB, MI, MS, RD)
-#define SUBQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_SUB, RS, MD, MB, MI, MS)
-#define SUBQir(IM, RD) _ALUQir(X86_SUB, IM, RD)
-#define SUBQim(IM, MD, MB, MI, MS) _ALUQim(X86_SUB, IM, MD, MB, MI, MS)
-
-#define XORBrr(RS, RD) _ALUBrr(X86_XOR, RS, RD)
-#define XORBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_XOR, MD, MB, MI, MS, RD)
-#define XORBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_XOR, RS, MD, MB, MI, MS)
-#define XORBir(IM, RD) _ALUBir(X86_XOR, IM, RD)
-#define XORBim(IM, MD, MB, MI, MS) _ALUBim(X86_XOR, IM, MD, MB, MI, MS)
-
-#define XORWrr(RS, RD) _ALUWrr(X86_XOR, RS, RD)
-#define XORWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_XOR, MD, MB, MI, MS, RD)
-#define XORWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_XOR, RS, MD, MB, MI, MS)
-#define XORWir(IM, RD) _ALUWir(X86_XOR, IM, RD)
-#define XORWim(IM, MD, MB, MI, MS) _ALUWim(X86_XOR, IM, MD, MB, MI, MS)
-
-#define XORLrr(RS, RD) _ALULrr(X86_XOR, RS, RD)
-#define XORLmr(MD, MB, MI, MS, RD) _ALULmr(X86_XOR, MD, MB, MI, MS, RD)
-#define XORLrm(RS, MD, MB, MI, MS) _ALULrm(X86_XOR, RS, MD, MB, MI, MS)
-#define XORLir(IM, RD) _ALULir(X86_XOR, IM, RD)
-#define XORLim(IM, MD, MB, MI, MS) _ALULim(X86_XOR, IM, MD, MB, MI, MS)
-
-#define XORQrr(RS, RD) _ALUQrr(X86_XOR, RS, RD)
-#define XORQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_XOR, MD, MB, MI, MS, RD)
-#define XORQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_XOR, RS, MD, MB, MI, MS)
-#define XORQir(IM, RD) _ALUQir(X86_XOR, IM, RD)
-#define XORQim(IM, MD, MB, MI, MS) _ALUQim(X86_XOR, IM, MD, MB, MI, MS)
-
-
-/* --- Shift/Rotate instructions ------------------------------------------- */
-
-enum {
- X86_ROL = 0,
- X86_ROR = 1,
- X86_RCL = 2,
- X86_RCR = 3,
- X86_SHL = 4,
- X86_SHR = 5,
- X86_SAR = 7,
-};
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-#define _ROTSHIBir(OP,IM,RD) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \
- (_REXBrr(0, RD), _O_Mrm (0xd0 ,_b11,OP,_r1(RD) )) : \
- (_REXBrr(0, RD), _O_Mrm_B (0xc0 ,_b11,OP,_r1(RD) ,_u8(IM))) )
-#define _ROTSHIBim(OP,IM,MD,MB,MI,MS) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \
- (_REXBrm(0, MB, MI), _O_r_X (0xd0 ,OP ,MD,MB,MI,MS )) : \
- (_REXBrm(0, MB, MI), _O_r_X_B (0xc0 ,OP ,MD,MB,MI,MS ,_u8(IM))) )
-#define _ROTSHIBrr(OP,RS,RD) (((RS) == X86_CL) ? \
- (_REXBrr(RS, RD), _O_Mrm (0xd2 ,_b11,OP,_r1(RD) )) : \
- x86_emit_failure("source register must be CL" ) )
-#define _ROTSHIBrm(OP,RS,MD,MB,MI,MS) (((RS) == X86_CL) ? \
- (_REXBrm(RS, MB, MI), _O_r_X (0xd2 ,OP ,MD,MB,MI,MS )) : \
- x86_emit_failure("source register must be CL" ) )
-
-#define _ROTSHIWir(OP,IM,RD) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \
- (_d16(), _REXLrr(0, RD), _O_Mrm (0xd1 ,_b11,OP,_r2(RD) )) : \
- (_d16(), _REXLrr(0, RD), _O_Mrm_B (0xc1 ,_b11,OP,_r2(RD) ,_u8(IM))) )
-#define _ROTSHIWim(OP,IM,MD,MB,MI,MS) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \
- (_d16(), _REXLrm(0, MB, MI), _O_r_X (0xd1 ,OP ,MD,MB,MI,MS )) : \
- (_d16(), _REXLrm(0, MB, MI), _O_r_X_B (0xc1 ,OP ,MD,MB,MI,MS ,_u8(IM))) )
-#define _ROTSHIWrr(OP,RS,RD) (((RS) == X86_CL) ? \
- (_d16(), _REXLrr(RS, RD), _O_Mrm (0xd3 ,_b11,OP,_r2(RD) )) : \
- x86_emit_failure("source register must be CL" ) )
-#define _ROTSHIWrm(OP,RS,MD,MB,MI,MS) (((RS) == X86_CL) ? \
- (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0xd3 ,OP ,MD,MB,MI,MS )) : \
- x86_emit_failure("source register must be CL" ) )
-
-#define _ROTSHILir(OP,IM,RD) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \
- (_REXLrr(0, RD), _O_Mrm (0xd1 ,_b11,OP,_r4(RD) )) : \
- (_REXLrr(0, RD), _O_Mrm_B (0xc1 ,_b11,OP,_r4(RD) ,_u8(IM))) )
-#define _ROTSHILim(OP,IM,MD,MB,MI,MS) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \
- (_REXLrm(0, MB, MI), _O_r_X (0xd1 ,OP ,MD,MB,MI,MS )) : \
- (_REXLrm(0, MB, MI), _O_r_X_B (0xc1 ,OP ,MD,MB,MI,MS ,_u8(IM))) )
-#define _ROTSHILrr(OP,RS,RD) (((RS) == X86_CL) ? \
- (_REXLrr(RS, RD), _O_Mrm (0xd3 ,_b11,OP,_r4(RD) )) : \
- x86_emit_failure("source register must be CL" ) )
-#define _ROTSHILrm(OP,RS,MD,MB,MI,MS) (((RS) == X86_CL) ? \
- (_REXLrm(RS, MB, MI), _O_r_X (0xd3 ,OP ,MD,MB,MI,MS )) : \
- x86_emit_failure("source register must be CL" ) )
-
-#define _ROTSHIQir(OP,IM,RD) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \
- (_REXQrr(0, RD), _O_Mrm (0xd1 ,_b11,OP,_r8(RD) )) : \
- (_REXQrr(0, RD), _O_Mrm_B (0xc1 ,_b11,OP,_r8(RD) ,_u8(IM))) )
-#define _ROTSHIQim(OP,IM,MD,MB,MI,MS) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \
- (_REXQrm(0, MB, MI), _O_r_X (0xd1 ,OP ,MD,MB,MI,MS )) : \
- (_REXQrm(0, MB, MI), _O_r_X_B (0xc1 ,OP ,MD,MB,MI,MS ,_u8(IM))) )
-#define _ROTSHIQrr(OP,RS,RD) (((RS) == X86_CL) ? \
- (_REXQrr(RS, RD), _O_Mrm (0xd3 ,_b11,OP,_r8(RD) )) : \
- x86_emit_failure("source register must be CL" ) )
-#define _ROTSHIQrm(OP,RS,MD,MB,MI,MS) (((RS) == X86_CL) ? \
- (_REXQrm(RS, MB, MI), _O_r_X (0xd3 ,OP ,MD,MB,MI,MS )) : \
- x86_emit_failure("source register must be CL" ) )
-
-#define ROLBir(IM, RD) _ROTSHIBir(X86_ROL, IM, RD)
-#define ROLBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_ROL, IM, MD, MB, MI, MS)
-#define ROLBrr(RS, RD) _ROTSHIBrr(X86_ROL, RS, RD)
-#define ROLBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_ROL, RS, MD, MB, MI, MS)
-
-#define ROLWir(IM, RD) _ROTSHIWir(X86_ROL, IM, RD)
-#define ROLWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_ROL, IM, MD, MB, MI, MS)
-#define ROLWrr(RS, RD) _ROTSHIWrr(X86_ROL, RS, RD)
-#define ROLWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_ROL, RS, MD, MB, MI, MS)
-
-#define ROLLir(IM, RD) _ROTSHILir(X86_ROL, IM, RD)
-#define ROLLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_ROL, IM, MD, MB, MI, MS)
-#define ROLLrr(RS, RD) _ROTSHILrr(X86_ROL, RS, RD)
-#define ROLLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_ROL, RS, MD, MB, MI, MS)
-
-#define ROLQir(IM, RD) _ROTSHIQir(X86_ROL, IM, RD)
-#define ROLQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_ROL, IM, MD, MB, MI, MS)
-#define ROLQrr(RS, RD) _ROTSHIQrr(X86_ROL, RS, RD)
-#define ROLQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_ROL, RS, MD, MB, MI, MS)
-
-#define RORBir(IM, RD) _ROTSHIBir(X86_ROR, IM, RD)
-#define RORBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_ROR, IM, MD, MB, MI, MS)
-#define RORBrr(RS, RD) _ROTSHIBrr(X86_ROR, RS, RD)
-#define RORBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_ROR, RS, MD, MB, MI, MS)
-
-#define RORWir(IM, RD) _ROTSHIWir(X86_ROR, IM, RD)
-#define RORWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_ROR, IM, MD, MB, MI, MS)
-#define RORWrr(RS, RD) _ROTSHIWrr(X86_ROR, RS, RD)
-#define RORWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_ROR, RS, MD, MB, MI, MS)
-
-#define RORLir(IM, RD) _ROTSHILir(X86_ROR, IM, RD)
-#define RORLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_ROR, IM, MD, MB, MI, MS)
-#define RORLrr(RS, RD) _ROTSHILrr(X86_ROR, RS, RD)
-#define RORLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_ROR, RS, MD, MB, MI, MS)
-
-#define RORQir(IM, RD) _ROTSHIQir(X86_ROR, IM, RD)
-#define RORQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_ROR, IM, MD, MB, MI, MS)
-#define RORQrr(RS, RD) _ROTSHIQrr(X86_ROR, RS, RD)
-#define RORQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_ROR, RS, MD, MB, MI, MS)
-
-#define RCLBir(IM, RD) _ROTSHIBir(X86_RCL, IM, RD)
-#define RCLBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_RCL, IM, MD, MB, MI, MS)
-#define RCLBrr(RS, RD) _ROTSHIBrr(X86_RCL, RS, RD)
-#define RCLBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_RCL, RS, MD, MB, MI, MS)
-
-#define RCLWir(IM, RD) _ROTSHIWir(X86_RCL, IM, RD)
-#define RCLWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_RCL, IM, MD, MB, MI, MS)
-#define RCLWrr(RS, RD) _ROTSHIWrr(X86_RCL, RS, RD)
-#define RCLWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_RCL, RS, MD, MB, MI, MS)
-
-#define RCLLir(IM, RD) _ROTSHILir(X86_RCL, IM, RD)
-#define RCLLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_RCL, IM, MD, MB, MI, MS)
-#define RCLLrr(RS, RD) _ROTSHILrr(X86_RCL, RS, RD)
-#define RCLLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_RCL, RS, MD, MB, MI, MS)
-
-#define RCLQir(IM, RD) _ROTSHIQir(X86_RCL, IM, RD)
-#define RCLQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_RCL, IM, MD, MB, MI, MS)
-#define RCLQrr(RS, RD) _ROTSHIQrr(X86_RCL, RS, RD)
-#define RCLQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_RCL, RS, MD, MB, MI, MS)
-
-#define RCRBir(IM, RD) _ROTSHIBir(X86_RCR, IM, RD)
-#define RCRBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_RCR, IM, MD, MB, MI, MS)
-#define RCRBrr(RS, RD) _ROTSHIBrr(X86_RCR, RS, RD)
-#define RCRBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_RCR, RS, MD, MB, MI, MS)
-
-#define RCRWir(IM, RD) _ROTSHIWir(X86_RCR, IM, RD)
-#define RCRWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_RCR, IM, MD, MB, MI, MS)
-#define RCRWrr(RS, RD) _ROTSHIWrr(X86_RCR, RS, RD)
-#define RCRWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_RCR, RS, MD, MB, MI, MS)
-
-#define RCRLir(IM, RD) _ROTSHILir(X86_RCR, IM, RD)
-#define RCRLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_RCR, IM, MD, MB, MI, MS)
-#define RCRLrr(RS, RD) _ROTSHILrr(X86_RCR, RS, RD)
-#define RCRLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_RCR, RS, MD, MB, MI, MS)
-
-#define RCRQir(IM, RD) _ROTSHIQir(X86_RCR, IM, RD)
-#define RCRQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_RCR, IM, MD, MB, MI, MS)
-#define RCRQrr(RS, RD) _ROTSHIQrr(X86_RCR, RS, RD)
-#define RCRQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_RCR, RS, MD, MB, MI, MS)
-
-#define SHLBir(IM, RD) _ROTSHIBir(X86_SHL, IM, RD)
-#define SHLBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_SHL, IM, MD, MB, MI, MS)
-#define SHLBrr(RS, RD) _ROTSHIBrr(X86_SHL, RS, RD)
-#define SHLBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_SHL, RS, MD, MB, MI, MS)
-
-#define SHLWir(IM, RD) _ROTSHIWir(X86_SHL, IM, RD)
-#define SHLWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_SHL, IM, MD, MB, MI, MS)
-#define SHLWrr(RS, RD) _ROTSHIWrr(X86_SHL, RS, RD)
-#define SHLWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_SHL, RS, MD, MB, MI, MS)
-
-#define SHLLir(IM, RD) _ROTSHILir(X86_SHL, IM, RD)
-#define SHLLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_SHL, IM, MD, MB, MI, MS)
-#define SHLLrr(RS, RD) _ROTSHILrr(X86_SHL, RS, RD)
-#define SHLLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_SHL, RS, MD, MB, MI, MS)
-
-#define SHLQir(IM, RD) _ROTSHIQir(X86_SHL, IM, RD)
-#define SHLQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_SHL, IM, MD, MB, MI, MS)
-#define SHLQrr(RS, RD) _ROTSHIQrr(X86_SHL, RS, RD)
-#define SHLQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_SHL, RS, MD, MB, MI, MS)
-
-#define SHRBir(IM, RD) _ROTSHIBir(X86_SHR, IM, RD)
-#define SHRBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_SHR, IM, MD, MB, MI, MS)
-#define SHRBrr(RS, RD) _ROTSHIBrr(X86_SHR, RS, RD)
-#define SHRBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_SHR, RS, MD, MB, MI, MS)
-
-#define SHRWir(IM, RD) _ROTSHIWir(X86_SHR, IM, RD)
-#define SHRWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_SHR, IM, MD, MB, MI, MS)
-#define SHRWrr(RS, RD) _ROTSHIWrr(X86_SHR, RS, RD)
-#define SHRWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_SHR, RS, MD, MB, MI, MS)
-
-#define SHRLir(IM, RD) _ROTSHILir(X86_SHR, IM, RD)
-#define SHRLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_SHR, IM, MD, MB, MI, MS)
-#define SHRLrr(RS, RD) _ROTSHILrr(X86_SHR, RS, RD)
-#define SHRLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_SHR, RS, MD, MB, MI, MS)
-
-#define SHRQir(IM, RD) _ROTSHIQir(X86_SHR, IM, RD)
-#define SHRQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_SHR, IM, MD, MB, MI, MS)
-#define SHRQrr(RS, RD) _ROTSHIQrr(X86_SHR, RS, RD)
-#define SHRQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_SHR, RS, MD, MB, MI, MS)
-
-#define SALBir SHLBir
-#define SALBim SHLBim
-#define SALBrr SHLBrr
-#define SALBrm SHLBrm
-
-#define SALWir SHLWir
-#define SALWim SHLWim
-#define SALWrr SHLWrr
-#define SALWrm SHLWrm
-
-#define SALLir SHLLir
-#define SALLim SHLLim
-#define SALLrr SHLLrr
-#define SALLrm SHLLrm
-
-#define SALQir SHLQir
-#define SALQim SHLQim
-#define SALQrr SHLQrr
-#define SALQrm SHLQrm
-
-#define SARBir(IM, RD) _ROTSHIBir(X86_SAR, IM, RD)
-#define SARBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_SAR, IM, MD, MB, MI, MS)
-#define SARBrr(RS, RD) _ROTSHIBrr(X86_SAR, RS, RD)
-#define SARBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_SAR, RS, MD, MB, MI, MS)
-
-#define SARWir(IM, RD) _ROTSHIWir(X86_SAR, IM, RD)
-#define SARWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_SAR, IM, MD, MB, MI, MS)
-#define SARWrr(RS, RD) _ROTSHIWrr(X86_SAR, RS, RD)
-#define SARWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_SAR, RS, MD, MB, MI, MS)
-
-#define SARLir(IM, RD) _ROTSHILir(X86_SAR, IM, RD)
-#define SARLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_SAR, IM, MD, MB, MI, MS)
-#define SARLrr(RS, RD) _ROTSHILrr(X86_SAR, RS, RD)
-#define SARLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_SAR, RS, MD, MB, MI, MS)
-
-#define SARQir(IM, RD) _ROTSHIQir(X86_SAR, IM, RD)
-#define SARQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_SAR, IM, MD, MB, MI, MS)
-#define SARQrr(RS, RD) _ROTSHIQrr(X86_SAR, RS, RD)
-#define SARQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_SAR, RS, MD, MB, MI, MS)
-
-
-/* --- Bit test instructions ----------------------------------------------- */
-
-enum {
- X86_BT = 4,
- X86_BTS = 5,
- X86_BTR = 6,
- X86_BTC = 7,
-};
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-#define _BTWir(OP, IM, RD) (_d16(), _REXLrr(0, RD), _OO_Mrm_B (0x0fba ,_b11,OP ,_r2(RD) ,_u8(IM)))
-#define _BTWim(OP, IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _OO_r_X_B (0x0fba ,OP ,MD,MB,MI,MS ,_u8(IM)))
-#define _BTWrr(OP, RS, RD) (_d16(), _REXLrr(RS, RD), _OO_Mrm (0x0f83|((OP)<<3),_b11,_r2(RS),_r2(RD) ))
-#define _BTWrm(OP, RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _OO_r_X (0x0f83|((OP)<<3) ,_r2(RS) ,MD,MB,MI,MS ))
-
-#define _BTLir(OP, IM, RD) (_REXLrr(0, RD), _OO_Mrm_B (0x0fba ,_b11,OP ,_r4(RD) ,_u8(IM)))
-#define _BTLim(OP, IM, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _OO_r_X_B (0x0fba ,OP ,MD,MB,MI,MS ,_u8(IM)))
-#define _BTLrr(OP, RS, RD) (_REXLrr(RS, RD), _OO_Mrm (0x0f83|((OP)<<3),_b11,_r4(RS),_r4(RD) ))
-#define _BTLrm(OP, RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0f83|((OP)<<3) ,_r4(RS) ,MD,MB,MI,MS ))
-
-#define _BTQir(OP, IM, RD) (_REXQrr(0, RD), _OO_Mrm_B (0x0fba ,_b11,OP ,_r8(RD) ,_u8(IM)))
-#define _BTQim(OP, IM, MD, MB, MI, MS) (_REXQrm(0, MB, MI), _OO_r_X_B (0x0fba ,OP ,MD,MB,MI,MS ,_u8(IM)))
-#define _BTQrr(OP, RS, RD) (_REXQrr(RS, RD), _OO_Mrm (0x0f83|((OP)<<3),_b11,_r8(RS),_r8(RD) ))
-#define _BTQrm(OP, RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0f83|((OP)<<3) ,_r8(RS) ,MD,MB,MI,MS ))
-
-#define BTWir(IM, RD) _BTWir(X86_BT, IM, RD)
-#define BTWim(IM, MD, MB, MI, MS) _BTWim(X86_BT, IM, MD, MB, MI, MS)
-#define BTWrr(RS, RD) _BTWrr(X86_BT, RS, RD)
-#define BTWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BT, RS, MD, MB, MI, MS)
-
-#define BTLir(IM, RD) _BTLir(X86_BT, IM, RD)
-#define BTLim(IM, MD, MB, MI, MS) _BTLim(X86_BT, IM, MD, MB, MI, MS)
-#define BTLrr(RS, RD) _BTLrr(X86_BT, RS, RD)
-#define BTLrm(RS, MD, MB, MI, MS) _BTLrm(X86_BT, RS, MD, MB, MI, MS)
-
-#define BTQir(IM, RD) _BTQir(X86_BT, IM, RD)
-#define BTQim(IM, MD, MB, MI, MS) _BTQim(X86_BT, IM, MD, MB, MI, MS)
-#define BTQrr(RS, RD) _BTQrr(X86_BT, RS, RD)
-#define BTQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BT, RS, MD, MB, MI, MS)
-
-#define BTCWir(IM, RD) _BTWir(X86_BTC, IM, RD)
-#define BTCWim(IM, MD, MB, MI, MS) _BTWim(X86_BTC, IM, MD, MB, MI, MS)
-#define BTCWrr(RS, RD) _BTWrr(X86_BTC, RS, RD)
-#define BTCWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BTC, RS, MD, MB, MI, MS)
-
-#define BTCLir(IM, RD) _BTLir(X86_BTC, IM, RD)
-#define BTCLim(IM, MD, MB, MI, MS) _BTLim(X86_BTC, IM, MD, MB, MI, MS)
-#define BTCLrr(RS, RD) _BTLrr(X86_BTC, RS, RD)
-#define BTCLrm(RS, MD, MB, MI, MS) _BTLrm(X86_BTC, RS, MD, MB, MI, MS)
-
-#define BTCQir(IM, RD) _BTQir(X86_BTC, IM, RD)
-#define BTCQim(IM, MD, MB, MI, MS) _BTQim(X86_BTC, IM, MD, MB, MI, MS)
-#define BTCQrr(RS, RD) _BTQrr(X86_BTC, RS, RD)
-#define BTCQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BTC, RS, MD, MB, MI, MS)
-
-#define BTRWir(IM, RD) _BTWir(X86_BTR, IM, RD)
-#define BTRWim(IM, MD, MB, MI, MS) _BTWim(X86_BTR, IM, MD, MB, MI, MS)
-#define BTRWrr(RS, RD) _BTWrr(X86_BTR, RS, RD)
-#define BTRWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BTR, RS, MD, MB, MI, MS)
-
-#define BTRLir(IM, RD) _BTLir(X86_BTR, IM, RD)
-#define BTRLim(IM, MD, MB, MI, MS) _BTLim(X86_BTR, IM, MD, MB, MI, MS)
-#define BTRLrr(RS, RD) _BTLrr(X86_BTR, RS, RD)
-#define BTRLrm(RS, MD, MB, MI, MS) _BTLrm(X86_BTR, RS, MD, MB, MI, MS)
-
-#define BTRQir(IM, RD) _BTQir(X86_BTR, IM, RD)
-#define BTRQim(IM, MD, MB, MI, MS) _BTQim(X86_BTR, IM, MD, MB, MI, MS)
-#define BTRQrr(RS, RD) _BTQrr(X86_BTR, RS, RD)
-#define BTRQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BTR, RS, MD, MB, MI, MS)
-
-#define BTSWir(IM, RD) _BTWir(X86_BTS, IM, RD)
-#define BTSWim(IM, MD, MB, MI, MS) _BTWim(X86_BTS, IM, MD, MB, MI, MS)
-#define BTSWrr(RS, RD) _BTWrr(X86_BTS, RS, RD)
-#define BTSWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BTS, RS, MD, MB, MI, MS)
-
-#define BTSLir(IM, RD) _BTLir(X86_BTS, IM, RD)
-#define BTSLim(IM, MD, MB, MI, MS) _BTLim(X86_BTS, IM, MD, MB, MI, MS)
-#define BTSLrr(RS, RD) _BTLrr(X86_BTS, RS, RD)
-#define BTSLrm(RS, MD, MB, MI, MS) _BTLrm(X86_BTS, RS, MD, MB, MI, MS)
-
-#define BTSQir(IM, RD) _BTQir(X86_BTS, IM, RD)
-#define BTSQim(IM, MD, MB, MI, MS) _BTQim(X86_BTS, IM, MD, MB, MI, MS)
-#define BTSQrr(RS, RD) _BTQrr(X86_BTS, RS, RD)
-#define BTSQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BTS, RS, MD, MB, MI, MS)
-
-
-/* --- Move instructions --------------------------------------------------- */
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-#define MOVBrr(RS, RD) (_REXBrr(RS, RD), _O_Mrm (0x88 ,_b11,_r1(RS),_r1(RD) ))
-#define MOVBmr(MD, MB, MI, MS, RD) (_REXBmr(MB, MI, RD), _O_r_X (0x8a ,_r1(RD) ,MD,MB,MI,MS ))
-#define MOVBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (0x88 ,_r1(RS) ,MD,MB,MI,MS ))
-#define MOVBir(IM, R) (_REXBrr(0, R), _Or_B (0xb0,_r1(R) ,_su8(IM)))
-#define MOVBim(IM, MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_X_B (0xc6 ,MD,MB,MI,MS ,_su8(IM)))
-
-#define MOVWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (0x89 ,_b11,_r2(RS),_r2(RD) ))
-#define MOVWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _O_r_X (0x8b ,_r2(RD) ,MD,MB,MI,MS ))
-#define MOVWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0x89 ,_r2(RS) ,MD,MB,MI,MS ))
-#define MOVWir(IM, R) (_d16(), _REXLrr(0, R), _Or_W (0xb8,_r2(R) ,_su16(IM)))
-#define MOVWim(IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_X_W (0xc7 ,MD,MB,MI,MS ,_su16(IM)))
-
-#define MOVLrr(RS, RD) (_REXLrr(RS, RD), _O_Mrm (0x89 ,_b11,_r4(RS),_r4(RD) ))
-#define MOVLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _O_r_X (0x8b ,_r4(RD) ,MD,MB,MI,MS ))
-#define MOVLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _O_r_X (0x89 ,_r4(RS) ,MD,MB,MI,MS ))
-#define MOVLir(IM, R) (_REXLrr(0, R), _Or_L (0xb8,_r4(R) ,IM ))
-#define MOVLim(IM, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_X_L (0xc7 ,MD,MB,MI,MS ,IM ))
-
-#define MOVQrr(RS, RD) (_REXQrr(RS, RD), _O_Mrm (0x89 ,_b11,_r8(RS),_r8(RD) ))
-#define MOVQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _O_r_X (0x8b ,_r8(RD) ,MD,MB,MI,MS ))
-#define MOVQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _O_r_X (0x89 ,_r8(RS) ,MD,MB,MI,MS ))
-#define MOVQir(IM, R) (_REXQrr(0, R), _Or_Q (0xb8,_r8(R) ,IM ))
-#define MOVQim(IM, MD, MB, MI, MS) (_REXQrm(0, MB, MI), _O_X_L (0xc7 ,MD,MB,MI,MS ,IM ))
-
-
-/* --- Unary and Multiply/Divide instructions ------------------------------ */
-
-enum {
- X86_NOT = 2,
- X86_NEG = 3,
- X86_MUL = 4,
- X86_IMUL = 5,
- X86_DIV = 6,
- X86_IDIV = 7,
-};
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-#define _UNARYBr(OP, RS) (_REXBrr(0, RS), _O_Mrm (0xf6 ,_b11,OP ,_r1(RS) ))
-#define _UNARYBm(OP, MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X (0xf6 ,OP ,MD,MB,MI,MS ))
-#define _UNARYWr(OP, RS) (_d16(), _REXLrr(0, RS), _O_Mrm (0xf7 ,_b11,OP ,_r2(RS) ))
-#define _UNARYWm(OP, MD, MB, MI, MS) (_d16(), _REXLmr(MB, MI, 0), _O_r_X (0xf7 ,OP ,MD,MB,MI,MS ))
-#define _UNARYLr(OP, RS) (_REXLrr(0, RS), _O_Mrm (0xf7 ,_b11,OP ,_r4(RS) ))
-#define _UNARYLm(OP, MD, MB, MI, MS) (_REXLmr(MB, MI, 0), _O_r_X (0xf7 ,OP ,MD,MB,MI,MS ))
-#define _UNARYQr(OP, RS) (_REXQrr(0, RS), _O_Mrm (0xf7 ,_b11,OP ,_r8(RS) ))
-#define _UNARYQm(OP, MD, MB, MI, MS) (_REXQmr(MB, MI, 0), _O_r_X (0xf7 ,OP ,MD,MB,MI,MS ))
-
-#define NOTBr(RS) _UNARYBr(X86_NOT, RS)
-#define NOTBm(MD, MB, MI, MS) _UNARYBm(X86_NOT, MD, MB, MI, MS)
-#define NOTWr(RS) _UNARYWr(X86_NOT, RS)
-#define NOTWm(MD, MB, MI, MS) _UNARYWm(X86_NOT, MD, MB, MI, MS)
-#define NOTLr(RS) _UNARYLr(X86_NOT, RS)
-#define NOTLm(MD, MB, MI, MS) _UNARYLm(X86_NOT, MD, MB, MI, MS)
-#define NOTQr(RS) _UNARYQr(X86_NOT, RS)
-#define NOTQm(MD, MB, MI, MS) _UNARYQm(X86_NOT, MD, MB, MI, MS)
-
-#define NEGBr(RS) _UNARYBr(X86_NEG, RS)
-#define NEGBm(MD, MB, MI, MS) _UNARYBm(X86_NEG, MD, MB, MI, MS)
-#define NEGWr(RS) _UNARYWr(X86_NEG, RS)
-#define NEGWm(MD, MB, MI, MS) _UNARYWm(X86_NEG, MD, MB, MI, MS)
-#define NEGLr(RS) _UNARYLr(X86_NEG, RS)
-#define NEGLm(MD, MB, MI, MS) _UNARYLm(X86_NEG, MD, MB, MI, MS)
-#define NEGQr(RS) _UNARYQr(X86_NEG, RS)
-#define NEGQm(MD, MB, MI, MS) _UNARYQm(X86_NEG, MD, MB, MI, MS)
-
-#define MULBr(RS) _UNARYBr(X86_MUL, RS)
-#define MULBm(MD, MB, MI, MS) _UNARYBm(X86_MUL, MD, MB, MI, MS)
-#define MULWr(RS) _UNARYWr(X86_MUL, RS)
-#define MULWm(MD, MB, MI, MS) _UNARYWm(X86_MUL, MD, MB, MI, MS)
-#define MULLr(RS) _UNARYLr(X86_MUL, RS)
-#define MULLm(MD, MB, MI, MS) _UNARYLm(X86_MUL, MD, MB, MI, MS)
-#define MULQr(RS) _UNARYQr(X86_MUL, RS)
-#define MULQm(MD, MB, MI, MS) _UNARYQm(X86_MUL, MD, MB, MI, MS)
-
-#define IMULBr(RS) _UNARYBr(X86_IMUL, RS)
-#define IMULBm(MD, MB, MI, MS) _UNARYBm(X86_IMUL, MD, MB, MI, MS)
-#define IMULWr(RS) _UNARYWr(X86_IMUL, RS)
-#define IMULWm(MD, MB, MI, MS) _UNARYWm(X86_IMUL, MD, MB, MI, MS)
-#define IMULLr(RS) _UNARYLr(X86_IMUL, RS)
-#define IMULLm(MD, MB, MI, MS) _UNARYLm(X86_IMUL, MD, MB, MI, MS)
-#define IMULQr(RS) _UNARYQr(X86_IMUL, RS)
-#define IMULQm(MD, MB, MI, MS) _UNARYQm(X86_IMUL, MD, MB, MI, MS)
-
-#define DIVBr(RS) _UNARYBr(X86_DIV, RS)
-#define DIVBm(MD, MB, MI, MS) _UNARYBm(X86_DIV, MD, MB, MI, MS)
-#define DIVWr(RS) _UNARYWr(X86_DIV, RS)
-#define DIVWm(MD, MB, MI, MS) _UNARYWm(X86_DIV, MD, MB, MI, MS)
-#define DIVLr(RS) _UNARYLr(X86_DIV, RS)
-#define DIVLm(MD, MB, MI, MS) _UNARYLm(X86_DIV, MD, MB, MI, MS)
-#define DIVQr(RS) _UNARYQr(X86_DIV, RS)
-#define DIVQm(MD, MB, MI, MS) _UNARYQm(X86_DIV, MD, MB, MI, MS)
-
-#define IDIVBr(RS) _UNARYBr(X86_IDIV, RS)
-#define IDIVBm(MD, MB, MI, MS) _UNARYBm(X86_IDIV, MD, MB, MI, MS)
-#define IDIVWr(RS) _UNARYWr(X86_IDIV, RS)
-#define IDIVWm(MD, MB, MI, MS) _UNARYWm(X86_IDIV, MD, MB, MI, MS)
-#define IDIVLr(RS) _UNARYLr(X86_IDIV, RS)
-#define IDIVLm(MD, MB, MI, MS) _UNARYLm(X86_IDIV, MD, MB, MI, MS)
-#define IDIVQr(RS) _UNARYQr(X86_IDIV, RS)
-#define IDIVQm(MD, MB, MI, MS) _UNARYQm(X86_IDIV, MD, MB, MI, MS)
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-#define IMULWrr(RS, RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0faf ,_b11,_r2(RD),_r2(RS) ))
-#define IMULWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0faf ,_r2(RD) ,MD,MB,MI,MS ))
-
-#define IMULWirr(IM,RS,RD) (_d16(), _REXLrr(RS, RD), _Os_Mrm_sW (0x69 ,_b11,_r2(RS),_r2(RD) ,_su16(IM) ))
-#define IMULWimr(IM,MD,MB,MI,MS,RD) (_d16(), _REXLmr(MB, MI, RD), _Os_r_X_sW (0x69 ,_r2(RD) ,MD,MB,MI,MS ,_su16(IM) ))
-
-#define IMULLir(IM, RD) (_REXLrr(0, RD), _Os_Mrm_sL (0x69 ,_b11,_r4(RD),_r4(RD) ,IM ))
-#define IMULLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0faf ,_b11,_r4(RD),_r4(RS) ))
-#define IMULLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0faf ,_r4(RD) ,MD,MB,MI,MS ))
-
-#define IMULQir(IM, RD) (_REXQrr(0, RD), _Os_Mrm_sL (0x69 ,_b11,_r8(RD),_r8(RD) ,IM ))
-#define IMULQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0faf ,_b11,_r8(RD),_r8(RS) ))
-#define IMULQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0faf ,_r8(RD) ,MD,MB,MI,MS ))
-
-#define IMULLirr(IM,RS,RD) (_REXLrr(RS, RD), _Os_Mrm_sL (0x69 ,_b11,_r4(RS),_r4(RD) ,IM ))
-#define IMULLimr(IM,MD,MB,MI,MS,RD) (_REXLmr(MB, MI, RD), _Os_r_X_sL (0x69 ,_r4(RD) ,MD,MB,MI,MS ,IM ))
-
-#define IMULQirr(IM,RS,RD) (_REXQrr(RS, RD), _Os_Mrm_sL (0x69 ,_b11,_r8(RS),_r8(RD) ,IM ))
-#define IMULQimr(IM,MD,MB,MI,MS,RD) (_REXQmr(MB, MI, RD), _Os_r_X_sL (0x69 ,_r8(RD) ,MD,MB,MI,MS ,IM ))
-
-
-/* --- Control Flow related instructions ----------------------------------- */
-
-enum {
- X86_CC_O = 0x0,
- X86_CC_NO = 0x1,
- X86_CC_NAE = 0x2,
- X86_CC_B = 0x2,
- X86_CC_C = 0x2,
- X86_CC_AE = 0x3,
- X86_CC_NB = 0x3,
- X86_CC_NC = 0x3,
- X86_CC_E = 0x4,
- X86_CC_Z = 0x4,
- X86_CC_NE = 0x5,
- X86_CC_NZ = 0x5,
- X86_CC_BE = 0x6,
- X86_CC_NA = 0x6,
- X86_CC_A = 0x7,
- X86_CC_NBE = 0x7,
- X86_CC_S = 0x8,
- X86_CC_NS = 0x9,
- X86_CC_P = 0xa,
- X86_CC_PE = 0xa,
- X86_CC_NP = 0xb,
- X86_CC_PO = 0xb,
- X86_CC_L = 0xc,
- X86_CC_NGE = 0xc,
- X86_CC_GE = 0xd,
- X86_CC_NL = 0xd,
- X86_CC_LE = 0xe,
- X86_CC_NG = 0xe,
- X86_CC_G = 0xf,
- X86_CC_NLE = 0xf,
-};
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-// FIXME: no prefix is availble to encode a 32-bit operand size in 64-bit mode
-#define CALLm(M) _O_D32 (0xe8 ,(int)(M) )
-#define _CALLLsr(R) (_REXLrr(0, R), _O_Mrm (0xff ,_b11,_b010,_r4(R) ))
-#define _CALLQsr(R) (_REXLrr(0, R), _O_Mrm (0xff ,_b11,_b010,_r8(R) ))
-#define CALLsr(R) ( X86_TARGET_64BIT ? _CALLQsr(R) : _CALLLsr(R))
-#define CALLsm(D,B,I,S) (_REXLrm(0, B, I), _O_r_X (0xff ,_b010 ,(int)(D),B,I,S ))
-
-// FIXME: no prefix is availble to encode a 32-bit operand size in 64-bit mode
-#define JMPSm(M) _O_D8 (0xeb ,(int)(M) )
-#define JMPm(M) _O_D32 (0xe9 ,(int)(M) )
-#define _JMPLsr(R) (_REXLrr(0, R), _O_Mrm (0xff ,_b11,_b100,_r4(R) ))
-#define _JMPQsr(R) (_REXLrr(0, R), _O_Mrm (0xff ,_b11,_b100,_r8(R) ))
-#define JMPsr(R) ( X86_TARGET_64BIT ? _JMPQsr(R) : _JMPLsr(R))
-#define JMPsm(D,B,I,S) (_REXLrm(0, B, I), _O_r_X (0xff ,_b100 ,(int)(D),B,I,S ))
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-#define JCCSii(CC, D) _O_B (0x70|(CC) ,(_sc)(int)(D) )
-#define JCCSim(CC, D) _O_D8 (0x70|(CC) ,(int)(D) )
-#define JOSm(D) JCCSim(X86_CC_O, D)
-#define JNOSm(D) JCCSim(X86_CC_NO, D)
-#define JBSm(D) JCCSim(X86_CC_B, D)
-#define JNAESm(D) JCCSim(X86_CC_NAE, D)
-#define JNBSm(D) JCCSim(X86_CC_NB, D)
-#define JAESm(D) JCCSim(X86_CC_AE, D)
-#define JESm(D) JCCSim(X86_CC_E, D)
-#define JZSm(D) JCCSim(X86_CC_Z, D)
-#define JNESm(D) JCCSim(X86_CC_NE, D)
-#define JNZSm(D) JCCSim(X86_CC_NZ, D)
-#define JBESm(D) JCCSim(X86_CC_BE, D)
-#define JNASm(D) JCCSim(X86_CC_NA, D)
-#define JNBESm(D) JCCSim(X86_CC_NBE, D)
-#define JASm(D) JCCSim(X86_CC_A, D)
-#define JSSm(D) JCCSim(X86_CC_S, D)
-#define JNSSm(D) JCCSim(X86_CC_NS, D)
-#define JPSm(D) JCCSim(X86_CC_P, D)
-#define JPESm(D) JCCSim(X86_CC_PE, D)
-#define JNPSm(D) JCCSim(X86_CC_NP, D)
-#define JPOSm(D) JCCSim(X86_CC_PO, D)
-#define JLSm(D) JCCSim(X86_CC_L, D)
-#define JNGESm(D) JCCSim(X86_CC_NGE, D)
-#define JNLSm(D) JCCSim(X86_CC_NL, D)
-#define JGESm(D) JCCSim(X86_CC_GE, D)
-#define JLESm(D) JCCSim(X86_CC_LE, D)
-#define JNGSm(D) JCCSim(X86_CC_NG, D)
-#define JNLESm(D) JCCSim(X86_CC_NLE, D)
-#define JGSm(D) JCCSim(X86_CC_G, D)
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-#define JCCii(CC, D) _OO_L (0x0f80|(CC) ,(int)(D) )
-#define JCCim(CC, D) _OO_D32 (0x0f80|(CC) ,(int)(D) )
-#define JOm(D) JCCim(X86_CC_O, D)
-#define JNOm(D) JCCim(X86_CC_NO, D)
-#define JBm(D) JCCim(X86_CC_B, D)
-#define JNAEm(D) JCCim(X86_CC_NAE, D)
-#define JNBm(D) JCCim(X86_CC_NB, D)
-#define JAEm(D) JCCim(X86_CC_AE, D)
-#define JEm(D) JCCim(X86_CC_E, D)
-#define JZm(D) JCCim(X86_CC_Z, D)
-#define JNEm(D) JCCim(X86_CC_NE, D)
-#define JNZm(D) JCCim(X86_CC_NZ, D)
-#define JBEm(D) JCCim(X86_CC_BE, D)
-#define JNAm(D) JCCim(X86_CC_NA, D)
-#define JNBEm(D) JCCim(X86_CC_NBE, D)
-#define JAm(D) JCCim(X86_CC_A, D)
-#define JSm(D) JCCim(X86_CC_S, D)
-#define JNSm(D) JCCim(X86_CC_NS, D)
-#define JPm(D) JCCim(X86_CC_P, D)
-#define JPEm(D) JCCim(X86_CC_PE, D)
-#define JNPm(D) JCCim(X86_CC_NP, D)
-#define JPOm(D) JCCim(X86_CC_PO, D)
-#define JLm(D) JCCim(X86_CC_L, D)
-#define JNGEm(D) JCCim(X86_CC_NGE, D)
-#define JNLm(D) JCCim(X86_CC_NL, D)
-#define JGEm(D) JCCim(X86_CC_GE, D)
-#define JLEm(D) JCCim(X86_CC_LE, D)
-#define JNGm(D) JCCim(X86_CC_NG, D)
-#define JNLEm(D) JCCim(X86_CC_NLE, D)
-#define JGm(D) JCCim(X86_CC_G, D)
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-#define SETCCir(CC, RD) (_REXBrr(0, RD), _OO_Mrm (0x0f90|(CC) ,_b11,_b000,_r1(RD) ))
-#define SETOr(RD) SETCCir(X86_CC_O, RD)
-#define SETNOr(RD) SETCCir(X86_CC_NO, RD)
-#define SETBr(RD) SETCCir(X86_CC_B, RD)
-#define SETNAEr(RD) SETCCir(X86_CC_NAE, RD)
-#define SETNBr(RD) SETCCir(X86_CC_NB, RD)
-#define SETAEr(RD) SETCCir(X86_CC_AE, RD)
-#define SETEr(RD) SETCCir(X86_CC_E, RD)
-#define SETZr(RD) SETCCir(X86_CC_Z, RD)
-#define SETNEr(RD) SETCCir(X86_CC_NE, RD)
-#define SETNZr(RD) SETCCir(X86_CC_NZ, RD)
-#define SETBEr(RD) SETCCir(X86_CC_BE, RD)
-#define SETNAr(RD) SETCCir(X86_CC_NA, RD)
-#define SETNBEr(RD) SETCCir(X86_CC_NBE, RD)
-#define SETAr(RD) SETCCir(X86_CC_A, RD)
-#define SETSr(RD) SETCCir(X86_CC_S, RD)
-#define SETNSr(RD) SETCCir(X86_CC_NS, RD)
-#define SETPr(RD) SETCCir(X86_CC_P, RD)
-#define SETPEr(RD) SETCCir(X86_CC_PE, RD)
-#define SETNPr(RD) SETCCir(X86_CC_NP, RD)
-#define SETPOr(RD) SETCCir(X86_CC_PO, RD)
-#define SETLr(RD) SETCCir(X86_CC_L, RD)
-#define SETNGEr(RD) SETCCir(X86_CC_NGE, RD)
-#define SETNLr(RD) SETCCir(X86_CC_NL, RD)
-#define SETGEr(RD) SETCCir(X86_CC_GE, RD)
-#define SETLEr(RD) SETCCir(X86_CC_LE, RD)
-#define SETNGr(RD) SETCCir(X86_CC_NG, RD)
-#define SETNLEr(RD) SETCCir(X86_CC_NLE, RD)
-#define SETGr(RD) SETCCir(X86_CC_G, RD)
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-#define SETCCim(CC,MD,MB,MI,MS) (_REXBrm(0, MB, MI), _OO_r_X (0x0f90|(CC) ,_b000 ,MD,MB,MI,MS ))
-#define SETOm(D, B, I, S) SETCCim(X86_CC_O, D, B, I, S)
-#define SETNOm(D, B, I, S) SETCCim(X86_CC_NO, D, B, I, S)
-#define SETBm(D, B, I, S) SETCCim(X86_CC_B, D, B, I, S)
-#define SETNAEm(D, B, I, S) SETCCim(X86_CC_NAE, D, B, I, S)
-#define SETNBm(D, B, I, S) SETCCim(X86_CC_NB, D, B, I, S)
-#define SETAEm(D, B, I, S) SETCCim(X86_CC_AE, D, B, I, S)
-#define SETEm(D, B, I, S) SETCCim(X86_CC_E, D, B, I, S)
-#define SETZm(D, B, I, S) SETCCim(X86_CC_Z, D, B, I, S)
-#define SETNEm(D, B, I, S) SETCCim(X86_CC_NE, D, B, I, S)
-#define SETNZm(D, B, I, S) SETCCim(X86_CC_NZ, D, B, I, S)
-#define SETBEm(D, B, I, S) SETCCim(X86_CC_BE, D, B, I, S)
-#define SETNAm(D, B, I, S) SETCCim(X86_CC_NA, D, B, I, S)
-#define SETNBEm(D, B, I, S) SETCCim(X86_CC_NBE, D, B, I, S)
-#define SETAm(D, B, I, S) SETCCim(X86_CC_A, D, B, I, S)
-#define SETSm(D, B, I, S) SETCCim(X86_CC_S, D, B, I, S)
-#define SETNSm(D, B, I, S) SETCCim(X86_CC_NS, D, B, I, S)
-#define SETPm(D, B, I, S) SETCCim(X86_CC_P, D, B, I, S)
-#define SETPEm(D, B, I, S) SETCCim(X86_CC_PE, D, B, I, S)
-#define SETNPm(D, B, I, S) SETCCim(X86_CC_NP, D, B, I, S)
-#define SETPOm(D, B, I, S) SETCCim(X86_CC_PO, D, B, I, S)
-#define SETLm(D, B, I, S) SETCCim(X86_CC_L, D, B, I, S)
-#define SETNGEm(D, B, I, S) SETCCim(X86_CC_NGE, D, B, I, S)
-#define SETNLm(D, B, I, S) SETCCim(X86_CC_NL, D, B, I, S)
-#define SETGEm(D, B, I, S) SETCCim(X86_CC_GE, D, B, I, S)
-#define SETLEm(D, B, I, S) SETCCim(X86_CC_LE, D, B, I, S)
-#define SETNGm(D, B, I, S) SETCCim(X86_CC_NG, D, B, I, S)
-#define SETNLEm(D, B, I, S) SETCCim(X86_CC_NLE, D, B, I, S)
-#define SETGm(D, B, I, S) SETCCim(X86_CC_G, D, B, I, S)
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-#define CMOVWrr(CC,RS,RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0f40|(CC) ,_b11,_r2(RD),_r2(RS) ))
-#define CMOVWmr(CC,MD,MB,MI,MS,RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0f40|(CC) ,_r2(RD) ,MD,MB,MI,MS ))
-#define CMOVLrr(CC,RS,RD) (_REXLrr(RD, RS), _OO_Mrm (0x0f40|(CC) ,_b11,_r4(RD),_r4(RS) ))
-#define CMOVLmr(CC,MD,MB,MI,MS,RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0f40|(CC) ,_r4(RD) ,MD,MB,MI,MS ))
-#define CMOVQrr(CC,RS,RD) (_REXQrr(RD, RS), _OO_Mrm (0x0f40|(CC) ,_b11,_r8(RD),_r8(RS) ))
-#define CMOVQmr(CC,MD,MB,MI,MS,RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0f40|(CC) ,_r8(RD) ,MD,MB,MI,MS ))
-
-
-/* --- Push/Pop instructions ----------------------------------------------- */
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-#define POPWr(RD) _m32only((_d16(), _Or (0x58,_r2(RD) )))
-#define POPWm(MD, MB, MI, MS) _m32only((_d16(), _O_r_X (0x8f ,_b000 ,MD,MB,MI,MS )))
-
-#define POPLr(RD) _m32only( _Or (0x58,_r4(RD) ))
-#define POPLm(MD, MB, MI, MS) _m32only( _O_r_X (0x8f ,_b000 ,MD,MB,MI,MS ))
-
-#define POPQr(RD) _m64only((_REXQr(RD), _Or (0x58,_r8(RD) )))
-#define POPQm(MD, MB, MI, MS) _m64only((_REXQm(MB, MI), _O_r_X (0x8f ,_b000 ,MD,MB,MI,MS )))
-
-#define PUSHWr(RS) _m32only((_d16(), _Or (0x50,_r2(RS) )))
-#define PUSHWm(MD, MB, MI, MS) _m32only((_d16(), _O_r_X (0xff, ,_b110 ,MD,MB,MI,MS )))
-#define PUSHWi(IM) _m32only((_d16(), _Os_sW (0x68 ,IM )))
-
-#define PUSHLr(RS) _m32only( _Or (0x50,_r4(RS) ))
-#define PUSHLm(MD, MB, MI, MS) _m32only( _O_r_X (0xff ,_b110 ,MD,MB,MI,MS ))
-#define PUSHLi(IM) _m32only( _Os_sL (0x68 ,IM ))
-
-#define PUSHQr(RS) _m64only((_REXQr(RS), _Or (0x50,_r8(RS) )))
-#define PUSHQm(MD, MB, MI, MS) _m64only((_REXQm(MB, MI), _O_r_X (0xff ,_b110 ,MD,MB,MI,MS )))
-#define PUSHQi(IM) _m64only( _Os_sL (0x68 ,IM ))
-
-#define POPA() (_d16(), _O (0x61 ))
-#define POPAD() _O (0x61 )
-
-#define PUSHA() (_d16(), _O (0x60 ))
-#define PUSHAD() _O (0x60 )
-
-#define POPF() _O (0x9d )
-#define PUSHF() _O (0x9c )
-
-
-/* --- Test instructions --------------------------------------------------- */
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-#define TESTBrr(RS, RD) (_REXBrr(RS, RD), _O_Mrm (0x84 ,_b11,_r1(RS),_r1(RD) ))
-#define TESTBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (0x84 ,_r1(RS) ,MD,MB,MI,MS ))
-#define TESTBir(IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_AL) ? \
- (_REXBrr(0, RD), _O_B (0xa8 ,_u8(IM))) : \
- (_REXBrr(0, RD), _O_Mrm_B (0xf6 ,_b11,_b000 ,_r1(RD) ,_u8(IM))) )
-#define TESTBim(IM, MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X_B (0xf6 ,_b000 ,MD,MB,MI,MS ,_u8(IM)))
-
-#define TESTWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (0x85 ,_b11,_r2(RS),_r2(RD) ))
-#define TESTWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0x85 ,_r2(RS) ,MD,MB,MI,MS ))
-#define TESTWir(IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_AX) ? \
- (_d16(), _REXLrr(0, RD), _O_W (0xa9 ,_u16(IM))) : \
- (_d16(), _REXLrr(0, RD), _O_Mrm_W (0xf7 ,_b11,_b000 ,_r2(RD) ,_u16(IM))) )
-#define TESTWim(IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_r_X_W (0xf7 ,_b000 ,MD,MB,MI,MS ,_u16(IM)))
-
-#define TESTLrr(RS, RD) (_REXLrr(RS, RD), _O_Mrm (0x85 ,_b11,_r4(RS),_r4(RD) ))
-#define TESTLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _O_r_X (0x85 ,_r4(RS) ,MD,MB,MI,MS ))
-#define TESTLir(IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_EAX) ? \
- (_REXLrr(0, RD), _O_L (0xa9 ,IM )) : \
- (_REXLrr(0, RD), _O_Mrm_L (0xf7 ,_b11,_b000 ,_r4(RD) ,IM )) )
-#define TESTLim(IM, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_r_X_L (0xf7 ,_b000 ,MD,MB,MI,MS ,IM ))
-
-#define TESTQrr(RS, RD) (_REXQrr(RS, RD), _O_Mrm (0x85 ,_b11,_r8(RS),_r8(RD) ))
-#define TESTQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _O_r_X (0x85 ,_r8(RS) ,MD,MB,MI,MS ))
-#define TESTQir(IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_RAX) ? \
- (_REXQrr(0, RD), _O_L (0xa9 ,IM )) : \
- (_REXQrr(0, RD), _O_Mrm_L (0xf7 ,_b11,_b000 ,_r8(RD) ,IM )) )
-#define TESTQim(IM, MD, MB, MI, MS) (_REXQrm(0, MB, MI), _O_r_X_L (0xf7 ,_b000 ,MD,MB,MI,MS ,IM ))
-
-
-/* --- Exchange instructions ----------------------------------------------- */
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-#define CMPXCHGBrr(RS, RD) (_REXBrr(RS, RD), _OO_Mrm (0x0fb0 ,_b11,_r1(RS),_r1(RD) ))
-#define CMPXCHGBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _OO_r_X (0x0fb0 ,_r1(RS) ,MD,MB,MI,MS ))
-
-#define CMPXCHGWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _OO_Mrm (0x0fb1 ,_b11,_r2(RS),_r2(RD) ))
-#define CMPXCHGWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _OO_r_X (0x0fb1 ,_r2(RS) ,MD,MB,MI,MS ))
-
-#define CMPXCHGLrr(RS, RD) (_REXLrr(RS, RD), _OO_Mrm (0x0fb1 ,_b11,_r4(RS),_r4(RD) ))
-#define CMPXCHGLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0fb1 ,_r4(RS) ,MD,MB,MI,MS ))
-
-#define CMPXCHGQrr(RS, RD) (_REXQrr(RS, RD), _OO_Mrm (0x0fb1 ,_b11,_r8(RS),_r8(RD) ))
-#define CMPXCHGQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0fb1 ,_r8(RS) ,MD,MB,MI,MS ))
-
-#define XADDBrr(RS, RD) (_REXBrr(RS, RD), _OO_Mrm (0x0fc0 ,_b11,_r1(RS),_r1(RD) ))
-#define XADDBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _OO_r_X (0x0fc0 ,_r1(RS) ,MD,MB,MI,MS ))
-
-#define XADDWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _OO_Mrm (0x0fc1 ,_b11,_r2(RS),_r2(RD) ))
-#define XADDWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _OO_r_X (0x0fc1 ,_r2(RS) ,MD,MB,MI,MS ))
-
-#define XADDLrr(RS, RD) (_REXLrr(RS, RD), _OO_Mrm (0x0fc1 ,_b11,_r4(RS),_r4(RD) ))
-#define XADDLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0fc1 ,_r4(RS) ,MD,MB,MI,MS ))
-
-#define XADDQrr(RS, RD) (_REXQrr(RS, RD), _OO_Mrm (0x0fc1 ,_b11,_r8(RS),_r8(RD) ))
-#define XADDQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0fc1 ,_r8(RS) ,MD,MB,MI,MS ))
-
-#define XCHGBrr(RS, RD) (_REXBrr(RS, RD), _O_Mrm (0x86 ,_b11,_r1(RS),_r1(RD) ))
-#define XCHGBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (0x86 ,_r1(RS) ,MD,MB,MI,MS ))
-
-#define XCHGWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (0x87 ,_b11,_r2(RS),_r2(RD) ))
-#define XCHGWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0x87 ,_r2(RS) ,MD,MB,MI,MS ))
-
-#define XCHGLrr(RS, RD) (_REXLrr(RS, RD), _O_Mrm (0x87 ,_b11,_r4(RS),_r4(RD) ))
-#define XCHGLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _O_r_X (0x87 ,_r4(RS) ,MD,MB,MI,MS ))
-
-#define XCHGQrr(RS, RD) (_REXQrr(RS, RD), _O_Mrm (0x87 ,_b11,_r8(RS),_r8(RD) ))
-#define XCHGQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _O_r_X (0x87 ,_r8(RS) ,MD,MB,MI,MS ))
-
-
-/* --- Increment/Decrement instructions ------------------------------------ */
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-#define DECBm(MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X (0xfe ,_b001 ,MD,MB,MI,MS ))
-#define DECBr(RD) (_REXBrr(0, RD), _O_Mrm (0xfe ,_b11,_b001 ,_r1(RD) ))
-
-#define DECWm(MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_r_X (0xff ,_b001 ,MD,MB,MI,MS ))
-#define DECWr(RD) (! X86_TARGET_64BIT ? (_d16(), _Or (0x48,_r2(RD) )) : \
- (_d16(), _REXLrr(0, RD), _O_Mrm (0xff ,_b11,_b001 ,_r2(RD) )))
-
-#define DECLm(MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_r_X (0xff ,_b001 ,MD,MB,MI,MS ))
-#define DECLr(RD) (! X86_TARGET_64BIT ? _Or (0x48,_r4(RD) ) : \
- (_REXLrr(0, RD), _O_Mrm (0xff ,_b11,_b001 ,_r4(RD) )))
-
-#define DECQm(MD, MB, MI, MS) (_REXQrm(0, MB, MI), _O_r_X (0xff ,_b001 ,MD,MB,MI,MS ))
-#define DECQr(RD) (_REXQrr(0, RD), _O_Mrm (0xff ,_b11,_b001 ,_r8(RD) ))
-
-#define INCBm(MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X (0xfe ,_b000 ,MD,MB,MI,MS ))
-#define INCBr(RD) (_REXBrr(0, RD), _O_Mrm (0xfe ,_b11,_b000 ,_r1(RD) ))
-
-#define INCWm(MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_r_X (0xff ,_b000 ,MD,MB,MI,MS ))
-#define INCWr(RD) (! X86_TARGET_64BIT ? (_d16(), _Or (0x40,_r2(RD) )) : \
- (_d16(), _REXLrr(0, RD), _O_Mrm (0xff ,_b11,_b000 ,_r2(RD) )) )
-
-#define INCLm(MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_r_X (0xff ,_b000 ,MD,MB,MI,MS ))
-#define INCLr(RD) (! X86_TARGET_64BIT ? _Or (0x40,_r4(RD) ) : \
- (_REXLrr(0, RD), _O_Mrm (0xff ,_b11,_b000 ,_r4(RD) )))
-
-#define INCQm(MD, MB, MI, MS) (_REXQrm(0, MB, MI), _O_r_X (0xff ,_b000 ,MD,MB,MI,MS ))
-#define INCQr(RD) (_REXQrr(0, RD), _O_Mrm (0xff ,_b11,_b000 ,_r8(RD) ))
-
-
-/* --- Misc instructions --------------------------------------------------- */
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-#define BSFWrr(RS, RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0fbc ,_b11,_r2(RD),_r2(RS) ))
-#define BSFWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0fbc ,_r2(RD) ,MD,MB,MI,MS ))
-#define BSRWrr(RS, RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0fbd ,_b11,_r2(RD),_r2(RS) ))
-#define BSRWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0fbd ,_r2(RD) ,MD,MB,MI,MS ))
-
-#define BSFLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fbc ,_b11,_r4(RD),_r4(RS) ))
-#define BSFLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fbc ,_r4(RD) ,MD,MB,MI,MS ))
-#define BSRLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fbd ,_b11,_r4(RD),_r4(RS) ))
-#define BSRLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fbd ,_r4(RD) ,MD,MB,MI,MS ))
-
-#define BSFQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fbc ,_b11,_r8(RD),_r8(RS) ))
-#define BSFQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fbc ,_r8(RD) ,MD,MB,MI,MS ))
-#define BSRQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fbd ,_b11,_r8(RD),_r8(RS) ))
-#define BSRQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fbd ,_r8(RD) ,MD,MB,MI,MS ))
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-#define MOVSBWrr(RS, RD) (_d16(), _REXBLrr(RD, RS), _OO_Mrm (0x0fbe ,_b11,_r2(RD),_r1(RS) ))
-#define MOVSBWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0fbe ,_r2(RD) ,MD,MB,MI,MS ))
-#define MOVZBWrr(RS, RD) (_d16(), _REXBLrr(RD, RS), _OO_Mrm (0x0fb6 ,_b11,_r2(RD),_r1(RS) ))
-#define MOVZBWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0fb6 ,_r2(RD) ,MD,MB,MI,MS ))
-
-#define MOVSBLrr(RS, RD) (_REXBLrr(RD, RS), _OO_Mrm (0x0fbe ,_b11,_r4(RD),_r1(RS) ))
-#define MOVSBLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fbe ,_r4(RD) ,MD,MB,MI,MS ))
-#define MOVZBLrr(RS, RD) (_REXBLrr(RD, RS), _OO_Mrm (0x0fb6 ,_b11,_r4(RD),_r1(RS) ))
-#define MOVZBLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fb6 ,_r4(RD) ,MD,MB,MI,MS ))
-
-#define MOVSBQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fbe ,_b11,_r8(RD),_r1(RS) ))
-#define MOVSBQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fbe ,_r8(RD) ,MD,MB,MI,MS ))
-#define MOVZBQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fb6 ,_b11,_r8(RD),_r1(RS) ))
-#define MOVZBQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fb6 ,_r8(RD) ,MD,MB,MI,MS ))
-
-#define MOVSWLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fbf ,_b11,_r4(RD),_r2(RS) ))
-#define MOVSWLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fbf ,_r4(RD) ,MD,MB,MI,MS ))
-#define MOVZWLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fb7 ,_b11,_r4(RD),_r2(RS) ))
-#define MOVZWLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fb7 ,_r4(RD) ,MD,MB,MI,MS ))
-
-#define MOVSWQrr(RS, RD) _m64only((_REXQrr(RD, RS), _OO_Mrm (0x0fbf ,_b11,_r8(RD),_r2(RS) )))
-#define MOVSWQmr(MD, MB, MI, MS, RD) _m64only((_REXQmr(MB, MI, RD), _OO_r_X (0x0fbf ,_r8(RD) ,MD,MB,MI,MS )))
-#define MOVZWQrr(RS, RD) _m64only((_REXQrr(RD, RS), _OO_Mrm (0x0fb7 ,_b11,_r8(RD),_r2(RS) )))
-#define MOVZWQmr(MD, MB, MI, MS, RD) _m64only((_REXQmr(MB, MI, RD), _OO_r_X (0x0fb7 ,_r8(RD) ,MD,MB,MI,MS )))
-
-#define MOVSLQrr(RS, RD) _m64only((_REXQrr(RD, RS), _O_Mrm (0x63 ,_b11,_r8(RD),_r4(RS) )))
-#define MOVSLQmr(MD, MB, MI, MS, RD) _m64only((_REXQmr(MB, MI, RD), _O_r_X (0x63 ,_r8(RD) ,MD,MB,MI,MS )))
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-#define LEALmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _O_r_X (0x8d ,_r4(RD) ,MD,MB,MI,MS ))
-#define LEAQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _O_r_X (0x8d ,_r4(RD) ,MD,MB,MI,MS ))
-
-#define BSWAPLr(R) (_REXLrr(0, R), _OOr (0x0fc8,_r4(R) ))
-#define BSWAPQr(R) (_REXQrr(0, R), _OOr (0x0fc8,_r8(R) ))
-
-#define CLC() _O (0xf8 )
-#define STC() _O (0xf9 )
-#define CMC() _O (0xf5 )
-
-#define CLD() _O (0xfc )
-#define STD() _O (0xfd )
-
-#define CBTW() (_d16(), _O (0x98 ))
-#define CWTL() _O (0x98 )
-#define CLTQ() _m64only(_REXQrr(0, 0), _O (0x98 ))
-
-#define CBW CBTW
-#define CWDE CWTL
-#define CDQE CLTQ
-
-#define CWTD() (_d16(), _O (0x99 ))
-#define CLTD() _O (0x99 )
-#define CQTO() _m64only(_REXQrr(0, 0), _O (0x99 ))
-
-#define CWD CWTD
-#define CDQ CLTD
-#define CQO CQTO
-
-#define LAHF() _O (0x9f )
-#define SAHF() _O (0x9e )
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-#define CPUID() _OO (0x0fa2 )
-#define RDTSC() _OO (0xff31 )
-
-#define ENTERii(W, B) _O_W_B (0xc8 ,_su16(W),_su8(B))
-
-#define LEAVE() _O (0xc9 )
-#define RET() _O (0xc3 )
-#define RETi(IM) _O_W (0xc2 ,_su16(IM))
-
-#define NOP() _O (0x90 )
-
-
-/* --- Media 64-bit instructions ------------------------------------------- */
-
-enum {
- X86_MMX_PABSB = 0x1c, // 2P
- X86_MMX_PABSW = 0x1d, // 2P
- X86_MMX_PABSD = 0x1e, // 2P
- X86_MMX_PACKSSWB = 0x63,
- X86_MMX_PACKSSDW = 0x6b,
- X86_MMX_PACKUSWB = 0x67,
- X86_MMX_PADDB = 0xfc,
- X86_MMX_PADDW = 0xfd,
- X86_MMX_PADDD = 0xfe,
- X86_MMX_PADDQ = 0xd4,
- X86_MMX_PADDSB = 0xec,
- X86_MMX_PADDSW = 0xed,
- X86_MMX_PADDUSB = 0xdc,
- X86_MMX_PADDUSW = 0xdd,
- X86_MMX_PAND = 0xdb,
- X86_MMX_PANDN = 0xdf,
- X86_MMX_PAVGB = 0xe0,
- X86_MMX_PAVGW = 0xe3,
- X86_MMX_PCMPEQB = 0x74,
- X86_MMX_PCMPEQW = 0x75,
- X86_MMX_PCMPEQD = 0x76,
- X86_MMX_PCMPGTB = 0x64,
- X86_MMX_PCMPGTW = 0x65,
- X86_MMX_PCMPGTD = 0x66,
- X86_MMX_PEXTRW = 0xc5, // 64, /r ib
- X86_MMX_PHADDW = 0x01, // 2P
- X86_MMX_PHADDD = 0x02, // 2P
- X86_MMX_PHADDSW = 0x03, // 2P
- X86_MMX_PHSUBW = 0x05, // 2P
- X86_MMX_PHSUBD = 0x06, // 2P
- X86_MMX_PHSUBSW = 0x07, // 2P
- X86_MMX_PINSRW = 0xc4, // 64, /r ib
- X86_MMX_PMADDUBSW = 0x04, // 2P
- X86_MMX_PMADDWD = 0xf5,
- X86_MMX_PMAXSW = 0xee,
- X86_MMX_PMAXUB = 0xde,
- X86_MMX_PMINSW = 0xea,
- X86_MMX_PMINUB = 0xda,
- X86_MMX_PMOVMSKB = 0xd7, // 64
- X86_MMX_PMULHRSW = 0x0b, // 2P
- X86_MMX_PMULHUW = 0xe4,
- X86_MMX_PMULHW = 0xe5,
- X86_MMX_PMULLW = 0xd5,
- X86_MMX_PMULUDQ = 0xf4,
- X86_MMX_POR = 0xeb,
- X86_MMX_PSADBW = 0xf6,
- X86_MMX_PSHUFB = 0x00, // 2P
- X86_MMX_PSHUFW = 0x70, // /r ib
- X86_MMX_PSIGNB = 0x08, // 2P
- X86_MMX_PSIGNW = 0x09, // 2P
- X86_MMX_PSIGND = 0x0a, // 2P
- X86_MMX_PSLLW = 0xf1,
- X86_MMX_PSLLWi = 0x71, // /6 ib
- X86_MMX_PSLLD = 0xf2,
- X86_MMX_PSLLDi = 0x72, // /6 ib
- X86_MMX_PSLLQ = 0xf3,
- X86_MMX_PSLLQi = 0x73, // /6 ib
- X86_MMX_PSRAW = 0xe1,
- X86_MMX_PSRAWi = 0x71, // /4 ib
- X86_MMX_PSRAD = 0xe2,
- X86_MMX_PSRADi = 0x72, // /4 ib
- X86_MMX_PSRLW = 0xd1,
- X86_MMX_PSRLWi = 0x71, // /2 ib
- X86_MMX_PSRLD = 0xd2,
- X86_MMX_PSRLDi = 0x72, // /2 ib
- X86_MMX_PSRLQ = 0xd3,
- X86_MMX_PSRLQi = 0x73, // /2 ib
- X86_MMX_PSUBB = 0xf8,
- X86_MMX_PSUBW = 0xf9,
- X86_MMX_PSUBD = 0xfa,
- X86_MMX_PSUBQ = 0xfb,
- X86_MMX_PSUBSB = 0xe8,
- X86_MMX_PSUBSW = 0xe9,
- X86_MMX_PSUBUSB = 0xd8,
- X86_MMX_PSUBUSW = 0xd9,
- X86_MMX_PUNPCKHBW = 0x68,
- X86_MMX_PUNPCKHWD = 0x69,
- X86_MMX_PUNPCKHDQ = 0x6a,
- X86_MMX_PUNPCKLBW = 0x60,
- X86_MMX_PUNPCKLWD = 0x61,
- X86_MMX_PUNPCKLDQ = 0x62,
- X86_MMX_PXOR = 0xef,
-};
-
-#define __MMXLrr(OP,RS,RSA,RD,RDA) (_REXLrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) ))
-#define __MMXLmr(OP,MD,MB,MI,MS,RD,RDA) (_REXLmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS ))
-#define __MMXLrm(OP,RS,RSA,MD,MB,MI,MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS ))
-#define __MMXLirr(OP,IM,RS,RSA,RD,RDA) (_REXLrr(RD, RS), _OO_Mrm_B (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) ,_u8(IM)))
-#define __MMXLimr(OP,IM,MD,MB,MI,MS,RD,RDA) (_REXLmr(MB, MI, RS), _OO_r_X_B (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS ,_u8(IM)))
-#define __MMXQrr(OP,RS,RSA,RD,RDA) (_REXQrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) ))
-#define __MMXQmr(OP,MD,MB,MI,MS,RD,RDA) (_REXQmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS ))
-#define __MMXQrm(OP,RS,RSA,MD,MB,MI,MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS ))
-#define __MMXQirr(OP,IM,RS,RSA,RD,RDA) (_REXQrr(RD, RS), _OO_Mrm_B (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) ,_u8(IM)))
-#define __MMXQimr(OP,IM,MD,MB,MI,MS,RD,RDA) (_REXQmr(MB, MI, RS), _OO_r_X_B (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS ,_u8(IM)))
-#define __MMX1Lrr(PX,OP,RS,RSA,RD,RDA) (_REXLrr(RD, RS), _B(0x0f),_OO_Mrm(((PX)<<8)|(OP) ,_b11,RDA(RD),RSA(RS) ))
-#define __MMX1Lmr(PX,OP,MD,MB,MI,MS,RD,RDA) (_REXLmr(MB, MI, RD), _B(0x0f),_OO_r_X(((PX)<<8)|(OP) ,RDA(RD) ,MD,MB,MI,MS ))
-#define __MMX1Lrm(PX,OP,RS,RSA,MD,MB,MI,MS) (_REXLrm(RS, MB, MI), _B(0x0f),_OO_r_X(((PX)<<8)|(OP) ,RSA(RS) ,MD,MB,MI,MS ))
-
-#define _MMXLrr(OP,RS,RD) __MMXLrr(OP,RS,_rM,RD,_rM)
-#define _MMXLmr(OP,MD,MB,MI,MS,RD) __MMXLmr(OP,MD,MB,MI,MS,RD,_rM)
-#define _MMXLrm(OP,RS,MD,MB,MI,MS) __MMXLrm(OP,RS,_rM,MD,MB,MI,MS)
-#define _MMXQrr(OP,RS,RD) __MMXQrr(OP,RS,_rM,RD,_rM)
-#define _MMXQmr(OP,MD,MB,MI,MS,RD) __MMXQmr(OP,MD,MB,MI,MS,RD,_rM)
-#define _MMXQrm(OP,RS,MD,MB,MI,MS) __MMXQrm(OP,RS,_rM,MD,MB,MI,MS)
-#define _2P_MMXLrr(OP,RS,RD) __MMX1Lrr(0x38, OP,RS,_rM,RD,_rM)
-#define _2P_MMXLmr(OP,MD,MB,MI,MS,RD) __MMX1Lmr(0x38, OP,MD,MB,MI,MS,RD,_rM)
-#define _2P_MMXLrm(OP,RS,MD,MB,MI,MS) __MMX1Lrm(0x38, OP,RS,_rM,MD,MB,MI,MS)
-
-#define MMX_MOVDMDrr(RS, RD) __MMXLrr(0x6e, RS,_r4, RD,_rM)
-#define MMX_MOVQMDrr(RS, RD) __MMXQrr(0x6e, RS,_r8, RD,_rM)
-#define MMX_MOVDMSrr(RS, RD) __MMXLrr(0x7e, RD,_r4, RS,_rM)
-#define MMX_MOVQMSrr(RS, RD) __MMXQrr(0x7e, RD,_r8, RS,_rM)
-
-#define MMX_MOVDmr(MD, MB, MI, MS, RD) _MMXLmr(0x6e, MD, MB, MI, MS, RD)
-#define MMX_MOVDrm(RS, MD, MB, MI, MS) _MMXLrm(0x7e, RS, MD, MB, MI, MS)
-#define MMX_MOVQrr(RS, RD) _MMXLrr(0x6f, RS, RD)
-#define MMX_MOVQmr(MD, MB, MI, MS, RD) _MMXLmr(0x6f, MD, MB, MI, MS, RD)
-#define MMX_MOVQrm(RS, MD, MB, MI, MS) _MMXLrm(0x7f, RS, MD, MB, MI, MS)
-
-// Original MMX instructions
-#define MMX_PACKSSWBrr(RS, RD) _MMXLrr(X86_MMX_PACKSSWB,RS,RD)
-#define MMX_PACKSSWBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PACKSSWB, MD, MB, MI, MS, RD)
-#define MMX_PACKSSDWrr(RS, RD) _MMXLrr(X86_MMX_PACKSSDW,RS,RD)
-#define MMX_PACKSSDWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PACKSSDW, MD, MB, MI, MS, RD)
-#define MMX_PACKUSWBrr(RS, RD) _MMXLrr(X86_MMX_PACKUSWB,RS,RD)
-#define MMX_PACKUSWBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PACKUSWB, MD, MB, MI, MS, RD)
-#define MMX_PADDBrr(RS, RD) _MMXLrr(X86_MMX_PADDB,RS,RD)
-#define MMX_PADDBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDB, MD, MB, MI, MS, RD)
-#define MMX_PADDWrr(RS, RD) _MMXLrr(X86_MMX_PADDW,RS,RD)
-#define MMX_PADDWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDW, MD, MB, MI, MS, RD)
-#define MMX_PADDDrr(RS, RD) _MMXLrr(X86_MMX_PADDD,RS,RD)
-#define MMX_PADDDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDD, MD, MB, MI, MS, RD)
-#define MMX_PADDQrr(RS, RD) _MMXLrr(X86_MMX_PADDQ,RS,RD)
-#define MMX_PADDQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDQ, MD, MB, MI, MS, RD)
-#define MMX_PADDSBrr(RS, RD) _MMXLrr(X86_MMX_PADDSB,RS,RD)
-#define MMX_PADDSBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDSB, MD, MB, MI, MS, RD)
-#define MMX_PADDSWrr(RS, RD) _MMXLrr(X86_MMX_PADDSW,RS,RD)
-#define MMX_PADDSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDSW, MD, MB, MI, MS, RD)
-#define MMX_PADDUSBrr(RS, RD) _MMXLrr(X86_MMX_PADDUSB,RS,RD)
-#define MMX_PADDUSBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDUSB, MD, MB, MI, MS, RD)
-#define MMX_PADDUSWrr(RS, RD) _MMXLrr(X86_MMX_PADDUSW,RS,RD)
-#define MMX_PADDUSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDUSW, MD, MB, MI, MS, RD)
-#define MMX_PANDrr(RS, RD) _MMXLrr(X86_MMX_PAND,RS,RD)
-#define MMX_PANDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PAND, MD, MB, MI, MS, RD)
-#define MMX_PANDNrr(RS, RD) _MMXLrr(X86_MMX_PANDN,RS,RD)
-#define MMX_PANDNmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PANDN, MD, MB, MI, MS, RD)
-#define MMX_PAVGBrr(RS, RD) _MMXLrr(X86_MMX_PAVGB,RS,RD)
-#define MMX_PAVGBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PAVGB, MD, MB, MI, MS, RD)
-#define MMX_PAVGWrr(RS, RD) _MMXLrr(X86_MMX_PAVGW,RS,RD)
-#define MMX_PAVGWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PAVGW, MD, MB, MI, MS, RD)
-#define MMX_PCMPEQBrr(RS, RD) _MMXLrr(X86_MMX_PCMPEQB,RS,RD)
-#define MMX_PCMPEQBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPEQB, MD, MB, MI, MS, RD)
-#define MMX_PCMPEQWrr(RS, RD) _MMXLrr(X86_MMX_PCMPEQW,RS,RD)
-#define MMX_PCMPEQWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPEQW, MD, MB, MI, MS, RD)
-#define MMX_PCMPEQDrr(RS, RD) _MMXLrr(X86_MMX_PCMPEQD,RS,RD)
-#define MMX_PCMPEQDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPEQD, MD, MB, MI, MS, RD)
-#define MMX_PCMPGTBrr(RS, RD) _MMXLrr(X86_MMX_PCMPGTB,RS,RD)
-#define MMX_PCMPGTBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPGTB, MD, MB, MI, MS, RD)
-#define MMX_PCMPGTWrr(RS, RD) _MMXLrr(X86_MMX_PCMPGTW,RS,RD)
-#define MMX_PCMPGTWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPGTW, MD, MB, MI, MS, RD)
-#define MMX_PCMPGTDrr(RS, RD) _MMXLrr(X86_MMX_PCMPGTD,RS,RD)
-#define MMX_PCMPGTDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPGTD, MD, MB, MI, MS, RD)
-#define MMX_PMADDWDrr(RS, RD) _MMXLrr(X86_MMX_PMADDWD,RS,RD)
-#define MMX_PMADDWDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMADDWD, MD, MB, MI, MS, RD)
-#define MMX_PMAXSWrr(RS, RD) _MMXLrr(X86_MMX_PMAXSW,RS,RD)
-#define MMX_PMAXSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMAXSW, MD, MB, MI, MS, RD)
-#define MMX_PMAXUBrr(RS, RD) _MMXLrr(X86_MMX_PMAXUB,RS,RD)
-#define MMX_PMAXUBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMAXUB, MD, MB, MI, MS, RD)
-#define MMX_PMINSWrr(RS, RD) _MMXLrr(X86_MMX_PMINSW,RS,RD)
-#define MMX_PMINSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMINSW, MD, MB, MI, MS, RD)
-#define MMX_PMINUBrr(RS, RD) _MMXLrr(X86_MMX_PMINUB,RS,RD)
-#define MMX_PMINUBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMINUB, MD, MB, MI, MS, RD)
-#define MMX_PMULHUWrr(RS, RD) _MMXLrr(X86_MMX_PMULHUW,RS,RD)
-#define MMX_PMULHUWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMULHUW, MD, MB, MI, MS, RD)
-#define MMX_PMULHWrr(RS, RD) _MMXLrr(X86_MMX_PMULHW,RS,RD)
-#define MMX_PMULHWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMULHW, MD, MB, MI, MS, RD)
-#define MMX_PMULLWrr(RS, RD) _MMXLrr(X86_MMX_PMULLW,RS,RD)
-#define MMX_PMULLWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMULLW, MD, MB, MI, MS, RD)
-#define MMX_PMULUDQrr(RS, RD) _MMXLrr(X86_MMX_PMULUDQ,RS,RD)
-#define MMX_PMULUDQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMULUDQ, MD, MB, MI, MS, RD)
-#define MMX_PORrr(RS, RD) _MMXLrr(X86_MMX_POR,RS,RD)
-#define MMX_PORmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_POR, MD, MB, MI, MS, RD)
-#define MMX_PSADBWrr(RS, RD) _MMXLrr(X86_MMX_PSADBW,RS,RD)
-#define MMX_PSADBWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSADBW, MD, MB, MI, MS, RD)
-#define MMX_PSLLWir(IM, RD) __MMXLirr(X86_MMX_PSLLWi, IM, RD,_rM, _b110,_rN)
-#define MMX_PSLLWrr(RS, RD) _MMXLrr(X86_MMX_PSLLW,RS,RD)
-#define MMX_PSLLWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSLLW, MD, MB, MI, MS, RD)
-#define MMX_PSLLDir(IM, RD) __MMXLirr(X86_MMX_PSLLDi, IM, RD,_rM, _b110,_rN)
-#define MMX_PSLLDrr(RS, RD) _MMXLrr(X86_MMX_PSLLD,RS,RD)
-#define MMX_PSLLDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSLLD, MD, MB, MI, MS, RD)
-#define MMX_PSLLQir(IM, RD) __MMXLirr(X86_MMX_PSLLQi, IM, RD,_rM, _b110,_rN)
-#define MMX_PSLLQrr(RS, RD) _MMXLrr(X86_MMX_PSLLQ,RS,RD)
-#define MMX_PSLLQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSLLQ, MD, MB, MI, MS, RD)
-#define MMX_PSRAWir(IM, RD) __MMXLirr(X86_MMX_PSRAWi, IM, RD,_rM, _b100,_rN)
-#define MMX_PSRAWrr(RS, RD) _MMXLrr(X86_MMX_PSRAW,RS,RD)
-#define MMX_PSRAWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSRAW, MD, MB, MI, MS, RD)
-#define MMX_PSRADir(IM, RD) __MMXLirr(X86_MMX_PSRADi, IM, RD,_rM, _b100,_rN)
-#define MMX_PSRADrr(RS, RD) _MMXLrr(X86_MMX_PSRAD,RS,RD)
-#define MMX_PSRADmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSRAD, MD, MB, MI, MS, RD)
-#define MMX_PSRLWir(IM, RD) __MMXLirr(X86_MMX_PSRLWi, IM, RD,_rM, _b010,_rN)
-#define MMX_PSRLWrr(RS, RD) _MMXLrr(X86_MMX_PSRLW,RS,RD)
-#define MMX_PSRLWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSRLW, MD, MB, MI, MS, RD)
-#define MMX_PSRLDir(IM, RD) __MMXLirr(X86_MMX_PSRLDi, IM, RD,_rM, _b010,_rN)
-#define MMX_PSRLDrr(RS, RD) _MMXLrr(X86_MMX_PSRLD,RS,RD)
-#define MMX_PSRLDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSRLD, MD, MB, MI, MS, RD)
-#define MMX_PSRLQir(IM, RD) __MMXLirr(X86_MMX_PSRLQi, IM, RD,_rM, _b010,_rN)
-#define MMX_PSRLQrr(RS, RD) _MMXLrr(X86_MMX_PSRLQ,RS,RD)
-#define MMX_PSRLQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSRLQ, MD, MB, MI, MS, RD)
-#define MMX_PSUBBrr(RS, RD) _MMXLrr(X86_MMX_PSUBB,RS,RD)
-#define MMX_PSUBBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBB, MD, MB, MI, MS, RD)
-#define MMX_PSUBWrr(RS, RD) _MMXLrr(X86_MMX_PSUBW,RS,RD)
-#define MMX_PSUBWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBW, MD, MB, MI, MS, RD)
-#define MMX_PSUBDrr(RS, RD) _MMXLrr(X86_MMX_PSUBD,RS,RD)
-#define MMX_PSUBDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBD, MD, MB, MI, MS, RD)
-#define MMX_PSUBQrr(RS, RD) _MMXLrr(X86_MMX_PSUBQ,RS,RD)
-#define MMX_PSUBQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBQ, MD, MB, MI, MS, RD)
-#define MMX_PSUBSBrr(RS, RD) _MMXLrr(X86_MMX_PSUBSB,RS,RD)
-#define MMX_PSUBSBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBSB, MD, MB, MI, MS, RD)
-#define MMX_PSUBSWrr(RS, RD) _MMXLrr(X86_MMX_PSUBSW,RS,RD)
-#define MMX_PSUBSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBSW, MD, MB, MI, MS, RD)
-#define MMX_PSUBUSBrr(RS, RD) _MMXLrr(X86_MMX_PSUBUSB,RS,RD)
-#define MMX_PSUBUSBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBUSB, MD, MB, MI, MS, RD)
-#define MMX_PSUBUSWrr(RS, RD) _MMXLrr(X86_MMX_PSUBUSW,RS,RD)
-#define MMX_PSUBUSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBUSW, MD, MB, MI, MS, RD)
-#define MMX_PUNPCKHBWrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKHBW,RS,RD)
-#define MMX_PUNPCKHBWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKHBW, MD, MB, MI, MS, RD)
-#define MMX_PUNPCKHWDrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKHWD,RS,RD)
-#define MMX_PUNPCKHWDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKHWD, MD, MB, MI, MS, RD)
-#define MMX_PUNPCKHDQrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKHDQ,RS,RD)
-#define MMX_PUNPCKHDQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKHDQ, MD, MB, MI, MS, RD)
-#define MMX_PUNPCKLBWrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKLBW,RS,RD)
-#define MMX_PUNPCKLBWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKLBW, MD, MB, MI, MS, RD)
-#define MMX_PUNPCKLWDrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKLWD,RS,RD)
-#define MMX_PUNPCKLWDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKLWD, MD, MB, MI, MS, RD)
-#define MMX_PUNPCKLDQrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKLDQ,RS,RD)
-#define MMX_PUNPCKLDQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKLDQ, MD, MB, MI, MS, RD)
-#define MMX_PXORrr(RS, RD) _MMXLrr(X86_MMX_PXOR,RS,RD)
-#define MMX_PXORmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PXOR, MD, MB, MI, MS, RD)
-
-#define MMX_PSHUFWirr(IM, RS, RD) __MMXLirr(X86_MMX_PSHUFW, IM, RS,_rM, RD,_rM)
-#define MMX_PSHUFWimr(IM, MD, MB, MI, MS, RD) __MMXLimr(X86_MMX_PSHUFW, IM, MD, MB, MI, MS, RD,_rM)
-#define MMX_PEXTRWLirr(IM, RS, RD) __MMXLirr(X86_MMX_PEXTRW, IM, RS,_rM, RD,_r4)
-#define MMX_PEXTRWQirr(IM, RS, RD) __MMXQirr(X86_MMX_PEXTRW, IM, RS,_rM, RD,_r8)
-#define MMX_PINSRWLirr(IM, RS, RD) __MMXLirr(X86_MMX_PINSRW, IM, RS,_r4, RD,_rM)
-#define MMX_PINSRWLimr(IM, MD, MB, MI, MS, RD) __MMXLimr(X86_MMX_PINSRW, IM, MD, MB, MI, MS, RD,_r4)
-#define MMX_PINSRWQirr(IM, RS, RD) __MMXQirr(X86_MMX_PINSRW, IM, RS,_r4, RD,_rM)
-#define MMX_PINSRWQimr(IM, MD, MB, MI, MS, RD) __MMXQimr(X86_MMX_PINSRW, IM, MD, MB, MI, MS, RD,_r8)
-
-// Additionnal MMX instructions, brought by SSSE3 ISA
-#define MMX_PABSBrr(RS, RD) _2P_MMXLrr(X86_MMX_PABSB,RS,RD)
-#define MMX_PABSBmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PABSB, MD, MB, MI, MS, RD)
-#define MMX_PABSWrr(RS, RD) _2P_MMXLrr(X86_MMX_PABSW,RS,RD)
-#define MMX_PABSWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PABSW, MD, MB, MI, MS, RD)
-#define MMX_PABSDrr(RS, RD) _2P_MMXLrr(X86_MMX_PABSD,RS,RD)
-#define MMX_PABSDmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PABSD, MD, MB, MI, MS, RD)
-#define MMX_PHADDWrr(RS, RD) _2P_MMXLrr(X86_MMX_PHADDW,RS,RD)
-#define MMX_PHADDWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHADDW, MD, MB, MI, MS, RD)
-#define MMX_PHADDDrr(RS, RD) _2P_MMXLrr(X86_MMX_PHADDD,RS,RD)
-#define MMX_PHADDDmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHADDD, MD, MB, MI, MS, RD)
-#define MMX_PHADDSWrr(RS, RD) _2P_MMXLrr(X86_MMX_PHADDSW,RS,RD)
-#define MMX_PHADDSWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHADDSW, MD, MB, MI, MS, RD)
-#define MMX_PHSUBWrr(RS, RD) _2P_MMXLrr(X86_MMX_PHSUBW,RS,RD)
-#define MMX_PHSUBWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHSUBW, MD, MB, MI, MS, RD)
-#define MMX_PHSUBDrr(RS, RD) _2P_MMXLrr(X86_MMX_PHSUBD,RS,RD)
-#define MMX_PHSUBDmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHSUBD, MD, MB, MI, MS, RD)
-#define MMX_PHSUBSWrr(RS, RD) _2P_MMXLrr(X86_MMX_PHSUBSW,RS,RD)
-#define MMX_PHSUBSWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHSUBSW, MD, MB, MI, MS, RD)
-#define MMX_PMADDUBSWrr(RS, RD) _2P_MMXLrr(X86_MMX_PMADDUBSW,RS,RD)
-#define MMX_PMADDUBSWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PMADDUBSW, MD, MB, MI, MS, RD)
-#define MMX_PMULHRSWrr(RS, RD) _2P_MMXLrr(X86_MMX_PMULHRSW,RS,RD)
-#define MMX_PMULHRSWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PMULHRSW, MD, MB, MI, MS, RD)
-#define MMX_PSHUFBrr(RS, RD) _2P_MMXLrr(X86_MMX_PSHUFB,RS,RD)
-#define MMX_PSHUFBmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PSHUFB, MD, MB, MI, MS, RD)
-#define MMX_PSIGNBrr(RS, RD) _2P_MMXLrr(X86_MMX_PSIGNB,RS,RD)
-#define MMX_PSIGNBmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PSIGNB, MD, MB, MI, MS, RD)
-#define MMX_PSIGNWrr(RS, RD) _2P_MMXLrr(X86_MMX_PSIGNW,RS,RD)
-#define MMX_PSIGNWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PSIGNW, MD, MB, MI, MS, RD)
-#define MMX_PSIGNDrr(RS, RD) _2P_MMXLrr(X86_MMX_PSIGND,RS,RD)
-#define MMX_PSIGNDmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PSIGND, MD, MB, MI, MS, RD)
-
-#define EMMS() _OO (0x0f77 )
-
-
-/* --- Media 128-bit instructions ------------------------------------------ */
-
-enum {
- X86_SSE_CC_EQ = 0,
- X86_SSE_CC_LT = 1,
- X86_SSE_CC_GT = 1,
- X86_SSE_CC_LE = 2,
- X86_SSE_CC_GE = 2,
- X86_SSE_CC_U = 3,
- X86_SSE_CC_NEQ = 4,
- X86_SSE_CC_NLT = 5,
- X86_SSE_CC_NGT = 5,
- X86_SSE_CC_NLE = 6,
- X86_SSE_CC_NGE = 6,
- X86_SSE_CC_O = 7
-};
-
-enum {
- X86_SSE_UCOMI = 0x2e,
- X86_SSE_COMI = 0x2f,
- X86_SSE_CMP = 0xc2,
- X86_SSE_SQRT = 0x51,
- X86_SSE_RSQRT = 0x52,
- X86_SSE_RCP = 0x53,
- X86_SSE_AND = 0x54,
- X86_SSE_ANDN = 0x55,
- X86_SSE_OR = 0x56,
- X86_SSE_XOR = 0x57,
- X86_SSE_ADD = 0x58,
- X86_SSE_MUL = 0x59,
- X86_SSE_SUB = 0x5c,
- X86_SSE_MIN = 0x5d,
- X86_SSE_DIV = 0x5e,
- X86_SSE_MAX = 0x5f,
- X86_SSE_CVTDQ2PD = 0xe6,
- X86_SSE_CVTDQ2PS = 0x5b,
- X86_SSE_CVTPD2DQ = 0xe6,
- X86_SSE_CVTPD2PI = 0x2d,
- X86_SSE_CVTPD2PS = 0x5a,
- X86_SSE_CVTPI2PD = 0x2a,
- X86_SSE_CVTPI2PS = 0x2a,
- X86_SSE_CVTPS2DQ = 0x5b,
- X86_SSE_CVTPS2PD = 0x5a,
- X86_SSE_CVTPS2PI = 0x2d,
- X86_SSE_CVTSD2SI = 0x2d,
- X86_SSE_CVTSD2SS = 0x5a,
- X86_SSE_CVTSI2SD = 0x2a,
- X86_SSE_CVTSI2SS = 0x2a,
- X86_SSE_CVTSS2SD = 0x5a,
- X86_SSE_CVTSS2SI = 0x2d,
- X86_SSE_CVTTPD2PI = 0x2c,
- X86_SSE_CVTTPD2DQ = 0xe6,
- X86_SSE_CVTTPS2DQ = 0x5b,
- X86_SSE_CVTTPS2PI = 0x2c,
- X86_SSE_CVTTSD2SI = 0x2c,
- X86_SSE_CVTTSS2SI = 0x2c,
- X86_SSE_MOVMSK = 0x50,
- X86_SSE_PACKSSDW = 0x6b,
- X86_SSE_PACKSSWB = 0x63,
- X86_SSE_PACKUSWB = 0x67,
- X86_SSE_PADDB = 0xfc,
- X86_SSE_PADDD = 0xfe,
- X86_SSE_PADDQ = 0xd4,
- X86_SSE_PADDSB = 0xec,
- X86_SSE_PADDSW = 0xed,
- X86_SSE_PADDUSB = 0xdc,
- X86_SSE_PADDUSW = 0xdd,
- X86_SSE_PADDW = 0xfd,
- X86_SSE_PAND = 0xdb,
- X86_SSE_PANDN = 0xdf,
- X86_SSE_PAVGB = 0xe0,
- X86_SSE_PAVGW = 0xe3,
- X86_SSE_PCMPEQB = 0x74,
- X86_SSE_PCMPEQD = 0x76,
- X86_SSE_PCMPEQW = 0x75,
- X86_SSE_PCMPGTB = 0x64,
- X86_SSE_PCMPGTD = 0x66,
- X86_SSE_PCMPGTW = 0x65,
- X86_SSE_PMADDWD = 0xf5,
- X86_SSE_PMAXSW = 0xee,
- X86_SSE_PMAXUB = 0xde,
- X86_SSE_PMINSW = 0xea,
- X86_SSE_PMINUB = 0xda,
- X86_SSE_PMOVMSKB = 0xd7,
- X86_SSE_PMULHUW = 0xe4,
- X86_SSE_PMULHW = 0xe5,
- X86_SSE_PMULLW = 0xd5,
- X86_SSE_PMULUDQ = 0xf4,
- X86_SSE_POR = 0xeb,
- X86_SSE_PSADBW = 0xf6,
- X86_SSE_PSLLD = 0xf2,
- X86_SSE_PSLLQ = 0xf3,
- X86_SSE_PSLLW = 0xf1,
- X86_SSE_PSRAD = 0xe2,
- X86_SSE_PSRAW = 0xe1,
- X86_SSE_PSRLD = 0xd2,
- X86_SSE_PSRLQ = 0xd3,
- X86_SSE_PSRLW = 0xd1,
- X86_SSE_PSUBB = 0xf8,
- X86_SSE_PSUBD = 0xfa,
- X86_SSE_PSUBQ = 0xfb,
- X86_SSE_PSUBSB = 0xe8,
- X86_SSE_PSUBSW = 0xe9,
- X86_SSE_PSUBUSB = 0xd8,
- X86_SSE_PSUBUSW = 0xd9,
- X86_SSE_PSUBW = 0xf9,
- X86_SSE_PUNPCKHBW = 0x68,
- X86_SSE_PUNPCKHDQ = 0x6a,
- X86_SSE_PUNPCKHQDQ = 0x6d,
- X86_SSE_PUNPCKHWD = 0x69,
- X86_SSE_PUNPCKLBW = 0x60,
- X86_SSE_PUNPCKLDQ = 0x62,
- X86_SSE_PUNPCKLQDQ = 0x6c,
- X86_SSE_PUNPCKLWD = 0x61,
- X86_SSE_PXOR = 0xef,
- X86_SSSE3_PSHUFB = 0x00,
-};
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-#define _SSSE3Lrr(OP1,OP2,RS,RSA,RD,RDA) (_B(0x66), _REXLrr(RD,RD), _B(0x0f), _OO_Mrm (((OP1)<<8)|(OP2) ,_b11,RDA(RD),RSA(RS) ))
-#define _SSSE3Lmr(OP1,OP2,MD,MB,MI,MS,RD,RDA) (_B(0x66), _REXLmr(MB, MI, RD), _B(0x0f), _OO_r_X (((OP1)<<8)|(OP2) ,RDA(RD) ,MD,MB,MI,MS ))
-#define _SSSE3Lirr(OP1,OP2,IM,RS,RD) (_B(0x66), _REXLrr(RD, RS), _B(0x0f), _OO_Mrm_B (((OP1)<<8)|(OP2) ,_b11,_rX(RD),_rX(RS) ,_u8(IM)))
-#define _SSSE3Limr(OP1,OP2,IM,MD,MB,MI,MS,RD) (_B(0x66), _REXLmr(MB, MI, RD), _B(0x0f), _OO_r_X_B (((OP1)<<8)|(OP2) ,_rX(RD) ,MD,MB,MI,MS ,_u8(IM)))
-
-#define __SSELir(OP,MO,IM,RD) (_REXLrr(0, RD), _OO_Mrm_B (0x0f00|(OP) ,_b11,MO ,_rX(RD) ,_u8(IM)))
-#define __SSELim(OP,MO,IM,MD,MB,MI,MS) (_REXLrm(0, MB, MI), _OO_r_X_B (0x0f00|(OP) ,MO ,MD,MB,MI,MS ,_u8(IM)))
-#define __SSELrr(OP,RS,RSA,RD,RDA) (_REXLrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) ))
-#define __SSELmr(OP,MD,MB,MI,MS,RD,RDA) (_REXLmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS ))
-#define __SSELrm(OP,RS,RSA,MD,MB,MI,MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS ))
-#define __SSELirr(OP,IM,RS,RD) (_REXLrr(RD, RS), _OO_Mrm_B (0x0f00|(OP) ,_b11,_rX(RD),_rX(RS) ,_u8(IM)))
-#define __SSELimr(OP,IM,MD,MB,MI,MS,RD) (_REXLmr(MB, MI, RD), _OO_r_X_B (0x0f00|(OP) ,_rX(RD) ,MD,MB,MI,MS ,_u8(IM)))
-
-#define __SSEQrr(OP,RS,RSA,RD,RDA) (_REXQrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) ))
-#define __SSEQmr(OP,MD,MB,MI,MS,RD,RDA) (_REXQmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS ))
-#define __SSEQrm(OP,RS,RSA,MD,MB,MI,MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS ))
-
-#define _SSELrr(PX,OP,RS,RSA,RD,RDA) (_B(PX), __SSELrr(OP, RS, RSA, RD, RDA))
-#define _SSELmr(PX,OP,MD,MB,MI,MS,RD,RDA) (_B(PX), __SSELmr(OP, MD, MB, MI, MS, RD, RDA))
-#define _SSELrm(PX,OP,RS,RSA,MD,MB,MI,MS) (_B(PX), __SSELrm(OP, RS, RSA, MD, MB, MI, MS))
-#define _SSELir(PX,OP,MO,IM,RD) (_B(PX), __SSELir(OP, MO, IM, RD))
-#define _SSELim(PX,OP,MO,IM,MD,MB,MI,MS) (_B(PX), __SSELim(OP, MO, IM, MD, MB, MI, MS))
-#define _SSELirr(PX,OP,IM,RS,RD) (_B(PX), __SSELirr(OP, IM, RS, RD))
-#define _SSELimr(PX,OP,IM,MD,MB,MI,MS,RD) (_B(PX), __SSELimr(OP, IM, MD, MB, MI, MS, RD))
-
-#define _SSEQrr(PX,OP,RS,RSA,RD,RDA) (_B(PX), __SSEQrr(OP, RS, RSA, RD, RDA))
-#define _SSEQmr(PX,OP,MD,MB,MI,MS,RD,RDA) (_B(PX), __SSEQmr(OP, MD, MB, MI, MS, RD, RDA))
-#define _SSEQrm(PX,OP,RS,RSA,MD,MB,MI,MS) (_B(PX), __SSEQrm(OP, RS, RSA, MD, MB, MI, MS))
-
-#define _SSEPSrr(OP,RS,RD) __SSELrr( OP, RS,_rX, RD,_rX)
-#define _SSEPSmr(OP,MD,MB,MI,MS,RD) __SSELmr( OP, MD, MB, MI, MS, RD,_rX)
-#define _SSEPSrm(OP,RS,MD,MB,MI,MS) __SSELrm( OP, RS,_rX, MD, MB, MI, MS)
-#define _SSEPSirr(OP,IM,RS,RD) __SSELirr( OP, IM, RS, RD)
-#define _SSEPSimr(OP,IM,MD,MB,MI,MS,RD) __SSELimr( OP, IM, MD, MB, MI, MS, RD)
-
-#define _SSEPDrr(OP,RS,RD) _SSELrr(0x66, OP, RS,_rX, RD,_rX)
-#define _SSEPDmr(OP,MD,MB,MI,MS,RD) _SSELmr(0x66, OP, MD, MB, MI, MS, RD,_rX)
-#define _SSEPDrm(OP,RS,MD,MB,MI,MS) _SSELrm(0x66, OP, RS,_rX, MD, MB, MI, MS)
-#define _SSEPDirr(OP,IM,RS,RD) _SSELirr(0x66, OP, IM, RS, RD)
-#define _SSEPDimr(OP,IM,MD,MB,MI,MS,RD) _SSELimr(0x66, OP, IM, MD, MB, MI, MS, RD)
-
-#define _SSESSrr(OP,RS,RD) _SSELrr(0xf3, OP, RS,_rX, RD,_rX)
-#define _SSESSmr(OP,MD,MB,MI,MS,RD) _SSELmr(0xf3, OP, MD, MB, MI, MS, RD,_rX)
-#define _SSESSrm(OP,RS,MD,MB,MI,MS) _SSELrm(0xf3, OP, RS,_rX, MD, MB, MI, MS)
-#define _SSESSirr(OP,IM,RS,RD) _SSELirr(0xf3, OP, IM, RS, RD)
-#define _SSESSimr(OP,IM,MD,MB,MI,MS,RD) _SSELimr(0xf3, OP, IM, MD, MB, MI, MS, RD)
-
-#define _SSESDrr(OP,RS,RD) _SSELrr(0xf2, OP, RS,_rX, RD,_rX)
-#define _SSESDmr(OP,MD,MB,MI,MS,RD) _SSELmr(0xf2, OP, MD, MB, MI, MS, RD,_rX)
-#define _SSESDrm(OP,RS,MD,MB,MI,MS) _SSELrm(0xf2, OP, RS,_rX, MD, MB, MI, MS)
-#define _SSESDirr(OP,IM,RS,RD) _SSELirr(0xf2, OP, IM, RS, RD)
-#define _SSESDimr(OP,IM,MD,MB,MI,MS,RD) _SSELimr(0xf2, OP, IM, MD, MB, MI, MS, RD)
-
-#define ADDPSrr(RS, RD) _SSEPSrr(X86_SSE_ADD, RS, RD)
-#define ADDPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
-#define ADDPDrr(RS, RD) _SSEPDrr(X86_SSE_ADD, RS, RD)
-#define ADDPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
-
-#define ADDSSrr(RS, RD) _SSESSrr(X86_SSE_ADD, RS, RD)
-#define ADDSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
-#define ADDSDrr(RS, RD) _SSESDrr(X86_SSE_ADD, RS, RD)
-#define ADDSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
-
-#define ANDNPSrr(RS, RD) _SSEPSrr(X86_SSE_ANDN, RS, RD)
-#define ANDNPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_ANDN, MD, MB, MI, MS, RD)
-#define ANDNPDrr(RS, RD) _SSEPDrr(X86_SSE_ANDN, RS, RD)
-#define ANDNPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_ANDN, MD, MB, MI, MS, RD)
-
-#define ANDPSrr(RS, RD) _SSEPSrr(X86_SSE_AND, RS, RD)
-#define ANDPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_AND, MD, MB, MI, MS, RD)
-#define ANDPDrr(RS, RD) _SSEPDrr(X86_SSE_AND, RS, RD)
-#define ANDPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_AND, MD, MB, MI, MS, RD)
-
-#define CMPPSrr(IM, RS, RD) _SSEPSirr(X86_SSE_CMP, IM, RS, RD)
-#define CMPPSmr(IM, MD, MB, MI, MS, RD) _SSEPSimr(X86_SSE_CMP, IM, MD, MB, MI, MS, RD)
-#define CMPPDrr(IM, RS, RD) _SSEPDirr(X86_SSE_CMP, IM, RS, RD)
-#define CMPPDmr(IM, MD, MB, MI, MS, RD) _SSEPDimr(X86_SSE_CMP, IM, MD, MB, MI, MS, RD)
-
-#define CMPSSrr(IM, RS, RD) _SSESSirr(X86_SSE_CMP, IM, RS, RD)
-#define CMPSSmr(IM, MD, MB, MI, MS, RD) _SSESSimr(X86_SSE_CMP, IM, MD, MB, MI, MS, RD)
-#define CMPSDrr(IM, RS, RD) _SSESDirr(X86_SSE_CMP, IM, RS, RD)
-#define CMPSDmr(IM, MD, MB, MI, MS, RD) _SSESDimr(X86_SSE_CMP, IM, MD, MB, MI, MS, RD)
-
-#define DIVPSrr(RS, RD) _SSEPSrr(X86_SSE_DIV, RS, RD)
-#define DIVPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
-#define DIVPDrr(RS, RD) _SSEPDrr(X86_SSE_DIV, RS, RD)
-#define DIVPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
-
-#define DIVSSrr(RS, RD) _SSESSrr(X86_SSE_DIV, RS, RD)
-#define DIVSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
-#define DIVSDrr(RS, RD) _SSESDrr(X86_SSE_DIV, RS, RD)
-#define DIVSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
-
-#define MAXPSrr(RS, RD) _SSEPSrr(X86_SSE_MAX, RS, RD)
-#define MAXPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
-#define MAXPDrr(RS, RD) _SSEPDrr(X86_SSE_MAX, RS, RD)
-#define MAXPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
-
-#define MAXSSrr(RS, RD) _SSESSrr(X86_SSE_MAX, RS, RD)
-#define MAXSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
-#define MAXSDrr(RS, RD) _SSESDrr(X86_SSE_MAX, RS, RD)
-#define MAXSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
-
-#define MINPSrr(RS, RD) _SSEPSrr(X86_SSE_MIN, RS, RD)
-#define MINPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
-#define MINPDrr(RS, RD) _SSEPDrr(X86_SSE_MIN, RS, RD)
-#define MINPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
-
-#define MINSSrr(RS, RD) _SSESSrr(X86_SSE_MIN, RS, RD)
-#define MINSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
-#define MINSDrr(RS, RD) _SSESDrr(X86_SSE_MIN, RS, RD)
-#define MINSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
-
-#define MULPSrr(RS, RD) _SSEPSrr(X86_SSE_MUL, RS, RD)
-#define MULPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
-#define MULPDrr(RS, RD) _SSEPDrr(X86_SSE_MUL, RS, RD)
-#define MULPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
-
-#define MULSSrr(RS, RD) _SSESSrr(X86_SSE_MUL, RS, RD)
-#define MULSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
-#define MULSDrr(RS, RD) _SSESDrr(X86_SSE_MUL, RS, RD)
-#define MULSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
-
-#define ORPSrr(RS, RD) _SSEPSrr(X86_SSE_OR, RS, RD)
-#define ORPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_OR, MD, MB, MI, MS, RD)
-#define ORPDrr(RS, RD) _SSEPDrr(X86_SSE_OR, RS, RD)
-#define ORPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_OR, MD, MB, MI, MS, RD)
-
-#define RCPPSrr(RS, RD) _SSEPSrr(X86_SSE_RCP, RS, RD)
-#define RCPPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_RCP, MD, MB, MI, MS, RD)
-#define RCPSSrr(RS, RD) _SSESSrr(X86_SSE_RCP, RS, RD)
-#define RCPSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_RCP, MD, MB, MI, MS, RD)
-
-#define RSQRTPSrr(RS, RD) _SSEPSrr(X86_SSE_RSQRT, RS, RD)
-#define RSQRTPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_RSQRT, MD, MB, MI, MS, RD)
-#define RSQRTSSrr(RS, RD) _SSESSrr(X86_SSE_RSQRT, RS, RD)
-#define RSQRTSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_RSQRT, MD, MB, MI, MS, RD)
-
-#define SQRTPSrr(RS, RD) _SSEPSrr(X86_SSE_SQRT, RS, RD)
-#define SQRTPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
-#define SQRTPDrr(RS, RD) _SSEPDrr(X86_SSE_SQRT, RS, RD)
-#define SQRTPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
-
-#define SQRTSSrr(RS, RD) _SSESSrr(X86_SSE_SQRT, RS, RD)
-#define SQRTSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
-#define SQRTSDrr(RS, RD) _SSESDrr(X86_SSE_SQRT, RS, RD)
-#define SQRTSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
-
-#define SUBPSrr(RS, RD) _SSEPSrr(X86_SSE_SUB, RS, RD)
-#define SUBPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
-#define SUBPDrr(RS, RD) _SSEPDrr(X86_SSE_SUB, RS, RD)
-#define SUBPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
-
-#define SUBSSrr(RS, RD) _SSESSrr(X86_SSE_SUB, RS, RD)
-#define SUBSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
-#define SUBSDrr(RS, RD) _SSESDrr(X86_SSE_SUB, RS, RD)
-#define SUBSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
-
-#define XORPSrr(RS, RD) _SSEPSrr(X86_SSE_XOR, RS, RD)
-#define XORPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_XOR, MD, MB, MI, MS, RD)
-#define XORPDrr(RS, RD) _SSEPDrr(X86_SSE_XOR, RS, RD)
-#define XORPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_XOR, MD, MB, MI, MS, RD)
-
-#define COMISSrr(RS, RD) _SSEPSrr(X86_SSE_COMI, RS, RD)
-#define COMISSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_COMI, MD, MB, MI, MS, RD)
-#define COMISDrr(RS, RD) _SSEPDrr(X86_SSE_COMI, RS, RD)
-#define COMISDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_COMI, MD, MB, MI, MS, RD)
-
-#define UCOMISSrr(RS, RD) _SSEPSrr(X86_SSE_UCOMI, RS, RD)
-#define UCOMISSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_UCOMI, MD, MB, MI, MS, RD)
-#define UCOMISDrr(RS, RD) _SSEPDrr(X86_SSE_UCOMI, RS, RD)
-#define UCOMISDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_UCOMI, MD, MB, MI, MS, RD)
-
-#define MOVAPSrr(RS, RD) _SSEPSrr(0x28, RS, RD)
-#define MOVAPSmr(MD, MB, MI, MS, RD) _SSEPSmr(0x28, MD, MB, MI, MS, RD)
-#define MOVAPSrm(RS, MD, MB, MI, MS) _SSEPSrm(0x29, RS, MD, MB, MI, MS)
-
-#define MOVAPDrr(RS, RD) _SSEPDrr(0x28, RS, RD)
-#define MOVAPDmr(MD, MB, MI, MS, RD) _SSEPDmr(0x28, MD, MB, MI, MS, RD)
-#define MOVAPDrm(RS, MD, MB, MI, MS) _SSEPDrm(0x29, RS, MD, MB, MI, MS)
-
-#define CVTDQ2PDrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTDQ2PD, RS,_rX, RD,_rX)
-#define CVTDQ2PDmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTDQ2PD, MD, MB, MI, MS, RD,_rX)
-#define CVTDQ2PSrr(RS, RD) __SSELrr( X86_SSE_CVTDQ2PS, RS,_rX, RD,_rX)
-#define CVTDQ2PSmr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTDQ2PS, MD, MB, MI, MS, RD,_rX)
-#define CVTPD2DQrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTPD2DQ, RS,_rX, RD,_rX)
-#define CVTPD2DQmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTPD2DQ, MD, MB, MI, MS, RD,_rX)
-#define CVTPD2PIrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTPD2PI, RS,_rX, RD,_rM)
-#define CVTPD2PImr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTPD2PI, MD, MB, MI, MS, RD,_rM)
-#define CVTPD2PSrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTPD2PS, RS,_rX, RD,_rX)
-#define CVTPD2PSmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTPD2PS, MD, MB, MI, MS, RD,_rX)
-#define CVTPI2PDrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTPI2PD, RS,_rM, RD,_rX)
-#define CVTPI2PDmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTPI2PD, MD, MB, MI, MS, RD,_rX)
-#define CVTPI2PSrr(RS, RD) __SSELrr( X86_SSE_CVTPI2PS, RS,_rM, RD,_rX)
-#define CVTPI2PSmr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTPI2PS, MD, MB, MI, MS, RD,_rX)
-#define CVTPS2DQrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTPS2DQ, RS,_rX, RD,_rX)
-#define CVTPS2DQmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTPS2DQ, MD, MB, MI, MS, RD,_rX)
-#define CVTPS2PDrr(RS, RD) __SSELrr( X86_SSE_CVTPS2PD, RS,_rX, RD,_rX)
-#define CVTPS2PDmr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTPS2PD, MD, MB, MI, MS, RD,_rX)
-#define CVTPS2PIrr(RS, RD) __SSELrr( X86_SSE_CVTPS2PI, RS,_rX, RD,_rM)
-#define CVTPS2PImr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTPS2PI, MD, MB, MI, MS, RD,_rM)
-#define CVTSD2SILrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTSD2SI, RS,_rX, RD,_r4)
-#define CVTSD2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTSD2SI, MD, MB, MI, MS, RD,_r4)
-#define CVTSD2SIQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTSD2SI, RS,_rX, RD,_r8)
-#define CVTSD2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTSD2SI, MD, MB, MI, MS, RD,_r8)
-#define CVTSD2SSrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTSD2SS, RS,_rX, RD,_rX)
-#define CVTSD2SSmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTSD2SS, MD, MB, MI, MS, RD,_rX)
-#define CVTSI2SDLrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTSI2SD, RS,_r4, RD,_rX)
-#define CVTSI2SDLmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTSI2SD, MD, MB, MI, MS, RD,_rX)
-#define CVTSI2SDQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTSI2SD, RS,_r8, RD,_rX)
-#define CVTSI2SDQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTSI2SD, MD, MB, MI, MS, RD,_rX)
-#define CVTSI2SSLrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTSI2SS, RS,_r4, RD,_rX)
-#define CVTSI2SSLmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTSI2SS, MD, MB, MI, MS, RD,_rX)
-#define CVTSI2SSQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTSI2SS, RS,_r8, RD,_rX)
-#define CVTSI2SSQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTSI2SS, MD, MB, MI, MS, RD,_rX)
-#define CVTSS2SDrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTSS2SD, RS,_rX, RD,_rX)
-#define CVTSS2SDmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTSS2SD, MD, MB, MI, MS, RD,_rX)
-#define CVTSS2SILrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTSS2SI, RS,_rX, RD,_r4)
-#define CVTSS2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTSS2SI, MD, MB, MI, MS, RD,_r4)
-#define CVTSS2SIQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTSS2SI, RS,_rX, RD,_r8)
-#define CVTSS2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTSS2SI, MD, MB, MI, MS, RD,_r8)
-#define CVTTPD2PIrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTTPD2PI, RS,_rX, RD,_rM)
-#define CVTTPD2PImr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTTPD2PI, MD, MB, MI, MS, RD,_rM)
-#define CVTTPD2DQrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTTPD2DQ, RS,_rX, RD,_rX)
-#define CVTTPD2DQmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTTPD2DQ, MD, MB, MI, MS, RD,_rX)
-#define CVTTPS2DQrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTTPS2DQ, RS,_rX, RD,_rX)
-#define CVTTPS2DQmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTTPS2DQ, MD, MB, MI, MS, RD,_rX)
-#define CVTTPS2PIrr(RS, RD) __SSELrr( X86_SSE_CVTTPS2PI, RS,_rX, RD,_rM)
-#define CVTTPS2PImr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTTPS2PI, MD, MB, MI, MS, RD,_rM)
-#define CVTTSD2SILrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTTSD2SI, RS,_rX, RD,_r4)
-#define CVTTSD2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTTSD2SI, MD, MB, MI, MS, RD,_r4)
-#define CVTTSD2SIQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTTSD2SI, RS,_rX, RD,_r8)
-#define CVTTSD2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTTSD2SI, MD, MB, MI, MS, RD,_r8)
-#define CVTTSS2SILrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTTSS2SI, RS,_rX, RD,_r4)
-#define CVTTSS2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTTSS2SI, MD, MB, MI, MS, RD,_r4)
-#define CVTTSS2SIQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTTSS2SI, RS,_rX, RD,_r8)
-#define CVTTSS2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTTSS2SI, MD, MB, MI, MS, RD,_r8)
-
-#define MOVDXDrr(RS, RD) _SSELrr(0x66, 0x6e, RS,_r4, RD,_rX)
-#define MOVDXDmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x6e, MD, MB, MI, MS, RD,_rX)
-#define MOVQXDrr(RS, RD) _SSEQrr(0x66, 0x6e, RS,_r8, RD,_rX)
-#define MOVQXDmr(MD, MB, MI, MS, RD) _SSEQmr(0x66, 0x6e, MD, MB, MI, MS, RD,_rX)
-
-#define MOVDXSrr(RS, RD) _SSELrr(0x66, 0x7e, RD,_r4, RS,_rX)
-#define MOVDXSrm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x7e, RS,_rX, MD, MB, MI, MS)
-#define MOVQXSrr(RS, RD) _SSEQrr(0x66, 0x7e, RD,_r8, RS,_rX)
-#define MOVQXSrm(RS, MD, MB, MI, MS) _SSEQrm(0x66, 0x7e, RS,_rX, MD, MB, MI, MS)
-
-#define MOVDLMrr(RS, RD) __SSELrr( 0x6e, RS,_r4, RD,_rM)
-#define MOVDLMmr(MD, MB, MI, MS, RD) __SSELmr( 0x6e, MD, MB, MI, MS, RD,_rM)
-#define MOVDQMrr(RS, RD) __SSEQrr( 0x6e, RS,_r8, RD,_rM)
-#define MOVDQMmr(MD, MB, MI, MS, RD) __SSEQmr( 0x6e, MD, MB, MI, MS, RD,_rM)
-
-#define MOVDMLrr(RS, RD) __SSELrr( 0x7e, RS,_rM, RD,_r4)
-#define MOVDMLrm(RS, MD, MB, MI, MS) __SSELrm( 0x7e, RS,_rM, MD, MB, MI, MS)
-#define MOVDMQrr(RS, RD) __SSEQrr( 0x7e, RS,_rM, RD,_r8)
-#define MOVDMQrm(RS, MD, MB, MI, MS) __SSEQrm( 0x7e, RS,_rM, MD, MB, MI, MS)
-
-#define MOVDQ2Qrr(RS, RD) _SSELrr(0xf2, 0xd6, RS,_rX, RD,_rM)
-#define MOVMSKPSrr(RS, RD) __SSELrr( 0x50, RS,_rX, RD,_r4)
-#define MOVMSKPDrr(RS, RD) _SSELrr(0x66, 0x50, RS,_rX, RD,_r4)
-
-#define MOVHLPSrr(RS, RD) __SSELrr( 0x12, RS,_rX, RD,_rX)
-#define MOVLHPSrr(RS, RD) __SSELrr( 0x16, RS,_rX, RD,_rX)
-
-#define MOVDQArr(RS, RD) _SSELrr(0x66, 0x6f, RS,_rX, RD,_rX)
-#define MOVDQAmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x6f, MD, MB, MI, MS, RD,_rX)
-#define MOVDQArm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x7f, RS,_rX, MD, MB, MI, MS)
-
-#define MOVDQUrr(RS, RD) _SSELrr(0xf3, 0x6f, RS,_rX, RD,_rX)
-#define MOVDQUmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, 0x6f, MD, MB, MI, MS, RD,_rX)
-#define MOVDQUrm(RS, MD, MB, MI, MS) _SSELrm(0xf3, 0x7f, RS,_rX, MD, MB, MI, MS)
-
-#define MOVHPDmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x16, MD, MB, MI, MS, RD,_rX)
-#define MOVHPDrm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x17, RS,_rX, MD, MB, MI, MS)
-#define MOVHPSmr(MD, MB, MI, MS, RD) __SSELmr( 0x16, MD, MB, MI, MS, RD,_rX)
-#define MOVHPSrm(RS, MD, MB, MI, MS) __SSELrm( 0x17, RS,_rX, MD, MB, MI, MS)
-
-#define MOVLPDmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x12, MD, MB, MI, MS, RD,_rX)
-#define MOVLPDrm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x13, RS,_rX, MD, MB, MI, MS)
-#define MOVLPSmr(MD, MB, MI, MS, RD) __SSELmr( 0x12, MD, MB, MI, MS, RD,_rX)
-#define MOVLPSrm(RS, MD, MB, MI, MS) __SSELrm( 0x13, RS,_rX, MD, MB, MI, MS)
-
-
-/* --- FLoating-Point instructions ----------------------------------------- */
-
-#define _ESCmi(D,B,I,S,OP) (_REXLrm(0,B,I), _O_r_X(0xd8|(OP & 7), (OP >> 3), D,B,I,S))
-
-#define FLDr(R) _OOr(0xd9c0,_rN(R))
-#define FLDLm(D,B,I,S) _ESCmi(D,B,I,S,005)
-#define FLDSm(D,B,I,S) _ESCmi(D,B,I,S,001)
-#define FLDTm(D,B,I,S) _ESCmi(D,B,I,S,053)
-
-#define FSTr(R) _OOr(0xddd0,_rN(R))
-#define FSTSm(D,B,I,S) _ESCmi(D,B,I,S,021)
-#define FSTLm(D,B,I,S) _ESCmi(D,B,I,S,025)
-
-#define FSTPr(R) _OOr(0xddd8,_rN(R))
-#define FSTPSm(D,B,I,S) _ESCmi(D,B,I,S,031)
-#define FSTPLm(D,B,I,S) _ESCmi(D,B,I,S,035)
-#define FSTPTm(D,B,I,S) _ESCmi(D,B,I,S,073)
-
-#define FADDr0(R) _OOr(0xd8c0,_rN(R))
-#define FADD0r(R) _OOr(0xdcc0,_rN(R))
-#define FADDP0r(R) _OOr(0xdec0,_rN(R))
-#define FADDSm(D,B,I,S) _ESCmi(D,B,I,S,000)
-#define FADDLm(D,B,I,S) _ESCmi(D,B,I,S,004)
-
-#define FSUBSm(D,B,I,S) _ESCmi(D,B,I,S,040)
-#define FSUBLm(D,B,I,S) _ESCmi(D,B,I,S,044)
-#define FSUBr0(R) _OOr(0xd8e0,_rN(R))
-#define FSUB0r(R) _OOr(0xdce8,_rN(R))
-#define FSUBP0r(R) _OOr(0xdee8,_rN(R))
-
-#define FSUBRr0(R) _OOr(0xd8e8,_rN(R))
-#define FSUBR0r(R) _OOr(0xdce0,_rN(R))
-#define FSUBRP0r(R) _OOr(0xdee0,_rN(R))
-#define FSUBRSm(D,B,I,S) _ESCmi(D,B,I,S,050)
-#define FSUBRLm(D,B,I,S) _ESCmi(D,B,I,S,054)
-
-#define FMULr0(R) _OOr(0xd8c8,_rN(R))
-#define FMUL0r(R) _OOr(0xdcc8,_rN(R))
-#define FMULP0r(R) _OOr(0xdec8,_rN(R))
-#define FMULSm(D,B,I,S) _ESCmi(D,B,I,S,010)
-#define FMULLm(D,B,I,S) _ESCmi(D,B,I,S,014)
-
-#define FDIVr0(R) _OOr(0xd8f0,_rN(R))
-#define FDIV0r(R) _OOr(0xdcf8,_rN(R))
-#define FDIVP0r(R) _OOr(0xdef8,_rN(R))
-#define FDIVSm(D,B,I,S) _ESCmi(D,B,I,S,060)
-#define FDIVLm(D,B,I,S) _ESCmi(D,B,I,S,064)
-
-#define FDIVRr0(R) _OOr(0xd8f8,_rN(R))
-#define FDIVR0r(R) _OOr(0xdcf0,_rN(R))
-#define FDIVRP0r(R) _OOr(0xdef0,_rN(R))
-#define FDIVRSm(D,B,I,S) _ESCmi(D,B,I,S,070)
-#define FDIVRLm(D,B,I,S) _ESCmi(D,B,I,S,074)
-
-#define FCMOVBr0(R) _OOr(0xdac0,_rN(R))
-#define FCMOVBEr0(R) _OOr(0xdad0,_rN(R))
-#define FCMOVEr0(R) _OOr(0xdac8,_rN(R))
-#define FCMOVNBr0(R) _OOr(0xdbc0,_rN(R))
-#define FCMOVNBEr0(R) _OOr(0xdbd0,_rN(R))
-#define FCMOVNEr0(R) _OOr(0xdbc8,_rN(R))
-#define FCMOVNUr0(R) _OOr(0xdbd8,_rN(R))
-#define FCMOVUr0(R) _OOr(0xdad8,_rN(R))
-#define FCOMIr0(R) _OOr(0xdbf0,_rN(R))
-#define FCOMIPr0(R) _OOr(0xdff0,_rN(R))
-
-#define FCOMr(R) _OOr(0xd8d0,_rN(R))
-#define FCOMSm(D,B,I,S) _ESCmi(D,B,I,S,020)
-#define FCOMLm(D,B,I,S) _ESCmi(D,B,I,S,024)
-
-#define FCOMPr(R) _OOr(0xd8d8,_rN(R))
-#define FCOMPSm(D,B,I,S) _ESCmi(D,B,I,S,030)
-#define FCOMPLm(D,B,I,S) _ESCmi(D,B,I,S,034)
-
-#define FUCOMIr0(R) _OOr(0xdbe8,_rN(R))
-#define FUCOMIPr0(R) _OOr(0xdfe8,_rN(R))
-#define FUCOMPr(R) _OOr(0xdde8,_rN(R))
-#define FUCOMr(R) _OOr(0xdde0,_rN(R))
-
-#define FIADDLm(D,B,I,S) _ESCmi(D,B,I,S,002)
-#define FICOMLm(D,B,I,S) _ESCmi(D,B,I,S,022)
-#define FICOMPLm(D,B,I,S) _ESCmi(D,B,I,S,032)
-#define FIDIVLm(D,B,I,S) _ESCmi(D,B,I,S,062)
-#define FIDIVRLm(D,B,I,S) _ESCmi(D,B,I,S,072)
-#define FILDLm(D,B,I,S) _ESCmi(D,B,I,S,003)
-#define FILDQm(D,B,I,S) _ESCmi(D,B,I,S,057)
-#define FIMULLm(D,B,I,S) _ESCmi(D,B,I,S,012)
-#define FISTLm(D,B,I,S) _ESCmi(D,B,I,S,023)
-#define FISTPLm(D,B,I,S) _ESCmi(D,B,I,S,033)
-#define FISTPQm(D,B,I,S) _ESCmi(D,B,I,S,077)
-#define FISUBLm(D,B,I,S) _ESCmi(D,B,I,S,042)
-#define FISUBRLm(D,B,I,S) _ESCmi(D,B,I,S,052)
-
-#define FREEr(R) _OOr(0xddc0,_rN(R))
-#define FXCHr(R) _OOr(0xd9c8,_rN(R))
-
-#endif /* X86_RTASM_H */
+++ /dev/null
-/*
- * compiler/compemu.h - Public interface and definitions
- *
- * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
- *
- * Adaptation for Basilisk II and improvements, copyright 2000-2005
- * Gwenole Beauchesne
- *
- * Basilisk II (C) 1997-2008 Christian Bauer
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef COMPEMU_H
-#define COMPEMU_H
-
-#ifdef JIT
-
-#if defined __i386__ || defined __x86_64__
-#include "flags_x86.h"
-#else
-#error "Unsupported JIT compiler for this architecture"
-#endif
-
-#if JIT_DEBUG
-/* dump some information (m68k block, x86 block addresses) about the compiler state */
-extern void compiler_dumpstate(void);
-#endif
-
-/* Now that we do block chaining, and also have linked lists on each tag,
- TAGMASK can be much smaller and still do its job. Saves several megs
- of memory! */
-#define TAGMASK 0x0000ffff
-#define TAGSIZE (TAGMASK+1)
-#define MAXRUN 1024
-#define cacheline(x) (((uintptr)x)&TAGMASK)
-
-extern uae_u8* start_pc_p;
-extern uae_u32 start_pc;
-
-struct blockinfo_t;
-
-struct cpu_history {
- uae_u16 * location;
-};
-
-union cacheline {
- cpuop_func * handler;
- blockinfo_t * bi;
-};
-
-/* Use new spill/reload strategy when calling external functions */
-#define USE_OPTIMIZED_CALLS 0
-#if USE_OPTIMIZED_CALLS
-#error implementation in progress
-#endif
-
-/* (gb) When on, this option can save save up to 30% compilation time
- * when many lazy flushes occur (e.g. apps in MacOS 8.x).
- */
-#define USE_SEPARATE_BIA 1
-
-/* Use chain of checksum_info_t to compute the block checksum */
-#define USE_CHECKSUM_INFO 1
-
-/* Use code inlining, aka follow-up of constant jumps */
-#define USE_INLINING 1
-
-/* Inlining requires the chained checksuming information */
-#if USE_INLINING
-#undef USE_CHECKSUM_INFO
-#define USE_CHECKSUM_INFO 1
-#endif
-
-/* Does flush_icache_range() only check for blocks falling in the requested range? */
-#define LAZY_FLUSH_ICACHE_RANGE 0
-
-#define USE_F_ALIAS 1
-#define USE_OFFSET 1
-#define COMP_DEBUG 1
-
-#if COMP_DEBUG
-#define Dif(x) if (x)
-#else
-#define Dif(x) if (0)
-#endif
-
-#define SCALE 2
-
-#define BYTES_PER_INST 10240 /* paranoid ;-) */
-#define LONGEST_68K_INST 16 /* The number of bytes the longest possible
- 68k instruction takes */
-#define MAX_CHECKSUM_LEN 2048 /* The maximum size we calculate checksums
- for. Anything larger will be flushed
- unconditionally even with SOFT_FLUSH */
-#define MAX_HOLD_BI 3 /* One for the current block, and up to two
- for jump targets */
-
-#define INDIVIDUAL_INST 0
-#if 1
-// gb-- my format from readcpu.cpp is not the same
-#define FLAG_X 0x0010
-#define FLAG_N 0x0008
-#define FLAG_Z 0x0004
-#define FLAG_V 0x0002
-#define FLAG_C 0x0001
-#else
-#define FLAG_C 0x0010
-#define FLAG_V 0x0008
-#define FLAG_Z 0x0004
-#define FLAG_N 0x0002
-#define FLAG_X 0x0001
-#endif
-#define FLAG_CZNV (FLAG_C | FLAG_Z | FLAG_N | FLAG_V)
-#define FLAG_ZNV (FLAG_Z | FLAG_N | FLAG_V)
-
-#define KILLTHERAT 1 /* Set to 1 to avoid some partial_rat_stalls */
-
-#if defined(__x86_64__)
-#define N_REGS 16 /* really only 15, but they are numbered 0-3,5-15 */
-#else
-#define N_REGS 8 /* really only 7, but they are numbered 0,1,2,3,5,6,7 */
-#endif
-#define N_FREGS 6 /* That leaves us two positions on the stack to play with */
-
-/* Functions exposed to newcpu, or to what was moved from newcpu.c to
- * compemu_support.c */
-extern void compiler_init(void);
-extern void compiler_exit(void);
-extern bool compiler_use_jit(void);
-extern void init_comp(void);
-extern void flush(int save_regs);
-extern void small_flush(int save_regs);
-extern void set_target(uae_u8* t);
-extern uae_u8* get_target(void);
-extern void freescratch(void);
-extern void build_comp(void);
-extern void set_cache_state(int enabled);
-extern int get_cache_state(void);
-extern uae_u32 get_jitted_size(void);
-extern void (*flush_icache)(int n);
-extern void alloc_cache(void);
-extern int check_for_cache_miss(void);
-
-/* JIT FPU compilation */
-extern void comp_fpp_opp (uae_u32 opcode, uae_u16 extra);
-extern void comp_fbcc_opp (uae_u32 opcode);
-extern void comp_fscc_opp (uae_u32 opcode, uae_u16 extra);
-
-extern uae_u32 needed_flags;
-extern uae_u8* comp_pc_p;
-extern void* pushall_call_handler;
-
-#define VREGS 32
-#define VFREGS 16
-
-#define INMEM 1
-#define CLEAN 2
-#define DIRTY 3
-#define UNDEF 4
-#define ISCONST 5
-
-typedef struct {
- uae_u32* mem;
- uae_u32 val;
- uae_u8 is_swapped;
- uae_u8 status;
- uae_s8 realreg; /* gb-- realreg can hold -1 */
- uae_u8 realind; /* The index in the holds[] array */
- uae_u8 needflush;
- uae_u8 validsize;
- uae_u8 dirtysize;
- uae_u8 dummy;
-} reg_status;
-
-typedef struct {
- uae_u32* mem;
- double val;
- uae_u8 status;
- uae_s8 realreg; /* gb-- realreg can hold -1 */
- uae_u8 realind;
- uae_u8 needflush;
-} freg_status;
-
-#define PC_P 16
-#define FLAGX 17
-#define FLAGTMP 18
-#define NEXT_HANDLER 19
-#define S1 20
-#define S2 21
-#define S3 22
-#define S4 23
-#define S5 24
-#define S6 25
-#define S7 26
-#define S8 27
-#define S9 28
-#define S10 29
-#define S11 30
-#define S12 31
-
-#define FP_RESULT 8
-#define FS1 9
-#define FS2 10
-#define FS3 11
-
-typedef struct {
- uae_u32 touched;
- uae_s8 holds[VREGS];
- uae_u8 nholds;
- uae_u8 canbyte;
- uae_u8 canword;
- uae_u8 locked;
-} n_status;
-
-typedef struct {
- uae_u32 touched;
- uae_s8 holds[VFREGS];
- uae_u8 nholds;
- uae_u8 locked;
-} fn_status;
-
-/* For flag handling */
-#define NADA 1
-#define TRASH 2
-#define VALID 3
-
-/* needflush values */
-#define NF_SCRATCH 0
-#define NF_TOMEM 1
-#define NF_HANDLER 2
-
-typedef struct {
- /* Integer part */
- reg_status state[VREGS];
- n_status nat[N_REGS];
- uae_u32 flags_on_stack;
- uae_u32 flags_in_flags;
- uae_u32 flags_are_important;
- /* FPU part */
- freg_status fate[VFREGS];
- fn_status fat[N_FREGS];
-
- /* x86 FPU part */
- uae_s8 spos[N_FREGS];
- uae_s8 onstack[6];
- uae_s8 tos;
-} bigstate;
-
-typedef struct {
- /* Integer part */
- char virt[VREGS];
- char nat[N_REGS];
-} smallstate;
-
-extern bigstate live;
-extern int touchcnt;
-
-
-#define IMM uae_s32
-#define R1 uae_u32
-#define R2 uae_u32
-#define R4 uae_u32
-#define W1 uae_u32
-#define W2 uae_u32
-#define W4 uae_u32
-#define RW1 uae_u32
-#define RW2 uae_u32
-#define RW4 uae_u32
-#define MEMR uae_u32
-#define MEMW uae_u32
-#define MEMRW uae_u32
-
-#define FW uae_u32
-#define FR uae_u32
-#define FRW uae_u32
-
-#define MIDFUNC(nargs,func,args) void func args
-#define MENDFUNC(nargs,func,args)
-#define COMPCALL(func) func
-
-#define LOWFUNC(flags,mem,nargs,func,args) static __inline__ void func args
-#define LENDFUNC(flags,mem,nargs,func,args)
-
-/* What we expose to the outside */
-#define DECLARE_MIDFUNC(func) extern void func
-DECLARE_MIDFUNC(bt_l_ri(R4 r, IMM i));
-DECLARE_MIDFUNC(bt_l_rr(R4 r, R4 b));
-DECLARE_MIDFUNC(btc_l_ri(RW4 r, IMM i));
-DECLARE_MIDFUNC(btc_l_rr(RW4 r, R4 b));
-DECLARE_MIDFUNC(bts_l_ri(RW4 r, IMM i));
-DECLARE_MIDFUNC(bts_l_rr(RW4 r, R4 b));
-DECLARE_MIDFUNC(btr_l_ri(RW4 r, IMM i));
-DECLARE_MIDFUNC(btr_l_rr(RW4 r, R4 b));
-DECLARE_MIDFUNC(mov_l_rm(W4 d, IMM s));
-DECLARE_MIDFUNC(call_r(R4 r));
-DECLARE_MIDFUNC(sub_l_mi(IMM d, IMM s));
-DECLARE_MIDFUNC(mov_l_mi(IMM d, IMM s));
-DECLARE_MIDFUNC(mov_w_mi(IMM d, IMM s));
-DECLARE_MIDFUNC(mov_b_mi(IMM d, IMM s));
-DECLARE_MIDFUNC(rol_b_ri(RW1 r, IMM i));
-DECLARE_MIDFUNC(rol_w_ri(RW2 r, IMM i));
-DECLARE_MIDFUNC(rol_l_ri(RW4 r, IMM i));
-DECLARE_MIDFUNC(rol_l_rr(RW4 d, R1 r));
-DECLARE_MIDFUNC(rol_w_rr(RW2 d, R1 r));
-DECLARE_MIDFUNC(rol_b_rr(RW1 d, R1 r));
-DECLARE_MIDFUNC(shll_l_rr(RW4 d, R1 r));
-DECLARE_MIDFUNC(shll_w_rr(RW2 d, R1 r));
-DECLARE_MIDFUNC(shll_b_rr(RW1 d, R1 r));
-DECLARE_MIDFUNC(ror_b_ri(R1 r, IMM i));
-DECLARE_MIDFUNC(ror_w_ri(R2 r, IMM i));
-DECLARE_MIDFUNC(ror_l_ri(R4 r, IMM i));
-DECLARE_MIDFUNC(ror_l_rr(R4 d, R1 r));
-DECLARE_MIDFUNC(ror_w_rr(R2 d, R1 r));
-DECLARE_MIDFUNC(ror_b_rr(R1 d, R1 r));
-DECLARE_MIDFUNC(shrl_l_rr(RW4 d, R1 r));
-DECLARE_MIDFUNC(shrl_w_rr(RW2 d, R1 r));
-DECLARE_MIDFUNC(shrl_b_rr(RW1 d, R1 r));
-DECLARE_MIDFUNC(shra_l_rr(RW4 d, R1 r));
-DECLARE_MIDFUNC(shra_w_rr(RW2 d, R1 r));
-DECLARE_MIDFUNC(shra_b_rr(RW1 d, R1 r));
-DECLARE_MIDFUNC(shll_l_ri(RW4 r, IMM i));
-DECLARE_MIDFUNC(shll_w_ri(RW2 r, IMM i));
-DECLARE_MIDFUNC(shll_b_ri(RW1 r, IMM i));
-DECLARE_MIDFUNC(shrl_l_ri(RW4 r, IMM i));
-DECLARE_MIDFUNC(shrl_w_ri(RW2 r, IMM i));
-DECLARE_MIDFUNC(shrl_b_ri(RW1 r, IMM i));
-DECLARE_MIDFUNC(shra_l_ri(RW4 r, IMM i));
-DECLARE_MIDFUNC(shra_w_ri(RW2 r, IMM i));
-DECLARE_MIDFUNC(shra_b_ri(RW1 r, IMM i));
-DECLARE_MIDFUNC(setcc(W1 d, IMM cc));
-DECLARE_MIDFUNC(setcc_m(IMM d, IMM cc));
-DECLARE_MIDFUNC(cmov_b_rr(RW1 d, R1 s, IMM cc));
-DECLARE_MIDFUNC(cmov_w_rr(RW2 d, R2 s, IMM cc));
-DECLARE_MIDFUNC(cmov_l_rr(RW4 d, R4 s, IMM cc));
-DECLARE_MIDFUNC(cmov_l_rm(RW4 d, IMM s, IMM cc));
-DECLARE_MIDFUNC(bsf_l_rr(W4 d, R4 s));
-DECLARE_MIDFUNC(pop_m(IMM d));
-DECLARE_MIDFUNC(push_m(IMM d));
-DECLARE_MIDFUNC(pop_l(W4 d));
-DECLARE_MIDFUNC(push_l_i(IMM i));
-DECLARE_MIDFUNC(push_l(R4 s));
-DECLARE_MIDFUNC(clear_16(RW4 r));
-DECLARE_MIDFUNC(clear_8(RW4 r));
-DECLARE_MIDFUNC(sign_extend_16_rr(W4 d, R2 s));
-DECLARE_MIDFUNC(sign_extend_8_rr(W4 d, R1 s));
-DECLARE_MIDFUNC(zero_extend_16_rr(W4 d, R2 s));
-DECLARE_MIDFUNC(zero_extend_8_rr(W4 d, R1 s));
-DECLARE_MIDFUNC(imul_64_32(RW4 d, RW4 s));
-DECLARE_MIDFUNC(mul_64_32(RW4 d, RW4 s));
-DECLARE_MIDFUNC(imul_32_32(RW4 d, R4 s));
-DECLARE_MIDFUNC(mul_32_32(RW4 d, R4 s));
-DECLARE_MIDFUNC(mov_b_rr(W1 d, R1 s));
-DECLARE_MIDFUNC(mov_w_rr(W2 d, R2 s));
-DECLARE_MIDFUNC(mov_l_rrm_indexed(W4 d,R4 baser, R4 index, IMM factor));
-DECLARE_MIDFUNC(mov_w_rrm_indexed(W2 d, R4 baser, R4 index, IMM factor));
-DECLARE_MIDFUNC(mov_b_rrm_indexed(W1 d, R4 baser, R4 index, IMM factor));
-DECLARE_MIDFUNC(mov_l_mrr_indexed(R4 baser, R4 index, IMM factor, R4 s));
-DECLARE_MIDFUNC(mov_w_mrr_indexed(R4 baser, R4 index, IMM factor, R2 s));
-DECLARE_MIDFUNC(mov_b_mrr_indexed(R4 baser, R4 index, IMM factor, R1 s));
-DECLARE_MIDFUNC(mov_l_bmrr_indexed(IMM base, R4 baser, R4 index, IMM factor, R4 s));
-DECLARE_MIDFUNC(mov_w_bmrr_indexed(IMM base, R4 baser, R4 index, IMM factor, R2 s));
-DECLARE_MIDFUNC(mov_b_bmrr_indexed(IMM base, R4 baser, R4 index, IMM factor, R1 s));
-DECLARE_MIDFUNC(mov_l_brrm_indexed(W4 d, IMM base, R4 baser, R4 index, IMM factor));
-DECLARE_MIDFUNC(mov_w_brrm_indexed(W2 d, IMM base, R4 baser, R4 index, IMM factor));
-DECLARE_MIDFUNC(mov_b_brrm_indexed(W1 d, IMM base, R4 baser, R4 index, IMM factor));
-DECLARE_MIDFUNC(mov_l_rm_indexed(W4 d, IMM base, R4 index, IMM factor));
-DECLARE_MIDFUNC(mov_l_rR(W4 d, R4 s, IMM offset));
-DECLARE_MIDFUNC(mov_w_rR(W2 d, R4 s, IMM offset));
-DECLARE_MIDFUNC(mov_b_rR(W1 d, R4 s, IMM offset));
-DECLARE_MIDFUNC(mov_l_brR(W4 d, R4 s, IMM offset));
-DECLARE_MIDFUNC(mov_w_brR(W2 d, R4 s, IMM offset));
-DECLARE_MIDFUNC(mov_b_brR(W1 d, R4 s, IMM offset));
-DECLARE_MIDFUNC(mov_l_Ri(R4 d, IMM i, IMM offset));
-DECLARE_MIDFUNC(mov_w_Ri(R4 d, IMM i, IMM offset));
-DECLARE_MIDFUNC(mov_b_Ri(R4 d, IMM i, IMM offset));
-DECLARE_MIDFUNC(mov_l_Rr(R4 d, R4 s, IMM offset));
-DECLARE_MIDFUNC(mov_w_Rr(R4 d, R2 s, IMM offset));
-DECLARE_MIDFUNC(mov_b_Rr(R4 d, R1 s, IMM offset));
-DECLARE_MIDFUNC(lea_l_brr(W4 d, R4 s, IMM offset));
-DECLARE_MIDFUNC(lea_l_brr_indexed(W4 d, R4 s, R4 index, IMM factor, IMM offset));
-DECLARE_MIDFUNC(lea_l_rr_indexed(W4 d, R4 s, R4 index, IMM factor));
-DECLARE_MIDFUNC(mov_l_bRr(R4 d, R4 s, IMM offset));
-DECLARE_MIDFUNC(mov_w_bRr(R4 d, R2 s, IMM offset));
-DECLARE_MIDFUNC(mov_b_bRr(R4 d, R1 s, IMM offset));
-DECLARE_MIDFUNC(bswap_32(RW4 r));
-DECLARE_MIDFUNC(bswap_16(RW2 r));
-DECLARE_MIDFUNC(mov_l_rr(W4 d, R4 s));
-DECLARE_MIDFUNC(mov_l_mr(IMM d, R4 s));
-DECLARE_MIDFUNC(mov_w_mr(IMM d, R2 s));
-DECLARE_MIDFUNC(mov_w_rm(W2 d, IMM s));
-DECLARE_MIDFUNC(mov_b_mr(IMM d, R1 s));
-DECLARE_MIDFUNC(mov_b_rm(W1 d, IMM s));
-DECLARE_MIDFUNC(mov_l_ri(W4 d, IMM s));
-DECLARE_MIDFUNC(mov_w_ri(W2 d, IMM s));
-DECLARE_MIDFUNC(mov_b_ri(W1 d, IMM s));
-DECLARE_MIDFUNC(add_l_mi(IMM d, IMM s) );
-DECLARE_MIDFUNC(add_w_mi(IMM d, IMM s) );
-DECLARE_MIDFUNC(add_b_mi(IMM d, IMM s) );
-DECLARE_MIDFUNC(test_l_ri(R4 d, IMM i));
-DECLARE_MIDFUNC(test_l_rr(R4 d, R4 s));
-DECLARE_MIDFUNC(test_w_rr(R2 d, R2 s));
-DECLARE_MIDFUNC(test_b_rr(R1 d, R1 s));
-DECLARE_MIDFUNC(and_l_ri(RW4 d, IMM i));
-DECLARE_MIDFUNC(and_l(RW4 d, R4 s));
-DECLARE_MIDFUNC(and_w(RW2 d, R2 s));
-DECLARE_MIDFUNC(and_b(RW1 d, R1 s));
-DECLARE_MIDFUNC(or_l_rm(RW4 d, IMM s));
-DECLARE_MIDFUNC(or_l_ri(RW4 d, IMM i));
-DECLARE_MIDFUNC(or_l(RW4 d, R4 s));
-DECLARE_MIDFUNC(or_w(RW2 d, R2 s));
-DECLARE_MIDFUNC(or_b(RW1 d, R1 s));
-DECLARE_MIDFUNC(adc_l(RW4 d, R4 s));
-DECLARE_MIDFUNC(adc_w(RW2 d, R2 s));
-DECLARE_MIDFUNC(adc_b(RW1 d, R1 s));
-DECLARE_MIDFUNC(add_l(RW4 d, R4 s));
-DECLARE_MIDFUNC(add_w(RW2 d, R2 s));
-DECLARE_MIDFUNC(add_b(RW1 d, R1 s));
-DECLARE_MIDFUNC(sub_l_ri(RW4 d, IMM i));
-DECLARE_MIDFUNC(sub_w_ri(RW2 d, IMM i));
-DECLARE_MIDFUNC(sub_b_ri(RW1 d, IMM i));
-DECLARE_MIDFUNC(add_l_ri(RW4 d, IMM i));
-DECLARE_MIDFUNC(add_w_ri(RW2 d, IMM i));
-DECLARE_MIDFUNC(add_b_ri(RW1 d, IMM i));
-DECLARE_MIDFUNC(sbb_l(RW4 d, R4 s));
-DECLARE_MIDFUNC(sbb_w(RW2 d, R2 s));
-DECLARE_MIDFUNC(sbb_b(RW1 d, R1 s));
-DECLARE_MIDFUNC(sub_l(RW4 d, R4 s));
-DECLARE_MIDFUNC(sub_w(RW2 d, R2 s));
-DECLARE_MIDFUNC(sub_b(RW1 d, R1 s));
-DECLARE_MIDFUNC(cmp_l(R4 d, R4 s));
-DECLARE_MIDFUNC(cmp_l_ri(R4 r, IMM i));
-DECLARE_MIDFUNC(cmp_w(R2 d, R2 s));
-DECLARE_MIDFUNC(cmp_b(R1 d, R1 s));
-DECLARE_MIDFUNC(xor_l(RW4 d, R4 s));
-DECLARE_MIDFUNC(xor_w(RW2 d, R2 s));
-DECLARE_MIDFUNC(xor_b(RW1 d, R1 s));
-DECLARE_MIDFUNC(live_flags(void));
-DECLARE_MIDFUNC(dont_care_flags(void));
-DECLARE_MIDFUNC(duplicate_carry(void));
-DECLARE_MIDFUNC(restore_carry(void));
-DECLARE_MIDFUNC(start_needflags(void));
-DECLARE_MIDFUNC(end_needflags(void));
-DECLARE_MIDFUNC(make_flags_live(void));
-DECLARE_MIDFUNC(call_r_11(R4 r, W4 out1, R4 in1, IMM osize, IMM isize));
-DECLARE_MIDFUNC(call_r_02(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2));
-DECLARE_MIDFUNC(forget_about(W4 r));
-DECLARE_MIDFUNC(nop(void));
-
-DECLARE_MIDFUNC(f_forget_about(FW r));
-DECLARE_MIDFUNC(fmov_pi(FW r));
-DECLARE_MIDFUNC(fmov_log10_2(FW r));
-DECLARE_MIDFUNC(fmov_log2_e(FW r));
-DECLARE_MIDFUNC(fmov_loge_2(FW r));
-DECLARE_MIDFUNC(fmov_1(FW r));
-DECLARE_MIDFUNC(fmov_0(FW r));
-DECLARE_MIDFUNC(fmov_rm(FW r, MEMR m));
-DECLARE_MIDFUNC(fmovi_rm(FW r, MEMR m));
-DECLARE_MIDFUNC(fmovi_mr(MEMW m, FR r));
-DECLARE_MIDFUNC(fmovs_rm(FW r, MEMR m));
-DECLARE_MIDFUNC(fmovs_mr(MEMW m, FR r));
-DECLARE_MIDFUNC(fmov_mr(MEMW m, FR r));
-DECLARE_MIDFUNC(fmov_ext_mr(MEMW m, FR r));
-DECLARE_MIDFUNC(fmov_ext_rm(FW r, MEMR m));
-DECLARE_MIDFUNC(fmov_rr(FW d, FR s));
-DECLARE_MIDFUNC(fldcw_m_indexed(R4 index, IMM base));
-DECLARE_MIDFUNC(ftst_r(FR r));
-DECLARE_MIDFUNC(dont_care_fflags(void));
-DECLARE_MIDFUNC(fsqrt_rr(FW d, FR s));
-DECLARE_MIDFUNC(fabs_rr(FW d, FR s));
-DECLARE_MIDFUNC(frndint_rr(FW d, FR s));
-DECLARE_MIDFUNC(fsin_rr(FW d, FR s));
-DECLARE_MIDFUNC(fcos_rr(FW d, FR s));
-DECLARE_MIDFUNC(ftwotox_rr(FW d, FR s));
-DECLARE_MIDFUNC(fetox_rr(FW d, FR s));
-DECLARE_MIDFUNC(flog2_rr(FW d, FR s));
-DECLARE_MIDFUNC(fneg_rr(FW d, FR s));
-DECLARE_MIDFUNC(fadd_rr(FRW d, FR s));
-DECLARE_MIDFUNC(fsub_rr(FRW d, FR s));
-DECLARE_MIDFUNC(fmul_rr(FRW d, FR s));
-DECLARE_MIDFUNC(frem_rr(FRW d, FR s));
-DECLARE_MIDFUNC(frem1_rr(FRW d, FR s));
-DECLARE_MIDFUNC(fdiv_rr(FRW d, FR s));
-DECLARE_MIDFUNC(fcmp_rr(FR d, FR s));
-DECLARE_MIDFUNC(fflags_into_flags(W2 tmp));
-#undef DECLARE_MIDFUNC
-
-extern int failure;
-#define FAIL(x) do { failure|=x; } while (0)
-
-/* Convenience functions exposed to gencomp */
-extern uae_u32 m68k_pc_offset;
-extern void readbyte(int address, int dest, int tmp);
-extern void readword(int address, int dest, int tmp);
-extern void readlong(int address, int dest, int tmp);
-extern void writebyte(int address, int source, int tmp);
-extern void writeword(int address, int source, int tmp);
-extern void writelong(int address, int source, int tmp);
-extern void writeword_clobber(int address, int source, int tmp);
-extern void writelong_clobber(int address, int source, int tmp);
-extern void get_n_addr(int address, int dest, int tmp);
-extern void get_n_addr_jmp(int address, int dest, int tmp);
-extern void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp);
-/* Set native Z flag only if register is zero */
-extern void set_zero(int r, int tmp);
-extern int kill_rodent(int r);
-extern void sync_m68k_pc(void);
-extern uae_u32 get_const(int r);
-extern int is_const(int r);
-extern void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond);
-
-#define comp_get_ibyte(o) do_get_mem_byte((uae_u8 *)(comp_pc_p + (o) + 1))
-#define comp_get_iword(o) do_get_mem_word((uae_u16 *)(comp_pc_p + (o)))
-#define comp_get_ilong(o) do_get_mem_long((uae_u32 *)(comp_pc_p + (o)))
-
-struct blockinfo_t;
-
-typedef struct dep_t {
- uae_u32* jmp_off;
- struct blockinfo_t* target;
- struct blockinfo_t* source;
- struct dep_t** prev_p;
- struct dep_t* next;
-} dependency;
-
-typedef struct checksum_info_t {
- uae_u8 *start_p;
- uae_u32 length;
- struct checksum_info_t *next;
-} checksum_info;
-
-typedef struct blockinfo_t {
- uae_s32 count;
- cpuop_func* direct_handler_to_use;
- cpuop_func* handler_to_use;
- /* The direct handler does not check for the correct address */
-
- cpuop_func* handler;
- cpuop_func* direct_handler;
-
- cpuop_func* direct_pen;
- cpuop_func* direct_pcc;
-
- uae_u8* pc_p;
-
- uae_u32 c1;
- uae_u32 c2;
-#if USE_CHECKSUM_INFO
- checksum_info *csi;
-#else
- uae_u32 len;
- uae_u32 min_pcp;
-#endif
-
- struct blockinfo_t* next_same_cl;
- struct blockinfo_t** prev_same_cl_p;
- struct blockinfo_t* next;
- struct blockinfo_t** prev_p;
-
- uae_u8 optlevel;
- uae_u8 needed_flags;
- uae_u8 status;
- uae_u8 havestate;
-
- dependency dep[2]; /* Holds things we depend on */
- dependency* deplist; /* List of things that depend on this */
- smallstate env;
-
-#if JIT_DEBUG
- /* (gb) size of the compiled block (direct handler) */
- uae_u32 direct_handler_size;
-#endif
-} blockinfo;
-
-#define BI_INVALID 0
-#define BI_ACTIVE 1
-#define BI_NEED_RECOMP 2
-#define BI_NEED_CHECK 3
-#define BI_CHECKING 4
-#define BI_COMPILING 5
-#define BI_FINALIZING 6
-
-void execute_normal(void);
-void exec_nostats(void);
-void do_nothing(void);
-
-#else
-
-static __inline__ void flush_icache(int) { }
-static __inline__ void build_comp() { }
-
-#endif /* !USE_JIT */
-
-#endif /* COMPEMU_H */
+++ /dev/null
-/*
- * compiler/codegen_x86.cpp - IA-32 code generator
- *
- * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
- *
- * Adaptation for Basilisk II and improvements, copyright 2000-2005
- * Gwenole Beauchesne
- *
- * Basilisk II (C) 1997-2008 Christian Bauer
- *
- * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-/* This should eventually end up in machdep/, but for now, x86 is the
- only target, and it's easier this way... */
-
-#include "flags_x86.h"
-
-/*************************************************************************
- * Some basic information about the the target CPU *
- *************************************************************************/
-
-#define EAX_INDEX 0
-#define ECX_INDEX 1
-#define EDX_INDEX 2
-#define EBX_INDEX 3
-#define ESP_INDEX 4
-#define EBP_INDEX 5
-#define ESI_INDEX 6
-#define EDI_INDEX 7
-#if defined(__x86_64__)
-#define R8_INDEX 8
-#define R9_INDEX 9
-#define R10_INDEX 10
-#define R11_INDEX 11
-#define R12_INDEX 12
-#define R13_INDEX 13
-#define R14_INDEX 14
-#define R15_INDEX 15
-#endif
-/* XXX this has to match X86_Reg8H_Base + 4 */
-#define AH_INDEX (0x10+4+EAX_INDEX)
-#define CH_INDEX (0x10+4+ECX_INDEX)
-#define DH_INDEX (0x10+4+EDX_INDEX)
-#define BH_INDEX (0x10+4+EBX_INDEX)
-
-/* The register in which subroutines return an integer return value */
-#define REG_RESULT EAX_INDEX
-
-/* The registers subroutines take their first and second argument in */
-#if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
-/* Handle the _fastcall parameters of ECX and EDX */
-#define REG_PAR1 ECX_INDEX
-#define REG_PAR2 EDX_INDEX
-#elif defined(__x86_64__)
-#define REG_PAR1 EDI_INDEX
-#define REG_PAR2 ESI_INDEX
-#else
-#define REG_PAR1 EAX_INDEX
-#define REG_PAR2 EDX_INDEX
-#endif
-
-#define REG_PC_PRE EAX_INDEX /* The register we use for preloading regs.pc_p */
-#if defined( _MSC_VER ) && !defined( USE_NORMAL_CALLING_CONVENTION )
-#define REG_PC_TMP EAX_INDEX
-#else
-#define REG_PC_TMP ECX_INDEX /* Another register that is not the above */
-#endif
-
-#define SHIFTCOUNT_NREG ECX_INDEX /* Register that can be used for shiftcount.
- -1 if any reg will do */
-#define MUL_NREG1 EAX_INDEX /* %eax will hold the low 32 bits after a 32x32 mul */
-#define MUL_NREG2 EDX_INDEX /* %edx will hold the high 32 bits */
-
-#define STACK_ALIGN 16
-#define STACK_OFFSET sizeof(void *)
-
-uae_s8 always_used[]={4,-1};
-#if defined(__x86_64__)
-uae_s8 can_byte[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
-uae_s8 can_word[]={0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,-1};
-#else
-uae_s8 can_byte[]={0,1,2,3,-1};
-uae_s8 can_word[]={0,1,2,3,5,6,7,-1};
-#endif
-
-#if USE_OPTIMIZED_CALLS
-/* Make sure interpretive core does not use cpuopti */
-uae_u8 call_saved[]={0,0,0,1,1,1,1,1};
-#error FIXME: code not ready
-#else
-/* cpuopti mutate instruction handlers to assume registers are saved
- by the caller */
-uae_u8 call_saved[]={0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0};
-#endif
-
-/* This *should* be the same as call_saved. But:
- - We might not really know which registers are saved, and which aren't,
- so we need to preserve some, but don't want to rely on everyone else
- also saving those registers
- - Special registers (such like the stack pointer) should not be "preserved"
- by pushing, even though they are "saved" across function calls
-*/
-#if defined(__x86_64__)
-/* callee-saved registers as defined by Linux AMD64 ABI: rbx, rbp, rsp, r12 - r15 */
-/* preserve r11 because it's generally used to hold pointers to functions */
-static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,0,0,0,0,0,1,1,1,1,1};
-#else
-/* callee-saved registers as defined by System V IA-32 ABI: edi, esi, ebx, ebp */
-static const uae_u8 need_to_preserve[]={0,0,0,1,0,1,1,1};
-#endif
-
-/* Whether classes of instructions do or don't clobber the native flags */
-#define CLOBBER_MOV
-#define CLOBBER_LEA
-#define CLOBBER_CMOV
-#define CLOBBER_POP
-#define CLOBBER_PUSH
-#define CLOBBER_SUB clobber_flags()
-#define CLOBBER_SBB clobber_flags()
-#define CLOBBER_CMP clobber_flags()
-#define CLOBBER_ADD clobber_flags()
-#define CLOBBER_ADC clobber_flags()
-#define CLOBBER_AND clobber_flags()
-#define CLOBBER_OR clobber_flags()
-#define CLOBBER_XOR clobber_flags()
-
-#define CLOBBER_ROL clobber_flags()
-#define CLOBBER_ROR clobber_flags()
-#define CLOBBER_SHLL clobber_flags()
-#define CLOBBER_SHRL clobber_flags()
-#define CLOBBER_SHRA clobber_flags()
-#define CLOBBER_TEST clobber_flags()
-#define CLOBBER_CL16
-#define CLOBBER_CL8
-#define CLOBBER_SE32
-#define CLOBBER_SE16
-#define CLOBBER_SE8
-#define CLOBBER_ZE32
-#define CLOBBER_ZE16
-#define CLOBBER_ZE8
-#define CLOBBER_SW16 clobber_flags()
-#define CLOBBER_SW32
-#define CLOBBER_SETCC
-#define CLOBBER_MUL clobber_flags()
-#define CLOBBER_BT clobber_flags()
-#define CLOBBER_BSF clobber_flags()
-
-/* The older code generator is now deprecated. */
-#define USE_NEW_RTASM 1
-
-#if USE_NEW_RTASM
-
-#if defined(__x86_64__)
-#define X86_TARGET_64BIT 1
-/* The address override prefix causes a 5 cycles penalty on Intel Core
- processors. Another solution would be to decompose the load in an LEA,
- MOV (to zero-extend), MOV (from memory): is it better? */
-#define ADDR32 x86_emit_byte(0x67),
-#else
-#define ADDR32 /**/
-#endif
-#define X86_FLAT_REGISTERS 0
-#define X86_OPTIMIZE_ALU 1
-#define X86_OPTIMIZE_ROTSHI 1
-#include "codegen_x86.h"
-
-#define x86_emit_byte(B) emit_byte(B)
-#define x86_emit_word(W) emit_word(W)
-#define x86_emit_long(L) emit_long(L)
-#define x86_emit_quad(Q) emit_quad(Q)
-#define x86_get_target() get_target()
-#define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
-
-static void jit_fail(const char *msg, const char *file, int line, const char *function)
-{
- fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
- function, file, line, msg);
- abort();
-}
-
-LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
-{
-#if defined(__x86_64__)
- PUSHQr(r);
-#else
- PUSHLr(r);
-#endif
-}
-LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
-
-LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
-{
-#if defined(__x86_64__)
- POPQr(r);
-#else
- POPLr(r);
-#endif
-}
-LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
-
-LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
-{
-#if defined(__x86_64__)
- POPQm(d, X86_NOREG, X86_NOREG, 1);
-#else
- POPLm(d, X86_NOREG, X86_NOREG, 1);
-#endif
-}
-LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
-
-LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
-{
- BTLir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
-{
- BTLrr(b, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
-
-LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
-{
- BTCLir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
-{
- BTCLrr(b, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
-
-LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
-{
- BTRLir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
-{
- BTRLrr(b, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
-
-LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
-{
- BTSLir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
-{
- BTSLrr(b, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
-
-LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
-{
- SUBWir(i, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
-
-LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
-{
- MOVLmr(s, X86_NOREG, X86_NOREG, 1, d);
-}
-LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
-
-LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
-{
- MOVLim(s, d, X86_NOREG, X86_NOREG, 1);
-}
-LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
-
-LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
-{
- MOVWim(s, d, X86_NOREG, X86_NOREG, 1);
-}
-LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
-
-LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
-{
- MOVBim(s, d, X86_NOREG, X86_NOREG, 1);
-}
-LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
-
-LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
-{
- ROLBim(i, d, X86_NOREG, X86_NOREG, 1);
-}
-LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
-{
- ROLBir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
-{
- ROLWir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
-{
- ROLLir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
-{
- ROLLrr(r, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
-{
- ROLWrr(r, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
-{
- ROLBrr(r, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
-{
- SHLLrr(r, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
-{
- SHLWrr(r, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
-{
- SHLBrr(r, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
-{
- RORBir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
-{
- RORWir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
-
-LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
-{
- ORLmr(s, X86_NOREG, X86_NOREG, 1, d);
-}
-LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
-
-LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
-{
- RORLir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
-{
- RORLrr(r, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
-{
- RORWrr(r, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
-{
- RORBrr(r, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
-{
- SHRLrr(r, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
-{
- SHRWrr(r, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
-{
- SHRBrr(r, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
-{
- SARLrr(r, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
-{
- SARWrr(r, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
-{
- SARBrr(r, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
-{
- SHLLir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
-{
- SHLWir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
-{
- SHLBir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
-{
- SHRLir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
-{
- SHRWir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
-{
- SHRBir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
-{
- SARLir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
-{
- SARWir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
-{
- SARBir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
-
-LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
-{
- SAHF();
-}
-LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
-
-LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
-{
- CPUID();
-}
-LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
-
-LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
-{
- LAHF();
-}
-LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
-
-LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
-{
- SETCCir(cc, d);
-}
-LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
-
-LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
-{
- SETCCim(cc, d, X86_NOREG, X86_NOREG, 1);
-}
-LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
-
-LOWFUNC(READ,NONE,3,raw_cmov_b_rr,(RW1 d, R1 s, IMM cc))
-{
- /* replacement using branch and mov */
- int8 *target_p = (int8 *)x86_get_target() + 1;
- JCCSii(cc^1, 0);
- MOVBrr(s, d);
- *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1);
-}
-LENDFUNC(READ,NONE,3,raw_cmov_b_rr,(RW1 d, R1 s, IMM cc))
-
-LOWFUNC(READ,NONE,3,raw_cmov_w_rr,(RW2 d, R2 s, IMM cc))
-{
- if (have_cmov)
- CMOVWrr(cc, s, d);
- else { /* replacement using branch and mov */
- int8 *target_p = (int8 *)x86_get_target() + 1;
- JCCSii(cc^1, 0);
- MOVWrr(s, d);
- *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1);
- }
-}
-LENDFUNC(READ,NONE,3,raw_cmov_w_rr,(RW2 d, R2 s, IMM cc))
-
-LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
-{
- if (have_cmov)
- CMOVLrr(cc, s, d);
- else { /* replacement using branch and mov */
- int8 *target_p = (int8 *)x86_get_target() + 1;
- JCCSii(cc^1, 0);
- MOVLrr(s, d);
- *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1);
- }
-}
-LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
-
-LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
-{
- BSFLrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
-
-LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
-{
- MOVSLQrr(s, d);
-}
-LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s))
-
-LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
-{
- MOVSWLrr(s, d);
-}
-LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
-
-LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
-{
- MOVSBLrr(s, d);
-}
-LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
-
-LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
-{
- MOVZWLrr(s, d);
-}
-LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
-
-LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
-{
- MOVZBLrr(s, d);
-}
-LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
-
-LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
-{
- IMULLrr(s, d);
-}
-LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
-
-LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
-{
- if (d!=MUL_NREG1 || s!=MUL_NREG2) {
- write_log("Bad register in IMUL: d=%d, s=%d\n",d,s);
- abort();
- }
- IMULLr(s);
-}
-LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
-
-LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
-{
- if (d!=MUL_NREG1 || s!=MUL_NREG2) {
- write_log("Bad register in MUL: d=%d, s=%d\n",d,s);
- abort();
- }
- MULLr(s);
-}
-LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
-
-LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
-{
- abort(); /* %^$&%^$%#^ x86! */
-}
-LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
-
-LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
-{
- MOVBrr(s, d);
-}
-LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
-
-LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
-{
- MOVWrr(s, d);
-}
-LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
-
-LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
-{
- ADDR32 MOVLmr(0, baser, index, factor, d);
-}
-LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
-
-LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
-{
- ADDR32 MOVWmr(0, baser, index, factor, d);
-}
-LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
-
-LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
-{
- ADDR32 MOVBmr(0, baser, index, factor, d);
-}
-LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
-
-LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
-{
- ADDR32 MOVLrm(s, 0, baser, index, factor);
-}
-LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
-
-LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
-{
- ADDR32 MOVWrm(s, 0, baser, index, factor);
-}
-LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
-
-LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
-{
- ADDR32 MOVBrm(s, 0, baser, index, factor);
-}
-LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
-
-LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
-{
- ADDR32 MOVLrm(s, base, baser, index, factor);
-}
-LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
-
-LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
-{
- ADDR32 MOVWrm(s, base, baser, index, factor);
-}
-LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
-
-LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
-{
- ADDR32 MOVBrm(s, base, baser, index, factor);
-}
-LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
-
-LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
-{
- ADDR32 MOVLmr(base, baser, index, factor, d);
-}
-LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
-
-LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
-{
- ADDR32 MOVWmr(base, baser, index, factor, d);
-}
-LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
-
-LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
-{
- ADDR32 MOVBmr(base, baser, index, factor, d);
-}
-LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
-
-LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
-{
- ADDR32 MOVLmr(base, X86_NOREG, index, factor, d);
-}
-LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
-
-LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
-{
- if (have_cmov)
- ADDR32 CMOVLmr(cond, base, X86_NOREG, index, factor, d);
- else { /* replacement using branch and mov */
- int8 *target_p = (int8 *)x86_get_target() + 1;
- JCCSii(cond^1, 0);
- ADDR32 MOVLmr(base, X86_NOREG, index, factor, d);
- *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1);
- }
-}
-LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
-
-LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
-{
- if (have_cmov)
- CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d);
- else { /* replacement using branch and mov */
- int8 *target_p = (int8 *)x86_get_target() + 1;
- JCCSii(cond^1, 0);
- MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d);
- *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1);
- }
-}
-LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
-
-LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
-{
- ADDR32 MOVLmr(offset, s, X86_NOREG, 1, d);
-}
-LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
-{
- ADDR32 MOVWmr(offset, s, X86_NOREG, 1, d);
-}
-LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
-{
- ADDR32 MOVBmr(offset, s, X86_NOREG, 1, d);
-}
-LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
-{
- ADDR32 MOVLmr(offset, s, X86_NOREG, 1, d);
-}
-LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
-{
- ADDR32 MOVWmr(offset, s, X86_NOREG, 1, d);
-}
-LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
-{
- ADDR32 MOVBmr(offset, s, X86_NOREG, 1, d);
-}
-LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
-{
- ADDR32 MOVLim(i, offset, d, X86_NOREG, 1);
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
-{
- ADDR32 MOVWim(i, offset, d, X86_NOREG, 1);
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
-{
- ADDR32 MOVBim(i, offset, d, X86_NOREG, 1);
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
-{
- ADDR32 MOVLrm(s, offset, d, X86_NOREG, 1);
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
-{
- ADDR32 MOVWrm(s, offset, d, X86_NOREG, 1);
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
-{
- ADDR32 MOVBrm(s, offset, d, X86_NOREG, 1);
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
-
-LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
-{
- LEALmr(offset, s, X86_NOREG, 1, d);
-}
-LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
-{
- LEALmr(offset, s, index, factor, d);
-}
-LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
-
-LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
-{
- LEALmr(0, s, index, factor, d);
-}
-LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
-
-LOWFUNC(NONE,NONE,4,raw_lea_l_r_scaled,(W4 d, R4 index, IMM factor))
-{
- LEALmr(0, X86_NOREG, index, factor, d);
-}
-LENDFUNC(NONE,NONE,4,raw_lea_l_r_scaled,(W4 d, R4 index, IMM factor))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
-{
- ADDR32 MOVLrm(s, offset, d, X86_NOREG, 1);
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
-{
- ADDR32 MOVWrm(s, offset, d, X86_NOREG, 1);
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
-{
- ADDR32 MOVBrm(s, offset, d, X86_NOREG, 1);
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
-
-LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
-{
- BSWAPLr(r);
-}
-LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
-
-LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
-{
- ROLWir(8, r);
-}
-LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
-
-LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
-{
- MOVLrr(s, d);
-}
-LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
-
-LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
-{
- MOVLrm(s, d, X86_NOREG, X86_NOREG, 1);
-}
-LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
-
-LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
-{
- MOVWrm(s, d, X86_NOREG, X86_NOREG, 1);
-}
-LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
-
-LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
-{
- MOVWmr(s, X86_NOREG, X86_NOREG, 1, d);
-}
-LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
-
-LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
-{
- MOVBrm(s, d, X86_NOREG, X86_NOREG, 1);
-}
-LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
-
-LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
-{
- MOVBmr(s, X86_NOREG, X86_NOREG, 1, d);
-}
-LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
-
-LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
-{
- MOVLir(s, d);
-}
-LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
-
-LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
-{
- MOVWir(s, d);
-}
-LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
-
-LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
-{
- MOVBir(s, d);
-}
-LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
-
-LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
-{
- ADCLim(s, d, X86_NOREG, X86_NOREG, 1);
-}
-LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
-
-LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
-{
- ADDLim(s, d, X86_NOREG, X86_NOREG, 1);
-}
-LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
-
-LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
-{
- ADDWim(s, d, X86_NOREG, X86_NOREG, 1);
-}
-LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
-
-LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
-{
- ADDBim(s, d, X86_NOREG, X86_NOREG, 1);
-}
-LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
-
-LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
-{
- TESTLir(i, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
-{
- TESTLrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
-
-LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
-{
- TESTWrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
-
-LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
-{
- TESTBrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
-
-LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
-{
- XORLir(i, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
-{
- ANDLir(i, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
-{
- ANDWir(i, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
-{
- ANDLrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
-
-LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
-{
- ANDWrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
-
-LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
-{
- ANDBrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
-
-LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
-{
- ORLir(i, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
-{
- ORLrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
-
-LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
-{
- ORWrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
-
-LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
-{
- ORBrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
-
-LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
-{
- ADCLrr(s, d);
-}
-LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
-
-LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
-{
- ADCWrr(s, d);
-}
-LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
-
-LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
-{
- ADCBrr(s, d);
-}
-LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
-
-LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
-{
- ADDLrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
-
-LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
-{
- ADDWrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
-
-LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
-{
- ADDBrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
-
-LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
-{
- SUBLir(i, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
-{
- SUBBir(i, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
-{
- ADDLir(i, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
-{
- ADDWir(i, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
-{
- ADDBir(i, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
-
-LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
-{
- SBBLrr(s, d);
-}
-LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
-
-LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
-{
- SBBWrr(s, d);
-}
-LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
-
-LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
-{
- SBBBrr(s, d);
-}
-LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
-
-LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
-{
- SUBLrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
-
-LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
-{
- SUBWrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
-
-LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
-{
- SUBBrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
-
-LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
-{
- CMPLrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
-
-LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
-{
- CMPLir(i, r);
-}
-LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
-{
- CMPWrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
-
-LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
-{
- CMPBim(s, d, X86_NOREG, X86_NOREG, 1);
-}
-LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
-
-LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
-{
- CMPBir(i, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
-{
- CMPBrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
-
-LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
-{
- ADDR32 CMPLmr(offset, X86_NOREG, index, factor, d);
-}
-LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
-
-LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
-{
- XORLrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
-
-LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
-{
- XORWrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
-
-LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
-{
- XORBrr(s, d);
-}
-LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
-
-LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
-{
- SUBLim(s, d, X86_NOREG, X86_NOREG, 1);
-}
-LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
-
-LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
-{
- CMPLim(s, d, X86_NOREG, X86_NOREG, 1);
-}
-LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
-
-LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
-{
- XCHGLrr(r2, r1);
-}
-LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
-
-LOWFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2))
-{
- XCHGBrr(r2, r1);
-}
-LENDFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2))
-
-LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
-{
- PUSHF();
-}
-LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
-
-LOWFUNC(WRITE,READ,0,raw_popfl,(void))
-{
- POPF();
-}
-LENDFUNC(WRITE,READ,0,raw_popfl,(void))
-
-/* Generate floating-point instructions */
-static inline void x86_fadd_m(MEMR s)
-{
- FADDDm(s,X86_NOREG,X86_NOREG,1);
-}
-
-#else
-
-const bool optimize_accum = true;
-const bool optimize_imm8 = true;
-const bool optimize_shift_once = true;
-
-/*************************************************************************
- * Actual encoding of the instructions on the target CPU *
- *************************************************************************/
-
-static __inline__ int isaccum(int r)
-{
- return (r == EAX_INDEX);
-}
-
-static __inline__ int isbyte(uae_s32 x)
-{
- return (x>=-128 && x<=127);
-}
-
-static __inline__ int isword(uae_s32 x)
-{
- return (x>=-32768 && x<=32767);
-}
-
-LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
-{
- emit_byte(0x50+r);
-}
-LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r))
-
-LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
-{
- emit_byte(0x58+r);
-}
-LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r))
-
-LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
-{
- emit_byte(0x8f);
- emit_byte(0x05);
- emit_long(d);
-}
-LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d))
-
-LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
-{
- emit_byte(0x0f);
- emit_byte(0xba);
- emit_byte(0xe0+r);
- emit_byte(i);
-}
-LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
-{
- emit_byte(0x0f);
- emit_byte(0xa3);
- emit_byte(0xc0+8*b+r);
-}
-LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b))
-
-LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
-{
- emit_byte(0x0f);
- emit_byte(0xba);
- emit_byte(0xf8+r);
- emit_byte(i);
-}
-LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
-{
- emit_byte(0x0f);
- emit_byte(0xbb);
- emit_byte(0xc0+8*b+r);
-}
-LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b))
-
-
-LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
-{
- emit_byte(0x0f);
- emit_byte(0xba);
- emit_byte(0xf0+r);
- emit_byte(i);
-}
-LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
-{
- emit_byte(0x0f);
- emit_byte(0xb3);
- emit_byte(0xc0+8*b+r);
-}
-LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b))
-
-LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
-{
- emit_byte(0x0f);
- emit_byte(0xba);
- emit_byte(0xe8+r);
- emit_byte(i);
-}
-LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
-{
- emit_byte(0x0f);
- emit_byte(0xab);
- emit_byte(0xc0+8*b+r);
-}
-LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b))
-
-LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
-{
- emit_byte(0x66);
- if (isbyte(i)) {
- emit_byte(0x83);
- emit_byte(0xe8+d);
- emit_byte(i);
- }
- else {
- if (optimize_accum && isaccum(d))
- emit_byte(0x2d);
- else {
- emit_byte(0x81);
- emit_byte(0xe8+d);
- }
- emit_word(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
-
-
-LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
-{
- emit_byte(0x8b);
- emit_byte(0x05+8*d);
- emit_long(s);
-}
-LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
-
-LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
-{
- emit_byte(0xc7);
- emit_byte(0x05);
- emit_long(d);
- emit_long(s);
-}
-LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
-
-LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
-{
- emit_byte(0x66);
- emit_byte(0xc7);
- emit_byte(0x05);
- emit_long(d);
- emit_word(s);
-}
-LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s))
-
-LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
-{
- emit_byte(0xc6);
- emit_byte(0x05);
- emit_long(d);
- emit_byte(s);
-}
-LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s))
-
-LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
-{
- if (optimize_shift_once && (i == 1)) {
- emit_byte(0xd0);
- emit_byte(0x05);
- emit_long(d);
- }
- else {
- emit_byte(0xc0);
- emit_byte(0x05);
- emit_long(d);
- emit_byte(i);
- }
-}
-LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
-{
- if (optimize_shift_once && (i == 1)) {
- emit_byte(0xd0);
- emit_byte(0xc0+r);
- }
- else {
- emit_byte(0xc0);
- emit_byte(0xc0+r);
- emit_byte(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
-{
- emit_byte(0x66);
- emit_byte(0xc1);
- emit_byte(0xc0+r);
- emit_byte(i);
-}
-LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
-{
- if (optimize_shift_once && (i == 1)) {
- emit_byte(0xd1);
- emit_byte(0xc0+r);
- }
- else {
- emit_byte(0xc1);
- emit_byte(0xc0+r);
- emit_byte(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
-{
- emit_byte(0xd3);
- emit_byte(0xc0+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
-{
- emit_byte(0x66);
- emit_byte(0xd3);
- emit_byte(0xc0+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
-{
- emit_byte(0xd2);
- emit_byte(0xc0+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
-{
- emit_byte(0xd3);
- emit_byte(0xe0+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
-{
- emit_byte(0x66);
- emit_byte(0xd3);
- emit_byte(0xe0+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
-{
- emit_byte(0xd2);
- emit_byte(0xe0+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
-{
- if (optimize_shift_once && (i == 1)) {
- emit_byte(0xd0);
- emit_byte(0xc8+r);
- }
- else {
- emit_byte(0xc0);
- emit_byte(0xc8+r);
- emit_byte(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
-{
- emit_byte(0x66);
- emit_byte(0xc1);
- emit_byte(0xc8+r);
- emit_byte(i);
-}
-LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i))
-
-// gb-- used for making an fpcr value in compemu_fpp.cpp
-LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
-{
- emit_byte(0x0b);
- emit_byte(0x05+8*d);
- emit_long(s);
-}
-LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s))
-
-LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
-{
- if (optimize_shift_once && (i == 1)) {
- emit_byte(0xd1);
- emit_byte(0xc8+r);
- }
- else {
- emit_byte(0xc1);
- emit_byte(0xc8+r);
- emit_byte(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
-{
- emit_byte(0xd3);
- emit_byte(0xc8+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
-{
- emit_byte(0x66);
- emit_byte(0xd3);
- emit_byte(0xc8+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
-{
- emit_byte(0xd2);
- emit_byte(0xc8+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
-{
- emit_byte(0xd3);
- emit_byte(0xe8+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
-{
- emit_byte(0x66);
- emit_byte(0xd3);
- emit_byte(0xe8+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
-{
- emit_byte(0xd2);
- emit_byte(0xe8+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
-{
- emit_byte(0xd3);
- emit_byte(0xf8+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
-{
- emit_byte(0x66);
- emit_byte(0xd3);
- emit_byte(0xf8+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
-{
- emit_byte(0xd2);
- emit_byte(0xf8+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r))
-
-LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
-{
- if (optimize_shift_once && (i == 1)) {
- emit_byte(0xd1);
- emit_byte(0xe0+r);
- }
- else {
- emit_byte(0xc1);
- emit_byte(0xe0+r);
- emit_byte(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
-{
- emit_byte(0x66);
- emit_byte(0xc1);
- emit_byte(0xe0+r);
- emit_byte(i);
-}
-LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
-{
- if (optimize_shift_once && (i == 1)) {
- emit_byte(0xd0);
- emit_byte(0xe0+r);
- }
- else {
- emit_byte(0xc0);
- emit_byte(0xe0+r);
- emit_byte(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
-{
- if (optimize_shift_once && (i == 1)) {
- emit_byte(0xd1);
- emit_byte(0xe8+r);
- }
- else {
- emit_byte(0xc1);
- emit_byte(0xe8+r);
- emit_byte(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
-{
- emit_byte(0x66);
- emit_byte(0xc1);
- emit_byte(0xe8+r);
- emit_byte(i);
-}
-LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
-{
- if (optimize_shift_once && (i == 1)) {
- emit_byte(0xd0);
- emit_byte(0xe8+r);
- }
- else {
- emit_byte(0xc0);
- emit_byte(0xe8+r);
- emit_byte(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
-{
- if (optimize_shift_once && (i == 1)) {
- emit_byte(0xd1);
- emit_byte(0xf8+r);
- }
- else {
- emit_byte(0xc1);
- emit_byte(0xf8+r);
- emit_byte(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
-{
- emit_byte(0x66);
- emit_byte(0xc1);
- emit_byte(0xf8+r);
- emit_byte(i);
-}
-LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
-{
- if (optimize_shift_once && (i == 1)) {
- emit_byte(0xd0);
- emit_byte(0xf8+r);
- }
- else {
- emit_byte(0xc0);
- emit_byte(0xf8+r);
- emit_byte(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i))
-
-LOWFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
-{
- emit_byte(0x9e);
-}
-LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah))
-
-LOWFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
-{
- emit_byte(0x0f);
- emit_byte(0xa2);
-}
-LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax))
-
-LOWFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
-{
- emit_byte(0x9f);
-}
-LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah))
-
-LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
-{
- emit_byte(0x0f);
- emit_byte(0x90+cc);
- emit_byte(0xc0+d);
-}
-LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc))
-
-LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
-{
- emit_byte(0x0f);
- emit_byte(0x90+cc);
- emit_byte(0x05);
- emit_long(d);
-}
-LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc))
-
-LOWFUNC(READ,NONE,3,raw_cmov_b_rr,(RW1 d, R1 s, IMM cc))
-{
- /* replacement using branch and mov */
- int uncc=(cc^1);
- emit_byte(0x70+uncc);
- emit_byte(3); /* skip next 2 bytes if not cc=true */
- emit_byte(0x88);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(READ,NONE,3,raw_cmov_b_rr,(RW1 d, R1 s, IMM cc))
-
-LOWFUNC(READ,NONE,3,raw_cmov_w_rr,(RW2 d, R2 s, IMM cc))
-{
- if (have_cmov) {
- emit_byte(0x66);
- emit_byte(0x0f);
- emit_byte(0x40+cc);
- emit_byte(0xc0+8*d+s);
- }
- else { /* replacement using branch and mov */
- int uncc=(cc^1);
- emit_byte(0x70+uncc);
- emit_byte(3); /* skip next 3 bytes if not cc=true */
- emit_byte(0x66);
- emit_byte(0x89);
- emit_byte(0xc0+8*s+d);
- }
-}
-LENDFUNC(READ,NONE,3,raw_cmov_w_rr,(RW2 d, R2 s, IMM cc))
-
-LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
-{
- if (have_cmov) {
- emit_byte(0x0f);
- emit_byte(0x40+cc);
- emit_byte(0xc0+8*d+s);
- }
- else { /* replacement using branch and mov */
- int uncc=(cc^1);
- emit_byte(0x70+uncc);
- emit_byte(2); /* skip next 2 bytes if not cc=true */
- emit_byte(0x89);
- emit_byte(0xc0+8*s+d);
- }
-}
-LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc))
-
-LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
-{
- emit_byte(0x0f);
- emit_byte(0xbc);
- emit_byte(0xc0+8*d+s);
-}
-LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s))
-
-LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
-{
- emit_byte(0x0f);
- emit_byte(0xbf);
- emit_byte(0xc0+8*d+s);
-}
-LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s))
-
-LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
-{
- emit_byte(0x0f);
- emit_byte(0xbe);
- emit_byte(0xc0+8*d+s);
-}
-LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s))
-
-LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
-{
- emit_byte(0x0f);
- emit_byte(0xb7);
- emit_byte(0xc0+8*d+s);
-}
-LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s))
-
-LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
-{
- emit_byte(0x0f);
- emit_byte(0xb6);
- emit_byte(0xc0+8*d+s);
-}
-LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s))
-
-LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
-{
- emit_byte(0x0f);
- emit_byte(0xaf);
- emit_byte(0xc0+8*d+s);
-}
-LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s))
-
-LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
-{
- if (d!=MUL_NREG1 || s!=MUL_NREG2)
- abort();
- emit_byte(0xf7);
- emit_byte(0xea);
-}
-LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s))
-
-LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
-{
- if (d!=MUL_NREG1 || s!=MUL_NREG2) {
- printf("Bad register in MUL: d=%d, s=%d\n",d,s);
- abort();
- }
- emit_byte(0xf7);
- emit_byte(0xe2);
-}
-LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s))
-
-LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
-{
- abort(); /* %^$&%^$%#^ x86! */
- emit_byte(0x0f);
- emit_byte(0xaf);
- emit_byte(0xc0+8*d+s);
-}
-LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s))
-
-LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
-{
- emit_byte(0x88);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s))
-
-LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
-{
- emit_byte(0x66);
- emit_byte(0x89);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s))
-
-LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
-{
- int isebp=(baser==5)?0x40:0;
- int fi;
-
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default: abort();
- }
-
-
- emit_byte(0x8b);
- emit_byte(0x04+8*d+isebp);
- emit_byte(baser+8*index+0x40*fi);
- if (isebp)
- emit_byte(0x00);
-}
-LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
-
-LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
-{
- int fi;
- int isebp;
-
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default: abort();
- }
- isebp=(baser==5)?0x40:0;
-
- emit_byte(0x66);
- emit_byte(0x8b);
- emit_byte(0x04+8*d+isebp);
- emit_byte(baser+8*index+0x40*fi);
- if (isebp)
- emit_byte(0x00);
-}
-LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
-
-LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
-{
- int fi;
- int isebp;
-
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default: abort();
- }
- isebp=(baser==5)?0x40:0;
-
- emit_byte(0x8a);
- emit_byte(0x04+8*d+isebp);
- emit_byte(baser+8*index+0x40*fi);
- if (isebp)
- emit_byte(0x00);
-}
-LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
-
-LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
-{
- int fi;
- int isebp;
-
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default: abort();
- }
-
-
- isebp=(baser==5)?0x40:0;
-
- emit_byte(0x89);
- emit_byte(0x04+8*s+isebp);
- emit_byte(baser+8*index+0x40*fi);
- if (isebp)
- emit_byte(0x00);
-}
-LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
-
-LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
-{
- int fi;
- int isebp;
-
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default: abort();
- }
- isebp=(baser==5)?0x40:0;
-
- emit_byte(0x66);
- emit_byte(0x89);
- emit_byte(0x04+8*s+isebp);
- emit_byte(baser+8*index+0x40*fi);
- if (isebp)
- emit_byte(0x00);
-}
-LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
-
-LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
-{
- int fi;
- int isebp;
-
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default: abort();
- }
- isebp=(baser==5)?0x40:0;
-
- emit_byte(0x88);
- emit_byte(0x04+8*s+isebp);
- emit_byte(baser+8*index+0x40*fi);
- if (isebp)
- emit_byte(0x00);
-}
-LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
-
-LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
-{
- int fi;
-
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default: abort();
- }
-
- emit_byte(0x89);
- emit_byte(0x84+8*s);
- emit_byte(baser+8*index+0x40*fi);
- emit_long(base);
-}
-LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
-
-LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
-{
- int fi;
-
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default: abort();
- }
-
- emit_byte(0x66);
- emit_byte(0x89);
- emit_byte(0x84+8*s);
- emit_byte(baser+8*index+0x40*fi);
- emit_long(base);
-}
-LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
-
-LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
-{
- int fi;
-
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default: abort();
- }
-
- emit_byte(0x88);
- emit_byte(0x84+8*s);
- emit_byte(baser+8*index+0x40*fi);
- emit_long(base);
-}
-LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
-
-LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
-{
- int fi;
-
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default: abort();
- }
-
- emit_byte(0x8b);
- emit_byte(0x84+8*d);
- emit_byte(baser+8*index+0x40*fi);
- emit_long(base);
-}
-LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
-
-LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
-{
- int fi;
-
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default: abort();
- }
-
- emit_byte(0x66);
- emit_byte(0x8b);
- emit_byte(0x84+8*d);
- emit_byte(baser+8*index+0x40*fi);
- emit_long(base);
-}
-LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
-
-LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
-{
- int fi;
-
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default: abort();
- }
-
- emit_byte(0x8a);
- emit_byte(0x84+8*d);
- emit_byte(baser+8*index+0x40*fi);
- emit_long(base);
-}
-LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
-
-LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
-{
- int fi;
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default:
- fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
- abort();
- }
- emit_byte(0x8b);
- emit_byte(0x04+8*d);
- emit_byte(0x05+8*index+64*fi);
- emit_long(base);
-}
-LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
-
-LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
-{
- int fi;
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default:
- fprintf(stderr,"Bad factor %d in mov_l_rm_indexed!\n",factor);
- abort();
- }
- if (have_cmov) {
- emit_byte(0x0f);
- emit_byte(0x40+cond);
- emit_byte(0x04+8*d);
- emit_byte(0x05+8*index+64*fi);
- emit_long(base);
- }
- else { /* replacement using branch and mov */
- int uncc=(cond^1);
- emit_byte(0x70+uncc);
- emit_byte(7); /* skip next 7 bytes if not cc=true */
- emit_byte(0x8b);
- emit_byte(0x04+8*d);
- emit_byte(0x05+8*index+64*fi);
- emit_long(base);
- }
-}
-LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond))
-
-LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
-{
- if (have_cmov) {
- emit_byte(0x0f);
- emit_byte(0x40+cond);
- emit_byte(0x05+8*d);
- emit_long(mem);
- }
- else { /* replacement using branch and mov */
- int uncc=(cond^1);
- emit_byte(0x70+uncc);
- emit_byte(6); /* skip next 6 bytes if not cc=true */
- emit_byte(0x8b);
- emit_byte(0x05+8*d);
- emit_long(mem);
- }
-}
-LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond))
-
-LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
-{
- Dif(!isbyte(offset)) abort();
- emit_byte(0x8b);
- emit_byte(0x40+8*d+s);
- emit_byte(offset);
-}
-LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
-{
- Dif(!isbyte(offset)) abort();
- emit_byte(0x66);
- emit_byte(0x8b);
- emit_byte(0x40+8*d+s);
- emit_byte(offset);
-}
-LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
-{
- Dif(!isbyte(offset)) abort();
- emit_byte(0x8a);
- emit_byte(0x40+8*d+s);
- emit_byte(offset);
-}
-LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
-{
- emit_byte(0x8b);
- emit_byte(0x80+8*d+s);
- emit_long(offset);
-}
-LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
-{
- emit_byte(0x66);
- emit_byte(0x8b);
- emit_byte(0x80+8*d+s);
- emit_long(offset);
-}
-LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
-{
- emit_byte(0x8a);
- emit_byte(0x80+8*d+s);
- emit_long(offset);
-}
-LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
-{
- Dif(!isbyte(offset)) abort();
- emit_byte(0xc7);
- emit_byte(0x40+d);
- emit_byte(offset);
- emit_long(i);
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
-{
- Dif(!isbyte(offset)) abort();
- emit_byte(0x66);
- emit_byte(0xc7);
- emit_byte(0x40+d);
- emit_byte(offset);
- emit_word(i);
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
-{
- Dif(!isbyte(offset)) abort();
- emit_byte(0xc6);
- emit_byte(0x40+d);
- emit_byte(offset);
- emit_byte(i);
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
-{
- Dif(!isbyte(offset)) abort();
- emit_byte(0x89);
- emit_byte(0x40+8*s+d);
- emit_byte(offset);
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
-{
- Dif(!isbyte(offset)) abort();
- emit_byte(0x66);
- emit_byte(0x89);
- emit_byte(0x40+8*s+d);
- emit_byte(offset);
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
-{
- Dif(!isbyte(offset)) abort();
- emit_byte(0x88);
- emit_byte(0x40+8*s+d);
- emit_byte(offset);
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset))
-
-LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
-{
- if (optimize_imm8 && isbyte(offset)) {
- emit_byte(0x8d);
- emit_byte(0x40+8*d+s);
- emit_byte(offset);
- }
- else {
- emit_byte(0x8d);
- emit_byte(0x80+8*d+s);
- emit_long(offset);
- }
-}
-LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
-{
- int fi;
-
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default: abort();
- }
-
- if (optimize_imm8 && isbyte(offset)) {
- emit_byte(0x8d);
- emit_byte(0x44+8*d);
- emit_byte(0x40*fi+8*index+s);
- emit_byte(offset);
- }
- else {
- emit_byte(0x8d);
- emit_byte(0x84+8*d);
- emit_byte(0x40*fi+8*index+s);
- emit_long(offset);
- }
-}
-LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
-
-LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
-{
- int isebp=(s==5)?0x40:0;
- int fi;
-
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default: abort();
- }
-
- emit_byte(0x8d);
- emit_byte(0x04+8*d+isebp);
- emit_byte(0x40*fi+8*index+s);
- if (isebp)
- emit_byte(0);
-}
-LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
-{
- if (optimize_imm8 && isbyte(offset)) {
- emit_byte(0x89);
- emit_byte(0x40+8*s+d);
- emit_byte(offset);
- }
- else {
- emit_byte(0x89);
- emit_byte(0x80+8*s+d);
- emit_long(offset);
- }
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
-{
- emit_byte(0x66);
- emit_byte(0x89);
- emit_byte(0x80+8*s+d);
- emit_long(offset);
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset))
-
-LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
-{
- if (optimize_imm8 && isbyte(offset)) {
- emit_byte(0x88);
- emit_byte(0x40+8*s+d);
- emit_byte(offset);
- }
- else {
- emit_byte(0x88);
- emit_byte(0x80+8*s+d);
- emit_long(offset);
- }
-}
-LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset))
-
-LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
-{
- emit_byte(0x0f);
- emit_byte(0xc8+r);
-}
-LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r))
-
-LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
-{
- emit_byte(0x66);
- emit_byte(0xc1);
- emit_byte(0xc0+r);
- emit_byte(0x08);
-}
-LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r))
-
-LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
-{
- emit_byte(0x89);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s))
-
-LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
-{
- emit_byte(0x89);
- emit_byte(0x05+8*s);
- emit_long(d);
-}
-LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s))
-
-LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
-{
- emit_byte(0x66);
- emit_byte(0x89);
- emit_byte(0x05+8*s);
- emit_long(d);
-}
-LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s))
-
-LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
-{
- emit_byte(0x66);
- emit_byte(0x8b);
- emit_byte(0x05+8*d);
- emit_long(s);
-}
-LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s))
-
-LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
-{
- emit_byte(0x88);
- emit_byte(0x05+8*(s&0xf)); /* XXX this handles %ah case (defined as 0x10+4) and others */
- emit_long(d);
-}
-LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s))
-
-LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
-{
- emit_byte(0x8a);
- emit_byte(0x05+8*d);
- emit_long(s);
-}
-LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s))
-
-LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
-{
- emit_byte(0xb8+d);
- emit_long(s);
-}
-LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s))
-
-LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
-{
- emit_byte(0x66);
- emit_byte(0xb8+d);
- emit_word(s);
-}
-LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
-
-LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
-{
- emit_byte(0xb0+d);
- emit_byte(s);
-}
-LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
-
-LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
-{
- emit_byte(0x81);
- emit_byte(0x15);
- emit_long(d);
- emit_long(s);
-}
-LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s))
-
-LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
-{
- if (optimize_imm8 && isbyte(s)) {
- emit_byte(0x83);
- emit_byte(0x05);
- emit_long(d);
- emit_byte(s);
- }
- else {
- emit_byte(0x81);
- emit_byte(0x05);
- emit_long(d);
- emit_long(s);
- }
-}
-LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s))
-
-LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
-{
- emit_byte(0x66);
- emit_byte(0x81);
- emit_byte(0x05);
- emit_long(d);
- emit_word(s);
-}
-LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s))
-
-LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
-{
- emit_byte(0x80);
- emit_byte(0x05);
- emit_long(d);
- emit_byte(s);
-}
-LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s))
-
-LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
-{
- if (optimize_accum && isaccum(d))
- emit_byte(0xa9);
- else {
- emit_byte(0xf7);
- emit_byte(0xc0+d);
- }
- emit_long(i);
-}
-LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
-{
- emit_byte(0x85);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s))
-
-LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
-{
- emit_byte(0x66);
- emit_byte(0x85);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s))
-
-LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
-{
- emit_byte(0x84);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s))
-
-LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
-{
- emit_byte(0x81);
- emit_byte(0xf0+d);
- emit_long(i);
-}
-LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
-{
- if (optimize_imm8 && isbyte(i)) {
- emit_byte(0x83);
- emit_byte(0xe0+d);
- emit_byte(i);
- }
- else {
- if (optimize_accum && isaccum(d))
- emit_byte(0x25);
- else {
- emit_byte(0x81);
- emit_byte(0xe0+d);
- }
- emit_long(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
-{
- emit_byte(0x66);
- if (optimize_imm8 && isbyte(i)) {
- emit_byte(0x83);
- emit_byte(0xe0+d);
- emit_byte(i);
- }
- else {
- if (optimize_accum && isaccum(d))
- emit_byte(0x25);
- else {
- emit_byte(0x81);
- emit_byte(0xe0+d);
- }
- emit_word(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
-{
- emit_byte(0x21);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s))
-
-LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
-{
- emit_byte(0x66);
- emit_byte(0x21);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s))
-
-LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
-{
- emit_byte(0x20);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s))
-
-LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
-{
- if (optimize_imm8 && isbyte(i)) {
- emit_byte(0x83);
- emit_byte(0xc8+d);
- emit_byte(i);
- }
- else {
- if (optimize_accum && isaccum(d))
- emit_byte(0x0d);
- else {
- emit_byte(0x81);
- emit_byte(0xc8+d);
- }
- emit_long(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
-{
- emit_byte(0x09);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s))
-
-LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
-{
- emit_byte(0x66);
- emit_byte(0x09);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s))
-
-LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
-{
- emit_byte(0x08);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s))
-
-LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
-{
- emit_byte(0x11);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s))
-
-LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
-{
- emit_byte(0x66);
- emit_byte(0x11);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s))
-
-LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
-{
- emit_byte(0x10);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s))
-
-LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
-{
- emit_byte(0x01);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s))
-
-LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
-{
- emit_byte(0x66);
- emit_byte(0x01);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s))
-
-LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
-{
- emit_byte(0x00);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s))
-
-LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
-{
- if (isbyte(i)) {
- emit_byte(0x83);
- emit_byte(0xe8+d);
- emit_byte(i);
- }
- else {
- if (optimize_accum && isaccum(d))
- emit_byte(0x2d);
- else {
- emit_byte(0x81);
- emit_byte(0xe8+d);
- }
- emit_long(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
-{
- if (optimize_accum && isaccum(d))
- emit_byte(0x2c);
- else {
- emit_byte(0x80);
- emit_byte(0xe8+d);
- }
- emit_byte(i);
-}
-LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
-{
- if (isbyte(i)) {
- emit_byte(0x83);
- emit_byte(0xc0+d);
- emit_byte(i);
- }
- else {
- if (optimize_accum && isaccum(d))
- emit_byte(0x05);
- else {
- emit_byte(0x81);
- emit_byte(0xc0+d);
- }
- emit_long(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
-{
- emit_byte(0x66);
- if (isbyte(i)) {
- emit_byte(0x83);
- emit_byte(0xc0+d);
- emit_byte(i);
- }
- else {
- if (optimize_accum && isaccum(d))
- emit_byte(0x05);
- else {
- emit_byte(0x81);
- emit_byte(0xc0+d);
- }
- emit_word(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
-{
- if (optimize_accum && isaccum(d))
- emit_byte(0x04);
- else {
- emit_byte(0x80);
- emit_byte(0xc0+d);
- }
- emit_byte(i);
-}
-LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
-
-LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
-{
- emit_byte(0x19);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s))
-
-LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
-{
- emit_byte(0x66);
- emit_byte(0x19);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s))
-
-LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
-{
- emit_byte(0x18);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s))
-
-LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
-{
- emit_byte(0x29);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s))
-
-LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
-{
- emit_byte(0x66);
- emit_byte(0x29);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s))
-
-LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
-{
- emit_byte(0x28);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s))
-
-LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
-{
- emit_byte(0x39);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s))
-
-LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
-{
- if (optimize_imm8 && isbyte(i)) {
- emit_byte(0x83);
- emit_byte(0xf8+r);
- emit_byte(i);
- }
- else {
- if (optimize_accum && isaccum(r))
- emit_byte(0x3d);
- else {
- emit_byte(0x81);
- emit_byte(0xf8+r);
- }
- emit_long(i);
- }
-}
-LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
-{
- emit_byte(0x66);
- emit_byte(0x39);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
-
-LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
-{
- emit_byte(0x80);
- emit_byte(0x3d);
- emit_long(d);
- emit_byte(s);
-}
-LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
-
-LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
-{
- if (optimize_accum && isaccum(d))
- emit_byte(0x3c);
- else {
- emit_byte(0x80);
- emit_byte(0xf8+d);
- }
- emit_byte(i);
-}
-LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
-
-LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
-{
- emit_byte(0x38);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s))
-
-LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
-{
- int fi;
-
- switch(factor) {
- case 1: fi=0; break;
- case 2: fi=1; break;
- case 4: fi=2; break;
- case 8: fi=3; break;
- default: abort();
- }
- emit_byte(0x39);
- emit_byte(0x04+8*d);
- emit_byte(5+8*index+0x40*fi);
- emit_long(offset);
-}
-LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor))
-
-LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
-{
- emit_byte(0x31);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s))
-
-LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
-{
- emit_byte(0x66);
- emit_byte(0x31);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s))
-
-LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
-{
- emit_byte(0x30);
- emit_byte(0xc0+8*s+d);
-}
-LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s))
-
-LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
-{
- if (optimize_imm8 && isbyte(s)) {
- emit_byte(0x83);
- emit_byte(0x2d);
- emit_long(d);
- emit_byte(s);
- }
- else {
- emit_byte(0x81);
- emit_byte(0x2d);
- emit_long(d);
- emit_long(s);
- }
-}
-LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s))
-
-LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
-{
- if (optimize_imm8 && isbyte(s)) {
- emit_byte(0x83);
- emit_byte(0x3d);
- emit_long(d);
- emit_byte(s);
- }
- else {
- emit_byte(0x81);
- emit_byte(0x3d);
- emit_long(d);
- emit_long(s);
- }
-}
-LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
-
-LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
-{
- emit_byte(0x87);
- emit_byte(0xc0+8*r1+r2);
-}
-LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
-
-LOWFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2))
-{
- emit_byte(0x86);
- emit_byte(0xc0+8*(r1&0xf)+(r2&0xf)); /* XXX this handles upper-halves registers (e.g. %ah defined as 0x10+4) */
-}
-LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2))
-
-/*************************************************************************
- * FIXME: mem access modes probably wrong *
- *************************************************************************/
-
-LOWFUNC(READ,WRITE,0,raw_pushfl,(void))
-{
- emit_byte(0x9c);
-}
-LENDFUNC(READ,WRITE,0,raw_pushfl,(void))
-
-LOWFUNC(WRITE,READ,0,raw_popfl,(void))
-{
- emit_byte(0x9d);
-}
-LENDFUNC(WRITE,READ,0,raw_popfl,(void))
-
-/* Generate floating-point instructions */
-static inline void x86_fadd_m(MEMR s)
-{
- emit_byte(0xdc);
- emit_byte(0x05);
- emit_long(s);
-}
-
-#endif
-
-/*************************************************************************
- * Unoptimizable stuff --- jump *
- *************************************************************************/
-
-static __inline__ void raw_call_r(R4 r)
-{
-#if USE_NEW_RTASM
- CALLsr(r);
-#else
- emit_byte(0xff);
- emit_byte(0xd0+r);
-#endif
-}
-
-static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
-{
-#if USE_NEW_RTASM
- CALLsm(base, X86_NOREG, r, m);
-#else
- int mu;
- switch(m) {
- case 1: mu=0; break;
- case 2: mu=1; break;
- case 4: mu=2; break;
- case 8: mu=3; break;
- default: abort();
- }
- emit_byte(0xff);
- emit_byte(0x14);
- emit_byte(0x05+8*r+0x40*mu);
- emit_long(base);
-#endif
-}
-
-static __inline__ void raw_jmp_r(R4 r)
-{
-#if USE_NEW_RTASM
- JMPsr(r);
-#else
- emit_byte(0xff);
- emit_byte(0xe0+r);
-#endif
-}
-
-static __inline__ void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
-{
-#if USE_NEW_RTASM
- JMPsm(base, X86_NOREG, r, m);
-#else
- int mu;
- switch(m) {
- case 1: mu=0; break;
- case 2: mu=1; break;
- case 4: mu=2; break;
- case 8: mu=3; break;
- default: abort();
- }
- emit_byte(0xff);
- emit_byte(0x24);
- emit_byte(0x05+8*r+0x40*mu);
- emit_long(base);
-#endif
-}
-
-static __inline__ void raw_jmp_m(uae_u32 base)
-{
- emit_byte(0xff);
- emit_byte(0x25);
- emit_long(base);
-}
-
-
-static __inline__ void raw_call(uae_u32 t)
-{
-#if USE_NEW_RTASM
- CALLm(t);
-#else
- emit_byte(0xe8);
- emit_long(t-(uae_u32)target-4);
-#endif
-}
-
-static __inline__ void raw_jmp(uae_u32 t)
-{
-#if USE_NEW_RTASM
- JMPm(t);
-#else
- emit_byte(0xe9);
- emit_long(t-(uae_u32)target-4);
-#endif
-}
-
-static __inline__ void raw_jl(uae_u32 t)
-{
- emit_byte(0x0f);
- emit_byte(0x8c);
- emit_long(t-(uintptr)target-4);
-}
-
-static __inline__ void raw_jz(uae_u32 t)
-{
- emit_byte(0x0f);
- emit_byte(0x84);
- emit_long(t-(uintptr)target-4);
-}
-
-static __inline__ void raw_jnz(uae_u32 t)
-{
- emit_byte(0x0f);
- emit_byte(0x85);
- emit_long(t-(uintptr)target-4);
-}
-
-static __inline__ void raw_jnz_l_oponly(void)
-{
- emit_byte(0x0f);
- emit_byte(0x85);
-}
-
-static __inline__ void raw_jcc_l_oponly(int cc)
-{
- emit_byte(0x0f);
- emit_byte(0x80+cc);
-}
-
-static __inline__ void raw_jnz_b_oponly(void)
-{
- emit_byte(0x75);
-}
-
-static __inline__ void raw_jz_b_oponly(void)
-{
- emit_byte(0x74);
-}
-
-static __inline__ void raw_jcc_b_oponly(int cc)
-{
- emit_byte(0x70+cc);
-}
-
-static __inline__ void raw_jmp_l_oponly(void)
-{
- emit_byte(0xe9);
-}
-
-static __inline__ void raw_jmp_b_oponly(void)
-{
- emit_byte(0xeb);
-}
-
-static __inline__ void raw_ret(void)
-{
- emit_byte(0xc3);
-}
-
-static __inline__ void raw_nop(void)
-{
- emit_byte(0x90);
-}
-
-static __inline__ void raw_emit_nop_filler(int nbytes)
-{
- /* Source: GNU Binutils 2.12.90.0.15 */
- /* Various efficient no-op patterns for aligning code labels.
- Note: Don't try to assemble the instructions in the comments.
- 0L and 0w are not legal. */
- static const uae_u8 f32_1[] =
- {0x90}; /* nop */
- static const uae_u8 f32_2[] =
- {0x89,0xf6}; /* movl %esi,%esi */
- static const uae_u8 f32_3[] =
- {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
- static const uae_u8 f32_4[] =
- {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
- static const uae_u8 f32_5[] =
- {0x90, /* nop */
- 0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
- static const uae_u8 f32_6[] =
- {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
- static const uae_u8 f32_7[] =
- {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
- static const uae_u8 f32_8[] =
- {0x90, /* nop */
- 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
- static const uae_u8 f32_9[] =
- {0x89,0xf6, /* movl %esi,%esi */
- 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
- static const uae_u8 f32_10[] =
- {0x8d,0x76,0x00, /* leal 0(%esi),%esi */
- 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
- static const uae_u8 f32_11[] =
- {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */
- 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
- static const uae_u8 f32_12[] =
- {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
- 0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */
- static const uae_u8 f32_13[] =
- {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */
- 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
- static const uae_u8 f32_14[] =
- {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */
- 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */
- static const uae_u8 f32_15[] =
- {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
- 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
- static const uae_u8 f32_16[] =
- {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */
- 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90};
- static const uae_u8 *const f32_patt[] = {
- f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8,
- f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15
- };
- static const uae_u8 prefixes[4] = { 0x66, 0x66, 0x66, 0x66 };
-
-#if defined(__x86_64__)
- /* The recommended way to pad 64bit code is to use NOPs preceded by
- maximally four 0x66 prefixes. Balance the size of nops. */
- if (nbytes == 0)
- return;
-
- int i;
- int nnops = (nbytes + 3) / 4;
- int len = nbytes / nnops;
- int remains = nbytes - nnops * len;
-
- for (i = 0; i < remains; i++) {
- emit_block(prefixes, len);
- raw_nop();
- }
- for (; i < nnops; i++) {
- emit_block(prefixes, len - 1);
- raw_nop();
- }
-#else
- int nloops = nbytes / 16;
- while (nloops-- > 0)
- emit_block(f32_16, sizeof(f32_16));
-
- nbytes %= 16;
- if (nbytes)
- emit_block(f32_patt[nbytes - 1], nbytes);
-#endif
-}
-
-
-/*************************************************************************
- * Flag handling, to and fro UAE flag register *
- *************************************************************************/
-
-static __inline__ void raw_flags_evicted(int r)
-{
- //live.state[FLAGTMP].status=CLEAN;
- live.state[FLAGTMP].status=INMEM;
- live.state[FLAGTMP].realreg=-1;
- /* We just "evicted" FLAGTMP. */
- if (live.nat[r].nholds!=1) {
- /* Huh? */
- abort();
- }
- live.nat[r].nholds=0;
-}
-
-#define FLAG_NREG1_FLAGREG 0 /* Set to -1 if any register will do */
-static __inline__ void raw_flags_to_reg_FLAGREG(int r)
-{
- raw_lahf(0); /* Most flags in AH */
- //raw_setcc(r,0); /* V flag in AL */
- raw_setcc_m((uintptr)live.state[FLAGTMP].mem,0);
-
-#if 1 /* Let's avoid those nasty partial register stalls */
- //raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,r);
- raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,AH_INDEX);
- raw_flags_evicted(r);
-#endif
-}
-
-#define FLAG_NREG2_FLAGREG 0 /* Set to -1 if any register will do */
-static __inline__ void raw_reg_to_flags_FLAGREG(int r)
-{
- raw_cmp_b_ri(r,-127); /* set V */
- raw_sahf(0);
-}
-
-#define FLAG_NREG3_FLAGREG 0 /* Set to -1 if any register will do */
-static __inline__ void raw_flags_set_zero_FLAGREG(int s, int tmp)
-{
- raw_mov_l_rr(tmp,s);
- raw_lahf(s); /* flags into ah */
- raw_and_l_ri(s,0xffffbfff);
- raw_and_l_ri(tmp,0x00004000);
- raw_xor_l_ri(tmp,0x00004000);
- raw_or_l(s,tmp);
- raw_sahf(s);
-}
-
-static __inline__ void raw_flags_init_FLAGREG(void) { }
-
-#define FLAG_NREG1_FLAGSTK -1 /* Set to -1 if any register will do */
-static __inline__ void raw_flags_to_reg_FLAGSTK(int r)
-{
- raw_pushfl();
- raw_pop_l_r(r);
- raw_mov_l_mr((uintptr)live.state[FLAGTMP].mem,r);
- raw_flags_evicted(r);
-}
-
-#define FLAG_NREG2_FLAGSTK -1 /* Set to -1 if any register will do */
-static __inline__ void raw_reg_to_flags_FLAGSTK(int r)
-{
- raw_push_l_r(r);
- raw_popfl();
-}
-
-#define FLAG_NREG3_FLAGSTK -1 /* Set to -1 if any register will do */
-static __inline__ void raw_flags_set_zero_FLAGSTK(int s, int tmp)
-{
- raw_mov_l_rr(tmp,s);
- raw_pushfl();
- raw_pop_l_r(s);
- raw_and_l_ri(s,0xffffffbf);
- raw_and_l_ri(tmp,0x00000040);
- raw_xor_l_ri(tmp,0x00000040);
- raw_or_l(s,tmp);
- raw_push_l_r(s);
- raw_popfl();
-}
-
-static __inline__ void raw_flags_init_FLAGSTK(void) { }
-
-#if defined(__x86_64__)
-/* Try to use the LAHF/SETO method on x86_64 since it is faster.
- This can't be the default because some older CPUs don't support
- LAHF/SAHF in long mode. */
-static int FLAG_NREG1_FLAGGEN = 0;
-static __inline__ void raw_flags_to_reg_FLAGGEN(int r)
-{
- if (have_lahf_lm) {
- // NOTE: the interpreter uses the normal EFLAGS layout
- // pushf/popf CF(0) ZF( 6) SF( 7) OF(11)
- // sahf/lahf CF(8) ZF(14) SF(15) OF( 0)
- assert(r == 0);
- raw_setcc(r,0); /* V flag in AL */
- raw_lea_l_r_scaled(0,0,8); /* move it to its EFLAGS location */
- raw_mov_b_mr(((uintptr)live.state[FLAGTMP].mem)+1,0);
- raw_lahf(0); /* most flags in AH */
- raw_mov_b_mr((uintptr)live.state[FLAGTMP].mem,AH_INDEX);
- raw_flags_evicted(r);
- }
- else
- raw_flags_to_reg_FLAGSTK(r);
-}
-
-static int FLAG_NREG2_FLAGGEN = 0;
-static __inline__ void raw_reg_to_flags_FLAGGEN(int r)
-{
- if (have_lahf_lm) {
- raw_xchg_b_rr(0,AH_INDEX);
- raw_cmp_b_ri(r,-120); /* set V */
- raw_sahf(0);
- }
- else
- raw_reg_to_flags_FLAGSTK(r);
-}
-
-static int FLAG_NREG3_FLAGGEN = 0;
-static __inline__ void raw_flags_set_zero_FLAGGEN(int s, int tmp)
-{
- if (have_lahf_lm)
- raw_flags_set_zero_FLAGREG(s, tmp);
- else
- raw_flags_set_zero_FLAGSTK(s, tmp);
-}
-
-static __inline__ void raw_flags_init_FLAGGEN(void)
-{
- if (have_lahf_lm) {
- FLAG_NREG1_FLAGGEN = FLAG_NREG1_FLAGREG;
- FLAG_NREG2_FLAGGEN = FLAG_NREG2_FLAGREG;
- FLAG_NREG1_FLAGGEN = FLAG_NREG3_FLAGREG;
- }
- else {
- FLAG_NREG1_FLAGGEN = FLAG_NREG1_FLAGSTK;
- FLAG_NREG2_FLAGGEN = FLAG_NREG2_FLAGSTK;
- FLAG_NREG1_FLAGGEN = FLAG_NREG3_FLAGSTK;
- }
-}
-#endif
-
-#ifdef SAHF_SETO_PROFITABLE
-#define FLAG_SUFFIX FLAGREG
-#elif defined __x86_64__
-#define FLAG_SUFFIX FLAGGEN
-#else
-#define FLAG_SUFFIX FLAGSTK
-#endif
-
-#define FLAG_GLUE_2(x, y) x ## _ ## y
-#define FLAG_GLUE_1(x, y) FLAG_GLUE_2(x, y)
-#define FLAG_GLUE(x) FLAG_GLUE_1(x, FLAG_SUFFIX)
-
-#define raw_flags_init FLAG_GLUE(raw_flags_init)
-#define FLAG_NREG1 FLAG_GLUE(FLAG_NREG1)
-#define raw_flags_to_reg FLAG_GLUE(raw_flags_to_reg)
-#define FLAG_NREG2 FLAG_GLUE(FLAG_NREG2)
-#define raw_reg_to_flags FLAG_GLUE(raw_reg_to_flags)
-#define FLAG_NREG3 FLAG_GLUE(FLAG_NREG3)
-#define raw_flags_set_zero FLAG_GLUE(raw_flags_set_zero)
-
-/* Apparently, there are enough instructions between flag store and
- flag reload to avoid the partial memory stall */
-static __inline__ void raw_load_flagreg(uae_u32 target, uae_u32 r)
-{
-#if 1
- raw_mov_l_rm(target,(uintptr)live.state[r].mem);
-#else
- raw_mov_b_rm(target,(uintptr)live.state[r].mem);
- raw_mov_b_rm(target+4,((uintptr)live.state[r].mem)+1);
-#endif
-}
-
-/* FLAGX is byte sized, and we *do* write it at that size */
-static __inline__ void raw_load_flagx(uae_u32 target, uae_u32 r)
-{
- if (live.nat[target].canbyte)
- raw_mov_b_rm(target,(uintptr)live.state[r].mem);
- else if (live.nat[target].canword)
- raw_mov_w_rm(target,(uintptr)live.state[r].mem);
- else
- raw_mov_l_rm(target,(uintptr)live.state[r].mem);
-}
-
-static __inline__ void raw_dec_sp(int off)
-{
- if (off) raw_sub_l_ri(ESP_INDEX,off);
-}
-
-static __inline__ void raw_inc_sp(int off)
-{
- if (off) raw_add_l_ri(ESP_INDEX,off);
-}
-
-/*************************************************************************
- * Handling mistaken direct memory access *
- *************************************************************************/
-
-// gb-- I don't need that part for JIT Basilisk II
-#if defined(NATMEM_OFFSET) && 0
-#include <asm/sigcontext.h>
-#include <signal.h>
-
-#define SIG_READ 1
-#define SIG_WRITE 2
-
-static int in_handler=0;
-static uae_u8 veccode[256];
-
-static void vec(int x, struct sigcontext sc)
-{
- uae_u8* i=(uae_u8*)sc.eip;
- uae_u32 addr=sc.cr2;
- int r=-1;
- int size=4;
- int dir=-1;
- int len=0;
- int j;
-
- write_log("fault address is %08x at %08x\n",sc.cr2,sc.eip);
- if (!canbang)
- write_log("Not happy! Canbang is 0 in SIGSEGV handler!\n");
- if (in_handler)
- write_log("Argh --- Am already in a handler. Shouldn't happen!\n");
-
- if (canbang && i>=compiled_code && i<=current_compile_p) {
- if (*i==0x66) {
- i++;
- size=2;
- len++;
- }
-
- switch(i[0]) {
- case 0x8a:
- if ((i[1]&0xc0)==0x80) {
- r=(i[1]>>3)&7;
- dir=SIG_READ;
- size=1;
- len+=6;
- break;
- }
- break;
- case 0x88:
- if ((i[1]&0xc0)==0x80) {
- r=(i[1]>>3)&7;
- dir=SIG_WRITE;
- size=1;
- len+=6;
- break;
- }
- break;
- case 0x8b:
- if ((i[1]&0xc0)==0x80) {
- r=(i[1]>>3)&7;
- dir=SIG_READ;
- len+=6;
- break;
- }
- if ((i[1]&0xc0)==0x40) {
- r=(i[1]>>3)&7;
- dir=SIG_READ;
- len+=3;
- break;
- }
- break;
- case 0x89:
- if ((i[1]&0xc0)==0x80) {
- r=(i[1]>>3)&7;
- dir=SIG_WRITE;
- len+=6;
- break;
- }
- if ((i[1]&0xc0)==0x40) {
- r=(i[1]>>3)&7;
- dir=SIG_WRITE;
- len+=3;
- break;
- }
- break;
- }
- }
-
- if (r!=-1) {
- void* pr=NULL;
- write_log("register was %d, direction was %d, size was %d\n",r,dir,size);
-
- switch(r) {
- case 0: pr=&(sc.eax); break;
- case 1: pr=&(sc.ecx); break;
- case 2: pr=&(sc.edx); break;
- case 3: pr=&(sc.ebx); break;
- case 4: pr=(size>1)?NULL:(((uae_u8*)&(sc.eax))+1); break;
- case 5: pr=(size>1)?
- (void*)(&(sc.ebp)):
- (void*)(((uae_u8*)&(sc.ecx))+1); break;
- case 6: pr=(size>1)?
- (void*)(&(sc.esi)):
- (void*)(((uae_u8*)&(sc.edx))+1); break;
- case 7: pr=(size>1)?
- (void*)(&(sc.edi)):
- (void*)(((uae_u8*)&(sc.ebx))+1); break;
- default: abort();
- }
- if (pr) {
- blockinfo* bi;
-
- if (currprefs.comp_oldsegv) {
- addr-=NATMEM_OFFSET;
-
- if ((addr>=0x10000000 && addr<0x40000000) ||
- (addr>=0x50000000)) {
- write_log("Suspicious address in %x SEGV handler.\n",addr);
- }
- if (dir==SIG_READ) {
- switch(size) {
- case 1: *((uae_u8*)pr)=get_byte(addr); break;
- case 2: *((uae_u16*)pr)=get_word(addr); break;
- case 4: *((uae_u32*)pr)=get_long(addr); break;
- default: abort();
- }
- }
- else { /* write */
- switch(size) {
- case 1: put_byte(addr,*((uae_u8*)pr)); break;
- case 2: put_word(addr,*((uae_u16*)pr)); break;
- case 4: put_long(addr,*((uae_u32*)pr)); break;
- default: abort();
- }
- }
- write_log("Handled one access!\n");
- fflush(stdout);
- segvcount++;
- sc.eip+=len;
- }
- else {
- void* tmp=target;
- int i;
- uae_u8 vecbuf[5];
-
- addr-=NATMEM_OFFSET;
-
- if ((addr>=0x10000000 && addr<0x40000000) ||
- (addr>=0x50000000)) {
- write_log("Suspicious address in %x SEGV handler.\n",addr);
- }
-
- target=(uae_u8*)sc.eip;
- for (i=0;i<5;i++)
- vecbuf[i]=target[i];
- emit_byte(0xe9);
- emit_long((uintptr)veccode-(uintptr)target-4);
- write_log("Create jump to %p\n",veccode);
-
- write_log("Handled one access!\n");
- fflush(stdout);
- segvcount++;
-
- target=veccode;
-
- if (dir==SIG_READ) {
- switch(size) {
- case 1: raw_mov_b_ri(r,get_byte(addr)); break;
- case 2: raw_mov_w_ri(r,get_byte(addr)); break;
- case 4: raw_mov_l_ri(r,get_byte(addr)); break;
- default: abort();
- }
- }
- else { /* write */
- switch(size) {
- case 1: put_byte(addr,*((uae_u8*)pr)); break;
- case 2: put_word(addr,*((uae_u16*)pr)); break;
- case 4: put_long(addr,*((uae_u32*)pr)); break;
- default: abort();
- }
- }
- for (i=0;i<5;i++)
- raw_mov_b_mi(sc.eip+i,vecbuf[i]);
- raw_mov_l_mi((uintptr)&in_handler,0);
- emit_byte(0xe9);
- emit_long(sc.eip+len-(uintptr)target-4);
- in_handler=1;
- target=tmp;
- }
- bi=active;
- while (bi) {
- if (bi->handler &&
- (uae_u8*)bi->direct_handler<=i &&
- (uae_u8*)bi->nexthandler>i) {
- write_log("deleted trigger (%p<%p<%p) %p\n",
- bi->handler,
- i,
- bi->nexthandler,
- bi->pc_p);
- invalidate_block(bi);
- raise_in_cl_list(bi);
- set_special(0);
- return;
- }
- bi=bi->next;
- }
- /* Not found in the active list. Might be a rom routine that
- is in the dormant list */
- bi=dormant;
- while (bi) {
- if (bi->handler &&
- (uae_u8*)bi->direct_handler<=i &&
- (uae_u8*)bi->nexthandler>i) {
- write_log("deleted trigger (%p<%p<%p) %p\n",
- bi->handler,
- i,
- bi->nexthandler,
- bi->pc_p);
- invalidate_block(bi);
- raise_in_cl_list(bi);
- set_special(0);
- return;
- }
- bi=bi->next;
- }
- write_log("Huh? Could not find trigger!\n");
- return;
- }
- }
- write_log("Can't handle access!\n");
- for (j=0;j<10;j++) {
- write_log("instruction byte %2d is %02x\n",j,i[j]);
- }
- write_log("Please send the above info (starting at \"fault address\") to\n"
- "bmeyer@csse.monash.edu.au\n"
- "This shouldn't happen ;-)\n");
- fflush(stdout);
- signal(SIGSEGV,SIG_DFL); /* returning here will cause a "real" SEGV */
-}
-#endif
-
-
-/*************************************************************************
- * Checking for CPU features *
- *************************************************************************/
-
-struct cpuinfo_x86 {
- uae_u8 x86; // CPU family
- uae_u8 x86_vendor; // CPU vendor
- uae_u8 x86_processor; // CPU canonical processor type
- uae_u8 x86_brand_id; // CPU BrandID if supported, yield 0 otherwise
- uae_u32 x86_hwcap;
- uae_u8 x86_model;
- uae_u8 x86_mask;
- int cpuid_level; // Maximum supported CPUID level, -1=no CPUID
- char x86_vendor_id[16];
-};
-struct cpuinfo_x86 cpuinfo;
-
-enum {
- X86_VENDOR_INTEL = 0,
- X86_VENDOR_CYRIX = 1,
- X86_VENDOR_AMD = 2,
- X86_VENDOR_UMC = 3,
- X86_VENDOR_NEXGEN = 4,
- X86_VENDOR_CENTAUR = 5,
- X86_VENDOR_RISE = 6,
- X86_VENDOR_TRANSMETA = 7,
- X86_VENDOR_NSC = 8,
- X86_VENDOR_UNKNOWN = 0xff
-};
-
-enum {
- X86_PROCESSOR_I386, /* 80386 */
- X86_PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
- X86_PROCESSOR_PENTIUM,
- X86_PROCESSOR_PENTIUMPRO,
- X86_PROCESSOR_K6,
- X86_PROCESSOR_ATHLON,
- X86_PROCESSOR_PENTIUM4,
- X86_PROCESSOR_X86_64,
- X86_PROCESSOR_max
-};
-
-static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
- "80386",
- "80486",
- "Pentium",
- "PentiumPro",
- "K6",
- "Athlon",
- "Pentium4",
- "x86-64"
-};
-
-static struct ptt {
- const int align_loop;
- const int align_loop_max_skip;
- const int align_jump;
- const int align_jump_max_skip;
- const int align_func;
-}
-x86_alignments[X86_PROCESSOR_max] = {
- { 4, 3, 4, 3, 4 },
- { 16, 15, 16, 15, 16 },
- { 16, 7, 16, 7, 16 },
- { 16, 15, 16, 7, 16 },
- { 32, 7, 32, 7, 32 },
- { 16, 7, 16, 7, 16 },
- { 0, 0, 0, 0, 0 },
- { 16, 7, 16, 7, 16 }
-};
-
-static void
-x86_get_cpu_vendor(struct cpuinfo_x86 *c)
-{
- char *v = c->x86_vendor_id;
-
- if (!strcmp(v, "GenuineIntel"))
- c->x86_vendor = X86_VENDOR_INTEL;
- else if (!strcmp(v, "AuthenticAMD"))
- c->x86_vendor = X86_VENDOR_AMD;
- else if (!strcmp(v, "CyrixInstead"))
- c->x86_vendor = X86_VENDOR_CYRIX;
- else if (!strcmp(v, "Geode by NSC"))
- c->x86_vendor = X86_VENDOR_NSC;
- else if (!strcmp(v, "UMC UMC UMC "))
- c->x86_vendor = X86_VENDOR_UMC;
- else if (!strcmp(v, "CentaurHauls"))
- c->x86_vendor = X86_VENDOR_CENTAUR;
- else if (!strcmp(v, "NexGenDriven"))
- c->x86_vendor = X86_VENDOR_NEXGEN;
- else if (!strcmp(v, "RiseRiseRise"))
- c->x86_vendor = X86_VENDOR_RISE;
- else if (!strcmp(v, "GenuineTMx86") ||
- !strcmp(v, "TransmetaCPU"))
- c->x86_vendor = X86_VENDOR_TRANSMETA;
- else
- c->x86_vendor = X86_VENDOR_UNKNOWN;
-}
-
-static void
-cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
-{
- const int CPUID_SPACE = 4096;
- uae_u8* cpuid_space = (uae_u8 *)vm_acquire(CPUID_SPACE);
- if (cpuid_space == VM_MAP_FAILED)
- abort();
- vm_protect(cpuid_space, CPUID_SPACE, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
-
- static uae_u32 s_op, s_eax, s_ebx, s_ecx, s_edx;
- uae_u8* tmp=get_target();
-
- s_op = op;
- set_target(cpuid_space);
- raw_push_l_r(0); /* eax */
- raw_push_l_r(1); /* ecx */
- raw_push_l_r(2); /* edx */
- raw_push_l_r(3); /* ebx */
- raw_mov_l_rm(0,(uintptr)&s_op);
- raw_cpuid(0);
- raw_mov_l_mr((uintptr)&s_eax,0);
- raw_mov_l_mr((uintptr)&s_ebx,3);
- raw_mov_l_mr((uintptr)&s_ecx,1);
- raw_mov_l_mr((uintptr)&s_edx,2);
- raw_pop_l_r(3);
- raw_pop_l_r(2);
- raw_pop_l_r(1);
- raw_pop_l_r(0);
- raw_ret();
- set_target(tmp);
-
- ((cpuop_func*)cpuid_space)(0);
- if (eax != NULL) *eax = s_eax;
- if (ebx != NULL) *ebx = s_ebx;
- if (ecx != NULL) *ecx = s_ecx;
- if (edx != NULL) *edx = s_edx;
-
- vm_release(cpuid_space, CPUID_SPACE);
-}
-
-static void
-raw_init_cpu(void)
-{
- struct cpuinfo_x86 *c = &cpuinfo;
-
- /* Defaults */
- c->x86_processor = X86_PROCESSOR_max;
- c->x86_vendor = X86_VENDOR_UNKNOWN;
- c->cpuid_level = -1; /* CPUID not detected */
- c->x86_model = c->x86_mask = 0; /* So far unknown... */
- c->x86_vendor_id[0] = '\0'; /* Unset */
- c->x86_hwcap = 0;
-
- /* Get vendor name */
- c->x86_vendor_id[12] = '\0';
- cpuid(0x00000000,
- (uae_u32 *)&c->cpuid_level,
- (uae_u32 *)&c->x86_vendor_id[0],
- (uae_u32 *)&c->x86_vendor_id[8],
- (uae_u32 *)&c->x86_vendor_id[4]);
- x86_get_cpu_vendor(c);
-
- /* Intel-defined flags: level 0x00000001 */
- c->x86_brand_id = 0;
- if ( c->cpuid_level >= 0x00000001 ) {
- uae_u32 tfms, brand_id;
- cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
- c->x86 = (tfms >> 8) & 15;
- if (c->x86 == 0xf)
- c->x86 += (tfms >> 20) & 0xff; /* extended family */
- c->x86_model = (tfms >> 4) & 15;
- if (c->x86_model == 0xf)
- c->x86_model |= (tfms >> 12) & 0xf0; /* extended model */
- c->x86_brand_id = brand_id & 0xff;
- c->x86_mask = tfms & 15;
- } else {
- /* Have CPUID level 0 only - unheard of */
- c->x86 = 4;
- }
-
- /* AMD-defined flags: level 0x80000001 */
- uae_u32 xlvl;
- cpuid(0x80000000, &xlvl, NULL, NULL, NULL);
- if ( (xlvl & 0xffff0000) == 0x80000000 ) {
- if ( xlvl >= 0x80000001 ) {
- uae_u32 features, extra_features;
- cpuid(0x80000001, NULL, NULL, &extra_features, &features);
- if (features & (1 << 29)) {
- /* Assume x86-64 if long mode is supported */
- c->x86_processor = X86_PROCESSOR_X86_64;
- }
- if (extra_features & (1 << 0))
- have_lahf_lm = true;
- }
- }
-
- /* Canonicalize processor ID */
- switch (c->x86) {
- case 3:
- c->x86_processor = X86_PROCESSOR_I386;
- break;
- case 4:
- c->x86_processor = X86_PROCESSOR_I486;
- break;
- case 5:
- if (c->x86_vendor == X86_VENDOR_AMD)
- c->x86_processor = X86_PROCESSOR_K6;
- else
- c->x86_processor = X86_PROCESSOR_PENTIUM;
- break;
- case 6:
- if (c->x86_vendor == X86_VENDOR_AMD)
- c->x86_processor = X86_PROCESSOR_ATHLON;
- else
- c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
- break;
- case 15:
- if (c->x86_processor == X86_PROCESSOR_max) {
- switch (c->x86_vendor) {
- case X86_VENDOR_INTEL:
- c->x86_processor = X86_PROCESSOR_PENTIUM4;
- break;
- case X86_VENDOR_AMD:
- /* Assume a 32-bit Athlon processor if not in long mode */
- c->x86_processor = X86_PROCESSOR_ATHLON;
- break;
- }
- }
- break;
- }
- if (c->x86_processor == X86_PROCESSOR_max) {
- c->x86_processor = X86_PROCESSOR_I386;
- fprintf(stderr, "Error: unknown processor type, assuming i386\n");
- fprintf(stderr, " Family : %d\n", c->x86);
- fprintf(stderr, " Model : %d\n", c->x86_model);
- fprintf(stderr, " Mask : %d\n", c->x86_mask);
- fprintf(stderr, " Vendor : %s [%d]\n", c->x86_vendor_id, c->x86_vendor);
- if (c->x86_brand_id)
- fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id);
- }
-
- /* Have CMOV support? */
- have_cmov = c->x86_hwcap & (1 << 15);
-#if defined(__x86_64__)
- if (!have_cmov) {
- write_log("x86-64 implementations are bound to have CMOV!\n");
- abort();
- }
-#endif
-
- /* Can the host CPU suffer from partial register stalls? */
- have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
-#if 1
- /* It appears that partial register writes are a bad idea even on
- AMD K7 cores, even though they are not supposed to have the
- dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
- if (c->x86_processor == X86_PROCESSOR_ATHLON)
- have_rat_stall = true;
-#endif
-
- /* Alignments */
- if (tune_alignment) {
- align_loops = x86_alignments[c->x86_processor].align_loop;
- align_jumps = x86_alignments[c->x86_processor].align_jump;
- }
-
- write_log("Max CPUID level=%d Processor is %s [%s]\n",
- c->cpuid_level, c->x86_vendor_id,
- x86_processor_string_table[c->x86_processor]);
-
- raw_flags_init();
-}
-
-static bool target_check_bsf(void)
-{
- bool mismatch = false;
- for (int g_ZF = 0; g_ZF <= 1; g_ZF++) {
- for (int g_CF = 0; g_CF <= 1; g_CF++) {
- for (int g_OF = 0; g_OF <= 1; g_OF++) {
- for (int g_SF = 0; g_SF <= 1; g_SF++) {
- for (int value = -1; value <= 1; value++) {
- unsigned long flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF;
- unsigned long tmp = value;
- __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0"
- : "+r" (flags), "+r" (tmp) : : "cc");
- int OF = (flags >> 11) & 1;
- int SF = (flags >> 7) & 1;
- int ZF = (flags >> 6) & 1;
- int CF = flags & 1;
- tmp = (value == 0);
- if (ZF != tmp || SF != g_SF || OF != g_OF || CF != g_CF)
- mismatch = true;
- }
- }}}}
- if (mismatch)
- write_log("Target CPU defines all flags on BSF instruction\n");
- return !mismatch;
-}
-
-
-/*************************************************************************
- * FPU stuff *
- *************************************************************************/
-
-
-static __inline__ void raw_fp_init(void)
-{
- int i;
-
- for (i=0;i<N_FREGS;i++)
- live.spos[i]=-2;
- live.tos=-1; /* Stack is empty */
-}
-
-static __inline__ void raw_fp_cleanup_drop(void)
-{
-#if 0
- /* using FINIT instead of popping all the entries.
- Seems to have side effects --- there is display corruption in
- Quake when this is used */
- if (live.tos>1) {
- emit_byte(0x9b);
- emit_byte(0xdb);
- emit_byte(0xe3);
- live.tos=-1;
- }
-#endif
- while (live.tos>=1) {
- emit_byte(0xde);
- emit_byte(0xd9);
- live.tos-=2;
- }
- while (live.tos>=0) {
- emit_byte(0xdd);
- emit_byte(0xd8);
- live.tos--;
- }
- raw_fp_init();
-}
-
-static __inline__ void make_tos(int r)
-{
- int p,q;
-
- if (live.spos[r]<0) { /* Register not yet on stack */
- emit_byte(0xd9);
- emit_byte(0xe8); /* Push '1' on the stack, just to grow it */
- live.tos++;
- live.spos[r]=live.tos;
- live.onstack[live.tos]=r;
- return;
- }
- /* Register is on stack */
- if (live.tos==live.spos[r])
- return;
- p=live.spos[r];
- q=live.onstack[live.tos];
-
- emit_byte(0xd9);
- emit_byte(0xc8+live.tos-live.spos[r]); /* exchange it with top of stack */
- live.onstack[live.tos]=r;
- live.spos[r]=live.tos;
- live.onstack[p]=q;
- live.spos[q]=p;
-}
-
-static __inline__ void make_tos2(int r, int r2)
-{
- int q;
-
- make_tos(r2); /* Put the reg that's supposed to end up in position2
- on top */
-
- if (live.spos[r]<0) { /* Register not yet on stack */
- make_tos(r); /* This will extend the stack */
- return;
- }
- /* Register is on stack */
- emit_byte(0xd9);
- emit_byte(0xc9); /* Move r2 into position 2 */
-
- q=live.onstack[live.tos-1];
- live.onstack[live.tos]=q;
- live.spos[q]=live.tos;
- live.onstack[live.tos-1]=r2;
- live.spos[r2]=live.tos-1;
-
- make_tos(r); /* And r into 1 */
-}
-
-static __inline__ int stackpos(int r)
-{
- if (live.spos[r]<0)
- abort();
- if (live.tos<live.spos[r]) {
- printf("Looking for spos for fnreg %d\n",r);
- abort();
- }
- return live.tos-live.spos[r];
-}
-
-static __inline__ void usereg(int r)
-{
- if (live.spos[r]<0)
- make_tos(r);
-}
-
-/* This is called with one FP value in a reg *above* tos, which it will
- pop off the stack if necessary */
-static __inline__ void tos_make(int r)
-{
- if (live.spos[r]<0) {
- live.tos++;
- live.spos[r]=live.tos;
- live.onstack[live.tos]=r;
- return;
- }
- emit_byte(0xdd);
- emit_byte(0xd8+(live.tos+1)-live.spos[r]); /* store top of stack in reg,
- and pop it*/
-}
-
-/* FP helper functions */
-#if USE_NEW_RTASM
-#define DEFINE_OP(NAME, GEN) \
-static inline void raw_##NAME(uint32 m) \
-{ \
- GEN(m, X86_NOREG, X86_NOREG, 1); \
-}
-DEFINE_OP(fstl, FSTDm);
-DEFINE_OP(fstpl, FSTPDm);
-DEFINE_OP(fldl, FLDDm);
-DEFINE_OP(fildl, FILDLm);
-DEFINE_OP(fistl, FISTLm);
-DEFINE_OP(flds, FLDSm);
-DEFINE_OP(fsts, FSTSm);
-DEFINE_OP(fstpt, FSTPTm);
-DEFINE_OP(fldt, FLDTm);
-#else
-#define DEFINE_OP(NAME, OP1, OP2) \
-static inline void raw_##NAME(uint32 m) \
-{ \
- emit_byte(OP1); \
- emit_byte(OP2); \
- emit_long(m); \
-}
-DEFINE_OP(fstl, 0xdd, 0x15);
-DEFINE_OP(fstpl, 0xdd, 0x1d);
-DEFINE_OP(fldl, 0xdd, 0x05);
-DEFINE_OP(fildl, 0xdb, 0x05);
-DEFINE_OP(fistl, 0xdb, 0x15);
-DEFINE_OP(flds, 0xd9, 0x05);
-DEFINE_OP(fsts, 0xd9, 0x15);
-DEFINE_OP(fstpt, 0xdb, 0x3d);
-DEFINE_OP(fldt, 0xdb, 0x2d);
-#endif
-#undef DEFINE_OP
-
-LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
-{
- make_tos(r);
- raw_fstl(m);
-}
-LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
-
-LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
-{
- make_tos(r);
- raw_fstpl(m);
- live.onstack[live.tos]=-1;
- live.tos--;
- live.spos[r]=-2;
-}
-LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
-
-LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
-{
- raw_fldl(m);
- tos_make(r);
-}
-LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
-
-LOWFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
-{
- raw_fildl(m);
- tos_make(r);
-}
-LENDFUNC(NONE,READ,2,raw_fmovi_rm,(FW r, MEMR m))
-
-LOWFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
-{
- make_tos(r);
- raw_fistl(m);
-}
-LENDFUNC(NONE,WRITE,2,raw_fmovi_mr,(MEMW m, FR r))
-
-LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
-{
- raw_flds(m);
- tos_make(r);
-}
-LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
-
-LOWFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
-{
- make_tos(r);
- raw_fsts(m);
-}
-LENDFUNC(NONE,WRITE,2,raw_fmovs_mr,(MEMW m, FR r))
-
-LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
-{
- int rs;
-
- /* Stupid x87 can't write a long double to mem without popping the
- stack! */
- usereg(r);
- rs=stackpos(r);
- emit_byte(0xd9); /* Get a copy to the top of stack */
- emit_byte(0xc0+rs);
-
- raw_fstpt(m); /* store and pop it */
-}
-LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
-
-LOWFUNC(NONE,WRITE,2,raw_fmov_ext_mr_drop,(MEMW m, FR r))
-{
- int rs;
-
- make_tos(r);
- raw_fstpt(m); /* store and pop it */
- live.onstack[live.tos]=-1;
- live.tos--;
- live.spos[r]=-2;
-}
-LENDFUNC(NONE,WRITE,2,raw_fmov_ext_mr,(MEMW m, FR r))
-
-LOWFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
-{
- raw_fldt(m);
- tos_make(r);
-}
-LENDFUNC(NONE,READ,2,raw_fmov_ext_rm,(FW r, MEMR m))
-
-LOWFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
-{
- emit_byte(0xd9);
- emit_byte(0xeb);
- tos_make(r);
-}
-LENDFUNC(NONE,NONE,1,raw_fmov_pi,(FW r))
-
-LOWFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
-{
- emit_byte(0xd9);
- emit_byte(0xec);
- tos_make(r);
-}
-LENDFUNC(NONE,NONE,1,raw_fmov_log10_2,(FW r))
-
-LOWFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
-{
- emit_byte(0xd9);
- emit_byte(0xea);
- tos_make(r);
-}
-LENDFUNC(NONE,NONE,1,raw_fmov_log2_e,(FW r))
-
-LOWFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
-{
- emit_byte(0xd9);
- emit_byte(0xed);
- tos_make(r);
-}
-LENDFUNC(NONE,NONE,1,raw_fmov_loge_2,(FW r))
-
-LOWFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
-{
- emit_byte(0xd9);
- emit_byte(0xe8);
- tos_make(r);
-}
-LENDFUNC(NONE,NONE,1,raw_fmov_1,(FW r))
-
-LOWFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
-{
- emit_byte(0xd9);
- emit_byte(0xee);
- tos_make(r);
-}
-LENDFUNC(NONE,NONE,1,raw_fmov_0,(FW r))
-
-LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
-{
- int ds;
-
- usereg(s);
- ds=stackpos(s);
- if (ds==0 && live.spos[d]>=0) {
- /* source is on top of stack, and we already have the dest */
- int dd=stackpos(d);
- emit_byte(0xdd);
- emit_byte(0xd0+dd);
- }
- else {
- emit_byte(0xd9);
- emit_byte(0xc0+ds); /* duplicate source on tos */
- tos_make(d); /* store to destination, pop if necessary */
- }
-}
-LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
-
-LOWFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
-{
- emit_byte(0xd9);
- emit_byte(0xa8+index);
- emit_long(base);
-}
-LENDFUNC(NONE,READ,4,raw_fldcw_m_indexed,(R4 index, IMM base))
-
-
-LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
-{
- int ds;
-
- if (d!=s) {
- usereg(s);
- ds=stackpos(s);
- emit_byte(0xd9);
- emit_byte(0xc0+ds); /* duplicate source */
- emit_byte(0xd9);
- emit_byte(0xfa); /* take square root */
- tos_make(d); /* store to destination */
- }
- else {
- make_tos(d);
- emit_byte(0xd9);
- emit_byte(0xfa); /* take square root */
- }
-}
-LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
-
-LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
-{
- int ds;
-
- if (d!=s) {
- usereg(s);
- ds=stackpos(s);
- emit_byte(0xd9);
- emit_byte(0xc0+ds); /* duplicate source */
- emit_byte(0xd9);
- emit_byte(0xe1); /* take fabs */
- tos_make(d); /* store to destination */
- }
- else {
- make_tos(d);
- emit_byte(0xd9);
- emit_byte(0xe1); /* take fabs */
- }
-}
-LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
-
-LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
-{
- int ds;
-
- if (d!=s) {
- usereg(s);
- ds=stackpos(s);
- emit_byte(0xd9);
- emit_byte(0xc0+ds); /* duplicate source */
- emit_byte(0xd9);
- emit_byte(0xfc); /* take frndint */
- tos_make(d); /* store to destination */
- }
- else {
- make_tos(d);
- emit_byte(0xd9);
- emit_byte(0xfc); /* take frndint */
- }
-}
-LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
-
-LOWFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
-{
- int ds;
-
- if (d!=s) {
- usereg(s);
- ds=stackpos(s);
- emit_byte(0xd9);
- emit_byte(0xc0+ds); /* duplicate source */
- emit_byte(0xd9);
- emit_byte(0xff); /* take cos */
- tos_make(d); /* store to destination */
- }
- else {
- make_tos(d);
- emit_byte(0xd9);
- emit_byte(0xff); /* take cos */
- }
-}
-LENDFUNC(NONE,NONE,2,raw_fcos_rr,(FW d, FR s))
-
-LOWFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
-{
- int ds;
-
- if (d!=s) {
- usereg(s);
- ds=stackpos(s);
- emit_byte(0xd9);
- emit_byte(0xc0+ds); /* duplicate source */
- emit_byte(0xd9);
- emit_byte(0xfe); /* take sin */
- tos_make(d); /* store to destination */
- }
- else {
- make_tos(d);
- emit_byte(0xd9);
- emit_byte(0xfe); /* take sin */
- }
-}
-LENDFUNC(NONE,NONE,2,raw_fsin_rr,(FW d, FR s))
-
-static const double one=1;
-LOWFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
-{
- int ds;
-
- usereg(s);
- ds=stackpos(s);
- emit_byte(0xd9);
- emit_byte(0xc0+ds); /* duplicate source */
-
- emit_byte(0xd9);
- emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
- emit_byte(0xd9);
- emit_byte(0xfc); /* rndint */
- emit_byte(0xd9);
- emit_byte(0xc9); /* swap top two elements */
- emit_byte(0xd8);
- emit_byte(0xe1); /* subtract rounded from original */
- emit_byte(0xd9);
- emit_byte(0xf0); /* f2xm1 */
- x86_fadd_m((uintptr)&one); /* Add '1' without using extra stack space */
- emit_byte(0xd9);
- emit_byte(0xfd); /* and scale it */
- emit_byte(0xdd);
- emit_byte(0xd9); /* take he rounded value off */
- tos_make(d); /* store to destination */
-}
-LENDFUNC(NONE,NONE,2,raw_ftwotox_rr,(FW d, FR s))
-
-LOWFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
-{
- int ds;
-
- usereg(s);
- ds=stackpos(s);
- emit_byte(0xd9);
- emit_byte(0xc0+ds); /* duplicate source */
- emit_byte(0xd9);
- emit_byte(0xea); /* fldl2e */
- emit_byte(0xde);
- emit_byte(0xc9); /* fmulp --- multiply source by log2(e) */
-
- emit_byte(0xd9);
- emit_byte(0xc0); /* duplicate top of stack. Now up to 8 high */
- emit_byte(0xd9);
- emit_byte(0xfc); /* rndint */
- emit_byte(0xd9);
- emit_byte(0xc9); /* swap top two elements */
- emit_byte(0xd8);
- emit_byte(0xe1); /* subtract rounded from original */
- emit_byte(0xd9);
- emit_byte(0xf0); /* f2xm1 */
- x86_fadd_m((uintptr)&one); /* Add '1' without using extra stack space */
- emit_byte(0xd9);
- emit_byte(0xfd); /* and scale it */
- emit_byte(0xdd);
- emit_byte(0xd9); /* take he rounded value off */
- tos_make(d); /* store to destination */
-}
-LENDFUNC(NONE,NONE,2,raw_fetox_rr,(FW d, FR s))
-
-LOWFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
-{
- int ds;
-
- usereg(s);
- ds=stackpos(s);
- emit_byte(0xd9);
- emit_byte(0xc0+ds); /* duplicate source */
- emit_byte(0xd9);
- emit_byte(0xe8); /* push '1' */
- emit_byte(0xd9);
- emit_byte(0xc9); /* swap top two */
- emit_byte(0xd9);
- emit_byte(0xf1); /* take 1*log2(x) */
- tos_make(d); /* store to destination */
-}
-LENDFUNC(NONE,NONE,2,raw_flog2_rr,(FW d, FR s))
-
-
-LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
-{
- int ds;
-
- if (d!=s) {
- usereg(s);
- ds=stackpos(s);
- emit_byte(0xd9);
- emit_byte(0xc0+ds); /* duplicate source */
- emit_byte(0xd9);
- emit_byte(0xe0); /* take fchs */
- tos_make(d); /* store to destination */
- }
- else {
- make_tos(d);
- emit_byte(0xd9);
- emit_byte(0xe0); /* take fchs */
- }
-}
-LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
-
-LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
-{
- int ds;
-
- usereg(s);
- usereg(d);
-
- if (live.spos[s]==live.tos) {
- /* Source is on top of stack */
- ds=stackpos(d);
- emit_byte(0xdc);
- emit_byte(0xc0+ds); /* add source to dest*/
- }
- else {
- make_tos(d);
- ds=stackpos(s);
-
- emit_byte(0xd8);
- emit_byte(0xc0+ds); /* add source to dest*/
- }
-}
-LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
-
-LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
-{
- int ds;
-
- usereg(s);
- usereg(d);
-
- if (live.spos[s]==live.tos) {
- /* Source is on top of stack */
- ds=stackpos(d);
- emit_byte(0xdc);
- emit_byte(0xe8+ds); /* sub source from dest*/
- }
- else {
- make_tos(d);
- ds=stackpos(s);
-
- emit_byte(0xd8);
- emit_byte(0xe0+ds); /* sub src from dest */
- }
-}
-LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
-
-LOWFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
-{
- int ds;
-
- usereg(s);
- usereg(d);
-
- make_tos(d);
- ds=stackpos(s);
-
- emit_byte(0xdd);
- emit_byte(0xe0+ds); /* cmp dest with source*/
-}
-LENDFUNC(NONE,NONE,2,raw_fcmp_rr,(FR d, FR s))
-
-LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
-{
- int ds;
-
- usereg(s);
- usereg(d);
-
- if (live.spos[s]==live.tos) {
- /* Source is on top of stack */
- ds=stackpos(d);
- emit_byte(0xdc);
- emit_byte(0xc8+ds); /* mul dest by source*/
- }
- else {
- make_tos(d);
- ds=stackpos(s);
-
- emit_byte(0xd8);
- emit_byte(0xc8+ds); /* mul dest by source*/
- }
-}
-LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
-
-LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
-{
- int ds;
-
- usereg(s);
- usereg(d);
-
- if (live.spos[s]==live.tos) {
- /* Source is on top of stack */
- ds=stackpos(d);
- emit_byte(0xdc);
- emit_byte(0xf8+ds); /* div dest by source */
- }
- else {
- make_tos(d);
- ds=stackpos(s);
-
- emit_byte(0xd8);
- emit_byte(0xf0+ds); /* div dest by source*/
- }
-}
-LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
-
-LOWFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
-{
- int ds;
-
- usereg(s);
- usereg(d);
-
- make_tos2(d,s);
- ds=stackpos(s);
-
- if (ds!=1) {
- printf("Failed horribly in raw_frem_rr! ds is %d\n",ds);
- abort();
- }
- emit_byte(0xd9);
- emit_byte(0xf8); /* take rem from dest by source */
-}
-LENDFUNC(NONE,NONE,2,raw_frem_rr,(FRW d, FR s))
-
-LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
-{
- int ds;
-
- usereg(s);
- usereg(d);
-
- make_tos2(d,s);
- ds=stackpos(s);
-
- if (ds!=1) {
- printf("Failed horribly in raw_frem1_rr! ds is %d\n",ds);
- abort();
- }
- emit_byte(0xd9);
- emit_byte(0xf5); /* take rem1 from dest by source */
-}
-LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
-
-
-LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
-{
- make_tos(r);
- emit_byte(0xd9); /* ftst */
- emit_byte(0xe4);
-}
-LENDFUNC(NONE,NONE,1,raw_ftst_r,(FR r))
-
-/* %eax register is clobbered if target processor doesn't support fucomi */
-#define FFLAG_NREG_CLOBBER_CONDITION !have_cmov
-#define FFLAG_NREG EAX_INDEX
-
-static __inline__ void raw_fflags_into_flags(int r)
-{
- int p;
-
- usereg(r);
- p=stackpos(r);
-
- emit_byte(0xd9);
- emit_byte(0xee); /* Push 0 */
- emit_byte(0xd9);
- emit_byte(0xc9+p); /* swap top two around */
- if (have_cmov) {
- // gb-- fucomi is for P6 cores only, not K6-2 then...
- emit_byte(0xdb);
- emit_byte(0xe9+p); /* fucomi them */
- }
- else {
- emit_byte(0xdd);
- emit_byte(0xe1+p); /* fucom them */
- emit_byte(0x9b);
- emit_byte(0xdf);
- emit_byte(0xe0); /* fstsw ax */
- raw_sahf(0); /* sahf */
- }
- emit_byte(0xdd);
- emit_byte(0xd9+p); /* store value back, and get rid of 0 */
-}
+++ /dev/null
-/******************** -*- mode: C; tab-width: 8 -*- ********************
- *
- * Run-time assembler for IA-32 and AMD64
- *
- ***********************************************************************/
-
-
-/***********************************************************************
- *
- * This file is derived from CCG.
- *
- * Copyright 1999, 2000, 2001, 2002, 2003 Ian Piumarta
- *
- * Adaptations and enhancements for AMD64 support, Copyright 2003-2008
- * Gwenole Beauchesne
- *
- * Basilisk II (C) 1997-2008 Christian Bauer
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- ***********************************************************************/
-
-#ifndef X86_RTASM_H
-#define X86_RTASM_H
-
-/* NOTES
- *
- * o Best viewed on a 1024x768 screen with fixed-6x10 font ;-)
- *
- * TODO
- *
- * o Fix FIXMEs
- * o SSE instructions
- * o Optimize for cases where register numbers are not integral constants
- */
-
-/* --- Configuration ------------------------------------------------------- */
-
-/* Define to settle a "flat" register set, i.e. different regno for
- each size variant. */
-#ifndef X86_FLAT_REGISTERS
-#define X86_FLAT_REGISTERS 1
-#endif
-
-/* Define to generate x86-64 code. */
-#ifndef X86_TARGET_64BIT
-#define X86_TARGET_64BIT 0
-#endif
-
-/* Define to optimize ALU instructions. */
-#ifndef X86_OPTIMIZE_ALU
-#define X86_OPTIMIZE_ALU 1
-#endif
-
-/* Define to optimize rotate/shift instructions. */
-#ifndef X86_OPTIMIZE_ROTSHI
-#define X86_OPTIMIZE_ROTSHI 1
-#endif
-
-/* Define to optimize absolute addresses for RIP relative addressing. */
-#ifndef X86_RIP_RELATIVE_ADDR
-#define X86_RIP_RELATIVE_ADDR 1
-#endif
-
-
-/* --- Macros -------------------------------------------------------------- */
-
-/* Functions used to emit code.
- *
- * x86_emit_byte(B)
- * x86_emit_word(W)
- * x86_emit_long(L)
- */
-
-/* Get pointer to current code
- *
- * x86_get_target()
- */
-
-/* Abort assembler, fatal failure.
- *
- * x86_emit_failure(MSG)
- */
-
-#define x86_emit_failure0(MSG) (x86_emit_failure(MSG),0)
-
-
-/* --- Register set -------------------------------------------------------- */
-
-enum {
- X86_RIP = -2,
-#if X86_FLAT_REGISTERS
- X86_NOREG = 0,
- X86_Reg8L_Base = 0x10,
- X86_Reg8H_Base = 0x20,
- X86_Reg16_Base = 0x30,
- X86_Reg32_Base = 0x40,
- X86_Reg64_Base = 0x50,
- X86_RegMMX_Base = 0x60,
- X86_RegXMM_Base = 0x70,
- X86_RegFPU_Base = 0x80
-#else
- X86_NOREG = -1,
- X86_Reg8L_Base = 0,
- X86_Reg8H_Base = 16,
- X86_Reg16_Base = 0,
- X86_Reg32_Base = 0,
- X86_Reg64_Base = 0,
- X86_RegMMX_Base = 0,
- X86_RegXMM_Base = 0,
- X86_RegFPU_Base = 0
-#endif
-};
-
-enum {
- X86_AL = X86_Reg8L_Base,
- X86_CL, X86_DL, X86_BL,
- X86_SPL, X86_BPL, X86_SIL, X86_DIL,
- X86_R8B, X86_R9B, X86_R10B, X86_R11B,
- X86_R12B, X86_R13B, X86_R14B, X86_R15B,
- X86_AH = X86_Reg8H_Base + 4,
- X86_CH, X86_DH, X86_BH
-};
-
-enum {
- X86_AX = X86_Reg16_Base,
- X86_CX, X86_DX, X86_BX,
- X86_SP, X86_BP, X86_SI, X86_DI,
- X86_R8W, X86_R9W, X86_R10W, X86_R11W,
- X86_R12W, X86_R13W, X86_R14W, X86_R15W
-};
-
-enum {
- X86_EAX = X86_Reg32_Base,
- X86_ECX, X86_EDX, X86_EBX,
- X86_ESP, X86_EBP, X86_ESI, X86_EDI,
- X86_R8D, X86_R9D, X86_R10D, X86_R11D,
- X86_R12D, X86_R13D, X86_R14D, X86_R15D
-};
-
-enum {
- X86_RAX = X86_Reg64_Base,
- X86_RCX, X86_RDX, X86_RBX,
- X86_RSP, X86_RBP, X86_RSI, X86_RDI,
- X86_R8, X86_R9, X86_R10, X86_R11,
- X86_R12, X86_R13, X86_R14, X86_R15
-};
-
-enum {
- X86_MM0 = X86_RegMMX_Base,
- X86_MM1, X86_MM2, X86_MM3,
- X86_MM4, X86_MM5, X86_MM6, X86_MM7,
-};
-
-enum {
- X86_XMM0 = X86_RegXMM_Base,
- X86_XMM1, X86_XMM2, X86_XMM3,
- X86_XMM4, X86_XMM5, X86_XMM6, X86_XMM7,
- X86_XMM8, X86_XMM9, X86_XMM10, X86_XMM11,
- X86_XMM12, X86_XMM13, X86_XMM14, X86_XMM15
-};
-
-enum {
- X86_ST0 = X86_RegFPU_Base,
- X86_ST1, X86_ST2, X86_ST3,
- X86_ST4, X86_ST5, X86_ST6, X86_ST7
-};
-
-/* Register control and access
- *
- * _r0P(R) Null register?
- * _rIP(R) RIP register?
- * _rXP(R) Extended register?
- *
- * _rC(R) Class of register (only valid if X86_FLAT_REGISTERS)
- * _rR(R) Full register number
- * _rN(R) Short register number for encoding
- *
- * _r1(R) 8-bit register ID
- * _r2(R) 16-bit register ID
- * _r4(R) 32-bit register ID
- * _r8(R) 64-bit register ID
- * _rM(R) MMX register ID
- * _rX(R) XMM register ID
- * _rF(R) FPU register ID
- * _rA(R) Address register ID used for EA calculation
- */
-
-#define _rST0P(R) ((int)(R) == (int)X86_ST0)
-#define _r0P(R) ((int)(R) == (int)X86_NOREG)
-#define _rIP(R) (X86_TARGET_64BIT ? ((int)(R) == (int)X86_RIP) : 0)
-
-#if X86_FLAT_REGISTERS
-#define _rC(R) ((R) & 0xf0)
-#define _rR(R) ((R) & 0x0f)
-#define _rN(R) ((R) & 0x07)
-#define _rXP(R) ((R) > 0 && _rR(R) > 7)
-#else
-#define _rN(R) ((R) & 0x07)
-#define _rR(R) (int(R))
-#define _rXP(R) (_rR(R) > 7 && _rR(R) < 16)
-#endif
-
-#if !defined(_ASM_SAFETY) || ! X86_FLAT_REGISTERS
-#define _r1(R) _rN(R)
-#define _r2(R) _rN(R)
-#define _r4(R) _rN(R)
-#define _r8(R) _rN(R)
-#define _rA(R) _rN(R)
-#define _rM(R) _rN(R)
-#define _rX(R) _rN(R)
-#define _rF(R) _rN(R)
-#else
-#define _r1(R) ( ((_rC(R) & (X86_Reg8L_Base | X86_Reg8H_Base)) != 0) ? _rN(R) : x86_emit_failure0( "8-bit register required"))
-#define _r2(R) ( (_rC(R) == X86_Reg16_Base) ? _rN(R) : x86_emit_failure0("16-bit register required"))
-#define _r4(R) ( (_rC(R) == X86_Reg32_Base) ? _rN(R) : x86_emit_failure0("32-bit register required"))
-#define _r8(R) ( (_rC(R) == X86_Reg64_Base) ? _rN(R) : x86_emit_failure0("64-bit register required"))
-#define _rA(R) ( X86_TARGET_64BIT ? \
- ( (_rC(R) == X86_Reg64_Base) ? _rN(R) : x86_emit_failure0("not a valid 64-bit base/index expression")) : \
- ( (_rC(R) == X86_Reg32_Base) ? _rN(R) : x86_emit_failure0("not a valid 32-bit base/index expression")) )
-#define _rM(R) ( (_rC(R) == X86_RegMMX_Base) ? _rN(R) : x86_emit_failure0("MMX register required"))
-#define _rX(R) ( (_rC(R) == X86_RegXMM_Base) ? _rN(R) : x86_emit_failure0("SSE register required"))
-#define _rF(R) ( (_rC(R) == X86_RegFPU_Base) ? _rN(R) : x86_emit_failure0("FPU register required"))
-#endif
-
-#define _rSP() (X86_TARGET_64BIT ? (int)X86_RSP : (int)X86_ESP)
-#define _r1e8lP(R) (int(R) >= X86_SPL && int(R) <= X86_DIL)
-#define _rbpP(R) (_rR(R) == _rR(X86_RBP))
-#define _rspP(R) (_rR(R) == _rR(X86_RSP))
-#define _rbp13P(R) (_rN(R) == _rN(X86_RBP))
-#define _rsp12P(R) (_rN(R) == _rN(X86_RSP))
-
-
-/* ========================================================================= */
-/* --- UTILITY ------------------------------------------------------------- */
-/* ========================================================================= */
-
-typedef signed char _sc;
-typedef unsigned char _uc;
-typedef signed short _ss;
-typedef unsigned short _us;
-typedef signed int _sl;
-typedef unsigned int _ul;
-
-#define _UC(X) ((_uc )(unsigned long)(X))
-#define _US(X) ((_us )(unsigned long)(X))
-#define _SL(X) ((_sl )(unsigned long)(X))
-#define _UL(X) ((_ul )(unsigned long)(X))
-
-#define _PUC(X) ((_uc *)(X))
-#define _PUS(X) ((_us *)(X))
-#define _PSL(X) ((_sl *)(X))
-#define _PUL(X) ((_ul *)(X))
-
-#define _B(B) x86_emit_byte((B))
-#define _W(W) x86_emit_word((W))
-#define _L(L) x86_emit_long((L))
-#define _Q(Q) x86_emit_quad((Q))
-
-#define _MASK(N) ((unsigned)((1<<(N)))-1)
-#define _siP(N,I) (!((((unsigned)(I))^(((unsigned)(I))<<1))&~_MASK(N)))
-#define _uiP(N,I) (!(((unsigned)(I))&~_MASK(N)))
-#define _suiP(N,I) (_siP(N,I) | _uiP(N,I))
-
-#ifndef _ASM_SAFETY
-#define _ck_s(W,I) (_UL(I) & _MASK(W))
-#define _ck_u(W,I) (_UL(I) & _MASK(W))
-#define _ck_su(W,I) (_UL(I) & _MASK(W))
-#define _ck_d(W,I) (_UL(I) & _MASK(W))
-#else
-#define _ck_s(W,I) (_siP(W,I) ? (_UL(I) & _MASK(W)) : x86_emit_failure0( "signed integer `"#I"' too large for "#W"-bit field"))
-#define _ck_u(W,I) (_uiP(W,I) ? (_UL(I) & _MASK(W)) : x86_emit_failure0("unsigned integer `"#I"' too large for "#W"-bit field"))
-#define _ck_su(W,I) (_suiP(W,I) ? (_UL(I) & _MASK(W)) : x86_emit_failure0( "integer `"#I"' too large for "#W"-bit field"))
-#define _ck_d(W,I) (_siP(W,I) ? (_UL(I) & _MASK(W)) : x86_emit_failure0( "displacement `"#I"' too large for "#W"-bit field"))
-#endif
-
-#define _s0P(I) ((I)==0)
-#define _s8P(I) _siP(8,I)
-#define _s16P(I) _siP(16,I)
-#define _u8P(I) _uiP(8,I)
-#define _u16P(I) _uiP(16,I)
-
-#define _su8(I) _ck_su(8,I)
-#define _su16(I) _ck_su(16,I)
-
-#define _s1(I) _ck_s( 1,I)
-#define _s2(I) _ck_s( 2,I)
-#define _s3(I) _ck_s( 3,I)
-#define _s4(I) _ck_s( 4,I)
-#define _s5(I) _ck_s( 5,I)
-#define _s6(I) _ck_s( 6,I)
-#define _s7(I) _ck_s( 7,I)
-#define _s8(I) _ck_s( 8,I)
-#define _s9(I) _ck_s( 9,I)
-#define _s10(I) _ck_s(10,I)
-#define _s11(I) _ck_s(11,I)
-#define _s12(I) _ck_s(12,I)
-#define _s13(I) _ck_s(13,I)
-#define _s14(I) _ck_s(14,I)
-#define _s15(I) _ck_s(15,I)
-#define _s16(I) _ck_s(16,I)
-#define _s17(I) _ck_s(17,I)
-#define _s18(I) _ck_s(18,I)
-#define _s19(I) _ck_s(19,I)
-#define _s20(I) _ck_s(20,I)
-#define _s21(I) _ck_s(21,I)
-#define _s22(I) _ck_s(22,I)
-#define _s23(I) _ck_s(23,I)
-#define _s24(I) _ck_s(24,I)
-#define _s25(I) _ck_s(25,I)
-#define _s26(I) _ck_s(26,I)
-#define _s27(I) _ck_s(27,I)
-#define _s28(I) _ck_s(28,I)
-#define _s29(I) _ck_s(29,I)
-#define _s30(I) _ck_s(30,I)
-#define _s31(I) _ck_s(31,I)
-#define _u1(I) _ck_u( 1,I)
-#define _u2(I) _ck_u( 2,I)
-#define _u3(I) _ck_u( 3,I)
-#define _u4(I) _ck_u( 4,I)
-#define _u5(I) _ck_u( 5,I)
-#define _u6(I) _ck_u( 6,I)
-#define _u7(I) _ck_u( 7,I)
-#define _u8(I) _ck_u( 8,I)
-#define _u9(I) _ck_u( 9,I)
-#define _u10(I) _ck_u(10,I)
-#define _u11(I) _ck_u(11,I)
-#define _u12(I) _ck_u(12,I)
-#define _u13(I) _ck_u(13,I)
-#define _u14(I) _ck_u(14,I)
-#define _u15(I) _ck_u(15,I)
-#define _u16(I) _ck_u(16,I)
-#define _u17(I) _ck_u(17,I)
-#define _u18(I) _ck_u(18,I)
-#define _u19(I) _ck_u(19,I)
-#define _u20(I) _ck_u(20,I)
-#define _u21(I) _ck_u(21,I)
-#define _u22(I) _ck_u(22,I)
-#define _u23(I) _ck_u(23,I)
-#define _u24(I) _ck_u(24,I)
-#define _u25(I) _ck_u(25,I)
-#define _u26(I) _ck_u(26,I)
-#define _u27(I) _ck_u(27,I)
-#define _u28(I) _ck_u(28,I)
-#define _u29(I) _ck_u(29,I)
-#define _u30(I) _ck_u(30,I)
-#define _u31(I) _ck_u(31,I)
-
-/* ========================================================================= */
-/* --- ASSEMBLER ----------------------------------------------------------- */
-/* ========================================================================= */
-
-#define _b00 0
-#define _b01 1
-#define _b10 2
-#define _b11 3
-
-#define _b000 0
-#define _b001 1
-#define _b010 2
-#define _b011 3
-#define _b100 4
-#define _b101 5
-#define _b110 6
-#define _b111 7
-
-#define _OFF4(D) (_UL(D) - _UL(x86_get_target()))
-#define _CKD8(D) _ck_d(8, ((_uc) _OFF4(D)) )
-
-#define _D8(D) (_B(0), ((*(_PUC(x86_get_target())-1))= _CKD8(D)))
-#define _D32(D) (_L(0), ((*(_PUL(x86_get_target())-1))= _OFF4(D)))
-
-#ifndef _ASM_SAFETY
-# define _M(M) (M)
-# define _r(R) (R)
-# define _m(M) (M)
-# define _s(S) (S)
-# define _i(I) (I)
-# define _b(B) (B)
-#else
-# define _M(M) (((M)>3) ? x86_emit_failure0("internal error: mod = " #M) : (M))
-# define _r(R) (((R)>7) ? x86_emit_failure0("internal error: reg = " #R) : (R))
-# define _m(M) (((M)>7) ? x86_emit_failure0("internal error: r/m = " #M) : (M))
-# define _s(S) (((S)>3) ? x86_emit_failure0("internal error: memory scale = " #S) : (S))
-# define _i(I) (((I)>7) ? x86_emit_failure0("internal error: memory index = " #I) : (I))
-# define _b(B) (((B)>7) ? x86_emit_failure0("internal error: memory base = " #B) : (B))
-#endif
-
-#define _Mrm(Md,R,M) _B((_M(Md)<<6)|(_r(R)<<3)|_m(M))
-#define _SIB(Sc,I, B) _B((_s(Sc)<<6)|(_i(I)<<3)|_b(B))
-
-#define _SCL(S) ((((S)==1) ? _b00 : \
- (((S)==2) ? _b01 : \
- (((S)==4) ? _b10 : \
- (((S)==8) ? _b11 : x86_emit_failure0("illegal scale: " #S))))))
-
-
-/* --- Memory subformats - urgh! ------------------------------------------- */
-
-/* _r_D() is RIP addressing mode if X86_TARGET_64BIT, use _r_DSIB() instead */
-#define _r_D( R, D ) (_Mrm(_b00,_rN(R),_b101 ) ,_L((_sl)(D)))
-#define _r_DSIB(R, D ) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(1),_b100 ,_b101 ),_L((_sl)(D)))
-#define _r_0B( R, B ) (_Mrm(_b00,_rN(R),_rA(B)) )
-#define _r_0BIS(R, B,I,S) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_rA(B)) )
-#define _r_1B( R, D,B ) (_Mrm(_b01,_rN(R),_rA(B)) ,_B((_sc)(D)))
-#define _r_1BIS(R, D,B,I,S) (_Mrm(_b01,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_rA(B)),_B((_sc)(D)))
-#define _r_4B( R, D,B ) (_Mrm(_b10,_rN(R),_rA(B)) ,_L((_sl)(D)))
-#define _r_4IS( R, D,I,S) (_Mrm(_b00,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_b101 ),_L((_sl)(D)))
-#define _r_4BIS(R, D,B,I,S) (_Mrm(_b10,_rN(R),_b100 ),_SIB(_SCL(S),_rA(I),_rA(B)),_L((_sl)(D)))
-
-#define _r_DB( R, D,B ) ((_s0P(D) && (!_rbp13P(B)) ? _r_0B (R, B ) : (_s8P(D) ? _r_1B( R,D,B ) : _r_4B( R,D,B ))))
-#define _r_DBIS(R, D,B,I,S) ((_s0P(D) && (!_rbp13P(B)) ? _r_0BIS(R, B,I,S) : (_s8P(D) ? _r_1BIS(R,D,B,I,S) : _r_4BIS(R,D,B,I,S))))
-
-/* Use RIP-addressing in 64-bit mode, if possible */
-#define _x86_RIP_addressing_possible(D,O) (X86_RIP_RELATIVE_ADDR && \
- ((uintptr)x86_get_target() + 4 + (O) - (D) <= 0xffffffff))
-
-#define _r_X( R, D,B,I,S,O) (_r0P(I) ? (_r0P(B) ? (!X86_TARGET_64BIT ? _r_D(R,D) : \
- (_x86_RIP_addressing_possible(D, O) ? \
- _r_D(R, (D) - ((uintptr)x86_get_target() + 4 + (O))) : \
- _r_DSIB(R,D))) : \
- (_rIP(B) ? _r_D (R,D ) : \
- (_rsp12P(B) ? _r_DBIS(R,D,_rSP(),_rSP(),1) : \
- _r_DB (R,D, B )))) : \
- (_r0P(B) ? _r_4IS (R,D, I,S) : \
- (!_rspP(I) ? _r_DBIS(R,D, B, I,S) : \
- x86_emit_failure("illegal index register: %esp"))))
-
-
-/* --- Instruction formats ------------------------------------------------- */
-
-#define _m32only(X) (! X86_TARGET_64BIT ? X : x86_emit_failure("invalid instruction in 64-bit mode"))
-#define _m64only(X) ( X86_TARGET_64BIT ? X : x86_emit_failure("invalid instruction in 32-bit mode"))
-#define _m64(X) ( X86_TARGET_64BIT ? X : ((void)0) )
-
-/* _format Opcd ModR/M dN(rB,rI,Sc) imm... */
-
-#define _d16() ( _B(0x66 ) )
-#define _O( OP ) ( _B( OP ) )
-#define _Or( OP,R ) ( _B( (OP)|_r(R)) )
-#define _OO( OP ) ( _B((OP)>>8), _B(( (OP) )&0xff) )
-#define _OOr( OP,R ) ( _B((OP)>>8), _B(( (OP)|_r(R))&0xff) )
-#define _Os( OP,B ) ( _s8P(B) ? _B(((OP)|_b10)) : _B(OP) )
-#define _sW( W ) ( _s8P(W) ? _B(W):_W(W) )
-#define _sL( L ) ( _s8P(L) ? _B(L):_L(L) )
-#define _sWO( W ) ( _s8P(W) ? 1 : 2 )
-#define _sLO( L ) ( _s8P(L) ? 1 : 4 )
-#define _O_B( OP ,B ) ( _O ( OP ) ,_B(B) )
-#define _O_W( OP ,W ) ( _O ( OP ) ,_W(W) )
-#define _O_L( OP ,L ) ( _O ( OP ) ,_L(L) )
-#define _OO_L( OP ,L ) ( _OO ( OP ) ,_L(L) )
-#define _O_D8( OP ,D ) ( _O ( OP ) ,_D8(D) )
-#define _O_D32( OP ,D ) ( _O ( OP ) ,_D32(D) )
-#define _OO_D32( OP ,D ) ( _OO ( OP ) ,_D32(D) )
-#define _Os_sW( OP ,W ) ( _Os ( OP,W) ,_sW(W) )
-#define _Os_sL( OP ,L ) ( _Os ( OP,L) ,_sL(L) )
-#define _O_W_B( OP ,W,B) ( _O ( OP ) ,_W(W),_B(B))
-#define _Or_B( OP,R ,B ) ( _Or ( OP,R) ,_B(B) )
-#define _Or_W( OP,R ,W ) ( _Or ( OP,R) ,_W(W) )
-#define _Or_L( OP,R ,L ) ( _Or ( OP,R) ,_L(L) )
-#define _Or_Q( OP,R ,Q ) ( _Or ( OP,R) ,_Q(Q) )
-#define _O_Mrm( OP ,MO,R,M ) ( _O ( OP ),_Mrm(MO,R,M ) )
-#define _OO_Mrm( OP ,MO,R,M ) ( _OO ( OP ),_Mrm(MO,R,M ) )
-#define _O_Mrm_B( OP ,MO,R,M ,B ) ( _O ( OP ),_Mrm(MO,R,M ) ,_B(B) )
-#define _O_Mrm_W( OP ,MO,R,M ,W ) ( _O ( OP ),_Mrm(MO,R,M ) ,_W(W) )
-#define _O_Mrm_L( OP ,MO,R,M ,L ) ( _O ( OP ),_Mrm(MO,R,M ) ,_L(L) )
-#define _OO_Mrm_B( OP ,MO,R,M ,B ) ( _OO ( OP ),_Mrm(MO,R,M ) ,_B(B) )
-#define _Os_Mrm_sW(OP ,MO,R,M ,W ) ( _Os ( OP,W),_Mrm(MO,R,M ),_sW(W) )
-#define _Os_Mrm_sL(OP ,MO,R,M ,L ) ( _Os ( OP,L),_Mrm(MO,R,M ),_sL(L) )
-#define _O_r_X( OP ,R ,MD,MB,MI,MS ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS,0) )
-#define _OO_r_X( OP ,R ,MD,MB,MI,MS ) ( _OO ( OP ),_r_X( R ,MD,MB,MI,MS,0) )
-#define _O_r_X_B( OP ,R ,MD,MB,MI,MS,B ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS,1) ,_B(B) )
-#define _O_r_X_W( OP ,R ,MD,MB,MI,MS,W ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS,2) ,_W(W) )
-#define _O_r_X_L( OP ,R ,MD,MB,MI,MS,L ) ( _O ( OP ),_r_X( R ,MD,MB,MI,MS,4) ,_L(L) )
-#define _OO_r_X_B( OP ,R ,MD,MB,MI,MS,B ) ( _OO ( OP ),_r_X( R ,MD,MB,MI,MS,1) ,_B(B) )
-#define _Os_r_X_sW(OP ,R ,MD,MB,MI,MS,W ) ( _Os ( OP,W),_r_X( R ,MD,MB,MI,MS,_sWO(W)),_sW(W))
-#define _Os_r_X_sL(OP ,R ,MD,MB,MI,MS,L ) ( _Os ( OP,L),_r_X( R ,MD,MB,MI,MS,_sLO(L)),_sL(L))
-#define _O_X_B( OP ,MD,MB,MI,MS,B ) ( _O_r_X_B( OP ,0 ,MD,MB,MI,MS ,B) )
-#define _O_X_W( OP ,MD,MB,MI,MS,W ) ( _O_r_X_W( OP ,0 ,MD,MB,MI,MS ,W) )
-#define _O_X_L( OP ,MD,MB,MI,MS,L ) ( _O_r_X_L( OP ,0 ,MD,MB,MI,MS ,L) )
-
-
-/* --- REX prefixes -------------------------------------------------------- */
-
-#define _VOID() ((void)0)
-#define _BIT(X) (!!(X))
-#define _d64(W,R,X,B) (_B(0x40|(W)<<3|(R)<<2|(X)<<1|(B)))
-
-#define __REXwrxb(L,W,R,X,B) ((W|R|X|B) || (L) ? _d64(W,R,X,B) : _VOID())
-#define __REXwrx_(L,W,R,X,MR) (__REXwrxb(L,W,R,X,_BIT(_rIP(MR)?0:_rXP(MR))))
-#define __REXw_x_(L,W,R,X,MR) (__REXwrx_(L,W,_BIT(_rXP(R)),X,MR))
-#define __REX_reg(RR) (__REXwrxb(0,0,0,00,_BIT(_rXP(RR))))
-#define __REX_mem(MB,MI) (__REXwrxb(0,0,0,_BIT(_rXP(MI)),_BIT(_rXP(MB))))
-
-// FIXME: can't mix new (SPL,BPL,SIL,DIL) with (AH,BH,CH,DH)
-#define _REXBrr(RR,MR) _m64(__REXw_x_(_r1e8lP(RR)||_r1e8lP(MR),0,RR,0,MR))
-#define _REXBmr(MB,MI,RD) _m64(__REXw_x_(_r1e8lP(RD)||_r1e8lP(MB),0,RD,_BIT(_rXP(MI)),MB))
-#define _REXBrm(RS,MB,MI) _REXBmr(MB,MI,RS)
-
-#define _REXBLrr(RR,MR) _m64(__REXw_x_(_r1e8lP(MR),0,RR,0,MR))
-#define _REXLrr(RR,MR) _m64(__REXw_x_(0,0,RR,0,MR))
-#define _REXLmr(MB,MI,RD) _m64(__REXw_x_(0,0,RD,_BIT(_rXP(MI)),MB))
-#define _REXLrm(RS,MB,MI) _REXLmr(MB,MI,RS)
-#define _REXLr(RR) _m64(__REX_reg(RR))
-#define _REXLm(MB,MI) _m64(__REX_mem(MB,MI))
-
-#define _REXQrr(RR,MR) _m64only(__REXw_x_(0,1,RR,0,MR))
-#define _REXQmr(MB,MI,RD) _m64only(__REXw_x_(0,1,RD,_BIT(_rXP(MI)),MB))
-#define _REXQrm(RS,MB,MI) _REXQmr(MB,MI,RS)
-#define _REXQr(RR) _m64only(__REX_reg(RR))
-#define _REXQm(MB,MI) _m64only(__REX_mem(MB,MI))
-
-
-/* ========================================================================= */
-/* --- Fully-qualified intrinsic instructions ------------------------------ */
-/* ========================================================================= */
-
-/* OPCODE + i = immediate operand
- * + r = register operand
- * + m = memory operand (disp,base,index,scale)
- * + sr/sm = a star preceding a register or memory
- * + 0 = top of stack register (for FPU instructions)
- *
- * NOTE in x86-64 mode: a memory operand with only a valid
- * displacement value will lead to the expect absolute mode. If
- * RIP addressing is necessary, X86_RIP shall be used as the base
- * register argument.
- */
-
-/* --- ALU instructions ---------------------------------------------------- */
-
-enum {
- X86_ADD = 0,
- X86_OR = 1,
- X86_ADC = 2,
- X86_SBB = 3,
- X86_AND = 4,
- X86_SUB = 5,
- X86_XOR = 6,
- X86_CMP = 7,
-};
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-#define _ALUBrr(OP,RS, RD) (_REXBrr(RS, RD), _O_Mrm (((OP) << 3) ,_b11,_r1(RS),_r1(RD) ))
-#define _ALUBmr(OP, MD, MB, MI, MS, RD) (_REXBmr(MB, MI, RD), _O_r_X (((OP) << 3) + 2 ,_r1(RD) ,MD,MB,MI,MS ))
-#define _ALUBrm(OP, RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (((OP) << 3) ,_r1(RS) ,MD,MB,MI,MS ))
-#define _ALUBir(OP, IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_AL) ? \
- (_REXBrr(0, RD), _O_B (((OP) << 3) + 4 ,_su8(IM))) : \
- (_REXBrr(0, RD), _O_Mrm_B (0x80 ,_b11,OP ,_r1(RD) ,_su8(IM))) )
-#define _ALUBim(OP, IM, MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X_B (0x80 ,OP ,MD,MB,MI,MS ,_su8(IM)))
-
-#define _ALUWrr(OP, RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (((OP) << 3) + 1,_b11,_r2(RS),_r2(RD) ))
-#define _ALUWmr(OP, MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _O_r_X (((OP) << 3) + 3 ,_r2(RD) ,MD,MB,MI,MS ))
-#define _ALUWrm(OP, RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (((OP) << 3) + 1 ,_r2(RS) ,MD,MB,MI,MS ))
-#define _ALUWir(OP, IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_AX) ? \
- (_d16(), _REXLrr(0, RD), _O_W (((OP) << 3) + 5 ,_su16(IM))) : \
- (_d16(), _REXLrr(0, RD), _Os_Mrm_sW (0x81 ,_b11,OP ,_r2(RD) ,_su16(IM))) )
-#define _ALUWim(OP, IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _Os_r_X_sW (0x81 ,OP ,MD,MB,MI,MS ,_su16(IM)))
-
-#define _ALULrr(OP, RS, RD) (_REXLrr(RS, RD), _O_Mrm (((OP) << 3) + 1,_b11,_r4(RS),_r4(RD) ))
-#define _ALULmr(OP, MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _O_r_X (((OP) << 3) + 3 ,_r4(RD) ,MD,MB,MI,MS ))
-#define _ALULrm(OP, RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _O_r_X (((OP) << 3) + 1 ,_r4(RS) ,MD,MB,MI,MS ))
-#define _ALULir(OP, IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_EAX) ? \
- (_REXLrr(0, RD), _O_L (((OP) << 3) + 5 ,IM )) : \
- (_REXLrr(0, RD), _Os_Mrm_sL (0x81 ,_b11,OP ,_r4(RD) ,IM )) )
-#define _ALULim(OP, IM, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _Os_r_X_sL (0x81 ,OP ,MD,MB,MI,MS ,IM ))
-
-#define _ALUQrr(OP, RS, RD) (_REXQrr(RS, RD), _O_Mrm (((OP) << 3) + 1,_b11,_r8(RS),_r8(RD) ))
-#define _ALUQmr(OP, MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _O_r_X (((OP) << 3) + 3 ,_r8(RD) ,MD,MB,MI,MS ))
-#define _ALUQrm(OP, RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _O_r_X (((OP) << 3) + 1 ,_r8(RS) ,MD,MB,MI,MS ))
-#define _ALUQir(OP, IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_RAX) ? \
- (_REXQrr(0, RD), _O_L (((OP) << 3) + 5 ,IM )) : \
- (_REXQrr(0, RD), _Os_Mrm_sL (0x81 ,_b11,OP ,_r8(RD) ,IM )) )
-#define _ALUQim(OP, IM, MD, MB, MI, MS) (_REXQrm(0, MB, MI), _Os_r_X_sL (0x81 ,OP ,MD,MB,MI,MS ,IM ))
-
-#define ADCBrr(RS, RD) _ALUBrr(X86_ADC, RS, RD)
-#define ADCBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_ADC, MD, MB, MI, MS, RD)
-#define ADCBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_ADC, RS, MD, MB, MI, MS)
-#define ADCBir(IM, RD) _ALUBir(X86_ADC, IM, RD)
-#define ADCBim(IM, MD, MB, MI, MS) _ALUBim(X86_ADC, IM, MD, MB, MI, MS)
-
-#define ADCWrr(RS, RD) _ALUWrr(X86_ADC, RS, RD)
-#define ADCWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_ADC, MD, MB, MI, MS, RD)
-#define ADCWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_ADC, RS, MD, MB, MI, MS)
-#define ADCWir(IM, RD) _ALUWir(X86_ADC, IM, RD)
-#define ADCWim(IM, MD, MB, MI, MS) _ALUWim(X86_ADC, IM, MD, MB, MI, MS)
-
-#define ADCLrr(RS, RD) _ALULrr(X86_ADC, RS, RD)
-#define ADCLmr(MD, MB, MI, MS, RD) _ALULmr(X86_ADC, MD, MB, MI, MS, RD)
-#define ADCLrm(RS, MD, MB, MI, MS) _ALULrm(X86_ADC, RS, MD, MB, MI, MS)
-#define ADCLir(IM, RD) _ALULir(X86_ADC, IM, RD)
-#define ADCLim(IM, MD, MB, MI, MS) _ALULim(X86_ADC, IM, MD, MB, MI, MS)
-
-#define ADCQrr(RS, RD) _ALUQrr(X86_ADC, RS, RD)
-#define ADCQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_ADC, MD, MB, MI, MS, RD)
-#define ADCQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_ADC, RS, MD, MB, MI, MS)
-#define ADCQir(IM, RD) _ALUQir(X86_ADC, IM, RD)
-#define ADCQim(IM, MD, MB, MI, MS) _ALUQim(X86_ADC, IM, MD, MB, MI, MS)
-
-#define ADDBrr(RS, RD) _ALUBrr(X86_ADD, RS, RD)
-#define ADDBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_ADD, MD, MB, MI, MS, RD)
-#define ADDBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_ADD, RS, MD, MB, MI, MS)
-#define ADDBir(IM, RD) _ALUBir(X86_ADD, IM, RD)
-#define ADDBim(IM, MD, MB, MI, MS) _ALUBim(X86_ADD, IM, MD, MB, MI, MS)
-
-#define ADDWrr(RS, RD) _ALUWrr(X86_ADD, RS, RD)
-#define ADDWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_ADD, MD, MB, MI, MS, RD)
-#define ADDWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_ADD, RS, MD, MB, MI, MS)
-#define ADDWir(IM, RD) _ALUWir(X86_ADD, IM, RD)
-#define ADDWim(IM, MD, MB, MI, MS) _ALUWim(X86_ADD, IM, MD, MB, MI, MS)
-
-#define ADDLrr(RS, RD) _ALULrr(X86_ADD, RS, RD)
-#define ADDLmr(MD, MB, MI, MS, RD) _ALULmr(X86_ADD, MD, MB, MI, MS, RD)
-#define ADDLrm(RS, MD, MB, MI, MS) _ALULrm(X86_ADD, RS, MD, MB, MI, MS)
-#define ADDLir(IM, RD) _ALULir(X86_ADD, IM, RD)
-#define ADDLim(IM, MD, MB, MI, MS) _ALULim(X86_ADD, IM, MD, MB, MI, MS)
-
-#define ADDQrr(RS, RD) _ALUQrr(X86_ADD, RS, RD)
-#define ADDQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_ADD, MD, MB, MI, MS, RD)
-#define ADDQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_ADD, RS, MD, MB, MI, MS)
-#define ADDQir(IM, RD) _ALUQir(X86_ADD, IM, RD)
-#define ADDQim(IM, MD, MB, MI, MS) _ALUQim(X86_ADD, IM, MD, MB, MI, MS)
-
-#define ANDBrr(RS, RD) _ALUBrr(X86_AND, RS, RD)
-#define ANDBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_AND, MD, MB, MI, MS, RD)
-#define ANDBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_AND, RS, MD, MB, MI, MS)
-#define ANDBir(IM, RD) _ALUBir(X86_AND, IM, RD)
-#define ANDBim(IM, MD, MB, MI, MS) _ALUBim(X86_AND, IM, MD, MB, MI, MS)
-
-#define ANDWrr(RS, RD) _ALUWrr(X86_AND, RS, RD)
-#define ANDWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_AND, MD, MB, MI, MS, RD)
-#define ANDWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_AND, RS, MD, MB, MI, MS)
-#define ANDWir(IM, RD) _ALUWir(X86_AND, IM, RD)
-#define ANDWim(IM, MD, MB, MI, MS) _ALUWim(X86_AND, IM, MD, MB, MI, MS)
-
-#define ANDLrr(RS, RD) _ALULrr(X86_AND, RS, RD)
-#define ANDLmr(MD, MB, MI, MS, RD) _ALULmr(X86_AND, MD, MB, MI, MS, RD)
-#define ANDLrm(RS, MD, MB, MI, MS) _ALULrm(X86_AND, RS, MD, MB, MI, MS)
-#define ANDLir(IM, RD) _ALULir(X86_AND, IM, RD)
-#define ANDLim(IM, MD, MB, MI, MS) _ALULim(X86_AND, IM, MD, MB, MI, MS)
-
-#define ANDQrr(RS, RD) _ALUQrr(X86_AND, RS, RD)
-#define ANDQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_AND, MD, MB, MI, MS, RD)
-#define ANDQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_AND, RS, MD, MB, MI, MS)
-#define ANDQir(IM, RD) _ALUQir(X86_AND, IM, RD)
-#define ANDQim(IM, MD, MB, MI, MS) _ALUQim(X86_AND, IM, MD, MB, MI, MS)
-
-#define CMPBrr(RS, RD) _ALUBrr(X86_CMP, RS, RD)
-#define CMPBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_CMP, MD, MB, MI, MS, RD)
-#define CMPBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_CMP, RS, MD, MB, MI, MS)
-#define CMPBir(IM, RD) _ALUBir(X86_CMP, IM, RD)
-#define CMPBim(IM, MD, MB, MI, MS) _ALUBim(X86_CMP, IM, MD, MB, MI, MS)
-
-#define CMPWrr(RS, RD) _ALUWrr(X86_CMP, RS, RD)
-#define CMPWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_CMP, MD, MB, MI, MS, RD)
-#define CMPWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_CMP, RS, MD, MB, MI, MS)
-#define CMPWir(IM, RD) _ALUWir(X86_CMP, IM, RD)
-#define CMPWim(IM, MD, MB, MI, MS) _ALUWim(X86_CMP, IM, MD, MB, MI, MS)
-
-#define CMPLrr(RS, RD) _ALULrr(X86_CMP, RS, RD)
-#define CMPLmr(MD, MB, MI, MS, RD) _ALULmr(X86_CMP, MD, MB, MI, MS, RD)
-#define CMPLrm(RS, MD, MB, MI, MS) _ALULrm(X86_CMP, RS, MD, MB, MI, MS)
-#define CMPLir(IM, RD) _ALULir(X86_CMP, IM, RD)
-#define CMPLim(IM, MD, MB, MI, MS) _ALULim(X86_CMP, IM, MD, MB, MI, MS)
-
-#define CMPQrr(RS, RD) _ALUQrr(X86_CMP, RS, RD)
-#define CMPQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_CMP, MD, MB, MI, MS, RD)
-#define CMPQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_CMP, RS, MD, MB, MI, MS)
-#define CMPQir(IM, RD) _ALUQir(X86_CMP, IM, RD)
-#define CMPQim(IM, MD, MB, MI, MS) _ALUQim(X86_CMP, IM, MD, MB, MI, MS)
-
-#define ORBrr(RS, RD) _ALUBrr(X86_OR, RS, RD)
-#define ORBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_OR, MD, MB, MI, MS, RD)
-#define ORBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_OR, RS, MD, MB, MI, MS)
-#define ORBir(IM, RD) _ALUBir(X86_OR, IM, RD)
-#define ORBim(IM, MD, MB, MI, MS) _ALUBim(X86_OR, IM, MD, MB, MI, MS)
-
-#define ORWrr(RS, RD) _ALUWrr(X86_OR, RS, RD)
-#define ORWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_OR, MD, MB, MI, MS, RD)
-#define ORWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_OR, RS, MD, MB, MI, MS)
-#define ORWir(IM, RD) _ALUWir(X86_OR, IM, RD)
-#define ORWim(IM, MD, MB, MI, MS) _ALUWim(X86_OR, IM, MD, MB, MI, MS)
-
-#define ORLrr(RS, RD) _ALULrr(X86_OR, RS, RD)
-#define ORLmr(MD, MB, MI, MS, RD) _ALULmr(X86_OR, MD, MB, MI, MS, RD)
-#define ORLrm(RS, MD, MB, MI, MS) _ALULrm(X86_OR, RS, MD, MB, MI, MS)
-#define ORLir(IM, RD) _ALULir(X86_OR, IM, RD)
-#define ORLim(IM, MD, MB, MI, MS) _ALULim(X86_OR, IM, MD, MB, MI, MS)
-
-#define ORQrr(RS, RD) _ALUQrr(X86_OR, RS, RD)
-#define ORQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_OR, MD, MB, MI, MS, RD)
-#define ORQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_OR, RS, MD, MB, MI, MS)
-#define ORQir(IM, RD) _ALUQir(X86_OR, IM, RD)
-#define ORQim(IM, MD, MB, MI, MS) _ALUQim(X86_OR, IM, MD, MB, MI, MS)
-
-#define SBBBrr(RS, RD) _ALUBrr(X86_SBB, RS, RD)
-#define SBBBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_SBB, MD, MB, MI, MS, RD)
-#define SBBBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_SBB, RS, MD, MB, MI, MS)
-#define SBBBir(IM, RD) _ALUBir(X86_SBB, IM, RD)
-#define SBBBim(IM, MD, MB, MI, MS) _ALUBim(X86_SBB, IM, MD, MB, MI, MS)
-
-#define SBBWrr(RS, RD) _ALUWrr(X86_SBB, RS, RD)
-#define SBBWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_SBB, MD, MB, MI, MS, RD)
-#define SBBWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_SBB, RS, MD, MB, MI, MS)
-#define SBBWir(IM, RD) _ALUWir(X86_SBB, IM, RD)
-#define SBBWim(IM, MD, MB, MI, MS) _ALUWim(X86_SBB, IM, MD, MB, MI, MS)
-
-#define SBBLrr(RS, RD) _ALULrr(X86_SBB, RS, RD)
-#define SBBLmr(MD, MB, MI, MS, RD) _ALULmr(X86_SBB, MD, MB, MI, MS, RD)
-#define SBBLrm(RS, MD, MB, MI, MS) _ALULrm(X86_SBB, RS, MD, MB, MI, MS)
-#define SBBLir(IM, RD) _ALULir(X86_SBB, IM, RD)
-#define SBBLim(IM, MD, MB, MI, MS) _ALULim(X86_SBB, IM, MD, MB, MI, MS)
-
-#define SBBQrr(RS, RD) _ALUQrr(X86_SBB, RS, RD)
-#define SBBQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_SBB, MD, MB, MI, MS, RD)
-#define SBBQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_SBB, RS, MD, MB, MI, MS)
-#define SBBQir(IM, RD) _ALUQir(X86_SBB, IM, RD)
-#define SBBQim(IM, MD, MB, MI, MS) _ALUQim(X86_SBB, IM, MD, MB, MI, MS)
-
-#define SUBBrr(RS, RD) _ALUBrr(X86_SUB, RS, RD)
-#define SUBBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_SUB, MD, MB, MI, MS, RD)
-#define SUBBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_SUB, RS, MD, MB, MI, MS)
-#define SUBBir(IM, RD) _ALUBir(X86_SUB, IM, RD)
-#define SUBBim(IM, MD, MB, MI, MS) _ALUBim(X86_SUB, IM, MD, MB, MI, MS)
-
-#define SUBWrr(RS, RD) _ALUWrr(X86_SUB, RS, RD)
-#define SUBWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_SUB, MD, MB, MI, MS, RD)
-#define SUBWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_SUB, RS, MD, MB, MI, MS)
-#define SUBWir(IM, RD) _ALUWir(X86_SUB, IM, RD)
-#define SUBWim(IM, MD, MB, MI, MS) _ALUWim(X86_SUB, IM, MD, MB, MI, MS)
-
-#define SUBLrr(RS, RD) _ALULrr(X86_SUB, RS, RD)
-#define SUBLmr(MD, MB, MI, MS, RD) _ALULmr(X86_SUB, MD, MB, MI, MS, RD)
-#define SUBLrm(RS, MD, MB, MI, MS) _ALULrm(X86_SUB, RS, MD, MB, MI, MS)
-#define SUBLir(IM, RD) _ALULir(X86_SUB, IM, RD)
-#define SUBLim(IM, MD, MB, MI, MS) _ALULim(X86_SUB, IM, MD, MB, MI, MS)
-
-#define SUBQrr(RS, RD) _ALUQrr(X86_SUB, RS, RD)
-#define SUBQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_SUB, MD, MB, MI, MS, RD)
-#define SUBQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_SUB, RS, MD, MB, MI, MS)
-#define SUBQir(IM, RD) _ALUQir(X86_SUB, IM, RD)
-#define SUBQim(IM, MD, MB, MI, MS) _ALUQim(X86_SUB, IM, MD, MB, MI, MS)
-
-#define XORBrr(RS, RD) _ALUBrr(X86_XOR, RS, RD)
-#define XORBmr(MD, MB, MI, MS, RD) _ALUBmr(X86_XOR, MD, MB, MI, MS, RD)
-#define XORBrm(RS, MD, MB, MI, MS) _ALUBrm(X86_XOR, RS, MD, MB, MI, MS)
-#define XORBir(IM, RD) _ALUBir(X86_XOR, IM, RD)
-#define XORBim(IM, MD, MB, MI, MS) _ALUBim(X86_XOR, IM, MD, MB, MI, MS)
-
-#define XORWrr(RS, RD) _ALUWrr(X86_XOR, RS, RD)
-#define XORWmr(MD, MB, MI, MS, RD) _ALUWmr(X86_XOR, MD, MB, MI, MS, RD)
-#define XORWrm(RS, MD, MB, MI, MS) _ALUWrm(X86_XOR, RS, MD, MB, MI, MS)
-#define XORWir(IM, RD) _ALUWir(X86_XOR, IM, RD)
-#define XORWim(IM, MD, MB, MI, MS) _ALUWim(X86_XOR, IM, MD, MB, MI, MS)
-
-#define XORLrr(RS, RD) _ALULrr(X86_XOR, RS, RD)
-#define XORLmr(MD, MB, MI, MS, RD) _ALULmr(X86_XOR, MD, MB, MI, MS, RD)
-#define XORLrm(RS, MD, MB, MI, MS) _ALULrm(X86_XOR, RS, MD, MB, MI, MS)
-#define XORLir(IM, RD) _ALULir(X86_XOR, IM, RD)
-#define XORLim(IM, MD, MB, MI, MS) _ALULim(X86_XOR, IM, MD, MB, MI, MS)
-
-#define XORQrr(RS, RD) _ALUQrr(X86_XOR, RS, RD)
-#define XORQmr(MD, MB, MI, MS, RD) _ALUQmr(X86_XOR, MD, MB, MI, MS, RD)
-#define XORQrm(RS, MD, MB, MI, MS) _ALUQrm(X86_XOR, RS, MD, MB, MI, MS)
-#define XORQir(IM, RD) _ALUQir(X86_XOR, IM, RD)
-#define XORQim(IM, MD, MB, MI, MS) _ALUQim(X86_XOR, IM, MD, MB, MI, MS)
-
-
-/* --- Shift/Rotate instructions ------------------------------------------- */
-
-enum {
- X86_ROL = 0,
- X86_ROR = 1,
- X86_RCL = 2,
- X86_RCR = 3,
- X86_SHL = 4,
- X86_SHR = 5,
- X86_SAR = 7,
-};
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-#define _ROTSHIBir(OP,IM,RD) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \
- (_REXBrr(0, RD), _O_Mrm (0xd0 ,_b11,OP,_r1(RD) )) : \
- (_REXBrr(0, RD), _O_Mrm_B (0xc0 ,_b11,OP,_r1(RD) ,_u8(IM))) )
-#define _ROTSHIBim(OP,IM,MD,MB,MI,MS) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \
- (_REXBrm(0, MB, MI), _O_r_X (0xd0 ,OP ,MD,MB,MI,MS )) : \
- (_REXBrm(0, MB, MI), _O_r_X_B (0xc0 ,OP ,MD,MB,MI,MS ,_u8(IM))) )
-#define _ROTSHIBrr(OP,RS,RD) (((RS) == X86_CL) ? \
- (_REXBrr(RS, RD), _O_Mrm (0xd2 ,_b11,OP,_r1(RD) )) : \
- x86_emit_failure("source register must be CL" ) )
-#define _ROTSHIBrm(OP,RS,MD,MB,MI,MS) (((RS) == X86_CL) ? \
- (_REXBrm(RS, MB, MI), _O_r_X (0xd2 ,OP ,MD,MB,MI,MS )) : \
- x86_emit_failure("source register must be CL" ) )
-
-#define _ROTSHIWir(OP,IM,RD) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \
- (_d16(), _REXLrr(0, RD), _O_Mrm (0xd1 ,_b11,OP,_r2(RD) )) : \
- (_d16(), _REXLrr(0, RD), _O_Mrm_B (0xc1 ,_b11,OP,_r2(RD) ,_u8(IM))) )
-#define _ROTSHIWim(OP,IM,MD,MB,MI,MS) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \
- (_d16(), _REXLrm(0, MB, MI), _O_r_X (0xd1 ,OP ,MD,MB,MI,MS )) : \
- (_d16(), _REXLrm(0, MB, MI), _O_r_X_B (0xc1 ,OP ,MD,MB,MI,MS ,_u8(IM))) )
-#define _ROTSHIWrr(OP,RS,RD) (((RS) == X86_CL) ? \
- (_d16(), _REXLrr(RS, RD), _O_Mrm (0xd3 ,_b11,OP,_r2(RD) )) : \
- x86_emit_failure("source register must be CL" ) )
-#define _ROTSHIWrm(OP,RS,MD,MB,MI,MS) (((RS) == X86_CL) ? \
- (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0xd3 ,OP ,MD,MB,MI,MS )) : \
- x86_emit_failure("source register must be CL" ) )
-
-#define _ROTSHILir(OP,IM,RD) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \
- (_REXLrr(0, RD), _O_Mrm (0xd1 ,_b11,OP,_r4(RD) )) : \
- (_REXLrr(0, RD), _O_Mrm_B (0xc1 ,_b11,OP,_r4(RD) ,_u8(IM))) )
-#define _ROTSHILim(OP,IM,MD,MB,MI,MS) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \
- (_REXLrm(0, MB, MI), _O_r_X (0xd1 ,OP ,MD,MB,MI,MS )) : \
- (_REXLrm(0, MB, MI), _O_r_X_B (0xc1 ,OP ,MD,MB,MI,MS ,_u8(IM))) )
-#define _ROTSHILrr(OP,RS,RD) (((RS) == X86_CL) ? \
- (_REXLrr(RS, RD), _O_Mrm (0xd3 ,_b11,OP,_r4(RD) )) : \
- x86_emit_failure("source register must be CL" ) )
-#define _ROTSHILrm(OP,RS,MD,MB,MI,MS) (((RS) == X86_CL) ? \
- (_REXLrm(RS, MB, MI), _O_r_X (0xd3 ,OP ,MD,MB,MI,MS )) : \
- x86_emit_failure("source register must be CL" ) )
-
-#define _ROTSHIQir(OP,IM,RD) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \
- (_REXQrr(0, RD), _O_Mrm (0xd1 ,_b11,OP,_r8(RD) )) : \
- (_REXQrr(0, RD), _O_Mrm_B (0xc1 ,_b11,OP,_r8(RD) ,_u8(IM))) )
-#define _ROTSHIQim(OP,IM,MD,MB,MI,MS) (X86_OPTIMIZE_ROTSHI && ((IM) == 1) ? \
- (_REXQrm(0, MB, MI), _O_r_X (0xd1 ,OP ,MD,MB,MI,MS )) : \
- (_REXQrm(0, MB, MI), _O_r_X_B (0xc1 ,OP ,MD,MB,MI,MS ,_u8(IM))) )
-#define _ROTSHIQrr(OP,RS,RD) (((RS) == X86_CL) ? \
- (_REXQrr(RS, RD), _O_Mrm (0xd3 ,_b11,OP,_r8(RD) )) : \
- x86_emit_failure("source register must be CL" ) )
-#define _ROTSHIQrm(OP,RS,MD,MB,MI,MS) (((RS) == X86_CL) ? \
- (_REXQrm(RS, MB, MI), _O_r_X (0xd3 ,OP ,MD,MB,MI,MS )) : \
- x86_emit_failure("source register must be CL" ) )
-
-#define ROLBir(IM, RD) _ROTSHIBir(X86_ROL, IM, RD)
-#define ROLBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_ROL, IM, MD, MB, MI, MS)
-#define ROLBrr(RS, RD) _ROTSHIBrr(X86_ROL, RS, RD)
-#define ROLBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_ROL, RS, MD, MB, MI, MS)
-
-#define ROLWir(IM, RD) _ROTSHIWir(X86_ROL, IM, RD)
-#define ROLWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_ROL, IM, MD, MB, MI, MS)
-#define ROLWrr(RS, RD) _ROTSHIWrr(X86_ROL, RS, RD)
-#define ROLWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_ROL, RS, MD, MB, MI, MS)
-
-#define ROLLir(IM, RD) _ROTSHILir(X86_ROL, IM, RD)
-#define ROLLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_ROL, IM, MD, MB, MI, MS)
-#define ROLLrr(RS, RD) _ROTSHILrr(X86_ROL, RS, RD)
-#define ROLLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_ROL, RS, MD, MB, MI, MS)
-
-#define ROLQir(IM, RD) _ROTSHIQir(X86_ROL, IM, RD)
-#define ROLQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_ROL, IM, MD, MB, MI, MS)
-#define ROLQrr(RS, RD) _ROTSHIQrr(X86_ROL, RS, RD)
-#define ROLQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_ROL, RS, MD, MB, MI, MS)
-
-#define RORBir(IM, RD) _ROTSHIBir(X86_ROR, IM, RD)
-#define RORBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_ROR, IM, MD, MB, MI, MS)
-#define RORBrr(RS, RD) _ROTSHIBrr(X86_ROR, RS, RD)
-#define RORBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_ROR, RS, MD, MB, MI, MS)
-
-#define RORWir(IM, RD) _ROTSHIWir(X86_ROR, IM, RD)
-#define RORWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_ROR, IM, MD, MB, MI, MS)
-#define RORWrr(RS, RD) _ROTSHIWrr(X86_ROR, RS, RD)
-#define RORWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_ROR, RS, MD, MB, MI, MS)
-
-#define RORLir(IM, RD) _ROTSHILir(X86_ROR, IM, RD)
-#define RORLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_ROR, IM, MD, MB, MI, MS)
-#define RORLrr(RS, RD) _ROTSHILrr(X86_ROR, RS, RD)
-#define RORLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_ROR, RS, MD, MB, MI, MS)
-
-#define RORQir(IM, RD) _ROTSHIQir(X86_ROR, IM, RD)
-#define RORQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_ROR, IM, MD, MB, MI, MS)
-#define RORQrr(RS, RD) _ROTSHIQrr(X86_ROR, RS, RD)
-#define RORQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_ROR, RS, MD, MB, MI, MS)
-
-#define RCLBir(IM, RD) _ROTSHIBir(X86_RCL, IM, RD)
-#define RCLBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_RCL, IM, MD, MB, MI, MS)
-#define RCLBrr(RS, RD) _ROTSHIBrr(X86_RCL, RS, RD)
-#define RCLBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_RCL, RS, MD, MB, MI, MS)
-
-#define RCLWir(IM, RD) _ROTSHIWir(X86_RCL, IM, RD)
-#define RCLWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_RCL, IM, MD, MB, MI, MS)
-#define RCLWrr(RS, RD) _ROTSHIWrr(X86_RCL, RS, RD)
-#define RCLWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_RCL, RS, MD, MB, MI, MS)
-
-#define RCLLir(IM, RD) _ROTSHILir(X86_RCL, IM, RD)
-#define RCLLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_RCL, IM, MD, MB, MI, MS)
-#define RCLLrr(RS, RD) _ROTSHILrr(X86_RCL, RS, RD)
-#define RCLLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_RCL, RS, MD, MB, MI, MS)
-
-#define RCLQir(IM, RD) _ROTSHIQir(X86_RCL, IM, RD)
-#define RCLQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_RCL, IM, MD, MB, MI, MS)
-#define RCLQrr(RS, RD) _ROTSHIQrr(X86_RCL, RS, RD)
-#define RCLQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_RCL, RS, MD, MB, MI, MS)
-
-#define RCRBir(IM, RD) _ROTSHIBir(X86_RCR, IM, RD)
-#define RCRBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_RCR, IM, MD, MB, MI, MS)
-#define RCRBrr(RS, RD) _ROTSHIBrr(X86_RCR, RS, RD)
-#define RCRBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_RCR, RS, MD, MB, MI, MS)
-
-#define RCRWir(IM, RD) _ROTSHIWir(X86_RCR, IM, RD)
-#define RCRWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_RCR, IM, MD, MB, MI, MS)
-#define RCRWrr(RS, RD) _ROTSHIWrr(X86_RCR, RS, RD)
-#define RCRWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_RCR, RS, MD, MB, MI, MS)
-
-#define RCRLir(IM, RD) _ROTSHILir(X86_RCR, IM, RD)
-#define RCRLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_RCR, IM, MD, MB, MI, MS)
-#define RCRLrr(RS, RD) _ROTSHILrr(X86_RCR, RS, RD)
-#define RCRLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_RCR, RS, MD, MB, MI, MS)
-
-#define RCRQir(IM, RD) _ROTSHIQir(X86_RCR, IM, RD)
-#define RCRQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_RCR, IM, MD, MB, MI, MS)
-#define RCRQrr(RS, RD) _ROTSHIQrr(X86_RCR, RS, RD)
-#define RCRQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_RCR, RS, MD, MB, MI, MS)
-
-#define SHLBir(IM, RD) _ROTSHIBir(X86_SHL, IM, RD)
-#define SHLBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_SHL, IM, MD, MB, MI, MS)
-#define SHLBrr(RS, RD) _ROTSHIBrr(X86_SHL, RS, RD)
-#define SHLBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_SHL, RS, MD, MB, MI, MS)
-
-#define SHLWir(IM, RD) _ROTSHIWir(X86_SHL, IM, RD)
-#define SHLWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_SHL, IM, MD, MB, MI, MS)
-#define SHLWrr(RS, RD) _ROTSHIWrr(X86_SHL, RS, RD)
-#define SHLWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_SHL, RS, MD, MB, MI, MS)
-
-#define SHLLir(IM, RD) _ROTSHILir(X86_SHL, IM, RD)
-#define SHLLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_SHL, IM, MD, MB, MI, MS)
-#define SHLLrr(RS, RD) _ROTSHILrr(X86_SHL, RS, RD)
-#define SHLLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_SHL, RS, MD, MB, MI, MS)
-
-#define SHLQir(IM, RD) _ROTSHIQir(X86_SHL, IM, RD)
-#define SHLQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_SHL, IM, MD, MB, MI, MS)
-#define SHLQrr(RS, RD) _ROTSHIQrr(X86_SHL, RS, RD)
-#define SHLQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_SHL, RS, MD, MB, MI, MS)
-
-#define SHRBir(IM, RD) _ROTSHIBir(X86_SHR, IM, RD)
-#define SHRBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_SHR, IM, MD, MB, MI, MS)
-#define SHRBrr(RS, RD) _ROTSHIBrr(X86_SHR, RS, RD)
-#define SHRBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_SHR, RS, MD, MB, MI, MS)
-
-#define SHRWir(IM, RD) _ROTSHIWir(X86_SHR, IM, RD)
-#define SHRWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_SHR, IM, MD, MB, MI, MS)
-#define SHRWrr(RS, RD) _ROTSHIWrr(X86_SHR, RS, RD)
-#define SHRWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_SHR, RS, MD, MB, MI, MS)
-
-#define SHRLir(IM, RD) _ROTSHILir(X86_SHR, IM, RD)
-#define SHRLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_SHR, IM, MD, MB, MI, MS)
-#define SHRLrr(RS, RD) _ROTSHILrr(X86_SHR, RS, RD)
-#define SHRLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_SHR, RS, MD, MB, MI, MS)
-
-#define SHRQir(IM, RD) _ROTSHIQir(X86_SHR, IM, RD)
-#define SHRQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_SHR, IM, MD, MB, MI, MS)
-#define SHRQrr(RS, RD) _ROTSHIQrr(X86_SHR, RS, RD)
-#define SHRQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_SHR, RS, MD, MB, MI, MS)
-
-#define SALBir SHLBir
-#define SALBim SHLBim
-#define SALBrr SHLBrr
-#define SALBrm SHLBrm
-
-#define SALWir SHLWir
-#define SALWim SHLWim
-#define SALWrr SHLWrr
-#define SALWrm SHLWrm
-
-#define SALLir SHLLir
-#define SALLim SHLLim
-#define SALLrr SHLLrr
-#define SALLrm SHLLrm
-
-#define SALQir SHLQir
-#define SALQim SHLQim
-#define SALQrr SHLQrr
-#define SALQrm SHLQrm
-
-#define SARBir(IM, RD) _ROTSHIBir(X86_SAR, IM, RD)
-#define SARBim(IM, MD, MB, MI, MS) _ROTSHIBim(X86_SAR, IM, MD, MB, MI, MS)
-#define SARBrr(RS, RD) _ROTSHIBrr(X86_SAR, RS, RD)
-#define SARBrm(RS, MD, MB, MI, MS) _ROTSHIBrm(X86_SAR, RS, MD, MB, MI, MS)
-
-#define SARWir(IM, RD) _ROTSHIWir(X86_SAR, IM, RD)
-#define SARWim(IM, MD, MB, MI, MS) _ROTSHIWim(X86_SAR, IM, MD, MB, MI, MS)
-#define SARWrr(RS, RD) _ROTSHIWrr(X86_SAR, RS, RD)
-#define SARWrm(RS, MD, MB, MI, MS) _ROTSHIWrm(X86_SAR, RS, MD, MB, MI, MS)
-
-#define SARLir(IM, RD) _ROTSHILir(X86_SAR, IM, RD)
-#define SARLim(IM, MD, MB, MI, MS) _ROTSHILim(X86_SAR, IM, MD, MB, MI, MS)
-#define SARLrr(RS, RD) _ROTSHILrr(X86_SAR, RS, RD)
-#define SARLrm(RS, MD, MB, MI, MS) _ROTSHILrm(X86_SAR, RS, MD, MB, MI, MS)
-
-#define SARQir(IM, RD) _ROTSHIQir(X86_SAR, IM, RD)
-#define SARQim(IM, MD, MB, MI, MS) _ROTSHIQim(X86_SAR, IM, MD, MB, MI, MS)
-#define SARQrr(RS, RD) _ROTSHIQrr(X86_SAR, RS, RD)
-#define SARQrm(RS, MD, MB, MI, MS) _ROTSHIQrm(X86_SAR, RS, MD, MB, MI, MS)
-
-
-/* --- Bit test instructions ----------------------------------------------- */
-
-enum {
- X86_BT = 4,
- X86_BTS = 5,
- X86_BTR = 6,
- X86_BTC = 7,
-};
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-#define _BTWir(OP, IM, RD) (_d16(), _REXLrr(0, RD), _OO_Mrm_B (0x0fba ,_b11,OP ,_r2(RD) ,_u8(IM)))
-#define _BTWim(OP, IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _OO_r_X_B (0x0fba ,OP ,MD,MB,MI,MS ,_u8(IM)))
-#define _BTWrr(OP, RS, RD) (_d16(), _REXLrr(RS, RD), _OO_Mrm (0x0f83|((OP)<<3),_b11,_r2(RS),_r2(RD) ))
-#define _BTWrm(OP, RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _OO_r_X (0x0f83|((OP)<<3) ,_r2(RS) ,MD,MB,MI,MS ))
-
-#define _BTLir(OP, IM, RD) (_REXLrr(0, RD), _OO_Mrm_B (0x0fba ,_b11,OP ,_r4(RD) ,_u8(IM)))
-#define _BTLim(OP, IM, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _OO_r_X_B (0x0fba ,OP ,MD,MB,MI,MS ,_u8(IM)))
-#define _BTLrr(OP, RS, RD) (_REXLrr(RS, RD), _OO_Mrm (0x0f83|((OP)<<3),_b11,_r4(RS),_r4(RD) ))
-#define _BTLrm(OP, RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0f83|((OP)<<3) ,_r4(RS) ,MD,MB,MI,MS ))
-
-#define _BTQir(OP, IM, RD) (_REXQrr(0, RD), _OO_Mrm_B (0x0fba ,_b11,OP ,_r8(RD) ,_u8(IM)))
-#define _BTQim(OP, IM, MD, MB, MI, MS) (_REXQrm(0, MB, MI), _OO_r_X_B (0x0fba ,OP ,MD,MB,MI,MS ,_u8(IM)))
-#define _BTQrr(OP, RS, RD) (_REXQrr(RS, RD), _OO_Mrm (0x0f83|((OP)<<3),_b11,_r8(RS),_r8(RD) ))
-#define _BTQrm(OP, RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0f83|((OP)<<3) ,_r8(RS) ,MD,MB,MI,MS ))
-
-#define BTWir(IM, RD) _BTWir(X86_BT, IM, RD)
-#define BTWim(IM, MD, MB, MI, MS) _BTWim(X86_BT, IM, MD, MB, MI, MS)
-#define BTWrr(RS, RD) _BTWrr(X86_BT, RS, RD)
-#define BTWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BT, RS, MD, MB, MI, MS)
-
-#define BTLir(IM, RD) _BTLir(X86_BT, IM, RD)
-#define BTLim(IM, MD, MB, MI, MS) _BTLim(X86_BT, IM, MD, MB, MI, MS)
-#define BTLrr(RS, RD) _BTLrr(X86_BT, RS, RD)
-#define BTLrm(RS, MD, MB, MI, MS) _BTLrm(X86_BT, RS, MD, MB, MI, MS)
-
-#define BTQir(IM, RD) _BTQir(X86_BT, IM, RD)
-#define BTQim(IM, MD, MB, MI, MS) _BTQim(X86_BT, IM, MD, MB, MI, MS)
-#define BTQrr(RS, RD) _BTQrr(X86_BT, RS, RD)
-#define BTQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BT, RS, MD, MB, MI, MS)
-
-#define BTCWir(IM, RD) _BTWir(X86_BTC, IM, RD)
-#define BTCWim(IM, MD, MB, MI, MS) _BTWim(X86_BTC, IM, MD, MB, MI, MS)
-#define BTCWrr(RS, RD) _BTWrr(X86_BTC, RS, RD)
-#define BTCWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BTC, RS, MD, MB, MI, MS)
-
-#define BTCLir(IM, RD) _BTLir(X86_BTC, IM, RD)
-#define BTCLim(IM, MD, MB, MI, MS) _BTLim(X86_BTC, IM, MD, MB, MI, MS)
-#define BTCLrr(RS, RD) _BTLrr(X86_BTC, RS, RD)
-#define BTCLrm(RS, MD, MB, MI, MS) _BTLrm(X86_BTC, RS, MD, MB, MI, MS)
-
-#define BTCQir(IM, RD) _BTQir(X86_BTC, IM, RD)
-#define BTCQim(IM, MD, MB, MI, MS) _BTQim(X86_BTC, IM, MD, MB, MI, MS)
-#define BTCQrr(RS, RD) _BTQrr(X86_BTC, RS, RD)
-#define BTCQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BTC, RS, MD, MB, MI, MS)
-
-#define BTRWir(IM, RD) _BTWir(X86_BTR, IM, RD)
-#define BTRWim(IM, MD, MB, MI, MS) _BTWim(X86_BTR, IM, MD, MB, MI, MS)
-#define BTRWrr(RS, RD) _BTWrr(X86_BTR, RS, RD)
-#define BTRWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BTR, RS, MD, MB, MI, MS)
-
-#define BTRLir(IM, RD) _BTLir(X86_BTR, IM, RD)
-#define BTRLim(IM, MD, MB, MI, MS) _BTLim(X86_BTR, IM, MD, MB, MI, MS)
-#define BTRLrr(RS, RD) _BTLrr(X86_BTR, RS, RD)
-#define BTRLrm(RS, MD, MB, MI, MS) _BTLrm(X86_BTR, RS, MD, MB, MI, MS)
-
-#define BTRQir(IM, RD) _BTQir(X86_BTR, IM, RD)
-#define BTRQim(IM, MD, MB, MI, MS) _BTQim(X86_BTR, IM, MD, MB, MI, MS)
-#define BTRQrr(RS, RD) _BTQrr(X86_BTR, RS, RD)
-#define BTRQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BTR, RS, MD, MB, MI, MS)
-
-#define BTSWir(IM, RD) _BTWir(X86_BTS, IM, RD)
-#define BTSWim(IM, MD, MB, MI, MS) _BTWim(X86_BTS, IM, MD, MB, MI, MS)
-#define BTSWrr(RS, RD) _BTWrr(X86_BTS, RS, RD)
-#define BTSWrm(RS, MD, MB, MI, MS) _BTWrm(X86_BTS, RS, MD, MB, MI, MS)
-
-#define BTSLir(IM, RD) _BTLir(X86_BTS, IM, RD)
-#define BTSLim(IM, MD, MB, MI, MS) _BTLim(X86_BTS, IM, MD, MB, MI, MS)
-#define BTSLrr(RS, RD) _BTLrr(X86_BTS, RS, RD)
-#define BTSLrm(RS, MD, MB, MI, MS) _BTLrm(X86_BTS, RS, MD, MB, MI, MS)
-
-#define BTSQir(IM, RD) _BTQir(X86_BTS, IM, RD)
-#define BTSQim(IM, MD, MB, MI, MS) _BTQim(X86_BTS, IM, MD, MB, MI, MS)
-#define BTSQrr(RS, RD) _BTQrr(X86_BTS, RS, RD)
-#define BTSQrm(RS, MD, MB, MI, MS) _BTQrm(X86_BTS, RS, MD, MB, MI, MS)
-
-
-/* --- Move instructions --------------------------------------------------- */
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-#define MOVBrr(RS, RD) (_REXBrr(RS, RD), _O_Mrm (0x88 ,_b11,_r1(RS),_r1(RD) ))
-#define MOVBmr(MD, MB, MI, MS, RD) (_REXBmr(MB, MI, RD), _O_r_X (0x8a ,_r1(RD) ,MD,MB,MI,MS ))
-#define MOVBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (0x88 ,_r1(RS) ,MD,MB,MI,MS ))
-#define MOVBir(IM, R) (_REXBrr(0, R), _Or_B (0xb0,_r1(R) ,_su8(IM)))
-#define MOVBim(IM, MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_X_B (0xc6 ,MD,MB,MI,MS ,_su8(IM)))
-
-#define MOVWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (0x89 ,_b11,_r2(RS),_r2(RD) ))
-#define MOVWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _O_r_X (0x8b ,_r2(RD) ,MD,MB,MI,MS ))
-#define MOVWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0x89 ,_r2(RS) ,MD,MB,MI,MS ))
-#define MOVWir(IM, R) (_d16(), _REXLrr(0, R), _Or_W (0xb8,_r2(R) ,_su16(IM)))
-#define MOVWim(IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_X_W (0xc7 ,MD,MB,MI,MS ,_su16(IM)))
-
-#define MOVLrr(RS, RD) (_REXLrr(RS, RD), _O_Mrm (0x89 ,_b11,_r4(RS),_r4(RD) ))
-#define MOVLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _O_r_X (0x8b ,_r4(RD) ,MD,MB,MI,MS ))
-#define MOVLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _O_r_X (0x89 ,_r4(RS) ,MD,MB,MI,MS ))
-#define MOVLir(IM, R) (_REXLrr(0, R), _Or_L (0xb8,_r4(R) ,IM ))
-#define MOVLim(IM, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_X_L (0xc7 ,MD,MB,MI,MS ,IM ))
-
-#define MOVQrr(RS, RD) (_REXQrr(RS, RD), _O_Mrm (0x89 ,_b11,_r8(RS),_r8(RD) ))
-#define MOVQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _O_r_X (0x8b ,_r8(RD) ,MD,MB,MI,MS ))
-#define MOVQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _O_r_X (0x89 ,_r8(RS) ,MD,MB,MI,MS ))
-#define MOVQir(IM, R) (_REXQrr(0, R), _Or_Q (0xb8,_r8(R) ,IM ))
-#define MOVQim(IM, MD, MB, MI, MS) (_REXQrm(0, MB, MI), _O_X_L (0xc7 ,MD,MB,MI,MS ,IM ))
-
-
-/* --- Unary and Multiply/Divide instructions ------------------------------ */
-
-enum {
- X86_NOT = 2,
- X86_NEG = 3,
- X86_MUL = 4,
- X86_IMUL = 5,
- X86_DIV = 6,
- X86_IDIV = 7,
-};
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-#define _UNARYBr(OP, RS) (_REXBrr(0, RS), _O_Mrm (0xf6 ,_b11,OP ,_r1(RS) ))
-#define _UNARYBm(OP, MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X (0xf6 ,OP ,MD,MB,MI,MS ))
-#define _UNARYWr(OP, RS) (_d16(), _REXLrr(0, RS), _O_Mrm (0xf7 ,_b11,OP ,_r2(RS) ))
-#define _UNARYWm(OP, MD, MB, MI, MS) (_d16(), _REXLmr(MB, MI, 0), _O_r_X (0xf7 ,OP ,MD,MB,MI,MS ))
-#define _UNARYLr(OP, RS) (_REXLrr(0, RS), _O_Mrm (0xf7 ,_b11,OP ,_r4(RS) ))
-#define _UNARYLm(OP, MD, MB, MI, MS) (_REXLmr(MB, MI, 0), _O_r_X (0xf7 ,OP ,MD,MB,MI,MS ))
-#define _UNARYQr(OP, RS) (_REXQrr(0, RS), _O_Mrm (0xf7 ,_b11,OP ,_r8(RS) ))
-#define _UNARYQm(OP, MD, MB, MI, MS) (_REXQmr(MB, MI, 0), _O_r_X (0xf7 ,OP ,MD,MB,MI,MS ))
-
-#define NOTBr(RS) _UNARYBr(X86_NOT, RS)
-#define NOTBm(MD, MB, MI, MS) _UNARYBm(X86_NOT, MD, MB, MI, MS)
-#define NOTWr(RS) _UNARYWr(X86_NOT, RS)
-#define NOTWm(MD, MB, MI, MS) _UNARYWm(X86_NOT, MD, MB, MI, MS)
-#define NOTLr(RS) _UNARYLr(X86_NOT, RS)
-#define NOTLm(MD, MB, MI, MS) _UNARYLm(X86_NOT, MD, MB, MI, MS)
-#define NOTQr(RS) _UNARYQr(X86_NOT, RS)
-#define NOTQm(MD, MB, MI, MS) _UNARYQm(X86_NOT, MD, MB, MI, MS)
-
-#define NEGBr(RS) _UNARYBr(X86_NEG, RS)
-#define NEGBm(MD, MB, MI, MS) _UNARYBm(X86_NEG, MD, MB, MI, MS)
-#define NEGWr(RS) _UNARYWr(X86_NEG, RS)
-#define NEGWm(MD, MB, MI, MS) _UNARYWm(X86_NEG, MD, MB, MI, MS)
-#define NEGLr(RS) _UNARYLr(X86_NEG, RS)
-#define NEGLm(MD, MB, MI, MS) _UNARYLm(X86_NEG, MD, MB, MI, MS)
-#define NEGQr(RS) _UNARYQr(X86_NEG, RS)
-#define NEGQm(MD, MB, MI, MS) _UNARYQm(X86_NEG, MD, MB, MI, MS)
-
-#define MULBr(RS) _UNARYBr(X86_MUL, RS)
-#define MULBm(MD, MB, MI, MS) _UNARYBm(X86_MUL, MD, MB, MI, MS)
-#define MULWr(RS) _UNARYWr(X86_MUL, RS)
-#define MULWm(MD, MB, MI, MS) _UNARYWm(X86_MUL, MD, MB, MI, MS)
-#define MULLr(RS) _UNARYLr(X86_MUL, RS)
-#define MULLm(MD, MB, MI, MS) _UNARYLm(X86_MUL, MD, MB, MI, MS)
-#define MULQr(RS) _UNARYQr(X86_MUL, RS)
-#define MULQm(MD, MB, MI, MS) _UNARYQm(X86_MUL, MD, MB, MI, MS)
-
-#define IMULBr(RS) _UNARYBr(X86_IMUL, RS)
-#define IMULBm(MD, MB, MI, MS) _UNARYBm(X86_IMUL, MD, MB, MI, MS)
-#define IMULWr(RS) _UNARYWr(X86_IMUL, RS)
-#define IMULWm(MD, MB, MI, MS) _UNARYWm(X86_IMUL, MD, MB, MI, MS)
-#define IMULLr(RS) _UNARYLr(X86_IMUL, RS)
-#define IMULLm(MD, MB, MI, MS) _UNARYLm(X86_IMUL, MD, MB, MI, MS)
-#define IMULQr(RS) _UNARYQr(X86_IMUL, RS)
-#define IMULQm(MD, MB, MI, MS) _UNARYQm(X86_IMUL, MD, MB, MI, MS)
-
-#define DIVBr(RS) _UNARYBr(X86_DIV, RS)
-#define DIVBm(MD, MB, MI, MS) _UNARYBm(X86_DIV, MD, MB, MI, MS)
-#define DIVWr(RS) _UNARYWr(X86_DIV, RS)
-#define DIVWm(MD, MB, MI, MS) _UNARYWm(X86_DIV, MD, MB, MI, MS)
-#define DIVLr(RS) _UNARYLr(X86_DIV, RS)
-#define DIVLm(MD, MB, MI, MS) _UNARYLm(X86_DIV, MD, MB, MI, MS)
-#define DIVQr(RS) _UNARYQr(X86_DIV, RS)
-#define DIVQm(MD, MB, MI, MS) _UNARYQm(X86_DIV, MD, MB, MI, MS)
-
-#define IDIVBr(RS) _UNARYBr(X86_IDIV, RS)
-#define IDIVBm(MD, MB, MI, MS) _UNARYBm(X86_IDIV, MD, MB, MI, MS)
-#define IDIVWr(RS) _UNARYWr(X86_IDIV, RS)
-#define IDIVWm(MD, MB, MI, MS) _UNARYWm(X86_IDIV, MD, MB, MI, MS)
-#define IDIVLr(RS) _UNARYLr(X86_IDIV, RS)
-#define IDIVLm(MD, MB, MI, MS) _UNARYLm(X86_IDIV, MD, MB, MI, MS)
-#define IDIVQr(RS) _UNARYQr(X86_IDIV, RS)
-#define IDIVQm(MD, MB, MI, MS) _UNARYQm(X86_IDIV, MD, MB, MI, MS)
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-#define IMULWrr(RS, RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0faf ,_b11,_r2(RD),_r2(RS) ))
-#define IMULWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0faf ,_r2(RD) ,MD,MB,MI,MS ))
-
-#define IMULWirr(IM,RS,RD) (_d16(), _REXLrr(RS, RD), _Os_Mrm_sW (0x69 ,_b11,_r2(RS),_r2(RD) ,_su16(IM) ))
-#define IMULWimr(IM,MD,MB,MI,MS,RD) (_d16(), _REXLmr(MB, MI, RD), _Os_r_X_sW (0x69 ,_r2(RD) ,MD,MB,MI,MS ,_su16(IM) ))
-
-#define IMULLir(IM, RD) (_REXLrr(0, RD), _Os_Mrm_sL (0x69 ,_b11,_r4(RD),_r4(RD) ,IM ))
-#define IMULLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0faf ,_b11,_r4(RD),_r4(RS) ))
-#define IMULLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0faf ,_r4(RD) ,MD,MB,MI,MS ))
-
-#define IMULQir(IM, RD) (_REXQrr(0, RD), _Os_Mrm_sL (0x69 ,_b11,_r8(RD),_r8(RD) ,IM ))
-#define IMULQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0faf ,_b11,_r8(RD),_r8(RS) ))
-#define IMULQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0faf ,_r8(RD) ,MD,MB,MI,MS ))
-
-#define IMULLirr(IM,RS,RD) (_REXLrr(RS, RD), _Os_Mrm_sL (0x69 ,_b11,_r4(RS),_r4(RD) ,IM ))
-#define IMULLimr(IM,MD,MB,MI,MS,RD) (_REXLmr(MB, MI, RD), _Os_r_X_sL (0x69 ,_r4(RD) ,MD,MB,MI,MS ,IM ))
-
-#define IMULQirr(IM,RS,RD) (_REXQrr(RS, RD), _Os_Mrm_sL (0x69 ,_b11,_r8(RS),_r8(RD) ,IM ))
-#define IMULQimr(IM,MD,MB,MI,MS,RD) (_REXQmr(MB, MI, RD), _Os_r_X_sL (0x69 ,_r8(RD) ,MD,MB,MI,MS ,IM ))
-
-
-/* --- Control Flow related instructions ----------------------------------- */
-
-enum {
- X86_CC_O = 0x0,
- X86_CC_NO = 0x1,
- X86_CC_NAE = 0x2,
- X86_CC_B = 0x2,
- X86_CC_C = 0x2,
- X86_CC_AE = 0x3,
- X86_CC_NB = 0x3,
- X86_CC_NC = 0x3,
- X86_CC_E = 0x4,
- X86_CC_Z = 0x4,
- X86_CC_NE = 0x5,
- X86_CC_NZ = 0x5,
- X86_CC_BE = 0x6,
- X86_CC_NA = 0x6,
- X86_CC_A = 0x7,
- X86_CC_NBE = 0x7,
- X86_CC_S = 0x8,
- X86_CC_NS = 0x9,
- X86_CC_P = 0xa,
- X86_CC_PE = 0xa,
- X86_CC_NP = 0xb,
- X86_CC_PO = 0xb,
- X86_CC_L = 0xc,
- X86_CC_NGE = 0xc,
- X86_CC_GE = 0xd,
- X86_CC_NL = 0xd,
- X86_CC_LE = 0xe,
- X86_CC_NG = 0xe,
- X86_CC_G = 0xf,
- X86_CC_NLE = 0xf,
-};
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-// FIXME: no prefix is availble to encode a 32-bit operand size in 64-bit mode
-#define CALLm(M) _O_D32 (0xe8 ,(int)(M) )
-#define _CALLLsr(R) (_REXLrr(0, R), _O_Mrm (0xff ,_b11,_b010,_r4(R) ))
-#define _CALLQsr(R) (_REXLrr(0, R), _O_Mrm (0xff ,_b11,_b010,_r8(R) ))
-#define CALLsr(R) ( X86_TARGET_64BIT ? _CALLQsr(R) : _CALLLsr(R))
-#define CALLsm(D,B,I,S) (_REXLrm(0, B, I), _O_r_X (0xff ,_b010 ,(int)(D),B,I,S ))
-
-// FIXME: no prefix is availble to encode a 32-bit operand size in 64-bit mode
-#define JMPSm(M) _O_D8 (0xeb ,(int)(M) )
-#define JMPm(M) _O_D32 (0xe9 ,(int)(M) )
-#define _JMPLsr(R) (_REXLrr(0, R), _O_Mrm (0xff ,_b11,_b100,_r4(R) ))
-#define _JMPQsr(R) (_REXLrr(0, R), _O_Mrm (0xff ,_b11,_b100,_r8(R) ))
-#define JMPsr(R) ( X86_TARGET_64BIT ? _JMPQsr(R) : _JMPLsr(R))
-#define JMPsm(D,B,I,S) (_REXLrm(0, B, I), _O_r_X (0xff ,_b100 ,(int)(D),B,I,S ))
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-#define JCCSii(CC, D) _O_B (0x70|(CC) ,(_sc)(int)(D) )
-#define JCCSim(CC, D) _O_D8 (0x70|(CC) ,(int)(D) )
-#define JOSm(D) JCCSim(X86_CC_O, D)
-#define JNOSm(D) JCCSim(X86_CC_NO, D)
-#define JBSm(D) JCCSim(X86_CC_B, D)
-#define JNAESm(D) JCCSim(X86_CC_NAE, D)
-#define JNBSm(D) JCCSim(X86_CC_NB, D)
-#define JAESm(D) JCCSim(X86_CC_AE, D)
-#define JESm(D) JCCSim(X86_CC_E, D)
-#define JZSm(D) JCCSim(X86_CC_Z, D)
-#define JNESm(D) JCCSim(X86_CC_NE, D)
-#define JNZSm(D) JCCSim(X86_CC_NZ, D)
-#define JBESm(D) JCCSim(X86_CC_BE, D)
-#define JNASm(D) JCCSim(X86_CC_NA, D)
-#define JNBESm(D) JCCSim(X86_CC_NBE, D)
-#define JASm(D) JCCSim(X86_CC_A, D)
-#define JSSm(D) JCCSim(X86_CC_S, D)
-#define JNSSm(D) JCCSim(X86_CC_NS, D)
-#define JPSm(D) JCCSim(X86_CC_P, D)
-#define JPESm(D) JCCSim(X86_CC_PE, D)
-#define JNPSm(D) JCCSim(X86_CC_NP, D)
-#define JPOSm(D) JCCSim(X86_CC_PO, D)
-#define JLSm(D) JCCSim(X86_CC_L, D)
-#define JNGESm(D) JCCSim(X86_CC_NGE, D)
-#define JNLSm(D) JCCSim(X86_CC_NL, D)
-#define JGESm(D) JCCSim(X86_CC_GE, D)
-#define JLESm(D) JCCSim(X86_CC_LE, D)
-#define JNGSm(D) JCCSim(X86_CC_NG, D)
-#define JNLESm(D) JCCSim(X86_CC_NLE, D)
-#define JGSm(D) JCCSim(X86_CC_G, D)
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-#define JCCii(CC, D) _OO_L (0x0f80|(CC) ,(int)(D) )
-#define JCCim(CC, D) _OO_D32 (0x0f80|(CC) ,(int)(D) )
-#define JOm(D) JCCim(X86_CC_O, D)
-#define JNOm(D) JCCim(X86_CC_NO, D)
-#define JBm(D) JCCim(X86_CC_B, D)
-#define JNAEm(D) JCCim(X86_CC_NAE, D)
-#define JNBm(D) JCCim(X86_CC_NB, D)
-#define JAEm(D) JCCim(X86_CC_AE, D)
-#define JEm(D) JCCim(X86_CC_E, D)
-#define JZm(D) JCCim(X86_CC_Z, D)
-#define JNEm(D) JCCim(X86_CC_NE, D)
-#define JNZm(D) JCCim(X86_CC_NZ, D)
-#define JBEm(D) JCCim(X86_CC_BE, D)
-#define JNAm(D) JCCim(X86_CC_NA, D)
-#define JNBEm(D) JCCim(X86_CC_NBE, D)
-#define JAm(D) JCCim(X86_CC_A, D)
-#define JSm(D) JCCim(X86_CC_S, D)
-#define JNSm(D) JCCim(X86_CC_NS, D)
-#define JPm(D) JCCim(X86_CC_P, D)
-#define JPEm(D) JCCim(X86_CC_PE, D)
-#define JNPm(D) JCCim(X86_CC_NP, D)
-#define JPOm(D) JCCim(X86_CC_PO, D)
-#define JLm(D) JCCim(X86_CC_L, D)
-#define JNGEm(D) JCCim(X86_CC_NGE, D)
-#define JNLm(D) JCCim(X86_CC_NL, D)
-#define JGEm(D) JCCim(X86_CC_GE, D)
-#define JLEm(D) JCCim(X86_CC_LE, D)
-#define JNGm(D) JCCim(X86_CC_NG, D)
-#define JNLEm(D) JCCim(X86_CC_NLE, D)
-#define JGm(D) JCCim(X86_CC_G, D)
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-#define SETCCir(CC, RD) (_REXBrr(0, RD), _OO_Mrm (0x0f90|(CC) ,_b11,_b000,_r1(RD) ))
-#define SETOr(RD) SETCCir(X86_CC_O, RD)
-#define SETNOr(RD) SETCCir(X86_CC_NO, RD)
-#define SETBr(RD) SETCCir(X86_CC_B, RD)
-#define SETNAEr(RD) SETCCir(X86_CC_NAE, RD)
-#define SETNBr(RD) SETCCir(X86_CC_NB, RD)
-#define SETAEr(RD) SETCCir(X86_CC_AE, RD)
-#define SETEr(RD) SETCCir(X86_CC_E, RD)
-#define SETZr(RD) SETCCir(X86_CC_Z, RD)
-#define SETNEr(RD) SETCCir(X86_CC_NE, RD)
-#define SETNZr(RD) SETCCir(X86_CC_NZ, RD)
-#define SETBEr(RD) SETCCir(X86_CC_BE, RD)
-#define SETNAr(RD) SETCCir(X86_CC_NA, RD)
-#define SETNBEr(RD) SETCCir(X86_CC_NBE, RD)
-#define SETAr(RD) SETCCir(X86_CC_A, RD)
-#define SETSr(RD) SETCCir(X86_CC_S, RD)
-#define SETNSr(RD) SETCCir(X86_CC_NS, RD)
-#define SETPr(RD) SETCCir(X86_CC_P, RD)
-#define SETPEr(RD) SETCCir(X86_CC_PE, RD)
-#define SETNPr(RD) SETCCir(X86_CC_NP, RD)
-#define SETPOr(RD) SETCCir(X86_CC_PO, RD)
-#define SETLr(RD) SETCCir(X86_CC_L, RD)
-#define SETNGEr(RD) SETCCir(X86_CC_NGE, RD)
-#define SETNLr(RD) SETCCir(X86_CC_NL, RD)
-#define SETGEr(RD) SETCCir(X86_CC_GE, RD)
-#define SETLEr(RD) SETCCir(X86_CC_LE, RD)
-#define SETNGr(RD) SETCCir(X86_CC_NG, RD)
-#define SETNLEr(RD) SETCCir(X86_CC_NLE, RD)
-#define SETGr(RD) SETCCir(X86_CC_G, RD)
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-#define SETCCim(CC,MD,MB,MI,MS) (_REXBrm(0, MB, MI), _OO_r_X (0x0f90|(CC) ,_b000 ,MD,MB,MI,MS ))
-#define SETOm(D, B, I, S) SETCCim(X86_CC_O, D, B, I, S)
-#define SETNOm(D, B, I, S) SETCCim(X86_CC_NO, D, B, I, S)
-#define SETBm(D, B, I, S) SETCCim(X86_CC_B, D, B, I, S)
-#define SETNAEm(D, B, I, S) SETCCim(X86_CC_NAE, D, B, I, S)
-#define SETNBm(D, B, I, S) SETCCim(X86_CC_NB, D, B, I, S)
-#define SETAEm(D, B, I, S) SETCCim(X86_CC_AE, D, B, I, S)
-#define SETEm(D, B, I, S) SETCCim(X86_CC_E, D, B, I, S)
-#define SETZm(D, B, I, S) SETCCim(X86_CC_Z, D, B, I, S)
-#define SETNEm(D, B, I, S) SETCCim(X86_CC_NE, D, B, I, S)
-#define SETNZm(D, B, I, S) SETCCim(X86_CC_NZ, D, B, I, S)
-#define SETBEm(D, B, I, S) SETCCim(X86_CC_BE, D, B, I, S)
-#define SETNAm(D, B, I, S) SETCCim(X86_CC_NA, D, B, I, S)
-#define SETNBEm(D, B, I, S) SETCCim(X86_CC_NBE, D, B, I, S)
-#define SETAm(D, B, I, S) SETCCim(X86_CC_A, D, B, I, S)
-#define SETSm(D, B, I, S) SETCCim(X86_CC_S, D, B, I, S)
-#define SETNSm(D, B, I, S) SETCCim(X86_CC_NS, D, B, I, S)
-#define SETPm(D, B, I, S) SETCCim(X86_CC_P, D, B, I, S)
-#define SETPEm(D, B, I, S) SETCCim(X86_CC_PE, D, B, I, S)
-#define SETNPm(D, B, I, S) SETCCim(X86_CC_NP, D, B, I, S)
-#define SETPOm(D, B, I, S) SETCCim(X86_CC_PO, D, B, I, S)
-#define SETLm(D, B, I, S) SETCCim(X86_CC_L, D, B, I, S)
-#define SETNGEm(D, B, I, S) SETCCim(X86_CC_NGE, D, B, I, S)
-#define SETNLm(D, B, I, S) SETCCim(X86_CC_NL, D, B, I, S)
-#define SETGEm(D, B, I, S) SETCCim(X86_CC_GE, D, B, I, S)
-#define SETLEm(D, B, I, S) SETCCim(X86_CC_LE, D, B, I, S)
-#define SETNGm(D, B, I, S) SETCCim(X86_CC_NG, D, B, I, S)
-#define SETNLEm(D, B, I, S) SETCCim(X86_CC_NLE, D, B, I, S)
-#define SETGm(D, B, I, S) SETCCim(X86_CC_G, D, B, I, S)
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-#define CMOVWrr(CC,RS,RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0f40|(CC) ,_b11,_r2(RD),_r2(RS) ))
-#define CMOVWmr(CC,MD,MB,MI,MS,RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0f40|(CC) ,_r2(RD) ,MD,MB,MI,MS ))
-#define CMOVLrr(CC,RS,RD) (_REXLrr(RD, RS), _OO_Mrm (0x0f40|(CC) ,_b11,_r4(RD),_r4(RS) ))
-#define CMOVLmr(CC,MD,MB,MI,MS,RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0f40|(CC) ,_r4(RD) ,MD,MB,MI,MS ))
-#define CMOVQrr(CC,RS,RD) (_REXQrr(RD, RS), _OO_Mrm (0x0f40|(CC) ,_b11,_r8(RD),_r8(RS) ))
-#define CMOVQmr(CC,MD,MB,MI,MS,RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0f40|(CC) ,_r8(RD) ,MD,MB,MI,MS ))
-
-
-/* --- Push/Pop instructions ----------------------------------------------- */
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-#define POPWr(RD) _m32only((_d16(), _Or (0x58,_r2(RD) )))
-#define POPWm(MD, MB, MI, MS) _m32only((_d16(), _O_r_X (0x8f ,_b000 ,MD,MB,MI,MS )))
-
-#define POPLr(RD) _m32only( _Or (0x58,_r4(RD) ))
-#define POPLm(MD, MB, MI, MS) _m32only( _O_r_X (0x8f ,_b000 ,MD,MB,MI,MS ))
-
-#define POPQr(RD) _m64only((_REXQr(RD), _Or (0x58,_r8(RD) )))
-#define POPQm(MD, MB, MI, MS) _m64only((_REXQm(MB, MI), _O_r_X (0x8f ,_b000 ,MD,MB,MI,MS )))
-
-#define PUSHWr(RS) _m32only((_d16(), _Or (0x50,_r2(RS) )))
-#define PUSHWm(MD, MB, MI, MS) _m32only((_d16(), _O_r_X (0xff, ,_b110 ,MD,MB,MI,MS )))
-#define PUSHWi(IM) _m32only((_d16(), _Os_sW (0x68 ,IM )))
-
-#define PUSHLr(RS) _m32only( _Or (0x50,_r4(RS) ))
-#define PUSHLm(MD, MB, MI, MS) _m32only( _O_r_X (0xff ,_b110 ,MD,MB,MI,MS ))
-#define PUSHLi(IM) _m32only( _Os_sL (0x68 ,IM ))
-
-#define PUSHQr(RS) _m64only((_REXQr(RS), _Or (0x50,_r8(RS) )))
-#define PUSHQm(MD, MB, MI, MS) _m64only((_REXQm(MB, MI), _O_r_X (0xff ,_b110 ,MD,MB,MI,MS )))
-#define PUSHQi(IM) _m64only( _Os_sL (0x68 ,IM ))
-
-#define POPA() (_d16(), _O (0x61 ))
-#define POPAD() _O (0x61 )
-
-#define PUSHA() (_d16(), _O (0x60 ))
-#define PUSHAD() _O (0x60 )
-
-#define POPF() _O (0x9d )
-#define PUSHF() _O (0x9c )
-
-
-/* --- Test instructions --------------------------------------------------- */
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-#define TESTBrr(RS, RD) (_REXBrr(RS, RD), _O_Mrm (0x84 ,_b11,_r1(RS),_r1(RD) ))
-#define TESTBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (0x84 ,_r1(RS) ,MD,MB,MI,MS ))
-#define TESTBir(IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_AL) ? \
- (_REXBrr(0, RD), _O_B (0xa8 ,_u8(IM))) : \
- (_REXBrr(0, RD), _O_Mrm_B (0xf6 ,_b11,_b000 ,_r1(RD) ,_u8(IM))) )
-#define TESTBim(IM, MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X_B (0xf6 ,_b000 ,MD,MB,MI,MS ,_u8(IM)))
-
-#define TESTWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (0x85 ,_b11,_r2(RS),_r2(RD) ))
-#define TESTWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0x85 ,_r2(RS) ,MD,MB,MI,MS ))
-#define TESTWir(IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_AX) ? \
- (_d16(), _REXLrr(0, RD), _O_W (0xa9 ,_u16(IM))) : \
- (_d16(), _REXLrr(0, RD), _O_Mrm_W (0xf7 ,_b11,_b000 ,_r2(RD) ,_u16(IM))) )
-#define TESTWim(IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_r_X_W (0xf7 ,_b000 ,MD,MB,MI,MS ,_u16(IM)))
-
-#define TESTLrr(RS, RD) (_REXLrr(RS, RD), _O_Mrm (0x85 ,_b11,_r4(RS),_r4(RD) ))
-#define TESTLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _O_r_X (0x85 ,_r4(RS) ,MD,MB,MI,MS ))
-#define TESTLir(IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_EAX) ? \
- (_REXLrr(0, RD), _O_L (0xa9 ,IM )) : \
- (_REXLrr(0, RD), _O_Mrm_L (0xf7 ,_b11,_b000 ,_r4(RD) ,IM )) )
-#define TESTLim(IM, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_r_X_L (0xf7 ,_b000 ,MD,MB,MI,MS ,IM ))
-
-#define TESTQrr(RS, RD) (_REXQrr(RS, RD), _O_Mrm (0x85 ,_b11,_r8(RS),_r8(RD) ))
-#define TESTQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _O_r_X (0x85 ,_r8(RS) ,MD,MB,MI,MS ))
-#define TESTQir(IM, RD) (X86_OPTIMIZE_ALU && ((RD) == X86_RAX) ? \
- (_REXQrr(0, RD), _O_L (0xa9 ,IM )) : \
- (_REXQrr(0, RD), _O_Mrm_L (0xf7 ,_b11,_b000 ,_r8(RD) ,IM )) )
-#define TESTQim(IM, MD, MB, MI, MS) (_REXQrm(0, MB, MI), _O_r_X_L (0xf7 ,_b000 ,MD,MB,MI,MS ,IM ))
-
-
-/* --- Exchange instructions ----------------------------------------------- */
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-#define CMPXCHGBrr(RS, RD) (_REXBrr(RS, RD), _OO_Mrm (0x0fb0 ,_b11,_r1(RS),_r1(RD) ))
-#define CMPXCHGBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _OO_r_X (0x0fb0 ,_r1(RS) ,MD,MB,MI,MS ))
-
-#define CMPXCHGWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _OO_Mrm (0x0fb1 ,_b11,_r2(RS),_r2(RD) ))
-#define CMPXCHGWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _OO_r_X (0x0fb1 ,_r2(RS) ,MD,MB,MI,MS ))
-
-#define CMPXCHGLrr(RS, RD) (_REXLrr(RS, RD), _OO_Mrm (0x0fb1 ,_b11,_r4(RS),_r4(RD) ))
-#define CMPXCHGLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0fb1 ,_r4(RS) ,MD,MB,MI,MS ))
-
-#define CMPXCHGQrr(RS, RD) (_REXQrr(RS, RD), _OO_Mrm (0x0fb1 ,_b11,_r8(RS),_r8(RD) ))
-#define CMPXCHGQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0fb1 ,_r8(RS) ,MD,MB,MI,MS ))
-
-#define XADDBrr(RS, RD) (_REXBrr(RS, RD), _OO_Mrm (0x0fc0 ,_b11,_r1(RS),_r1(RD) ))
-#define XADDBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _OO_r_X (0x0fc0 ,_r1(RS) ,MD,MB,MI,MS ))
-
-#define XADDWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _OO_Mrm (0x0fc1 ,_b11,_r2(RS),_r2(RD) ))
-#define XADDWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _OO_r_X (0x0fc1 ,_r2(RS) ,MD,MB,MI,MS ))
-
-#define XADDLrr(RS, RD) (_REXLrr(RS, RD), _OO_Mrm (0x0fc1 ,_b11,_r4(RS),_r4(RD) ))
-#define XADDLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0fc1 ,_r4(RS) ,MD,MB,MI,MS ))
-
-#define XADDQrr(RS, RD) (_REXQrr(RS, RD), _OO_Mrm (0x0fc1 ,_b11,_r8(RS),_r8(RD) ))
-#define XADDQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0fc1 ,_r8(RS) ,MD,MB,MI,MS ))
-
-#define XCHGBrr(RS, RD) (_REXBrr(RS, RD), _O_Mrm (0x86 ,_b11,_r1(RS),_r1(RD) ))
-#define XCHGBrm(RS, MD, MB, MI, MS) (_REXBrm(RS, MB, MI), _O_r_X (0x86 ,_r1(RS) ,MD,MB,MI,MS ))
-
-#define XCHGWrr(RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm (0x87 ,_b11,_r2(RS),_r2(RD) ))
-#define XCHGWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X (0x87 ,_r2(RS) ,MD,MB,MI,MS ))
-
-#define XCHGLrr(RS, RD) (_REXLrr(RS, RD), _O_Mrm (0x87 ,_b11,_r4(RS),_r4(RD) ))
-#define XCHGLrm(RS, MD, MB, MI, MS) (_REXLrm(RS, MB, MI), _O_r_X (0x87 ,_r4(RS) ,MD,MB,MI,MS ))
-
-#define XCHGQrr(RS, RD) (_REXQrr(RS, RD), _O_Mrm (0x87 ,_b11,_r8(RS),_r8(RD) ))
-#define XCHGQrm(RS, MD, MB, MI, MS) (_REXQrm(RS, MB, MI), _O_r_X (0x87 ,_r8(RS) ,MD,MB,MI,MS ))
-
-
-/* --- Increment/Decrement instructions ------------------------------------ */
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-#define DECBm(MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X (0xfe ,_b001 ,MD,MB,MI,MS ))
-#define DECBr(RD) (_REXBrr(0, RD), _O_Mrm (0xfe ,_b11,_b001 ,_r1(RD) ))
-
-#define DECWm(MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_r_X (0xff ,_b001 ,MD,MB,MI,MS ))
-#define DECWr(RD) (! X86_TARGET_64BIT ? (_d16(), _Or (0x48,_r2(RD) )) : \
- (_d16(), _REXLrr(0, RD), _O_Mrm (0xff ,_b11,_b001 ,_r2(RD) )))
-
-#define DECLm(MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_r_X (0xff ,_b001 ,MD,MB,MI,MS ))
-#define DECLr(RD) (! X86_TARGET_64BIT ? _Or (0x48,_r4(RD) ) : \
- (_REXLrr(0, RD), _O_Mrm (0xff ,_b11,_b001 ,_r4(RD) )))
-
-#define DECQm(MD, MB, MI, MS) (_REXQrm(0, MB, MI), _O_r_X (0xff ,_b001 ,MD,MB,MI,MS ))
-#define DECQr(RD) (_REXQrr(0, RD), _O_Mrm (0xff ,_b11,_b001 ,_r8(RD) ))
-
-#define INCBm(MD, MB, MI, MS) (_REXBrm(0, MB, MI), _O_r_X (0xfe ,_b000 ,MD,MB,MI,MS ))
-#define INCBr(RD) (_REXBrr(0, RD), _O_Mrm (0xfe ,_b11,_b000 ,_r1(RD) ))
-
-#define INCWm(MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI), _O_r_X (0xff ,_b000 ,MD,MB,MI,MS ))
-#define INCWr(RD) (! X86_TARGET_64BIT ? (_d16(), _Or (0x40,_r2(RD) )) : \
- (_d16(), _REXLrr(0, RD), _O_Mrm (0xff ,_b11,_b000 ,_r2(RD) )) )
-
-#define INCLm(MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_r_X (0xff ,_b000 ,MD,MB,MI,MS ))
-#define INCLr(RD) (! X86_TARGET_64BIT ? _Or (0x40,_r4(RD) ) : \
- (_REXLrr(0, RD), _O_Mrm (0xff ,_b11,_b000 ,_r4(RD) )))
-
-#define INCQm(MD, MB, MI, MS) (_REXQrm(0, MB, MI), _O_r_X (0xff ,_b000 ,MD,MB,MI,MS ))
-#define INCQr(RD) (_REXQrr(0, RD), _O_Mrm (0xff ,_b11,_b000 ,_r8(RD) ))
-
-
-/* --- Misc instructions --------------------------------------------------- */
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-#define BSFWrr(RS, RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0fbc ,_b11,_r2(RD),_r2(RS) ))
-#define BSFWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0fbc ,_r2(RD) ,MD,MB,MI,MS ))
-#define BSRWrr(RS, RD) (_d16(), _REXLrr(RD, RS), _OO_Mrm (0x0fbd ,_b11,_r2(RD),_r2(RS) ))
-#define BSRWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0fbd ,_r2(RD) ,MD,MB,MI,MS ))
-
-#define BSFLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fbc ,_b11,_r4(RD),_r4(RS) ))
-#define BSFLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fbc ,_r4(RD) ,MD,MB,MI,MS ))
-#define BSRLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fbd ,_b11,_r4(RD),_r4(RS) ))
-#define BSRLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fbd ,_r4(RD) ,MD,MB,MI,MS ))
-
-#define BSFQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fbc ,_b11,_r8(RD),_r8(RS) ))
-#define BSFQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fbc ,_r8(RD) ,MD,MB,MI,MS ))
-#define BSRQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fbd ,_b11,_r8(RD),_r8(RS) ))
-#define BSRQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fbd ,_r8(RD) ,MD,MB,MI,MS ))
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-#define MOVSBWrr(RS, RD) (_d16(), _REXBLrr(RD, RS), _OO_Mrm (0x0fbe ,_b11,_r2(RD),_r1(RS) ))
-#define MOVSBWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0fbe ,_r2(RD) ,MD,MB,MI,MS ))
-#define MOVZBWrr(RS, RD) (_d16(), _REXBLrr(RD, RS), _OO_Mrm (0x0fb6 ,_b11,_r2(RD),_r1(RS) ))
-#define MOVZBWmr(MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD), _OO_r_X (0x0fb6 ,_r2(RD) ,MD,MB,MI,MS ))
-
-#define MOVSBLrr(RS, RD) (_REXBLrr(RD, RS), _OO_Mrm (0x0fbe ,_b11,_r4(RD),_r1(RS) ))
-#define MOVSBLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fbe ,_r4(RD) ,MD,MB,MI,MS ))
-#define MOVZBLrr(RS, RD) (_REXBLrr(RD, RS), _OO_Mrm (0x0fb6 ,_b11,_r4(RD),_r1(RS) ))
-#define MOVZBLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fb6 ,_r4(RD) ,MD,MB,MI,MS ))
-
-#define MOVSBQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fbe ,_b11,_r8(RD),_r1(RS) ))
-#define MOVSBQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fbe ,_r8(RD) ,MD,MB,MI,MS ))
-#define MOVZBQrr(RS, RD) (_REXQrr(RD, RS), _OO_Mrm (0x0fb6 ,_b11,_r8(RD),_r1(RS) ))
-#define MOVZBQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _OO_r_X (0x0fb6 ,_r8(RD) ,MD,MB,MI,MS ))
-
-#define MOVSWLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fbf ,_b11,_r4(RD),_r2(RS) ))
-#define MOVSWLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fbf ,_r4(RD) ,MD,MB,MI,MS ))
-#define MOVZWLrr(RS, RD) (_REXLrr(RD, RS), _OO_Mrm (0x0fb7 ,_b11,_r4(RD),_r2(RS) ))
-#define MOVZWLmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _OO_r_X (0x0fb7 ,_r4(RD) ,MD,MB,MI,MS ))
-
-#define MOVSWQrr(RS, RD) _m64only((_REXQrr(RD, RS), _OO_Mrm (0x0fbf ,_b11,_r8(RD),_r2(RS) )))
-#define MOVSWQmr(MD, MB, MI, MS, RD) _m64only((_REXQmr(MB, MI, RD), _OO_r_X (0x0fbf ,_r8(RD) ,MD,MB,MI,MS )))
-#define MOVZWQrr(RS, RD) _m64only((_REXQrr(RD, RS), _OO_Mrm (0x0fb7 ,_b11,_r8(RD),_r2(RS) )))
-#define MOVZWQmr(MD, MB, MI, MS, RD) _m64only((_REXQmr(MB, MI, RD), _OO_r_X (0x0fb7 ,_r8(RD) ,MD,MB,MI,MS )))
-
-#define MOVSLQrr(RS, RD) _m64only((_REXQrr(RD, RS), _O_Mrm (0x63 ,_b11,_r8(RD),_r4(RS) )))
-#define MOVSLQmr(MD, MB, MI, MS, RD) _m64only((_REXQmr(MB, MI, RD), _O_r_X (0x63 ,_r8(RD) ,MD,MB,MI,MS )))
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-#define LEALmr(MD, MB, MI, MS, RD) (_REXLmr(MB, MI, RD), _O_r_X (0x8d ,_r4(RD) ,MD,MB,MI,MS ))
-#define LEAQmr(MD, MB, MI, MS, RD) (_REXQmr(MB, MI, RD), _O_r_X (0x8d ,_r4(RD) ,MD,MB,MI,MS ))
-
-#define BSWAPLr(R) (_REXLrr(0, R), _OOr (0x0fc8,_r4(R) ))
-#define BSWAPQr(R) (_REXQrr(0, R), _OOr (0x0fc8,_r8(R) ))
-
-#define CLC() _O (0xf8 )
-#define STC() _O (0xf9 )
-#define CMC() _O (0xf5 )
-
-#define CLD() _O (0xfc )
-#define STD() _O (0xfd )
-
-#define CBTW() (_d16(), _O (0x98 ))
-#define CWTL() _O (0x98 )
-#define CLTQ() _m64only(_REXQrr(0, 0), _O (0x98 ))
-
-#define CBW CBTW
-#define CWDE CWTL
-#define CDQE CLTQ
-
-#define CWTD() (_d16(), _O (0x99 ))
-#define CLTD() _O (0x99 )
-#define CQTO() _m64only(_REXQrr(0, 0), _O (0x99 ))
-
-#define CWD CWTD
-#define CDQ CLTD
-#define CQO CQTO
-
-#define LAHF() _O (0x9f )
-#define SAHF() _O (0x9e )
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-#define CPUID() _OO (0x0fa2 )
-#define RDTSC() _OO (0xff31 )
-
-#define ENTERii(W, B) _O_W_B (0xc8 ,_su16(W),_su8(B))
-
-#define LEAVE() _O (0xc9 )
-#define RET() _O (0xc3 )
-#define RETi(IM) _O_W (0xc2 ,_su16(IM))
-
-#define NOP() _O (0x90 )
-
-
-/* --- Media 64-bit instructions ------------------------------------------- */
-
-enum {
- X86_MMX_PABSB = 0x1c, // 2P
- X86_MMX_PABSW = 0x1d, // 2P
- X86_MMX_PABSD = 0x1e, // 2P
- X86_MMX_PACKSSWB = 0x63,
- X86_MMX_PACKSSDW = 0x6b,
- X86_MMX_PACKUSWB = 0x67,
- X86_MMX_PADDB = 0xfc,
- X86_MMX_PADDW = 0xfd,
- X86_MMX_PADDD = 0xfe,
- X86_MMX_PADDQ = 0xd4,
- X86_MMX_PADDSB = 0xec,
- X86_MMX_PADDSW = 0xed,
- X86_MMX_PADDUSB = 0xdc,
- X86_MMX_PADDUSW = 0xdd,
- X86_MMX_PAND = 0xdb,
- X86_MMX_PANDN = 0xdf,
- X86_MMX_PAVGB = 0xe0,
- X86_MMX_PAVGW = 0xe3,
- X86_MMX_PCMPEQB = 0x74,
- X86_MMX_PCMPEQW = 0x75,
- X86_MMX_PCMPEQD = 0x76,
- X86_MMX_PCMPGTB = 0x64,
- X86_MMX_PCMPGTW = 0x65,
- X86_MMX_PCMPGTD = 0x66,
- X86_MMX_PEXTRW = 0xc5, // 64, /r ib
- X86_MMX_PHADDW = 0x01, // 2P
- X86_MMX_PHADDD = 0x02, // 2P
- X86_MMX_PHADDSW = 0x03, // 2P
- X86_MMX_PHSUBW = 0x05, // 2P
- X86_MMX_PHSUBD = 0x06, // 2P
- X86_MMX_PHSUBSW = 0x07, // 2P
- X86_MMX_PINSRW = 0xc4, // 64, /r ib
- X86_MMX_PMADDUBSW = 0x04, // 2P
- X86_MMX_PMADDWD = 0xf5,
- X86_MMX_PMAXSW = 0xee,
- X86_MMX_PMAXUB = 0xde,
- X86_MMX_PMINSW = 0xea,
- X86_MMX_PMINUB = 0xda,
- X86_MMX_PMOVMSKB = 0xd7, // 64
- X86_MMX_PMULHRSW = 0x0b, // 2P
- X86_MMX_PMULHUW = 0xe4,
- X86_MMX_PMULHW = 0xe5,
- X86_MMX_PMULLW = 0xd5,
- X86_MMX_PMULUDQ = 0xf4,
- X86_MMX_POR = 0xeb,
- X86_MMX_PSADBW = 0xf6,
- X86_MMX_PSHUFB = 0x00, // 2P
- X86_MMX_PSHUFW = 0x70, // /r ib
- X86_MMX_PSIGNB = 0x08, // 2P
- X86_MMX_PSIGNW = 0x09, // 2P
- X86_MMX_PSIGND = 0x0a, // 2P
- X86_MMX_PSLLW = 0xf1,
- X86_MMX_PSLLWi = 0x71, // /6 ib
- X86_MMX_PSLLD = 0xf2,
- X86_MMX_PSLLDi = 0x72, // /6 ib
- X86_MMX_PSLLQ = 0xf3,
- X86_MMX_PSLLQi = 0x73, // /6 ib
- X86_MMX_PSRAW = 0xe1,
- X86_MMX_PSRAWi = 0x71, // /4 ib
- X86_MMX_PSRAD = 0xe2,
- X86_MMX_PSRADi = 0x72, // /4 ib
- X86_MMX_PSRLW = 0xd1,
- X86_MMX_PSRLWi = 0x71, // /2 ib
- X86_MMX_PSRLD = 0xd2,
- X86_MMX_PSRLDi = 0x72, // /2 ib
- X86_MMX_PSRLQ = 0xd3,
- X86_MMX_PSRLQi = 0x73, // /2 ib
- X86_MMX_PSUBB = 0xf8,
- X86_MMX_PSUBW = 0xf9,
- X86_MMX_PSUBD = 0xfa,
- X86_MMX_PSUBQ = 0xfb,
- X86_MMX_PSUBSB = 0xe8,
- X86_MMX_PSUBSW = 0xe9,
- X86_MMX_PSUBUSB = 0xd8,
- X86_MMX_PSUBUSW = 0xd9,
- X86_MMX_PUNPCKHBW = 0x68,
- X86_MMX_PUNPCKHWD = 0x69,
- X86_MMX_PUNPCKHDQ = 0x6a,
- X86_MMX_PUNPCKLBW = 0x60,
- X86_MMX_PUNPCKLWD = 0x61,
- X86_MMX_PUNPCKLDQ = 0x62,
- X86_MMX_PXOR = 0xef,
-};
-
-#define __MMXLrr(OP,RS,RSA,RD,RDA) (_REXLrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) ))
-#define __MMXLmr(OP,MD,MB,MI,MS,RD,RDA) (_REXLmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS ))
-#define __MMXLrm(OP,RS,RSA,MD,MB,MI,MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS ))
-#define __MMXLirr(OP,IM,RS,RSA,RD,RDA) (_REXLrr(RD, RS), _OO_Mrm_B (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) ,_u8(IM)))
-#define __MMXLimr(OP,IM,MD,MB,MI,MS,RD,RDA) (_REXLmr(MB, MI, RS), _OO_r_X_B (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS ,_u8(IM)))
-#define __MMXQrr(OP,RS,RSA,RD,RDA) (_REXQrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) ))
-#define __MMXQmr(OP,MD,MB,MI,MS,RD,RDA) (_REXQmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS ))
-#define __MMXQrm(OP,RS,RSA,MD,MB,MI,MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS ))
-#define __MMXQirr(OP,IM,RS,RSA,RD,RDA) (_REXQrr(RD, RS), _OO_Mrm_B (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) ,_u8(IM)))
-#define __MMXQimr(OP,IM,MD,MB,MI,MS,RD,RDA) (_REXQmr(MB, MI, RS), _OO_r_X_B (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS ,_u8(IM)))
-#define __MMX1Lrr(PX,OP,RS,RSA,RD,RDA) (_REXLrr(RD, RS), _B(0x0f),_OO_Mrm(((PX)<<8)|(OP) ,_b11,RDA(RD),RSA(RS) ))
-#define __MMX1Lmr(PX,OP,MD,MB,MI,MS,RD,RDA) (_REXLmr(MB, MI, RD), _B(0x0f),_OO_r_X(((PX)<<8)|(OP) ,RDA(RD) ,MD,MB,MI,MS ))
-#define __MMX1Lrm(PX,OP,RS,RSA,MD,MB,MI,MS) (_REXLrm(RS, MB, MI), _B(0x0f),_OO_r_X(((PX)<<8)|(OP) ,RSA(RS) ,MD,MB,MI,MS ))
-
-#define _MMXLrr(OP,RS,RD) __MMXLrr(OP,RS,_rM,RD,_rM)
-#define _MMXLmr(OP,MD,MB,MI,MS,RD) __MMXLmr(OP,MD,MB,MI,MS,RD,_rM)
-#define _MMXLrm(OP,RS,MD,MB,MI,MS) __MMXLrm(OP,RS,_rM,MD,MB,MI,MS)
-#define _MMXQrr(OP,RS,RD) __MMXQrr(OP,RS,_rM,RD,_rM)
-#define _MMXQmr(OP,MD,MB,MI,MS,RD) __MMXQmr(OP,MD,MB,MI,MS,RD,_rM)
-#define _MMXQrm(OP,RS,MD,MB,MI,MS) __MMXQrm(OP,RS,_rM,MD,MB,MI,MS)
-#define _2P_MMXLrr(OP,RS,RD) __MMX1Lrr(0x38, OP,RS,_rM,RD,_rM)
-#define _2P_MMXLmr(OP,MD,MB,MI,MS,RD) __MMX1Lmr(0x38, OP,MD,MB,MI,MS,RD,_rM)
-#define _2P_MMXLrm(OP,RS,MD,MB,MI,MS) __MMX1Lrm(0x38, OP,RS,_rM,MD,MB,MI,MS)
-
-#define MMX_MOVDMDrr(RS, RD) __MMXLrr(0x6e, RS,_r4, RD,_rM)
-#define MMX_MOVQMDrr(RS, RD) __MMXQrr(0x6e, RS,_r8, RD,_rM)
-#define MMX_MOVDMSrr(RS, RD) __MMXLrr(0x7e, RD,_r4, RS,_rM)
-#define MMX_MOVQMSrr(RS, RD) __MMXQrr(0x7e, RD,_r8, RS,_rM)
-
-#define MMX_MOVDmr(MD, MB, MI, MS, RD) _MMXLmr(0x6e, MD, MB, MI, MS, RD)
-#define MMX_MOVDrm(RS, MD, MB, MI, MS) _MMXLrm(0x7e, RS, MD, MB, MI, MS)
-#define MMX_MOVQrr(RS, RD) _MMXLrr(0x6f, RS, RD)
-#define MMX_MOVQmr(MD, MB, MI, MS, RD) _MMXLmr(0x6f, MD, MB, MI, MS, RD)
-#define MMX_MOVQrm(RS, MD, MB, MI, MS) _MMXLrm(0x7f, RS, MD, MB, MI, MS)
-
-// Original MMX instructions
-#define MMX_PACKSSWBrr(RS, RD) _MMXLrr(X86_MMX_PACKSSWB,RS,RD)
-#define MMX_PACKSSWBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PACKSSWB, MD, MB, MI, MS, RD)
-#define MMX_PACKSSDWrr(RS, RD) _MMXLrr(X86_MMX_PACKSSDW,RS,RD)
-#define MMX_PACKSSDWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PACKSSDW, MD, MB, MI, MS, RD)
-#define MMX_PACKUSWBrr(RS, RD) _MMXLrr(X86_MMX_PACKUSWB,RS,RD)
-#define MMX_PACKUSWBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PACKUSWB, MD, MB, MI, MS, RD)
-#define MMX_PADDBrr(RS, RD) _MMXLrr(X86_MMX_PADDB,RS,RD)
-#define MMX_PADDBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDB, MD, MB, MI, MS, RD)
-#define MMX_PADDWrr(RS, RD) _MMXLrr(X86_MMX_PADDW,RS,RD)
-#define MMX_PADDWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDW, MD, MB, MI, MS, RD)
-#define MMX_PADDDrr(RS, RD) _MMXLrr(X86_MMX_PADDD,RS,RD)
-#define MMX_PADDDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDD, MD, MB, MI, MS, RD)
-#define MMX_PADDQrr(RS, RD) _MMXLrr(X86_MMX_PADDQ,RS,RD)
-#define MMX_PADDQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDQ, MD, MB, MI, MS, RD)
-#define MMX_PADDSBrr(RS, RD) _MMXLrr(X86_MMX_PADDSB,RS,RD)
-#define MMX_PADDSBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDSB, MD, MB, MI, MS, RD)
-#define MMX_PADDSWrr(RS, RD) _MMXLrr(X86_MMX_PADDSW,RS,RD)
-#define MMX_PADDSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDSW, MD, MB, MI, MS, RD)
-#define MMX_PADDUSBrr(RS, RD) _MMXLrr(X86_MMX_PADDUSB,RS,RD)
-#define MMX_PADDUSBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDUSB, MD, MB, MI, MS, RD)
-#define MMX_PADDUSWrr(RS, RD) _MMXLrr(X86_MMX_PADDUSW,RS,RD)
-#define MMX_PADDUSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PADDUSW, MD, MB, MI, MS, RD)
-#define MMX_PANDrr(RS, RD) _MMXLrr(X86_MMX_PAND,RS,RD)
-#define MMX_PANDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PAND, MD, MB, MI, MS, RD)
-#define MMX_PANDNrr(RS, RD) _MMXLrr(X86_MMX_PANDN,RS,RD)
-#define MMX_PANDNmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PANDN, MD, MB, MI, MS, RD)
-#define MMX_PAVGBrr(RS, RD) _MMXLrr(X86_MMX_PAVGB,RS,RD)
-#define MMX_PAVGBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PAVGB, MD, MB, MI, MS, RD)
-#define MMX_PAVGWrr(RS, RD) _MMXLrr(X86_MMX_PAVGW,RS,RD)
-#define MMX_PAVGWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PAVGW, MD, MB, MI, MS, RD)
-#define MMX_PCMPEQBrr(RS, RD) _MMXLrr(X86_MMX_PCMPEQB,RS,RD)
-#define MMX_PCMPEQBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPEQB, MD, MB, MI, MS, RD)
-#define MMX_PCMPEQWrr(RS, RD) _MMXLrr(X86_MMX_PCMPEQW,RS,RD)
-#define MMX_PCMPEQWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPEQW, MD, MB, MI, MS, RD)
-#define MMX_PCMPEQDrr(RS, RD) _MMXLrr(X86_MMX_PCMPEQD,RS,RD)
-#define MMX_PCMPEQDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPEQD, MD, MB, MI, MS, RD)
-#define MMX_PCMPGTBrr(RS, RD) _MMXLrr(X86_MMX_PCMPGTB,RS,RD)
-#define MMX_PCMPGTBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPGTB, MD, MB, MI, MS, RD)
-#define MMX_PCMPGTWrr(RS, RD) _MMXLrr(X86_MMX_PCMPGTW,RS,RD)
-#define MMX_PCMPGTWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPGTW, MD, MB, MI, MS, RD)
-#define MMX_PCMPGTDrr(RS, RD) _MMXLrr(X86_MMX_PCMPGTD,RS,RD)
-#define MMX_PCMPGTDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PCMPGTD, MD, MB, MI, MS, RD)
-#define MMX_PMADDWDrr(RS, RD) _MMXLrr(X86_MMX_PMADDWD,RS,RD)
-#define MMX_PMADDWDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMADDWD, MD, MB, MI, MS, RD)
-#define MMX_PMAXSWrr(RS, RD) _MMXLrr(X86_MMX_PMAXSW,RS,RD)
-#define MMX_PMAXSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMAXSW, MD, MB, MI, MS, RD)
-#define MMX_PMAXUBrr(RS, RD) _MMXLrr(X86_MMX_PMAXUB,RS,RD)
-#define MMX_PMAXUBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMAXUB, MD, MB, MI, MS, RD)
-#define MMX_PMINSWrr(RS, RD) _MMXLrr(X86_MMX_PMINSW,RS,RD)
-#define MMX_PMINSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMINSW, MD, MB, MI, MS, RD)
-#define MMX_PMINUBrr(RS, RD) _MMXLrr(X86_MMX_PMINUB,RS,RD)
-#define MMX_PMINUBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMINUB, MD, MB, MI, MS, RD)
-#define MMX_PMULHUWrr(RS, RD) _MMXLrr(X86_MMX_PMULHUW,RS,RD)
-#define MMX_PMULHUWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMULHUW, MD, MB, MI, MS, RD)
-#define MMX_PMULHWrr(RS, RD) _MMXLrr(X86_MMX_PMULHW,RS,RD)
-#define MMX_PMULHWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMULHW, MD, MB, MI, MS, RD)
-#define MMX_PMULLWrr(RS, RD) _MMXLrr(X86_MMX_PMULLW,RS,RD)
-#define MMX_PMULLWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMULLW, MD, MB, MI, MS, RD)
-#define MMX_PMULUDQrr(RS, RD) _MMXLrr(X86_MMX_PMULUDQ,RS,RD)
-#define MMX_PMULUDQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PMULUDQ, MD, MB, MI, MS, RD)
-#define MMX_PORrr(RS, RD) _MMXLrr(X86_MMX_POR,RS,RD)
-#define MMX_PORmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_POR, MD, MB, MI, MS, RD)
-#define MMX_PSADBWrr(RS, RD) _MMXLrr(X86_MMX_PSADBW,RS,RD)
-#define MMX_PSADBWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSADBW, MD, MB, MI, MS, RD)
-#define MMX_PSLLWir(IM, RD) __MMXLirr(X86_MMX_PSLLWi, IM, RD,_rM, _b110,_rN)
-#define MMX_PSLLWrr(RS, RD) _MMXLrr(X86_MMX_PSLLW,RS,RD)
-#define MMX_PSLLWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSLLW, MD, MB, MI, MS, RD)
-#define MMX_PSLLDir(IM, RD) __MMXLirr(X86_MMX_PSLLDi, IM, RD,_rM, _b110,_rN)
-#define MMX_PSLLDrr(RS, RD) _MMXLrr(X86_MMX_PSLLD,RS,RD)
-#define MMX_PSLLDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSLLD, MD, MB, MI, MS, RD)
-#define MMX_PSLLQir(IM, RD) __MMXLirr(X86_MMX_PSLLQi, IM, RD,_rM, _b110,_rN)
-#define MMX_PSLLQrr(RS, RD) _MMXLrr(X86_MMX_PSLLQ,RS,RD)
-#define MMX_PSLLQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSLLQ, MD, MB, MI, MS, RD)
-#define MMX_PSRAWir(IM, RD) __MMXLirr(X86_MMX_PSRAWi, IM, RD,_rM, _b100,_rN)
-#define MMX_PSRAWrr(RS, RD) _MMXLrr(X86_MMX_PSRAW,RS,RD)
-#define MMX_PSRAWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSRAW, MD, MB, MI, MS, RD)
-#define MMX_PSRADir(IM, RD) __MMXLirr(X86_MMX_PSRADi, IM, RD,_rM, _b100,_rN)
-#define MMX_PSRADrr(RS, RD) _MMXLrr(X86_MMX_PSRAD,RS,RD)
-#define MMX_PSRADmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSRAD, MD, MB, MI, MS, RD)
-#define MMX_PSRLWir(IM, RD) __MMXLirr(X86_MMX_PSRLWi, IM, RD,_rM, _b010,_rN)
-#define MMX_PSRLWrr(RS, RD) _MMXLrr(X86_MMX_PSRLW,RS,RD)
-#define MMX_PSRLWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSRLW, MD, MB, MI, MS, RD)
-#define MMX_PSRLDir(IM, RD) __MMXLirr(X86_MMX_PSRLDi, IM, RD,_rM, _b010,_rN)
-#define MMX_PSRLDrr(RS, RD) _MMXLrr(X86_MMX_PSRLD,RS,RD)
-#define MMX_PSRLDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSRLD, MD, MB, MI, MS, RD)
-#define MMX_PSRLQir(IM, RD) __MMXLirr(X86_MMX_PSRLQi, IM, RD,_rM, _b010,_rN)
-#define MMX_PSRLQrr(RS, RD) _MMXLrr(X86_MMX_PSRLQ,RS,RD)
-#define MMX_PSRLQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSRLQ, MD, MB, MI, MS, RD)
-#define MMX_PSUBBrr(RS, RD) _MMXLrr(X86_MMX_PSUBB,RS,RD)
-#define MMX_PSUBBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBB, MD, MB, MI, MS, RD)
-#define MMX_PSUBWrr(RS, RD) _MMXLrr(X86_MMX_PSUBW,RS,RD)
-#define MMX_PSUBWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBW, MD, MB, MI, MS, RD)
-#define MMX_PSUBDrr(RS, RD) _MMXLrr(X86_MMX_PSUBD,RS,RD)
-#define MMX_PSUBDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBD, MD, MB, MI, MS, RD)
-#define MMX_PSUBQrr(RS, RD) _MMXLrr(X86_MMX_PSUBQ,RS,RD)
-#define MMX_PSUBQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBQ, MD, MB, MI, MS, RD)
-#define MMX_PSUBSBrr(RS, RD) _MMXLrr(X86_MMX_PSUBSB,RS,RD)
-#define MMX_PSUBSBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBSB, MD, MB, MI, MS, RD)
-#define MMX_PSUBSWrr(RS, RD) _MMXLrr(X86_MMX_PSUBSW,RS,RD)
-#define MMX_PSUBSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBSW, MD, MB, MI, MS, RD)
-#define MMX_PSUBUSBrr(RS, RD) _MMXLrr(X86_MMX_PSUBUSB,RS,RD)
-#define MMX_PSUBUSBmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBUSB, MD, MB, MI, MS, RD)
-#define MMX_PSUBUSWrr(RS, RD) _MMXLrr(X86_MMX_PSUBUSW,RS,RD)
-#define MMX_PSUBUSWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PSUBUSW, MD, MB, MI, MS, RD)
-#define MMX_PUNPCKHBWrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKHBW,RS,RD)
-#define MMX_PUNPCKHBWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKHBW, MD, MB, MI, MS, RD)
-#define MMX_PUNPCKHWDrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKHWD,RS,RD)
-#define MMX_PUNPCKHWDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKHWD, MD, MB, MI, MS, RD)
-#define MMX_PUNPCKHDQrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKHDQ,RS,RD)
-#define MMX_PUNPCKHDQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKHDQ, MD, MB, MI, MS, RD)
-#define MMX_PUNPCKLBWrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKLBW,RS,RD)
-#define MMX_PUNPCKLBWmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKLBW, MD, MB, MI, MS, RD)
-#define MMX_PUNPCKLWDrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKLWD,RS,RD)
-#define MMX_PUNPCKLWDmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKLWD, MD, MB, MI, MS, RD)
-#define MMX_PUNPCKLDQrr(RS, RD) _MMXLrr(X86_MMX_PUNPCKLDQ,RS,RD)
-#define MMX_PUNPCKLDQmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PUNPCKLDQ, MD, MB, MI, MS, RD)
-#define MMX_PXORrr(RS, RD) _MMXLrr(X86_MMX_PXOR,RS,RD)
-#define MMX_PXORmr(MD,MB,MI,MS,RD) _MMXLmr(X86_MMX_PXOR, MD, MB, MI, MS, RD)
-
-#define MMX_PSHUFWirr(IM, RS, RD) __MMXLirr(X86_MMX_PSHUFW, IM, RS,_rM, RD,_rM)
-#define MMX_PSHUFWimr(IM, MD, MB, MI, MS, RD) __MMXLimr(X86_MMX_PSHUFW, IM, MD, MB, MI, MS, RD,_rM)
-#define MMX_PEXTRWLirr(IM, RS, RD) __MMXLirr(X86_MMX_PEXTRW, IM, RS,_rM, RD,_r4)
-#define MMX_PEXTRWQirr(IM, RS, RD) __MMXQirr(X86_MMX_PEXTRW, IM, RS,_rM, RD,_r8)
-#define MMX_PINSRWLirr(IM, RS, RD) __MMXLirr(X86_MMX_PINSRW, IM, RS,_r4, RD,_rM)
-#define MMX_PINSRWLimr(IM, MD, MB, MI, MS, RD) __MMXLimr(X86_MMX_PINSRW, IM, MD, MB, MI, MS, RD,_r4)
-#define MMX_PINSRWQirr(IM, RS, RD) __MMXQirr(X86_MMX_PINSRW, IM, RS,_r4, RD,_rM)
-#define MMX_PINSRWQimr(IM, MD, MB, MI, MS, RD) __MMXQimr(X86_MMX_PINSRW, IM, MD, MB, MI, MS, RD,_r8)
-
-// Additionnal MMX instructions, brought by SSSE3 ISA
-#define MMX_PABSBrr(RS, RD) _2P_MMXLrr(X86_MMX_PABSB,RS,RD)
-#define MMX_PABSBmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PABSB, MD, MB, MI, MS, RD)
-#define MMX_PABSWrr(RS, RD) _2P_MMXLrr(X86_MMX_PABSW,RS,RD)
-#define MMX_PABSWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PABSW, MD, MB, MI, MS, RD)
-#define MMX_PABSDrr(RS, RD) _2P_MMXLrr(X86_MMX_PABSD,RS,RD)
-#define MMX_PABSDmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PABSD, MD, MB, MI, MS, RD)
-#define MMX_PHADDWrr(RS, RD) _2P_MMXLrr(X86_MMX_PHADDW,RS,RD)
-#define MMX_PHADDWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHADDW, MD, MB, MI, MS, RD)
-#define MMX_PHADDDrr(RS, RD) _2P_MMXLrr(X86_MMX_PHADDD,RS,RD)
-#define MMX_PHADDDmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHADDD, MD, MB, MI, MS, RD)
-#define MMX_PHADDSWrr(RS, RD) _2P_MMXLrr(X86_MMX_PHADDSW,RS,RD)
-#define MMX_PHADDSWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHADDSW, MD, MB, MI, MS, RD)
-#define MMX_PHSUBWrr(RS, RD) _2P_MMXLrr(X86_MMX_PHSUBW,RS,RD)
-#define MMX_PHSUBWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHSUBW, MD, MB, MI, MS, RD)
-#define MMX_PHSUBDrr(RS, RD) _2P_MMXLrr(X86_MMX_PHSUBD,RS,RD)
-#define MMX_PHSUBDmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHSUBD, MD, MB, MI, MS, RD)
-#define MMX_PHSUBSWrr(RS, RD) _2P_MMXLrr(X86_MMX_PHSUBSW,RS,RD)
-#define MMX_PHSUBSWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PHSUBSW, MD, MB, MI, MS, RD)
-#define MMX_PMADDUBSWrr(RS, RD) _2P_MMXLrr(X86_MMX_PMADDUBSW,RS,RD)
-#define MMX_PMADDUBSWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PMADDUBSW, MD, MB, MI, MS, RD)
-#define MMX_PMULHRSWrr(RS, RD) _2P_MMXLrr(X86_MMX_PMULHRSW,RS,RD)
-#define MMX_PMULHRSWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PMULHRSW, MD, MB, MI, MS, RD)
-#define MMX_PSHUFBrr(RS, RD) _2P_MMXLrr(X86_MMX_PSHUFB,RS,RD)
-#define MMX_PSHUFBmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PSHUFB, MD, MB, MI, MS, RD)
-#define MMX_PSIGNBrr(RS, RD) _2P_MMXLrr(X86_MMX_PSIGNB,RS,RD)
-#define MMX_PSIGNBmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PSIGNB, MD, MB, MI, MS, RD)
-#define MMX_PSIGNWrr(RS, RD) _2P_MMXLrr(X86_MMX_PSIGNW,RS,RD)
-#define MMX_PSIGNWmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PSIGNW, MD, MB, MI, MS, RD)
-#define MMX_PSIGNDrr(RS, RD) _2P_MMXLrr(X86_MMX_PSIGND,RS,RD)
-#define MMX_PSIGNDmr(MD,MB,MI,MS,RD) _2P_MMXLmr(X86_MMX_PSIGND, MD, MB, MI, MS, RD)
-
-#define EMMS() _OO (0x0f77 )
-
-
-/* --- Media 128-bit instructions ------------------------------------------ */
-
-enum {
- X86_SSE_CC_EQ = 0,
- X86_SSE_CC_LT = 1,
- X86_SSE_CC_GT = 1,
- X86_SSE_CC_LE = 2,
- X86_SSE_CC_GE = 2,
- X86_SSE_CC_U = 3,
- X86_SSE_CC_NEQ = 4,
- X86_SSE_CC_NLT = 5,
- X86_SSE_CC_NGT = 5,
- X86_SSE_CC_NLE = 6,
- X86_SSE_CC_NGE = 6,
- X86_SSE_CC_O = 7
-};
-
-enum {
- X86_SSE_UCOMI = 0x2e,
- X86_SSE_COMI = 0x2f,
- X86_SSE_CMP = 0xc2,
- X86_SSE_SQRT = 0x51,
- X86_SSE_RSQRT = 0x52,
- X86_SSE_RCP = 0x53,
- X86_SSE_AND = 0x54,
- X86_SSE_ANDN = 0x55,
- X86_SSE_OR = 0x56,
- X86_SSE_XOR = 0x57,
- X86_SSE_ADD = 0x58,
- X86_SSE_MUL = 0x59,
- X86_SSE_SUB = 0x5c,
- X86_SSE_MIN = 0x5d,
- X86_SSE_DIV = 0x5e,
- X86_SSE_MAX = 0x5f,
- X86_SSE_CVTDQ2PD = 0xe6,
- X86_SSE_CVTDQ2PS = 0x5b,
- X86_SSE_CVTPD2DQ = 0xe6,
- X86_SSE_CVTPD2PI = 0x2d,
- X86_SSE_CVTPD2PS = 0x5a,
- X86_SSE_CVTPI2PD = 0x2a,
- X86_SSE_CVTPI2PS = 0x2a,
- X86_SSE_CVTPS2DQ = 0x5b,
- X86_SSE_CVTPS2PD = 0x5a,
- X86_SSE_CVTPS2PI = 0x2d,
- X86_SSE_CVTSD2SI = 0x2d,
- X86_SSE_CVTSD2SS = 0x5a,
- X86_SSE_CVTSI2SD = 0x2a,
- X86_SSE_CVTSI2SS = 0x2a,
- X86_SSE_CVTSS2SD = 0x5a,
- X86_SSE_CVTSS2SI = 0x2d,
- X86_SSE_CVTTPD2PI = 0x2c,
- X86_SSE_CVTTPD2DQ = 0xe6,
- X86_SSE_CVTTPS2DQ = 0x5b,
- X86_SSE_CVTTPS2PI = 0x2c,
- X86_SSE_CVTTSD2SI = 0x2c,
- X86_SSE_CVTTSS2SI = 0x2c,
- X86_SSE_MOVMSK = 0x50,
- X86_SSE_PACKSSDW = 0x6b,
- X86_SSE_PACKSSWB = 0x63,
- X86_SSE_PACKUSWB = 0x67,
- X86_SSE_PADDB = 0xfc,
- X86_SSE_PADDD = 0xfe,
- X86_SSE_PADDQ = 0xd4,
- X86_SSE_PADDSB = 0xec,
- X86_SSE_PADDSW = 0xed,
- X86_SSE_PADDUSB = 0xdc,
- X86_SSE_PADDUSW = 0xdd,
- X86_SSE_PADDW = 0xfd,
- X86_SSE_PAND = 0xdb,
- X86_SSE_PANDN = 0xdf,
- X86_SSE_PAVGB = 0xe0,
- X86_SSE_PAVGW = 0xe3,
- X86_SSE_PCMPEQB = 0x74,
- X86_SSE_PCMPEQD = 0x76,
- X86_SSE_PCMPEQW = 0x75,
- X86_SSE_PCMPGTB = 0x64,
- X86_SSE_PCMPGTD = 0x66,
- X86_SSE_PCMPGTW = 0x65,
- X86_SSE_PMADDWD = 0xf5,
- X86_SSE_PMAXSW = 0xee,
- X86_SSE_PMAXUB = 0xde,
- X86_SSE_PMINSW = 0xea,
- X86_SSE_PMINUB = 0xda,
- X86_SSE_PMOVMSKB = 0xd7,
- X86_SSE_PMULHUW = 0xe4,
- X86_SSE_PMULHW = 0xe5,
- X86_SSE_PMULLW = 0xd5,
- X86_SSE_PMULUDQ = 0xf4,
- X86_SSE_POR = 0xeb,
- X86_SSE_PSADBW = 0xf6,
- X86_SSE_PSLLD = 0xf2,
- X86_SSE_PSLLQ = 0xf3,
- X86_SSE_PSLLW = 0xf1,
- X86_SSE_PSRAD = 0xe2,
- X86_SSE_PSRAW = 0xe1,
- X86_SSE_PSRLD = 0xd2,
- X86_SSE_PSRLQ = 0xd3,
- X86_SSE_PSRLW = 0xd1,
- X86_SSE_PSUBB = 0xf8,
- X86_SSE_PSUBD = 0xfa,
- X86_SSE_PSUBQ = 0xfb,
- X86_SSE_PSUBSB = 0xe8,
- X86_SSE_PSUBSW = 0xe9,
- X86_SSE_PSUBUSB = 0xd8,
- X86_SSE_PSUBUSW = 0xd9,
- X86_SSE_PSUBW = 0xf9,
- X86_SSE_PUNPCKHBW = 0x68,
- X86_SSE_PUNPCKHDQ = 0x6a,
- X86_SSE_PUNPCKHQDQ = 0x6d,
- X86_SSE_PUNPCKHWD = 0x69,
- X86_SSE_PUNPCKLBW = 0x60,
- X86_SSE_PUNPCKLDQ = 0x62,
- X86_SSE_PUNPCKLQDQ = 0x6c,
- X86_SSE_PUNPCKLWD = 0x61,
- X86_SSE_PXOR = 0xef,
- X86_SSSE3_PSHUFB = 0x00,
-};
-
-/* _format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm... */
-
-#define _SSSE3Lrr(OP1,OP2,RS,RSA,RD,RDA) (_B(0x66), _REXLrr(RD,RD), _B(0x0f), _OO_Mrm (((OP1)<<8)|(OP2) ,_b11,RDA(RD),RSA(RS) ))
-#define _SSSE3Lmr(OP1,OP2,MD,MB,MI,MS,RD,RDA) (_B(0x66), _REXLmr(MB, MI, RD), _B(0x0f), _OO_r_X (((OP1)<<8)|(OP2) ,RDA(RD) ,MD,MB,MI,MS ))
-#define _SSSE3Lirr(OP1,OP2,IM,RS,RD) (_B(0x66), _REXLrr(RD, RS), _B(0x0f), _OO_Mrm_B (((OP1)<<8)|(OP2) ,_b11,_rX(RD),_rX(RS) ,_u8(IM)))
-#define _SSSE3Limr(OP1,OP2,IM,MD,MB,MI,MS,RD) (_B(0x66), _REXLmr(MB, MI, RD), _B(0x0f), _OO_r_X_B (((OP1)<<8)|(OP2) ,_rX(RD) ,MD,MB,MI,MS ,_u8(IM)))
-
-#define __SSELir(OP,MO,IM,RD) (_REXLrr(0, RD), _OO_Mrm_B (0x0f00|(OP) ,_b11,MO ,_rX(RD) ,_u8(IM)))
-#define __SSELim(OP,MO,IM,MD,MB,MI,MS) (_REXLrm(0, MB, MI), _OO_r_X_B (0x0f00|(OP) ,MO ,MD,MB,MI,MS ,_u8(IM)))
-#define __SSELrr(OP,RS,RSA,RD,RDA) (_REXLrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) ))
-#define __SSELmr(OP,MD,MB,MI,MS,RD,RDA) (_REXLmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS ))
-#define __SSELrm(OP,RS,RSA,MD,MB,MI,MS) (_REXLrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS ))
-#define __SSELirr(OP,IM,RS,RD) (_REXLrr(RD, RS), _OO_Mrm_B (0x0f00|(OP) ,_b11,_rX(RD),_rX(RS) ,_u8(IM)))
-#define __SSELimr(OP,IM,MD,MB,MI,MS,RD) (_REXLmr(MB, MI, RD), _OO_r_X_B (0x0f00|(OP) ,_rX(RD) ,MD,MB,MI,MS ,_u8(IM)))
-
-#define __SSEQrr(OP,RS,RSA,RD,RDA) (_REXQrr(RD, RS), _OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS) ))
-#define __SSEQmr(OP,MD,MB,MI,MS,RD,RDA) (_REXQmr(MB, MI, RD), _OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS ))
-#define __SSEQrm(OP,RS,RSA,MD,MB,MI,MS) (_REXQrm(RS, MB, MI), _OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS ))
-
-#define _SSELrr(PX,OP,RS,RSA,RD,RDA) (_B(PX), __SSELrr(OP, RS, RSA, RD, RDA))
-#define _SSELmr(PX,OP,MD,MB,MI,MS,RD,RDA) (_B(PX), __SSELmr(OP, MD, MB, MI, MS, RD, RDA))
-#define _SSELrm(PX,OP,RS,RSA,MD,MB,MI,MS) (_B(PX), __SSELrm(OP, RS, RSA, MD, MB, MI, MS))
-#define _SSELir(PX,OP,MO,IM,RD) (_B(PX), __SSELir(OP, MO, IM, RD))
-#define _SSELim(PX,OP,MO,IM,MD,MB,MI,MS) (_B(PX), __SSELim(OP, MO, IM, MD, MB, MI, MS))
-#define _SSELirr(PX,OP,IM,RS,RD) (_B(PX), __SSELirr(OP, IM, RS, RD))
-#define _SSELimr(PX,OP,IM,MD,MB,MI,MS,RD) (_B(PX), __SSELimr(OP, IM, MD, MB, MI, MS, RD))
-
-#define _SSEQrr(PX,OP,RS,RSA,RD,RDA) (_B(PX), __SSEQrr(OP, RS, RSA, RD, RDA))
-#define _SSEQmr(PX,OP,MD,MB,MI,MS,RD,RDA) (_B(PX), __SSEQmr(OP, MD, MB, MI, MS, RD, RDA))
-#define _SSEQrm(PX,OP,RS,RSA,MD,MB,MI,MS) (_B(PX), __SSEQrm(OP, RS, RSA, MD, MB, MI, MS))
-
-#define _SSEPSrr(OP,RS,RD) __SSELrr( OP, RS,_rX, RD,_rX)
-#define _SSEPSmr(OP,MD,MB,MI,MS,RD) __SSELmr( OP, MD, MB, MI, MS, RD,_rX)
-#define _SSEPSrm(OP,RS,MD,MB,MI,MS) __SSELrm( OP, RS,_rX, MD, MB, MI, MS)
-#define _SSEPSirr(OP,IM,RS,RD) __SSELirr( OP, IM, RS, RD)
-#define _SSEPSimr(OP,IM,MD,MB,MI,MS,RD) __SSELimr( OP, IM, MD, MB, MI, MS, RD)
-
-#define _SSEPDrr(OP,RS,RD) _SSELrr(0x66, OP, RS,_rX, RD,_rX)
-#define _SSEPDmr(OP,MD,MB,MI,MS,RD) _SSELmr(0x66, OP, MD, MB, MI, MS, RD,_rX)
-#define _SSEPDrm(OP,RS,MD,MB,MI,MS) _SSELrm(0x66, OP, RS,_rX, MD, MB, MI, MS)
-#define _SSEPDirr(OP,IM,RS,RD) _SSELirr(0x66, OP, IM, RS, RD)
-#define _SSEPDimr(OP,IM,MD,MB,MI,MS,RD) _SSELimr(0x66, OP, IM, MD, MB, MI, MS, RD)
-
-#define _SSESSrr(OP,RS,RD) _SSELrr(0xf3, OP, RS,_rX, RD,_rX)
-#define _SSESSmr(OP,MD,MB,MI,MS,RD) _SSELmr(0xf3, OP, MD, MB, MI, MS, RD,_rX)
-#define _SSESSrm(OP,RS,MD,MB,MI,MS) _SSELrm(0xf3, OP, RS,_rX, MD, MB, MI, MS)
-#define _SSESSirr(OP,IM,RS,RD) _SSELirr(0xf3, OP, IM, RS, RD)
-#define _SSESSimr(OP,IM,MD,MB,MI,MS,RD) _SSELimr(0xf3, OP, IM, MD, MB, MI, MS, RD)
-
-#define _SSESDrr(OP,RS,RD) _SSELrr(0xf2, OP, RS,_rX, RD,_rX)
-#define _SSESDmr(OP,MD,MB,MI,MS,RD) _SSELmr(0xf2, OP, MD, MB, MI, MS, RD,_rX)
-#define _SSESDrm(OP,RS,MD,MB,MI,MS) _SSELrm(0xf2, OP, RS,_rX, MD, MB, MI, MS)
-#define _SSESDirr(OP,IM,RS,RD) _SSELirr(0xf2, OP, IM, RS, RD)
-#define _SSESDimr(OP,IM,MD,MB,MI,MS,RD) _SSELimr(0xf2, OP, IM, MD, MB, MI, MS, RD)
-
-#define ADDPSrr(RS, RD) _SSEPSrr(X86_SSE_ADD, RS, RD)
-#define ADDPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
-#define ADDPDrr(RS, RD) _SSEPDrr(X86_SSE_ADD, RS, RD)
-#define ADDPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
-
-#define ADDSSrr(RS, RD) _SSESSrr(X86_SSE_ADD, RS, RD)
-#define ADDSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
-#define ADDSDrr(RS, RD) _SSESDrr(X86_SSE_ADD, RS, RD)
-#define ADDSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
-
-#define ANDNPSrr(RS, RD) _SSEPSrr(X86_SSE_ANDN, RS, RD)
-#define ANDNPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_ANDN, MD, MB, MI, MS, RD)
-#define ANDNPDrr(RS, RD) _SSEPDrr(X86_SSE_ANDN, RS, RD)
-#define ANDNPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_ANDN, MD, MB, MI, MS, RD)
-
-#define ANDPSrr(RS, RD) _SSEPSrr(X86_SSE_AND, RS, RD)
-#define ANDPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_AND, MD, MB, MI, MS, RD)
-#define ANDPDrr(RS, RD) _SSEPDrr(X86_SSE_AND, RS, RD)
-#define ANDPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_AND, MD, MB, MI, MS, RD)
-
-#define CMPPSrr(IM, RS, RD) _SSEPSirr(X86_SSE_CMP, IM, RS, RD)
-#define CMPPSmr(IM, MD, MB, MI, MS, RD) _SSEPSimr(X86_SSE_CMP, IM, MD, MB, MI, MS, RD)
-#define CMPPDrr(IM, RS, RD) _SSEPDirr(X86_SSE_CMP, IM, RS, RD)
-#define CMPPDmr(IM, MD, MB, MI, MS, RD) _SSEPDimr(X86_SSE_CMP, IM, MD, MB, MI, MS, RD)
-
-#define CMPSSrr(IM, RS, RD) _SSESSirr(X86_SSE_CMP, IM, RS, RD)
-#define CMPSSmr(IM, MD, MB, MI, MS, RD) _SSESSimr(X86_SSE_CMP, IM, MD, MB, MI, MS, RD)
-#define CMPSDrr(IM, RS, RD) _SSESDirr(X86_SSE_CMP, IM, RS, RD)
-#define CMPSDmr(IM, MD, MB, MI, MS, RD) _SSESDimr(X86_SSE_CMP, IM, MD, MB, MI, MS, RD)
-
-#define DIVPSrr(RS, RD) _SSEPSrr(X86_SSE_DIV, RS, RD)
-#define DIVPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
-#define DIVPDrr(RS, RD) _SSEPDrr(X86_SSE_DIV, RS, RD)
-#define DIVPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
-
-#define DIVSSrr(RS, RD) _SSESSrr(X86_SSE_DIV, RS, RD)
-#define DIVSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
-#define DIVSDrr(RS, RD) _SSESDrr(X86_SSE_DIV, RS, RD)
-#define DIVSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
-
-#define MAXPSrr(RS, RD) _SSEPSrr(X86_SSE_MAX, RS, RD)
-#define MAXPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
-#define MAXPDrr(RS, RD) _SSEPDrr(X86_SSE_MAX, RS, RD)
-#define MAXPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
-
-#define MAXSSrr(RS, RD) _SSESSrr(X86_SSE_MAX, RS, RD)
-#define MAXSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
-#define MAXSDrr(RS, RD) _SSESDrr(X86_SSE_MAX, RS, RD)
-#define MAXSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
-
-#define MINPSrr(RS, RD) _SSEPSrr(X86_SSE_MIN, RS, RD)
-#define MINPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
-#define MINPDrr(RS, RD) _SSEPDrr(X86_SSE_MIN, RS, RD)
-#define MINPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
-
-#define MINSSrr(RS, RD) _SSESSrr(X86_SSE_MIN, RS, RD)
-#define MINSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
-#define MINSDrr(RS, RD) _SSESDrr(X86_SSE_MIN, RS, RD)
-#define MINSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
-
-#define MULPSrr(RS, RD) _SSEPSrr(X86_SSE_MUL, RS, RD)
-#define MULPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
-#define MULPDrr(RS, RD) _SSEPDrr(X86_SSE_MUL, RS, RD)
-#define MULPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
-
-#define MULSSrr(RS, RD) _SSESSrr(X86_SSE_MUL, RS, RD)
-#define MULSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
-#define MULSDrr(RS, RD) _SSESDrr(X86_SSE_MUL, RS, RD)
-#define MULSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
-
-#define ORPSrr(RS, RD) _SSEPSrr(X86_SSE_OR, RS, RD)
-#define ORPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_OR, MD, MB, MI, MS, RD)
-#define ORPDrr(RS, RD) _SSEPDrr(X86_SSE_OR, RS, RD)
-#define ORPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_OR, MD, MB, MI, MS, RD)
-
-#define RCPPSrr(RS, RD) _SSEPSrr(X86_SSE_RCP, RS, RD)
-#define RCPPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_RCP, MD, MB, MI, MS, RD)
-#define RCPSSrr(RS, RD) _SSESSrr(X86_SSE_RCP, RS, RD)
-#define RCPSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_RCP, MD, MB, MI, MS, RD)
-
-#define RSQRTPSrr(RS, RD) _SSEPSrr(X86_SSE_RSQRT, RS, RD)
-#define RSQRTPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_RSQRT, MD, MB, MI, MS, RD)
-#define RSQRTSSrr(RS, RD) _SSESSrr(X86_SSE_RSQRT, RS, RD)
-#define RSQRTSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_RSQRT, MD, MB, MI, MS, RD)
-
-#define SQRTPSrr(RS, RD) _SSEPSrr(X86_SSE_SQRT, RS, RD)
-#define SQRTPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
-#define SQRTPDrr(RS, RD) _SSEPDrr(X86_SSE_SQRT, RS, RD)
-#define SQRTPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
-
-#define SQRTSSrr(RS, RD) _SSESSrr(X86_SSE_SQRT, RS, RD)
-#define SQRTSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
-#define SQRTSDrr(RS, RD) _SSESDrr(X86_SSE_SQRT, RS, RD)
-#define SQRTSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
-
-#define SUBPSrr(RS, RD) _SSEPSrr(X86_SSE_SUB, RS, RD)
-#define SUBPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
-#define SUBPDrr(RS, RD) _SSEPDrr(X86_SSE_SUB, RS, RD)
-#define SUBPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
-
-#define SUBSSrr(RS, RD) _SSESSrr(X86_SSE_SUB, RS, RD)
-#define SUBSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
-#define SUBSDrr(RS, RD) _SSESDrr(X86_SSE_SUB, RS, RD)
-#define SUBSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
-
-#define XORPSrr(RS, RD) _SSEPSrr(X86_SSE_XOR, RS, RD)
-#define XORPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_XOR, MD, MB, MI, MS, RD)
-#define XORPDrr(RS, RD) _SSEPDrr(X86_SSE_XOR, RS, RD)
-#define XORPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_XOR, MD, MB, MI, MS, RD)
-
-#define COMISSrr(RS, RD) _SSEPSrr(X86_SSE_COMI, RS, RD)
-#define COMISSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_COMI, MD, MB, MI, MS, RD)
-#define COMISDrr(RS, RD) _SSEPDrr(X86_SSE_COMI, RS, RD)
-#define COMISDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_COMI, MD, MB, MI, MS, RD)
-
-#define UCOMISSrr(RS, RD) _SSEPSrr(X86_SSE_UCOMI, RS, RD)
-#define UCOMISSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_UCOMI, MD, MB, MI, MS, RD)
-#define UCOMISDrr(RS, RD) _SSEPDrr(X86_SSE_UCOMI, RS, RD)
-#define UCOMISDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_UCOMI, MD, MB, MI, MS, RD)
-
-#define MOVAPSrr(RS, RD) _SSEPSrr(0x28, RS, RD)
-#define MOVAPSmr(MD, MB, MI, MS, RD) _SSEPSmr(0x28, MD, MB, MI, MS, RD)
-#define MOVAPSrm(RS, MD, MB, MI, MS) _SSEPSrm(0x29, RS, MD, MB, MI, MS)
-
-#define MOVAPDrr(RS, RD) _SSEPDrr(0x28, RS, RD)
-#define MOVAPDmr(MD, MB, MI, MS, RD) _SSEPDmr(0x28, MD, MB, MI, MS, RD)
-#define MOVAPDrm(RS, MD, MB, MI, MS) _SSEPDrm(0x29, RS, MD, MB, MI, MS)
-
-#define CVTDQ2PDrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTDQ2PD, RS,_rX, RD,_rX)
-#define CVTDQ2PDmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTDQ2PD, MD, MB, MI, MS, RD,_rX)
-#define CVTDQ2PSrr(RS, RD) __SSELrr( X86_SSE_CVTDQ2PS, RS,_rX, RD,_rX)
-#define CVTDQ2PSmr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTDQ2PS, MD, MB, MI, MS, RD,_rX)
-#define CVTPD2DQrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTPD2DQ, RS,_rX, RD,_rX)
-#define CVTPD2DQmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTPD2DQ, MD, MB, MI, MS, RD,_rX)
-#define CVTPD2PIrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTPD2PI, RS,_rX, RD,_rM)
-#define CVTPD2PImr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTPD2PI, MD, MB, MI, MS, RD,_rM)
-#define CVTPD2PSrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTPD2PS, RS,_rX, RD,_rX)
-#define CVTPD2PSmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTPD2PS, MD, MB, MI, MS, RD,_rX)
-#define CVTPI2PDrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTPI2PD, RS,_rM, RD,_rX)
-#define CVTPI2PDmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTPI2PD, MD, MB, MI, MS, RD,_rX)
-#define CVTPI2PSrr(RS, RD) __SSELrr( X86_SSE_CVTPI2PS, RS,_rM, RD,_rX)
-#define CVTPI2PSmr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTPI2PS, MD, MB, MI, MS, RD,_rX)
-#define CVTPS2DQrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTPS2DQ, RS,_rX, RD,_rX)
-#define CVTPS2DQmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTPS2DQ, MD, MB, MI, MS, RD,_rX)
-#define CVTPS2PDrr(RS, RD) __SSELrr( X86_SSE_CVTPS2PD, RS,_rX, RD,_rX)
-#define CVTPS2PDmr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTPS2PD, MD, MB, MI, MS, RD,_rX)
-#define CVTPS2PIrr(RS, RD) __SSELrr( X86_SSE_CVTPS2PI, RS,_rX, RD,_rM)
-#define CVTPS2PImr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTPS2PI, MD, MB, MI, MS, RD,_rM)
-#define CVTSD2SILrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTSD2SI, RS,_rX, RD,_r4)
-#define CVTSD2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTSD2SI, MD, MB, MI, MS, RD,_r4)
-#define CVTSD2SIQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTSD2SI, RS,_rX, RD,_r8)
-#define CVTSD2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTSD2SI, MD, MB, MI, MS, RD,_r8)
-#define CVTSD2SSrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTSD2SS, RS,_rX, RD,_rX)
-#define CVTSD2SSmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTSD2SS, MD, MB, MI, MS, RD,_rX)
-#define CVTSI2SDLrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTSI2SD, RS,_r4, RD,_rX)
-#define CVTSI2SDLmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTSI2SD, MD, MB, MI, MS, RD,_rX)
-#define CVTSI2SDQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTSI2SD, RS,_r8, RD,_rX)
-#define CVTSI2SDQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTSI2SD, MD, MB, MI, MS, RD,_rX)
-#define CVTSI2SSLrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTSI2SS, RS,_r4, RD,_rX)
-#define CVTSI2SSLmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTSI2SS, MD, MB, MI, MS, RD,_rX)
-#define CVTSI2SSQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTSI2SS, RS,_r8, RD,_rX)
-#define CVTSI2SSQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTSI2SS, MD, MB, MI, MS, RD,_rX)
-#define CVTSS2SDrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTSS2SD, RS,_rX, RD,_rX)
-#define CVTSS2SDmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTSS2SD, MD, MB, MI, MS, RD,_rX)
-#define CVTSS2SILrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTSS2SI, RS,_rX, RD,_r4)
-#define CVTSS2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTSS2SI, MD, MB, MI, MS, RD,_r4)
-#define CVTSS2SIQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTSS2SI, RS,_rX, RD,_r8)
-#define CVTSS2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTSS2SI, MD, MB, MI, MS, RD,_r8)
-#define CVTTPD2PIrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTTPD2PI, RS,_rX, RD,_rM)
-#define CVTTPD2PImr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTTPD2PI, MD, MB, MI, MS, RD,_rM)
-#define CVTTPD2DQrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTTPD2DQ, RS,_rX, RD,_rX)
-#define CVTTPD2DQmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTTPD2DQ, MD, MB, MI, MS, RD,_rX)
-#define CVTTPS2DQrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTTPS2DQ, RS,_rX, RD,_rX)
-#define CVTTPS2DQmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTTPS2DQ, MD, MB, MI, MS, RD,_rX)
-#define CVTTPS2PIrr(RS, RD) __SSELrr( X86_SSE_CVTTPS2PI, RS,_rX, RD,_rM)
-#define CVTTPS2PImr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTTPS2PI, MD, MB, MI, MS, RD,_rM)
-#define CVTTSD2SILrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTTSD2SI, RS,_rX, RD,_r4)
-#define CVTTSD2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTTSD2SI, MD, MB, MI, MS, RD,_r4)
-#define CVTTSD2SIQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTTSD2SI, RS,_rX, RD,_r8)
-#define CVTTSD2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTTSD2SI, MD, MB, MI, MS, RD,_r8)
-#define CVTTSS2SILrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTTSS2SI, RS,_rX, RD,_r4)
-#define CVTTSS2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTTSS2SI, MD, MB, MI, MS, RD,_r4)
-#define CVTTSS2SIQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTTSS2SI, RS,_rX, RD,_r8)
-#define CVTTSS2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTTSS2SI, MD, MB, MI, MS, RD,_r8)
-
-#define MOVDXDrr(RS, RD) _SSELrr(0x66, 0x6e, RS,_r4, RD,_rX)
-#define MOVDXDmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x6e, MD, MB, MI, MS, RD,_rX)
-#define MOVQXDrr(RS, RD) _SSEQrr(0x66, 0x6e, RS,_r8, RD,_rX)
-#define MOVQXDmr(MD, MB, MI, MS, RD) _SSEQmr(0x66, 0x6e, MD, MB, MI, MS, RD,_rX)
-
-#define MOVDXSrr(RS, RD) _SSELrr(0x66, 0x7e, RD,_r4, RS,_rX)
-#define MOVDXSrm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x7e, RS,_rX, MD, MB, MI, MS)
-#define MOVQXSrr(RS, RD) _SSEQrr(0x66, 0x7e, RD,_r8, RS,_rX)
-#define MOVQXSrm(RS, MD, MB, MI, MS) _SSEQrm(0x66, 0x7e, RS,_rX, MD, MB, MI, MS)
-
-#define MOVDLMrr(RS, RD) __SSELrr( 0x6e, RS,_r4, RD,_rM)
-#define MOVDLMmr(MD, MB, MI, MS, RD) __SSELmr( 0x6e, MD, MB, MI, MS, RD,_rM)
-#define MOVDQMrr(RS, RD) __SSEQrr( 0x6e, RS,_r8, RD,_rM)
-#define MOVDQMmr(MD, MB, MI, MS, RD) __SSEQmr( 0x6e, MD, MB, MI, MS, RD,_rM)
-
-#define MOVDMLrr(RS, RD) __SSELrr( 0x7e, RS,_rM, RD,_r4)
-#define MOVDMLrm(RS, MD, MB, MI, MS) __SSELrm( 0x7e, RS,_rM, MD, MB, MI, MS)
-#define MOVDMQrr(RS, RD) __SSEQrr( 0x7e, RS,_rM, RD,_r8)
-#define MOVDMQrm(RS, MD, MB, MI, MS) __SSEQrm( 0x7e, RS,_rM, MD, MB, MI, MS)
-
-#define MOVDQ2Qrr(RS, RD) _SSELrr(0xf2, 0xd6, RS,_rX, RD,_rM)
-#define MOVMSKPSrr(RS, RD) __SSELrr( 0x50, RS,_rX, RD,_r4)
-#define MOVMSKPDrr(RS, RD) _SSELrr(0x66, 0x50, RS,_rX, RD,_r4)
-
-#define MOVHLPSrr(RS, RD) __SSELrr( 0x12, RS,_rX, RD,_rX)
-#define MOVLHPSrr(RS, RD) __SSELrr( 0x16, RS,_rX, RD,_rX)
-
-#define MOVDQArr(RS, RD) _SSELrr(0x66, 0x6f, RS,_rX, RD,_rX)
-#define MOVDQAmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x6f, MD, MB, MI, MS, RD,_rX)
-#define MOVDQArm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x7f, RS,_rX, MD, MB, MI, MS)
-
-#define MOVDQUrr(RS, RD) _SSELrr(0xf3, 0x6f, RS,_rX, RD,_rX)
-#define MOVDQUmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, 0x6f, MD, MB, MI, MS, RD,_rX)
-#define MOVDQUrm(RS, MD, MB, MI, MS) _SSELrm(0xf3, 0x7f, RS,_rX, MD, MB, MI, MS)
-
-#define MOVHPDmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x16, MD, MB, MI, MS, RD,_rX)
-#define MOVHPDrm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x17, RS,_rX, MD, MB, MI, MS)
-#define MOVHPSmr(MD, MB, MI, MS, RD) __SSELmr( 0x16, MD, MB, MI, MS, RD,_rX)
-#define MOVHPSrm(RS, MD, MB, MI, MS) __SSELrm( 0x17, RS,_rX, MD, MB, MI, MS)
-
-#define MOVLPDmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x12, MD, MB, MI, MS, RD,_rX)
-#define MOVLPDrm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x13, RS,_rX, MD, MB, MI, MS)
-#define MOVLPSmr(MD, MB, MI, MS, RD) __SSELmr( 0x12, MD, MB, MI, MS, RD,_rX)
-#define MOVLPSrm(RS, MD, MB, MI, MS) __SSELrm( 0x13, RS,_rX, MD, MB, MI, MS)
-
-
-/* --- Floating-Point instructions ----------------------------------------- */
-
-enum {
- X86_F2XM1 = 0xd9f0,
- X86_FABS = 0xd9e1,
- X86_FADD = 0xd8c0, // m32fp, m64fp, sti0, st0i, pst0i
- X86_FIADD = 0xda00, // m32int, m16int
- X86_FBLD = 0xdf04, // mem
- X86_FBSTP = 0xdf06, // mem
- X86_FCHS = 0xd9e0,
- X86_FCMOVB = 0xdac0, // sti0
- X86_FCMOVE = 0xdac8, // sti0
- X86_FCMOVBE = 0xdad0, // sti0
- X86_FCMOVU = 0xdad8, // sti0
- X86_FCMOVNB = 0xdbc0, // sti0
- X86_FCMOVNE = 0xdbc8, // sti0
- X86_FCMOVNBE = 0xdbd0, // sti0
- X86_FCMOVNU = 0xdbd8, // sti0
- X86_FCOM = 0xd8d2, // m32fp, m64fp, sti
- X86_FCOMP = 0xd8db, // m32fp, m64fp, sti
- X86_FCOMPP = 0xded9,
- X86_FCOMI = 0xdbf0, // sti0
- X86_FCOMIP = 0xdff0, // sti0
- X86_FUCOMI = 0xdbe8, // sti0
- X86_FUCOMIP = 0xdfe8, // sti0
- X86_FCOS = 0xd9ff,
- X86_FDECSTP = 0xd9f6,
- X86_FDIV = 0xd8f6, // m32fp, m64fp, sti0, st0i, pst0i
- X86_FIDIV = 0xda06, // m32int, m16int
- X86_FDIVR = 0xd8ff, // m32fp, m64fp, sti0, st0i, pst0i
- X86_FIDIVR = 0xda07, // m32int, m16int
- X86_FFREE = 0xddc0, // sti
- X86_FICOM = 0xda02, // m32int, m16int
- X86_FICOMP = 0xda03, // m32int, m16int
- X86_FILD = 0xdb00, // m32int, m16int
- X86_FILDQ = 0xdf05, // mem
- X86_FINCSTP = 0xd9f7,
- X86_FIST = 0xdb02, // m32int, m16int
- X86_FISTP = 0xdb03, // m32int, m16int
- X86_FISTPQ = 0xdf07, // mem
- X86_FISTTP = 0xdb01, // m32int, m16int
- X86_FISTTPQ = 0xdd01, // mem
- X86_FLD = 0xd900, // m32fp, m64fp
- X86_FLDT = 0xdb05, // mem
- X86_FLD1 = 0xd9e8,
- X86_FLDL2T = 0xd9e9,
- X86_FLDL2E = 0xd9ea,
- X86_FLDPI = 0xd9eb,
- X86_FLDLG2 = 0xd9ec,
- X86_FLDLN2 = 0xd9ed,
- X86_FLDZ = 0xd9ee,
- X86_FMUL = 0xd8c9, // m32fp, m64fp, sti0, st0i, pst0i
- X86_FIMUL = 0xda01, // m32int, m16int
- X86_FNOP = 0xd9d0,
- X86_FPATAN = 0xd9f3,
- X86_FPREM = 0xd9f8,
- X86_FPREM1 = 0xd9f5,
- X86_FPTAN = 0xd9f2,
- X86_FRNDINT = 0xd9fc,
- X86_FSCALE = 0xd9fd,
- X86_FSIN = 0xd9fe,
- X86_FSINCOS = 0xd9fb,
- X86_FSQRT = 0xd9fa,
- X86_FSTS = 0xd902, // mem
- X86_FSTD = 0xdd02, // mem
- X86_FST = 0xddd0, // sti
- X86_FSTPS = 0xd903, // mem
- X86_FSTPD = 0xdd03, // mem
- X86_FSTPT = 0xdb07, // mem
- X86_FSTP = 0xddd8, // sti
- X86_FSUB = 0xd8e4, // m32fp, m64fp, sti0, st0i, pst0i
- X86_FISUB = 0xda04, // m32int, m16int
- X86_FSUBR = 0xd8ed, // m32fp, m64fp, sti0, st0i, pst0i
- X86_FISUBR = 0xda05, // m32int, m16int
- X86_FTST = 0xd9e4,
- X86_FUCOM = 0xdde0, // sti
- X86_FUCOMP = 0xdde8, // sti
- X86_FUCOMPP = 0xdae9,
- X86_FXAM = 0xd9e5,
- X86_FXCH = 0xd9c8, // sti
- X86_FXTRACT = 0xd9f4,
- X86_FYL2X = 0xd9f1,
- X86_FYL2XP1 = 0xd9f9,
-};
-
-#define _FPU(OP) _OO(OP)
-#define _FPUm(OP, MD, MB, MI, MS) (_REXLrm(0, MB, MI), _O_r_X((OP)>>8, (OP)&7, MD, MB, MI, MS))
-#define _FPUSm(OP, MD, MB, MI, MS) _FPUm(OP, MD, MB, MI, MS)
-#define _FPUDm(OP, MD, MB, MI, MS) _FPUm((OP)|0x400, MD, MB, MI, MS)
-#define _FPULm(OP, MD, MB, MI, MS) _FPUm(OP, MD, MB, MI, MS)
-#define _FPUWm(OP, MD, MB, MI, MS) _FPUm((OP)|0x400, MD, MB, MI, MS)
-#define _FPUr(OP, RR) _OOr((OP)&0xfff8, _rF(RR))
-#define _FPU0r(OP, RD) _FPUr((OP)|0x400, RD)
-#define _FPUr0(OP, RS) _FPUr((OP) , RS)
-#define _FPUrr(OP, RS, RD) (_rST0P(RS) ? _FPU0r(OP, RD) : (_rST0P(RD) ? _FPUr0(OP, RS) : x86_emit_failure("FPU instruction without st0")))
-#define _FPUP0r(OP, RD) _FPU0r((OP)|0x200, RD)
-
-#define F2XM1() _FPU(X86_F2XM1)
-#define FABS() _FPU(X86_FABS)
-#define FADDSm(MD, MB, MI, MS) _FPUSm(X86_FADD, MD, MB, MI, MS)
-#define FADDDm(MD, MB, MI, MS) _FPUDm(X86_FADD, MD, MB, MI, MS)
-#define FADDP0r(RD) _FPUP0r(X86_FADD, RD)
-#define FADDrr(RS, RD) _FPUrr(X86_FADD, RS, RD)
-#define FADD0r(RD) _FPU0r(X86_FADD, RD)
-#define FADDr0(RS) _FPUr0(X86_FADD, RS)
-#define FIADDWm(MD, MB, MI, MS) _FPUWm(X86_FIADD, MD, MB, MI, MS)
-#define FIADDLm(MD, MB, MI, MS) _FPULm(X86_FIADD, MD, MB, MI, MS)
-#define FBLDm(MD, MB, MI, MS) _FPUm(X86_FBLD, MD, MB, MI, MS)
-#define FBSTPm(MD, MB, MI, MS) _FPUm(X86_FBSTP, MD, MB, MI, MS)
-#define FCHS() _FPU(X86_FCHS)
-#define FCMOVBr0(RS) _FPUr0(X86_FCMOVB, RS)
-#define FCMOVEr0(RS) _FPUr0(X86_FCMOVE, RS)
-#define FCMOVBEr0(RS) _FPUr0(X86_FCMOVBE, RS)
-#define FCMOVUr0(RS) _FPUr0(X86_FCMOVU, RS)
-#define FCMOVNBr0(RS) _FPUr0(X86_FCMOVNB, RS)
-#define FCMOVNEr0(RS) _FPUr0(X86_FCMOVNE, RS)
-#define FCMOVNBEr0(RS) _FPUr0(X86_FCMOVNBE, RS)
-#define FCMOVNUr0(RS) _FPUr0(X86_FCMOVNU, RS)
-#define FCOMSm(MD, MB, MI, MS) _FPUSm(X86_FCOM, MD, MB, MI, MS)
-#define FCOMDm(MD, MB, MI, MS) _FPUDm(X86_FCOM, MD, MB, MI, MS)
-#define FCOMr(RD) _FPUr(X86_FCOM, RD)
-#define FCOMPSm(MD, MB, MI, MS) _FPUSm(X86_FCOMP, MD, MB, MI, MS)
-#define FCOMPDm(MD, MB, MI, MS) _FPUDm(X86_FCOMP, MD, MB, MI, MS)
-#define FCOMPr(RD) _FPUr(X86_FCOMP, RD)
-#define FCOMPP() _FPU(X86_FCOMPP)
-#define FCOMIr0(RS) _FPUr0(X86_FCOMI, RS)
-#define FCOMIPr0(RS) _FPUr0(X86_FCOMIP, RS)
-#define FUCOMIr0(RS) _FPUr0(X86_FUCOMI, RS)
-#define FUCOMIPr0(RS) _FPUr0(X86_FUCOMIP, RS)
-#define FCOS() _FPU(X86_FCOS)
-#define FDECSTP() _FPU(X86_FDECSTP)
-#define FDIVSm(MD, MB, MI, MS) _FPUSm(X86_FDIV, MD, MB, MI, MS)
-#define FDIVDm(MD, MB, MI, MS) _FPUDm(X86_FDIV, MD, MB, MI, MS)
-#define FDIVP0r(RD) _FPUP0r(X86_FDIV, RD)
-#define FDIVrr(RS, RD) _FPUrr(X86_FDIV, RS, RD)
-#define FDIV0r(RD) _FPU0r(X86_FDIV, RD)
-#define FDIVr0(RS) _FPUr0(X86_FDIV, RS)
-#define FIDIVWm(MD, MB, MI, MS) _FPUWm(X86_FIDIV, MD, MB, MI, MS)
-#define FIDIVLm(MD, MB, MI, MS) _FPULm(X86_FIDIV, MD, MB, MI, MS)
-#define FDIVRSm(MD, MB, MI, MS) _FPUSm(X86_FDIVR, MD, MB, MI, MS)
-#define FDIVRDm(MD, MB, MI, MS) _FPUDm(X86_FDIVR, MD, MB, MI, MS)
-#define FDIVRP0r(RD) _FPUP0r(X86_FDIVR, RD)
-#define FDIVRrr(RS, RD) _FPUrr(X86_FDIVR, RS, RD)
-#define FDIVR0r(RD) _FPU0r(X86_FDIVR, RD)
-#define FDIVRr0(RS) _FPUr0(X86_FDIVR, RS)
-#define FIDIVRWm(MD, MB, MI, MS) _FPUWm(X86_FIDIVR, MD, MB, MI, MS)
-#define FIDIVRLm(MD, MB, MI, MS) _FPULm(X86_FIDIVR, MD, MB, MI, MS)
-#define FFREEr(RD) _FPUr(X86_FFREE, RD)
-#define FICOMWm(MD, MB, MI, MS) _FPUWm(X86_FICOM, MD, MB, MI, MS)
-#define FICOMLm(MD, MB, MI, MS) _FPULm(X86_FICOM, MD, MB, MI, MS)
-#define FICOMPWm(MD, MB, MI, MS) _FPUWm(X86_FICOMP, MD, MB, MI, MS)
-#define FICOMPLm(MD, MB, MI, MS) _FPULm(X86_FICOMP, MD, MB, MI, MS)
-#define FILDWm(MD, MB, MI, MS) _FPUWm(X86_FILD, MD, MB, MI, MS)
-#define FILDLm(MD, MB, MI, MS) _FPULm(X86_FILD, MD, MB, MI, MS)
-#define FILDQm(MD, MB, MI, MS) _FPUm(X86_FILDQ, MD, MB, MI, MS)
-#define FINCSTP() _FPU(X86_FINCSTP)
-#define FISTWm(MD, MB, MI, MS) _FPUWm(X86_FIST, MD, MB, MI, MS)
-#define FISTLm(MD, MB, MI, MS) _FPULm(X86_FIST, MD, MB, MI, MS)
-#define FISTPWm(MD, MB, MI, MS) _FPUWm(X86_FISTP, MD, MB, MI, MS)
-#define FISTPLm(MD, MB, MI, MS) _FPULm(X86_FISTP, MD, MB, MI, MS)
-#define FISTPQm(MD, MB, MI, MS) _FPUm(X86_FISTPQ, MD, MB, MI, MS)
-#define FISTTPWm(MD, MB, MI, MS) _FPUWm(X86_FISTTP, MD, MB, MI, MS)
-#define FISTTPLm(MD, MB, MI, MS) _FPULm(X86_FISTTP, MD, MB, MI, MS)
-#define FISTTPQm(MD, MB, MI, MS) _FPUm(X86_FISTTPQ, MD, MB, MI, MS)
-#define FLDSm(MD, MB, MI, MS) _FPUSm(X86_FLD, MD, MB, MI, MS)
-#define FLDDm(MD, MB, MI, MS) _FPUDm(X86_FLD, MD, MB, MI, MS)
-#define FLDTm(MD, MB, MI, MS) _FPUm(X86_FLDT, MD, MB, MI, MS)
-#define FLD1() _FPU(X86_FLD1)
-#define FLDL2T() _FPU(X86_FLDL2T)
-#define FLDL2E() _FPU(X86_FLDL2E)
-#define FLDPI() _FPU(X86_FLDPI)
-#define FLDLG2() _FPU(X86_FLDLG2)
-#define FLDLN2() _FPU(X86_FLDLN2)
-#define FLDZ() _FPU(X86_FLDZ)
-#define FMULSm(MD, MB, MI, MS) _FPUSm(X86_FMUL, MD, MB, MI, MS)
-#define FMULDm(MD, MB, MI, MS) _FPUDm(X86_FMUL, MD, MB, MI, MS)
-#define FMULP0r(RD) _FPUP0r(X86_FMUL, RD)
-#define FMULrr(RS, RD) _FPUrr(X86_FMUL, RS, RD)
-#define FMUL0r(RD) _FPU0r(X86_FMUL, RD)
-#define FMULr0(RS) _FPUr0(X86_FMUL, RS)
-#define FIMULWm(MD, MB, MI, MS) _FPUWm(X86_FIMUL, MD, MB, MI, MS)
-#define FIMULLm(MD, MB, MI, MS) _FPULm(X86_FIMUL, MD, MB, MI, MS)
-#define FNOP() _FPU(X86_FNOP)
-#define FPATAN() _FPU(X86_FPATAN)
-#define FPREM() _FPU(X86_FPREM)
-#define FPREM1() _FPU(X86_FPREM1)
-#define FPTAN() _FPU(X86_FPTAN)
-#define FRNDINT() _FPU(X86_FRNDINT)
-#define FSCALE() _FPU(X86_FSCALE)
-#define FSIN() _FPU(X86_FSIN)
-#define FSINCOS() _FPU(X86_FSINCOS)
-#define FSQRT() _FPU(X86_FSQRT)
-#define FSTSm(MD, MB, MI, MS) _FPUm(X86_FSTS, MD, MB, MI, MS)
-#define FSTDm(MD, MB, MI, MS) _FPUm(X86_FSTD, MD, MB, MI, MS)
-#define FSTr(RD) _FPUr(X86_FST, RD)
-#define FSTPSm(MD, MB, MI, MS) _FPUm(X86_FSTPS, MD, MB, MI, MS)
-#define FSTPDm(MD, MB, MI, MS) _FPUm(X86_FSTPD, MD, MB, MI, MS)
-#define FSTPTm(MD, MB, MI, MS) _FPUm(X86_FSTPT, MD, MB, MI, MS)
-#define FSTPr(RD) _FPUr(X86_FSTP, RD)
-#define FSUBSm(MD, MB, MI, MS) _FPUSm(X86_FSUB, MD, MB, MI, MS)
-#define FSUBDm(MD, MB, MI, MS) _FPUDm(X86_FSUB, MD, MB, MI, MS)
-#define FSUBP0r(RD) _FPUP0r(X86_FSUB, RD)
-#define FSUBrr(RS, RD) _FPUrr(X86_FSUB, RS, RD)
-#define FSUB0r(RD) _FPU0r(X86_FSUB, RD)
-#define FSUBr0(RS) _FPUr0(X86_FSUB, RS)
-#define FISUBWm(MD, MB, MI, MS) _FPUWm(X86_FISUB, MD, MB, MI, MS)
-#define FISUBLm(MD, MB, MI, MS) _FPULm(X86_FISUB, MD, MB, MI, MS)
-#define FSUBRSm(MD, MB, MI, MS) _FPUSm(X86_FSUBR, MD, MB, MI, MS)
-#define FSUBRDm(MD, MB, MI, MS) _FPUDm(X86_FSUBR, MD, MB, MI, MS)
-#define FSUBRP0r(RD) _FPUP0r(X86_FSUBR, RD)
-#define FSUBRrr(RS, RD) _FPUrr(X86_FSUBR, RS, RD)
-#define FSUBR0r(RD) _FPU0r(X86_FSUBR, RD)
-#define FSUBRr0(RS) _FPUr0(X86_FSUBR, RS)
-#define FISUBRWm(MD, MB, MI, MS) _FPUWm(X86_FISUBR, MD, MB, MI, MS)
-#define FISUBRLm(MD, MB, MI, MS) _FPULm(X86_FISUBR, MD, MB, MI, MS)
-#define FTST() _FPU(X86_FTST)
-#define FUCOMr(RD) _FPUr(X86_FUCOM, RD)
-#define FUCOMPr(RD) _FPUr(X86_FUCOMP, RD)
-#define FUCOMPP() _FPU(X86_FUCOMPP)
-#define FXAM() _FPU(X86_FXAM)
-#define FXCHr(RD) _FPUr(X86_FXCH, RD)
-#define FXTRACT() _FPU(X86_FXTRACT)
-#define FYL2X() _FPU(X86_FYL2X)
-#define FYL2XP1() _FPU(X86_FYL2XP1)
-
-#endif /* X86_RTASM_H */
+++ /dev/null
-/*
- * compiler/compemu.h - Public interface and definitions
- *
- * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
- *
- * Adaptation for Basilisk II and improvements, copyright 2000-2005
- * Gwenole Beauchesne
- *
- * Basilisk II (C) 1997-2008 Christian Bauer
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef COMPEMU_H
-#define COMPEMU_H
-
-#include "newcpu.h"
-
-#if USE_JIT
-
-#if defined __i386__ || defined __x86_64__
-#include "flags_x86.h"
-#else
-#error "Unsupported JIT compiler for this architecture"
-#endif
-
-#if JIT_DEBUG
-/* dump some information (m68k block, x86 block addresses) about the compiler state */
-extern void compiler_dumpstate(void);
-#endif
-
-/* Now that we do block chaining, and also have linked lists on each tag,
- TAGMASK can be much smaller and still do its job. Saves several megs
- of memory! */
-#define TAGMASK 0x0000ffff
-#define TAGSIZE (TAGMASK+1)
-#define MAXRUN 1024
-#define cacheline(x) (((uintptr)x)&TAGMASK)
-
-extern uae_u8* start_pc_p;
-extern uae_u32 start_pc;
-
-struct blockinfo_t;
-
-struct cpu_history {
- uae_u16 * location;
-};
-
-union cacheline {
- cpuop_func * handler;
- blockinfo_t * bi;
-};
-
-/* Use new spill/reload strategy when calling external functions */
-#define USE_OPTIMIZED_CALLS 0
-#if USE_OPTIMIZED_CALLS
-#error implementation in progress
-#endif
-
-/* (gb) When on, this option can save save up to 30% compilation time
- * when many lazy flushes occur (e.g. apps in MacOS 8.x).
- */
-#define USE_SEPARATE_BIA 1
-
-/* Use chain of checksum_info_t to compute the block checksum */
-#define USE_CHECKSUM_INFO 1
-
-/* Use code inlining, aka follow-up of constant jumps */
-#define USE_INLINING 1
-
-/* Inlining requires the chained checksuming information */
-#if USE_INLINING
-#undef USE_CHECKSUM_INFO
-#define USE_CHECKSUM_INFO 1
-#endif
-
-/* Does flush_icache_range() only check for blocks falling in the requested range? */
-#define LAZY_FLUSH_ICACHE_RANGE 0
-
-#define USE_F_ALIAS 1
-#define USE_OFFSET 1
-#define COMP_DEBUG 1
-
-#if COMP_DEBUG
-#define Dif(x) if (x)
-#else
-#define Dif(x) if (0)
-#endif
-
-#define SCALE 2
-
-#define BYTES_PER_INST 10240 /* paranoid ;-) */
-#define LONGEST_68K_INST 16 /* The number of bytes the longest possible
- 68k instruction takes */
-#define MAX_CHECKSUM_LEN 2048 /* The maximum size we calculate checksums
- for. Anything larger will be flushed
- unconditionally even with SOFT_FLUSH */
-#define MAX_HOLD_BI 3 /* One for the current block, and up to two
- for jump targets */
-
-#define INDIVIDUAL_INST 0
-#if 1
-// gb-- my format from readcpu.cpp is not the same
-#define FLAG_X 0x0010
-#define FLAG_N 0x0008
-#define FLAG_Z 0x0004
-#define FLAG_V 0x0002
-#define FLAG_C 0x0001
-#else
-#define FLAG_C 0x0010
-#define FLAG_V 0x0008
-#define FLAG_Z 0x0004
-#define FLAG_N 0x0002
-#define FLAG_X 0x0001
-#endif
-#define FLAG_CZNV (FLAG_C | FLAG_Z | FLAG_N | FLAG_V)
-#define FLAG_ZNV (FLAG_Z | FLAG_N | FLAG_V)
-
-#define KILLTHERAT 1 /* Set to 1 to avoid some partial_rat_stalls */
-
-#if defined(__x86_64__)
-#define N_REGS 16 /* really only 15, but they are numbered 0-3,5-15 */
-#else
-#define N_REGS 8 /* really only 7, but they are numbered 0,1,2,3,5,6,7 */
-#endif
-#define N_FREGS 6 /* That leaves us two positions on the stack to play with */
-
-/* Functions exposed to newcpu, or to what was moved from newcpu.c to
- * compemu_support.c */
-extern void compiler_init(void);
-extern void compiler_exit(void);
-extern bool compiler_use_jit(void);
-extern void init_comp(void);
-extern void flush(int save_regs);
-extern void small_flush(int save_regs);
-extern void set_target(uae_u8* t);
-extern uae_u8* get_target(void);
-extern void freescratch(void);
-extern void build_comp(void);
-extern void set_cache_state(int enabled);
-extern int get_cache_state(void);
-extern uae_u32 get_jitted_size(void);
-extern void (*flush_icache)(int n);
-extern void alloc_cache(void);
-extern int check_for_cache_miss(void);
-
-/* JIT FPU compilation */
-extern void comp_fpp_opp (uae_u32 opcode, uae_u16 extra);
-extern void comp_fbcc_opp (uae_u32 opcode);
-extern void comp_fscc_opp (uae_u32 opcode, uae_u16 extra);
-
-extern uae_u32 needed_flags;
-extern cacheline cache_tags[];
-extern uae_u8* comp_pc_p;
-extern void* pushall_call_handler;
-
-#define VREGS 32
-#define VFREGS 16
-
-#define INMEM 1
-#define CLEAN 2
-#define DIRTY 3
-#define UNDEF 4
-#define ISCONST 5
-
-typedef struct {
- uae_u32* mem;
- uae_u32 val;
- uae_u8 is_swapped;
- uae_u8 status;
- uae_s8 realreg; /* gb-- realreg can hold -1 */
- uae_u8 realind; /* The index in the holds[] array */
- uae_u8 needflush;
- uae_u8 validsize;
- uae_u8 dirtysize;
- uae_u8 dummy;
-} reg_status;
-
-typedef struct {
- uae_u32* mem;
- double val;
- uae_u8 status;
- uae_s8 realreg; /* gb-- realreg can hold -1 */
- uae_u8 realind;
- uae_u8 needflush;
-} freg_status;
-
-#define PC_P 16
-#define FLAGX 17
-#define FLAGTMP 18
-#define NEXT_HANDLER 19
-#define S1 20
-#define S2 21
-#define S3 22
-#define S4 23
-#define S5 24
-#define S6 25
-#define S7 26
-#define S8 27
-#define S9 28
-#define S10 29
-#define S11 30
-#define S12 31
-
-#define FP_RESULT 8
-#define FS1 9
-#define FS2 10
-#define FS3 11
-
-typedef struct {
- uae_u32 touched;
- uae_s8 holds[VREGS];
- uae_u8 nholds;
- uae_u8 canbyte;
- uae_u8 canword;
- uae_u8 locked;
-} n_status;
-
-typedef struct {
- uae_u32 touched;
- uae_s8 holds[VFREGS];
- uae_u8 nholds;
- uae_u8 locked;
-} fn_status;
-
-/* For flag handling */
-#define NADA 1
-#define TRASH 2
-#define VALID 3
-
-/* needflush values */
-#define NF_SCRATCH 0
-#define NF_TOMEM 1
-#define NF_HANDLER 2
-
-typedef struct {
- /* Integer part */
- reg_status state[VREGS];
- n_status nat[N_REGS];
- uae_u32 flags_on_stack;
- uae_u32 flags_in_flags;
- uae_u32 flags_are_important;
- /* FPU part */
- freg_status fate[VFREGS];
- fn_status fat[N_FREGS];
-
- /* x86 FPU part */
- uae_s8 spos[N_FREGS];
- uae_s8 onstack[6];
- uae_s8 tos;
-} bigstate;
-
-typedef struct {
- /* Integer part */
- char virt[VREGS];
- char nat[N_REGS];
-} smallstate;
-
-extern bigstate live;
-extern int touchcnt;
-
-
-#define IMM uae_s32
-#define R1 uae_u32
-#define R2 uae_u32
-#define R4 uae_u32
-#define W1 uae_u32
-#define W2 uae_u32
-#define W4 uae_u32
-#define RW1 uae_u32
-#define RW2 uae_u32
-#define RW4 uae_u32
-#define MEMR uae_u32
-#define MEMW uae_u32
-#define MEMRW uae_u32
-
-#define FW uae_u32
-#define FR uae_u32
-#define FRW uae_u32
-
-#define MIDFUNC(nargs,func,args) void func args
-#define MENDFUNC(nargs,func,args)
-#define COMPCALL(func) func
-
-#define LOWFUNC(flags,mem,nargs,func,args) static __inline__ void func args
-#define LENDFUNC(flags,mem,nargs,func,args)
-
-/* What we expose to the outside */
-#define DECLARE_MIDFUNC(func) extern void func
-DECLARE_MIDFUNC(bt_l_ri(R4 r, IMM i));
-DECLARE_MIDFUNC(bt_l_rr(R4 r, R4 b));
-DECLARE_MIDFUNC(btc_l_ri(RW4 r, IMM i));
-DECLARE_MIDFUNC(btc_l_rr(RW4 r, R4 b));
-DECLARE_MIDFUNC(bts_l_ri(RW4 r, IMM i));
-DECLARE_MIDFUNC(bts_l_rr(RW4 r, R4 b));
-DECLARE_MIDFUNC(btr_l_ri(RW4 r, IMM i));
-DECLARE_MIDFUNC(btr_l_rr(RW4 r, R4 b));
-DECLARE_MIDFUNC(mov_l_rm(W4 d, IMM s));
-DECLARE_MIDFUNC(call_r(R4 r));
-DECLARE_MIDFUNC(sub_l_mi(IMM d, IMM s));
-DECLARE_MIDFUNC(mov_l_mi(IMM d, IMM s));
-DECLARE_MIDFUNC(mov_w_mi(IMM d, IMM s));
-DECLARE_MIDFUNC(mov_b_mi(IMM d, IMM s));
-DECLARE_MIDFUNC(rol_b_ri(RW1 r, IMM i));
-DECLARE_MIDFUNC(rol_w_ri(RW2 r, IMM i));
-DECLARE_MIDFUNC(rol_l_ri(RW4 r, IMM i));
-DECLARE_MIDFUNC(rol_l_rr(RW4 d, R1 r));
-DECLARE_MIDFUNC(rol_w_rr(RW2 d, R1 r));
-DECLARE_MIDFUNC(rol_b_rr(RW1 d, R1 r));
-DECLARE_MIDFUNC(shll_l_rr(RW4 d, R1 r));
-DECLARE_MIDFUNC(shll_w_rr(RW2 d, R1 r));
-DECLARE_MIDFUNC(shll_b_rr(RW1 d, R1 r));
-DECLARE_MIDFUNC(ror_b_ri(R1 r, IMM i));
-DECLARE_MIDFUNC(ror_w_ri(R2 r, IMM i));
-DECLARE_MIDFUNC(ror_l_ri(R4 r, IMM i));
-DECLARE_MIDFUNC(ror_l_rr(R4 d, R1 r));
-DECLARE_MIDFUNC(ror_w_rr(R2 d, R1 r));
-DECLARE_MIDFUNC(ror_b_rr(R1 d, R1 r));
-DECLARE_MIDFUNC(shrl_l_rr(RW4 d, R1 r));
-DECLARE_MIDFUNC(shrl_w_rr(RW2 d, R1 r));
-DECLARE_MIDFUNC(shrl_b_rr(RW1 d, R1 r));
-DECLARE_MIDFUNC(shra_l_rr(RW4 d, R1 r));
-DECLARE_MIDFUNC(shra_w_rr(RW2 d, R1 r));
-DECLARE_MIDFUNC(shra_b_rr(RW1 d, R1 r));
-DECLARE_MIDFUNC(shll_l_ri(RW4 r, IMM i));
-DECLARE_MIDFUNC(shll_w_ri(RW2 r, IMM i));
-DECLARE_MIDFUNC(shll_b_ri(RW1 r, IMM i));
-DECLARE_MIDFUNC(shrl_l_ri(RW4 r, IMM i));
-DECLARE_MIDFUNC(shrl_w_ri(RW2 r, IMM i));
-DECLARE_MIDFUNC(shrl_b_ri(RW1 r, IMM i));
-DECLARE_MIDFUNC(shra_l_ri(RW4 r, IMM i));
-DECLARE_MIDFUNC(shra_w_ri(RW2 r, IMM i));
-DECLARE_MIDFUNC(shra_b_ri(RW1 r, IMM i));
-DECLARE_MIDFUNC(setcc(W1 d, IMM cc));
-DECLARE_MIDFUNC(setcc_m(IMM d, IMM cc));
-DECLARE_MIDFUNC(cmov_b_rr(RW1 d, R1 s, IMM cc));
-DECLARE_MIDFUNC(cmov_w_rr(RW2 d, R2 s, IMM cc));
-DECLARE_MIDFUNC(cmov_l_rr(RW4 d, R4 s, IMM cc));
-DECLARE_MIDFUNC(cmov_l_rm(RW4 d, IMM s, IMM cc));
-DECLARE_MIDFUNC(bsf_l_rr(W4 d, R4 s));
-DECLARE_MIDFUNC(pop_m(IMM d));
-DECLARE_MIDFUNC(push_m(IMM d));
-DECLARE_MIDFUNC(pop_l(W4 d));
-DECLARE_MIDFUNC(push_l_i(IMM i));
-DECLARE_MIDFUNC(push_l(R4 s));
-DECLARE_MIDFUNC(clear_16(RW4 r));
-DECLARE_MIDFUNC(clear_8(RW4 r));
-DECLARE_MIDFUNC(sign_extend_16_rr(W4 d, R2 s));
-DECLARE_MIDFUNC(sign_extend_8_rr(W4 d, R1 s));
-DECLARE_MIDFUNC(zero_extend_16_rr(W4 d, R2 s));
-DECLARE_MIDFUNC(zero_extend_8_rr(W4 d, R1 s));
-DECLARE_MIDFUNC(imul_64_32(RW4 d, RW4 s));
-DECLARE_MIDFUNC(mul_64_32(RW4 d, RW4 s));
-DECLARE_MIDFUNC(imul_32_32(RW4 d, R4 s));
-DECLARE_MIDFUNC(mul_32_32(RW4 d, R4 s));
-DECLARE_MIDFUNC(mov_b_rr(W1 d, R1 s));
-DECLARE_MIDFUNC(mov_w_rr(W2 d, R2 s));
-DECLARE_MIDFUNC(mov_l_rrm_indexed(W4 d,R4 baser, R4 index, IMM factor));
-DECLARE_MIDFUNC(mov_w_rrm_indexed(W2 d, R4 baser, R4 index, IMM factor));
-DECLARE_MIDFUNC(mov_b_rrm_indexed(W1 d, R4 baser, R4 index, IMM factor));
-DECLARE_MIDFUNC(mov_l_mrr_indexed(R4 baser, R4 index, IMM factor, R4 s));
-DECLARE_MIDFUNC(mov_w_mrr_indexed(R4 baser, R4 index, IMM factor, R2 s));
-DECLARE_MIDFUNC(mov_b_mrr_indexed(R4 baser, R4 index, IMM factor, R1 s));
-DECLARE_MIDFUNC(mov_l_bmrr_indexed(IMM base, R4 baser, R4 index, IMM factor, R4 s));
-DECLARE_MIDFUNC(mov_w_bmrr_indexed(IMM base, R4 baser, R4 index, IMM factor, R2 s));
-DECLARE_MIDFUNC(mov_b_bmrr_indexed(IMM base, R4 baser, R4 index, IMM factor, R1 s));
-DECLARE_MIDFUNC(mov_l_brrm_indexed(W4 d, IMM base, R4 baser, R4 index, IMM factor));
-DECLARE_MIDFUNC(mov_w_brrm_indexed(W2 d, IMM base, R4 baser, R4 index, IMM factor));
-DECLARE_MIDFUNC(mov_b_brrm_indexed(W1 d, IMM base, R4 baser, R4 index, IMM factor));
-DECLARE_MIDFUNC(mov_l_rm_indexed(W4 d, IMM base, R4 index, IMM factor));
-DECLARE_MIDFUNC(mov_l_rR(W4 d, R4 s, IMM offset));
-DECLARE_MIDFUNC(mov_w_rR(W2 d, R4 s, IMM offset));
-DECLARE_MIDFUNC(mov_b_rR(W1 d, R4 s, IMM offset));
-DECLARE_MIDFUNC(mov_l_brR(W4 d, R4 s, IMM offset));
-DECLARE_MIDFUNC(mov_w_brR(W2 d, R4 s, IMM offset));
-DECLARE_MIDFUNC(mov_b_brR(W1 d, R4 s, IMM offset));
-DECLARE_MIDFUNC(mov_l_Ri(R4 d, IMM i, IMM offset));
-DECLARE_MIDFUNC(mov_w_Ri(R4 d, IMM i, IMM offset));
-DECLARE_MIDFUNC(mov_b_Ri(R4 d, IMM i, IMM offset));
-DECLARE_MIDFUNC(mov_l_Rr(R4 d, R4 s, IMM offset));
-DECLARE_MIDFUNC(mov_w_Rr(R4 d, R2 s, IMM offset));
-DECLARE_MIDFUNC(mov_b_Rr(R4 d, R1 s, IMM offset));
-DECLARE_MIDFUNC(lea_l_brr(W4 d, R4 s, IMM offset));
-DECLARE_MIDFUNC(lea_l_brr_indexed(W4 d, R4 s, R4 index, IMM factor, IMM offset));
-DECLARE_MIDFUNC(lea_l_rr_indexed(W4 d, R4 s, R4 index, IMM factor));
-DECLARE_MIDFUNC(mov_l_bRr(R4 d, R4 s, IMM offset));
-DECLARE_MIDFUNC(mov_w_bRr(R4 d, R2 s, IMM offset));
-DECLARE_MIDFUNC(mov_b_bRr(R4 d, R1 s, IMM offset));
-DECLARE_MIDFUNC(bswap_32(RW4 r));
-DECLARE_MIDFUNC(bswap_16(RW2 r));
-DECLARE_MIDFUNC(mov_l_rr(W4 d, R4 s));
-DECLARE_MIDFUNC(mov_l_mr(IMM d, R4 s));
-DECLARE_MIDFUNC(mov_w_mr(IMM d, R2 s));
-DECLARE_MIDFUNC(mov_w_rm(W2 d, IMM s));
-DECLARE_MIDFUNC(mov_b_mr(IMM d, R1 s));
-DECLARE_MIDFUNC(mov_b_rm(W1 d, IMM s));
-DECLARE_MIDFUNC(mov_l_ri(W4 d, IMM s));
-DECLARE_MIDFUNC(mov_w_ri(W2 d, IMM s));
-DECLARE_MIDFUNC(mov_b_ri(W1 d, IMM s));
-DECLARE_MIDFUNC(add_l_mi(IMM d, IMM s) );
-DECLARE_MIDFUNC(add_w_mi(IMM d, IMM s) );
-DECLARE_MIDFUNC(add_b_mi(IMM d, IMM s) );
-DECLARE_MIDFUNC(test_l_ri(R4 d, IMM i));
-DECLARE_MIDFUNC(test_l_rr(R4 d, R4 s));
-DECLARE_MIDFUNC(test_w_rr(R2 d, R2 s));
-DECLARE_MIDFUNC(test_b_rr(R1 d, R1 s));
-DECLARE_MIDFUNC(and_l_ri(RW4 d, IMM i));
-DECLARE_MIDFUNC(and_l(RW4 d, R4 s));
-DECLARE_MIDFUNC(and_w(RW2 d, R2 s));
-DECLARE_MIDFUNC(and_b(RW1 d, R1 s));
-DECLARE_MIDFUNC(or_l_rm(RW4 d, IMM s));
-DECLARE_MIDFUNC(or_l_ri(RW4 d, IMM i));
-DECLARE_MIDFUNC(or_l(RW4 d, R4 s));
-DECLARE_MIDFUNC(or_w(RW2 d, R2 s));
-DECLARE_MIDFUNC(or_b(RW1 d, R1 s));
-DECLARE_MIDFUNC(adc_l(RW4 d, R4 s));
-DECLARE_MIDFUNC(adc_w(RW2 d, R2 s));
-DECLARE_MIDFUNC(adc_b(RW1 d, R1 s));
-DECLARE_MIDFUNC(add_l(RW4 d, R4 s));
-DECLARE_MIDFUNC(add_w(RW2 d, R2 s));
-DECLARE_MIDFUNC(add_b(RW1 d, R1 s));
-DECLARE_MIDFUNC(sub_l_ri(RW4 d, IMM i));
-DECLARE_MIDFUNC(sub_w_ri(RW2 d, IMM i));
-DECLARE_MIDFUNC(sub_b_ri(RW1 d, IMM i));
-DECLARE_MIDFUNC(add_l_ri(RW4 d, IMM i));
-DECLARE_MIDFUNC(add_w_ri(RW2 d, IMM i));
-DECLARE_MIDFUNC(add_b_ri(RW1 d, IMM i));
-DECLARE_MIDFUNC(sbb_l(RW4 d, R4 s));
-DECLARE_MIDFUNC(sbb_w(RW2 d, R2 s));
-DECLARE_MIDFUNC(sbb_b(RW1 d, R1 s));
-DECLARE_MIDFUNC(sub_l(RW4 d, R4 s));
-DECLARE_MIDFUNC(sub_w(RW2 d, R2 s));
-DECLARE_MIDFUNC(sub_b(RW1 d, R1 s));
-DECLARE_MIDFUNC(cmp_l(R4 d, R4 s));
-DECLARE_MIDFUNC(cmp_l_ri(R4 r, IMM i));
-DECLARE_MIDFUNC(cmp_w(R2 d, R2 s));
-DECLARE_MIDFUNC(cmp_b(R1 d, R1 s));
-DECLARE_MIDFUNC(xor_l(RW4 d, R4 s));
-DECLARE_MIDFUNC(xor_w(RW2 d, R2 s));
-DECLARE_MIDFUNC(xor_b(RW1 d, R1 s));
-DECLARE_MIDFUNC(live_flags(void));
-DECLARE_MIDFUNC(dont_care_flags(void));
-DECLARE_MIDFUNC(duplicate_carry(void));
-DECLARE_MIDFUNC(restore_carry(void));
-DECLARE_MIDFUNC(start_needflags(void));
-DECLARE_MIDFUNC(end_needflags(void));
-DECLARE_MIDFUNC(make_flags_live(void));
-DECLARE_MIDFUNC(call_r_11(R4 r, W4 out1, R4 in1, IMM osize, IMM isize));
-DECLARE_MIDFUNC(call_r_02(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2));
-DECLARE_MIDFUNC(forget_about(W4 r));
-DECLARE_MIDFUNC(nop(void));
-
-DECLARE_MIDFUNC(f_forget_about(FW r));
-DECLARE_MIDFUNC(fmov_pi(FW r));
-DECLARE_MIDFUNC(fmov_log10_2(FW r));
-DECLARE_MIDFUNC(fmov_log2_e(FW r));
-DECLARE_MIDFUNC(fmov_loge_2(FW r));
-DECLARE_MIDFUNC(fmov_1(FW r));
-DECLARE_MIDFUNC(fmov_0(FW r));
-DECLARE_MIDFUNC(fmov_rm(FW r, MEMR m));
-DECLARE_MIDFUNC(fmovi_rm(FW r, MEMR m));
-DECLARE_MIDFUNC(fmovi_mr(MEMW m, FR r));
-DECLARE_MIDFUNC(fmovs_rm(FW r, MEMR m));
-DECLARE_MIDFUNC(fmovs_mr(MEMW m, FR r));
-DECLARE_MIDFUNC(fmov_mr(MEMW m, FR r));
-DECLARE_MIDFUNC(fmov_ext_mr(MEMW m, FR r));
-DECLARE_MIDFUNC(fmov_ext_rm(FW r, MEMR m));
-DECLARE_MIDFUNC(fmov_rr(FW d, FR s));
-DECLARE_MIDFUNC(fldcw_m_indexed(R4 index, IMM base));
-DECLARE_MIDFUNC(ftst_r(FR r));
-DECLARE_MIDFUNC(dont_care_fflags(void));
-DECLARE_MIDFUNC(fsqrt_rr(FW d, FR s));
-DECLARE_MIDFUNC(fabs_rr(FW d, FR s));
-DECLARE_MIDFUNC(frndint_rr(FW d, FR s));
-DECLARE_MIDFUNC(fsin_rr(FW d, FR s));
-DECLARE_MIDFUNC(fcos_rr(FW d, FR s));
-DECLARE_MIDFUNC(ftwotox_rr(FW d, FR s));
-DECLARE_MIDFUNC(fetox_rr(FW d, FR s));
-DECLARE_MIDFUNC(flog2_rr(FW d, FR s));
-DECLARE_MIDFUNC(fneg_rr(FW d, FR s));
-DECLARE_MIDFUNC(fadd_rr(FRW d, FR s));
-DECLARE_MIDFUNC(fsub_rr(FRW d, FR s));
-DECLARE_MIDFUNC(fmul_rr(FRW d, FR s));
-DECLARE_MIDFUNC(frem_rr(FRW d, FR s));
-DECLARE_MIDFUNC(frem1_rr(FRW d, FR s));
-DECLARE_MIDFUNC(fdiv_rr(FRW d, FR s));
-DECLARE_MIDFUNC(fcmp_rr(FR d, FR s));
-DECLARE_MIDFUNC(fflags_into_flags(W2 tmp));
-#undef DECLARE_MIDFUNC
-
-extern int failure;
-#define FAIL(x) do { failure|=x; } while (0)
-
-/* Convenience functions exposed to gencomp */
-extern uae_u32 m68k_pc_offset;
-extern void readbyte(int address, int dest, int tmp);
-extern void readword(int address, int dest, int tmp);
-extern void readlong(int address, int dest, int tmp);
-extern void writebyte(int address, int source, int tmp);
-extern void writeword(int address, int source, int tmp);
-extern void writelong(int address, int source, int tmp);
-extern void writeword_clobber(int address, int source, int tmp);
-extern void writelong_clobber(int address, int source, int tmp);
-extern void get_n_addr(int address, int dest, int tmp);
-extern void get_n_addr_jmp(int address, int dest, int tmp);
-extern void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp);
-/* Set native Z flag only if register is zero */
-extern void set_zero(int r, int tmp);
-extern int kill_rodent(int r);
-extern void sync_m68k_pc(void);
-extern uae_u32 get_const(int r);
-extern int is_const(int r);
-extern void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond);
-
-#define comp_get_ibyte(o) do_get_mem_byte((uae_u8 *)(comp_pc_p + (o) + 1))
-#define comp_get_iword(o) do_get_mem_word((uae_u16 *)(comp_pc_p + (o)))
-#define comp_get_ilong(o) do_get_mem_long((uae_u32 *)(comp_pc_p + (o)))
-
-struct blockinfo_t;
-
-typedef struct dep_t {
- uae_u32* jmp_off;
- struct blockinfo_t* target;
- struct blockinfo_t* source;
- struct dep_t** prev_p;
- struct dep_t* next;
-} dependency;
-
-typedef struct checksum_info_t {
- uae_u8 *start_p;
- uae_u32 length;
- struct checksum_info_t *next;
-} checksum_info;
-
-typedef struct blockinfo_t {
- uae_s32 count;
- cpuop_func* direct_handler_to_use;
- cpuop_func* handler_to_use;
- /* The direct handler does not check for the correct address */
-
- cpuop_func* handler;
- cpuop_func* direct_handler;
-
- cpuop_func* direct_pen;
- cpuop_func* direct_pcc;
-
- uae_u8* pc_p;
-
- uae_u32 c1;
- uae_u32 c2;
-#if USE_CHECKSUM_INFO
- checksum_info *csi;
-#else
- uae_u32 len;
- uae_u32 min_pcp;
-#endif
-
- struct blockinfo_t* next_same_cl;
- struct blockinfo_t** prev_same_cl_p;
- struct blockinfo_t* next;
- struct blockinfo_t** prev_p;
-
- uae_u8 optlevel;
- uae_u8 needed_flags;
- uae_u8 status;
- uae_u8 havestate;
-
- dependency dep[2]; /* Holds things we depend on */
- dependency* deplist; /* List of things that depend on this */
- smallstate env;
-
-#if JIT_DEBUG
- /* (gb) size of the compiled block (direct handler) */
- uae_u32 direct_handler_size;
-#endif
-} blockinfo;
-
-#define BI_INVALID 0
-#define BI_ACTIVE 1
-#define BI_NEED_RECOMP 2
-#define BI_NEED_CHECK 3
-#define BI_CHECKING 4
-#define BI_COMPILING 5
-#define BI_FINALIZING 6
-
-void execute_normal(void);
-void exec_nostats(void);
-void do_nothing(void);
-
-#else
-
-static __inline__ void flush_icache(int) { }
-static __inline__ void build_comp() { }
-
-#endif /* !USE_JIT */
-
-#endif /* COMPEMU_H */
+++ /dev/null
-/*
- * compiler/compemu_fpp.cpp - Dynamic translation of FPU instructions
- *
- * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
- *
- * Adaptation for Basilisk II and improvements, copyright 2000-2005
- * Gwenole Beauchesne
- *
- * Basilisk II (C) 1997-2008 Christian Bauer
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-/*
- * UAE - The Un*x Amiga Emulator
- *
- * MC68881 emulation
- *
- * Copyright 1996 Herman ten Brugge
- * Adapted for JIT compilation (c) Bernd Meyer, 2000
- */
-
-#include "sysdeps.h"
-
-#include <math.h>
-#include <stdio.h>
-
-#include "memory.h"
-#include "readcpu.h"
-#include "newcpu.h"
-#include "main.h"
-#include "compiler/compemu.h"
-#include "fpu/fpu.h"
-#include "fpu/flags.h"
-#include "fpu/exceptions.h"
-#include "fpu/rounding.h"
-
-#define DEBUG 0
-#include "debug.h"
-
-// gb-- WARNING: get_fpcr() and set_fpcr() support is experimental
-#define HANDLE_FPCR 0
-
-// - IEEE-based fpu core must be used
-#if defined(FPU_IEEE)
-# define CAN_HANDLE_FPCR
-#endif
-
-// - Generic rounding mode and precision modes are supported if set together
-#if defined(FPU_USE_GENERIC_ROUNDING_MODE) && defined(FPU_USE_GENERIC_ROUNDING_PRECISION)
-# define CAN_HANDLE_FPCR
-#endif
-
-// - X86 rounding mode and precision modes are *not* supported but might work (?!)
-#if defined(FPU_USE_X86_ROUNDING_MODE) && defined(FPU_USE_X86_ROUNDING_PRECISION)
-# define CAN_HANDLE_FPCR
-#endif
-
-#if HANDLE_FPCR && !defined(CAN_HANDLE_FPCR)
-# warning "Can't handle FPCR, will FAIL(1) at runtime"
-# undef HANDLE_FPCR
-# define HANDLE_FPCR 0
-#endif
-
-#define STATIC_INLINE static inline
-#define MAKE_FPSR(r) do { fmov_rr(FP_RESULT,r); } while (0)
-
-#define delay nop() ;nop()
-#define delay2 nop() ;nop()
-
-#define UNKNOWN_EXTRA 0xFFFFFFFF
-static void fpuop_illg(uae_u32 opcode, uae_u32 extra)
-{
-/*
- if (extra == UNKNOWN_EXTRA)
- printf("FPU opcode %x, extra UNKNOWN_EXTRA\n",opcode & 0xFFFF);
- else
- printf("FPU opcode %x, extra %x\n",opcode & 0xFFFF,extra & 0xFFFF);
-*/
- op_illg(opcode);
-}
-
-static uae_s32 temp_fp[4]; /* To convert between FP/integer */
-
-/* return register number, or -1 for failure */
-STATIC_INLINE int get_fp_value (uae_u32 opcode, uae_u16 extra)
-{
- uaecptr tmppc;
- uae_u16 tmp;
- int size;
- int mode;
- int reg;
- double* src;
- uae_u32 ad = 0;
- static int sz1[8] = { 4, 4, 12, 12, 2, 8, 1, 0 };
- static int sz2[8] = { 4, 4, 12, 12, 2, 8, 2, 0 };
-
- if ((extra & 0x4000) == 0) {
- return ((extra >> 10) & 7);
- }
-
- mode = (opcode >> 3) & 7;
- reg = opcode & 7;
- size = (extra >> 10) & 7;
- switch (mode) {
- case 0:
- switch (size) {
- case 6:
- sign_extend_8_rr(S1,reg);
- mov_l_mr((uintptr)temp_fp,S1);
- delay2;
- fmovi_rm(FS1,(uintptr)temp_fp);
- return FS1;
- case 4:
- sign_extend_16_rr(S1,reg);
- mov_l_mr((uintptr)temp_fp,S1);
- delay2;
- fmovi_rm(FS1,(uintptr)temp_fp);
- return FS1;
- case 0:
- mov_l_mr((uintptr)temp_fp,reg);
- delay2;
- fmovi_rm(FS1,(uintptr)temp_fp);
- return FS1;
- case 1:
- mov_l_mr((uintptr)temp_fp,reg);
- delay2;
- fmovs_rm(FS1,(uintptr)temp_fp);
- return FS1;
- default:
- return -1;
- }
- return -1; /* Should be unreachable */
- case 1:
- return -1; /* Genuine invalid instruction */
- default:
- break;
- }
- /* OK, we *will* have to load something from an address. Let's make
- sure we know how to handle that, or quit early --- i.e. *before*
- we do any postincrement/predecrement that we may regret */
-
- switch (size) {
- case 3:
- return -1;
- case 0:
- case 1:
- case 2:
- case 4:
- case 5:
- case 6:
- break;
- default:
- return -1;
- }
-
- switch (mode) {
- case 2:
- ad=S1; /* We will change it, anyway ;-) */
- mov_l_rr(ad,reg+8);
- break;
- case 3:
- ad=S1;
- mov_l_rr(ad,reg+8);
- lea_l_brr(reg+8,reg+8,(reg == 7?sz2[size]:sz1[size]));
- break;
- case 4:
- ad=S1;
-
- lea_l_brr(reg+8,reg+8,-(reg == 7?sz2[size]:sz1[size]));
- mov_l_rr(ad,reg+8);
- break;
- case 5:
- {
- uae_u32 off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
- ad=S1;
- mov_l_rr(ad,reg+8);
- lea_l_brr(ad,ad,off);
- break;
- }
- case 6:
- {
- uae_u32 dp=comp_get_iword((m68k_pc_offset+=2)-2);
- ad=S1;
- calc_disp_ea_020(reg+8,dp,ad,S2);
- break;
- }
- case 7:
- switch (reg) {
- case 0:
- {
- uae_u32 off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
- ad=S1;
- mov_l_ri(ad,off);
- break;
- }
- case 1:
- {
- uae_u32 off=comp_get_ilong((m68k_pc_offset+=4)-4);
- ad=S1;
- mov_l_ri(ad,off);
- break;
- }
- case 2:
- {
- uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+
- m68k_pc_offset;
- uae_s32 PC16off =(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)
--2);
- ad=S1;
- mov_l_ri(ad,address+PC16off);
- break;
- }
- case 3:
- return -1;
- tmppc = m68k_getpc ();
- tmp = next_iword ();
- ad = get_disp_ea_020 (tmppc, tmp);
- break;
- case 4:
- {
- uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+ m68k_pc_offset;
- ad=S1;
- // Immediate addressing mode && Operation Length == Byte ->
- // Use the low-order byte of the extension word.
- if (size == 6) address++;
- mov_l_ri(ad,address);
- m68k_pc_offset+=sz2[size];
- break;
- }
- default:
- return -1;
- }
- }
-
- switch (size) {
- case 0:
- readlong(ad,S2,S3);
- mov_l_mr((uintptr)temp_fp,S2);
- delay2;
- fmovi_rm(FS1,(uintptr)temp_fp);
- break;
- case 1:
- readlong(ad,S2,S3);
- mov_l_mr((uintptr)temp_fp,S2);
- delay2;
- fmovs_rm(FS1,(uintptr)temp_fp);
- break;
- case 2:
- readword(ad,S2,S3);
- mov_w_mr(((uintptr)temp_fp)+8,S2);
- add_l_ri(ad,4);
- readlong(ad,S2,S3);
- mov_l_mr((uintptr)(temp_fp)+4,S2);
- add_l_ri(ad,4);
- readlong(ad,S2,S3);
- mov_l_mr((uintptr)(temp_fp),S2);
- delay2;
- fmov_ext_rm(FS1,(uintptr)(temp_fp));
- break;
- case 3:
- return -1; /* Some silly "packed" stuff */
- case 4:
- readword(ad,S2,S3);
- sign_extend_16_rr(S2,S2);
- mov_l_mr((uintptr)temp_fp,S2);
- delay2;
- fmovi_rm(FS1,(uintptr)temp_fp);
- break;
- case 5:
- readlong(ad,S2,S3);
- mov_l_mr(((uintptr)temp_fp)+4,S2);
- add_l_ri(ad,4);
- readlong(ad,S2,S3);
- mov_l_mr((uintptr)(temp_fp),S2);
- delay2;
- fmov_rm(FS1,(uintptr)(temp_fp));
- break;
- case 6:
- readbyte(ad,S2,S3);
- sign_extend_8_rr(S2,S2);
- mov_l_mr((uintptr)temp_fp,S2);
- delay2;
- fmovi_rm(FS1,(uintptr)temp_fp);
- break;
- default:
- return -1;
- }
- return FS1;
-}
-
-/* return of -1 means failure, >=0 means OK */
-STATIC_INLINE int put_fp_value (int val, uae_u32 opcode, uae_u16 extra)
-{
- uae_u16 tmp;
- uaecptr tmppc;
- int size;
- int mode;
- int reg;
- uae_u32 ad;
- static int sz1[8] = { 4, 4, 12, 12, 2, 8, 1, 0 };
- static int sz2[8] = { 4, 4, 12, 12, 2, 8, 2, 0 };
-
- if ((extra & 0x4000) == 0) {
- const int dest_reg = (extra >> 10) & 7;
- fmov_rr(dest_reg, val);
- // gb-- status register is affected
- MAKE_FPSR(dest_reg);
- return 0;
- }
-
- mode = (opcode >> 3) & 7;
- reg = opcode & 7;
- size = (extra >> 10) & 7;
- ad = (uae_u32)-1;
- switch (mode) {
- case 0:
- switch (size) {
- case 6:
- fmovi_mr((uintptr)temp_fp,val);
- delay;
- mov_b_rm(reg,(uintptr)temp_fp);
- return 0;
- case 4:
- fmovi_mr((uintptr)temp_fp,val);
- delay;
- mov_w_rm(reg,(uintptr)temp_fp);
- return 0;
- case 0:
- fmovi_mr((uintptr)temp_fp,val);
- delay;
- mov_l_rm(reg,(uintptr)temp_fp);
- return 0;
- case 1:
- fmovs_mr((uintptr)temp_fp,val);
- delay;
- mov_l_rm(reg,(uintptr)temp_fp);
- return 0;
- default:
- return -1;
- }
- case 1:
- return -1; /* genuine invalid instruction */
- default: break;
- }
-
- /* Let's make sure we get out *before* doing something silly if
- we can't handle the size */
- switch (size) {
- case 0:
- case 4:
- case 5:
- case 6:
- case 2:
- case 1:
- break;
- case 3:
- default:
- return -1;
- }
-
- switch (mode) {
- case 2:
- ad=S1;
- mov_l_rr(ad,reg+8);
- break;
- case 3:
- ad=S1;
- mov_l_rr(ad,reg+8);
- lea_l_brr(reg+8,reg+8,(reg == 7?sz2[size]:sz1[size]));
- break;
- case 4:
- ad=S1;
- lea_l_brr(reg+8,reg+8,-(reg == 7?sz2[size]:sz1[size]));
- mov_l_rr(ad,reg+8);
- break;
- case 5:
- {
- uae_u32 off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
- ad=S1;
- mov_l_rr(ad,reg+8);
- add_l_ri(ad,off);
- break;
- }
- case 6:
- {
- uae_u32 dp=comp_get_iword((m68k_pc_offset+=2)-2);
- ad=S1;
- calc_disp_ea_020(reg+8,dp,ad,S2);
- break;
- }
- case 7:
- switch (reg) {
- case 0:
- {
- uae_u32 off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
- ad=S1;
- mov_l_ri(ad,off);
- break;
- }
- case 1:
- {
- uae_u32 off=comp_get_ilong((m68k_pc_offset+=4)-4);
- ad=S1;
- mov_l_ri(ad,off);
- break;
- }
- case 2:
- {
- uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+
- m68k_pc_offset;
- uae_s32 PC16off =(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
- ad=S1;
- mov_l_ri(ad,address+PC16off);
- break;
- }
- case 3:
- return -1;
- tmppc = m68k_getpc ();
- tmp = next_iword ();
- ad = get_disp_ea_020 (tmppc, tmp);
- break;
- case 4:
- {
- uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+
- m68k_pc_offset;
- ad=S1;
- mov_l_ri(ad,address);
- m68k_pc_offset+=sz2[size];
- break;
- }
- default:
- return -1;
- }
- }
- switch (size) {
- case 0:
- fmovi_mr((uintptr)temp_fp,val);
- delay;
- mov_l_rm(S2,(uintptr)temp_fp);
- writelong_clobber(ad,S2,S3);
- break;
- case 1:
- fmovs_mr((uintptr)temp_fp,val);
- delay;
- mov_l_rm(S2,(uintptr)temp_fp);
- writelong_clobber(ad,S2,S3);
- break;
- case 2:
- fmov_ext_mr((uintptr)temp_fp,val);
- delay;
- mov_w_rm(S2,(uintptr)temp_fp+8);
- writeword_clobber(ad,S2,S3);
- add_l_ri(ad,4);
- mov_l_rm(S2,(uintptr)temp_fp+4);
- writelong_clobber(ad,S2,S3);
- add_l_ri(ad,4);
- mov_l_rm(S2,(uintptr)temp_fp);
- writelong_clobber(ad,S2,S3);
- break;
- case 3: return -1; /* Packed */
-
- case 4:
- fmovi_mr((uintptr)temp_fp,val);
- delay;
- mov_l_rm(S2,(uintptr)temp_fp);
- writeword_clobber(ad,S2,S3);
- break;
- case 5:
- fmov_mr((uintptr)temp_fp,val);
- delay;
- mov_l_rm(S2,(uintptr)temp_fp+4);
- writelong_clobber(ad,S2,S3);
- add_l_ri(ad,4);
- mov_l_rm(S2,(uintptr)temp_fp);
- writelong_clobber(ad,S2,S3);
- break;
- case 6:
- fmovi_mr((uintptr)temp_fp,val);
- delay;
- mov_l_rm(S2,(uintptr)temp_fp);
- writebyte(ad,S2,S3);
- break;
- default:
- return -1;
- }
- return 0;
-}
-
-/* return -1 for failure, or register number for success */
-STATIC_INLINE int get_fp_ad (uae_u32 opcode, uae_u32 * ad)
-{
- uae_u16 tmp;
- uaecptr tmppc;
- int mode;
- int reg;
- uae_s32 off;
-
- mode = (opcode >> 3) & 7;
- reg = opcode & 7;
- switch (mode) {
- case 0:
- case 1:
- return -1;
- case 2:
- case 3:
- case 4:
- mov_l_rr(S1,8+reg);
- return S1;
- *ad = m68k_areg (regs, reg);
- break;
- case 5:
- off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
-
- mov_l_rr(S1,8+reg);
- add_l_ri(S1,off);
- return S1;
- case 6:
- return -1;
- break;
- case 7:
- switch (reg) {
- case 0:
- off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
- mov_l_ri(S1,off);
- return S1;
- case 1:
- off=comp_get_ilong((m68k_pc_offset+=4)-4);
- mov_l_ri(S1,off);
- return S1;
- case 2:
- return -1;
-// *ad = m68k_getpc ();
-// *ad += (uae_s32) (uae_s16) next_iword ();
- off=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;
- off+=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
- mov_l_ri(S1,off);
- return S1;
- case 3:
- return -1;
- tmppc = m68k_getpc ();
- tmp = next_iword ();
- *ad = get_disp_ea_020 (tmppc, tmp);
- break;
- default:
- return -1;
- }
- }
- abort();
-}
-
-void comp_fdbcc_opp (uae_u32 opcode, uae_u16 extra)
-{
- FAIL(1);
- return;
-}
-
-void comp_fscc_opp (uae_u32 opcode, uae_u16 extra)
-{
- uae_u32 ad;
- int cc;
- int reg;
-
-#if DEBUG_FPP
- printf ("fscc_opp at %08lx\n", m68k_getpc ());
- fflush (stdout);
-#endif
-
-
- if (extra&0x20) { /* only cc from 00 to 1f are defined */
- FAIL(1);
- return;
- }
- if ((opcode & 0x38) != 0) { /* We can only do to integer register */
- FAIL(1);
- return;
- }
-
- fflags_into_flags(S2);
- reg=(opcode&7);
-
- mov_l_ri(S1,255);
- mov_l_ri(S4,0);
- switch(extra&0x0f) { /* according to fpp.c, the 0x10 bit is ignored
- */
- case 0: break; /* set never */
- case 1: mov_l_rr(S2,S4);
- cmov_l_rr(S4,S1,4);
- cmov_l_rr(S4,S2,10); break;
- case 2: cmov_l_rr(S4,S1,7); break;
- case 3: cmov_l_rr(S4,S1,3); break;
- case 4: mov_l_rr(S2,S4);
- cmov_l_rr(S4,S1,2);
- cmov_l_rr(S4,S2,10); break;
- case 5: mov_l_rr(S2,S4);
- cmov_l_rr(S4,S1,6);
- cmov_l_rr(S4,S2,10); break;
- case 6: cmov_l_rr(S4,S1,5); break;
- case 7: cmov_l_rr(S4,S1,11); break;
- case 8: cmov_l_rr(S4,S1,10); break;
- case 9: cmov_l_rr(S4,S1,4); break;
- case 10: cmov_l_rr(S4,S1,10); cmov_l_rr(S4,S1,7); break;
- case 11: cmov_l_rr(S4,S1,4); cmov_l_rr(S4,S1,3); break;
- case 12: cmov_l_rr(S4,S1,2); break;
- case 13: cmov_l_rr(S4,S1,6); break;
- case 14: cmov_l_rr(S4,S1,5); cmov_l_rr(S4,S1,10); break;
- case 15: mov_l_rr(S4,S1); break;
- }
-
- if ((opcode & 0x38) == 0) {
- mov_b_rr(reg,S4);
- } else {
- abort();
- if (get_fp_ad (opcode, &ad) == 0) {
- m68k_setpc (m68k_getpc () - 4);
- fpuop_illg (opcode,extra);
- } else
- put_byte (ad, cc ? 0xff : 0x00);
- }
-}
-
-void comp_ftrapcc_opp (uae_u32 opcode, uaecptr oldpc)
-{
- int cc;
-
- FAIL(1);
- return;
-}
-
-void comp_fbcc_opp (uae_u32 opcode)
-{
- uae_u32 start_68k_offset=m68k_pc_offset;
- uae_u32 off;
- uae_u32 v1;
- uae_u32 v2;
- uae_u32 nh;
- int cc;
-
- // comp_pc_p is expected to be bound to 32-bit addresses
- assert((uintptr)comp_pc_p <= 0xffffffffUL);
-
- if (opcode&0x20) { /* only cc from 00 to 1f are defined */
- FAIL(1);
- return;
- }
- if ((opcode&0x40)==0) {
- off=(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
- }
- else {
- off=comp_get_ilong((m68k_pc_offset+=4)-4);
- }
- mov_l_ri(S1,(uintptr)
- (comp_pc_p+off-(m68k_pc_offset-start_68k_offset)));
- mov_l_ri(PC_P,(uintptr)comp_pc_p);
-
- /* Now they are both constant. Might as well fold in m68k_pc_offset */
- add_l_ri(S1,m68k_pc_offset);
- add_l_ri(PC_P,m68k_pc_offset);
- m68k_pc_offset=0;
-
- /* according to fpp.c, the 0x10 bit is ignored
- (it handles exception handling, which we don't
- do, anyway ;-) */
- cc=opcode&0x0f;
- v1=get_const(PC_P);
- v2=get_const(S1);
- fflags_into_flags(S2);
-
- switch(cc) {
- case 0: break; /* jump never */
- case 1:
- mov_l_rr(S2,PC_P);
- cmov_l_rr(PC_P,S1,4);
- cmov_l_rr(PC_P,S2,10); break;
- case 2: register_branch(v1,v2,7); break;
- case 3: register_branch(v1,v2,3); break;
- case 4:
- mov_l_rr(S2,PC_P);
- cmov_l_rr(PC_P,S1,2);
- cmov_l_rr(PC_P,S2,10); break;
- case 5:
- mov_l_rr(S2,PC_P);
- cmov_l_rr(PC_P,S1,6);
- cmov_l_rr(PC_P,S2,10); break;
- case 6: register_branch(v1,v2,5); break;
- case 7: register_branch(v1,v2,11); break;
- case 8: register_branch(v1,v2,10); break;
- case 9: register_branch(v1,v2,4); break;
- case 10:
- cmov_l_rr(PC_P,S1,10);
- cmov_l_rr(PC_P,S1,7); break;
- case 11:
- cmov_l_rr(PC_P,S1,4);
- cmov_l_rr(PC_P,S1,3); break;
- case 12: register_branch(v1,v2,2); break;
- case 13: register_branch(v1,v2,6); break;
- case 14:
- cmov_l_rr(PC_P,S1,5);
- cmov_l_rr(PC_P,S1,10); break;
- case 15: mov_l_rr(PC_P,S1); break;
- }
-}
-
- /* Floating point conditions
- The "NotANumber" part could be problematic; Howver, when NaN is
- encountered, the ftst instruction sets bot N and Z to 1 on the x87,
- so quite often things just fall into place. This is probably not
- accurate wrt the 68k FPU, but it is *as* accurate as this was before.
- However, some more thought should go into fixing this stuff up so
- it accurately emulates the 68k FPU.
->=<U
-0000 0x00: 0 --- Never jump
-0101 0x01: Z --- jump if zero (x86: 4)
-1000 0x02: !(NotANumber || Z || N) --- Neither Z nor N set (x86: 7)
-1101 0x03: Z || !(NotANumber || N); --- Z or !N (x86: 4 and 3)
-0010 0x04: N && !(NotANumber || Z); --- N and !Z (x86: hard!)
-0111 0x05: Z || (N && !NotANumber); --- Z or N (x86: 6)
-1010 0x06: !(NotANumber || Z); --- not Z (x86: 5)
-1110 0x07: !NotANumber; --- not NaN (x86: 11, not parity)
-0001 0x08: NotANumber; --- NaN (x86: 10)
-0101 0x09: NotANumber || Z; --- Z (x86: 4)
-1001 0x0a: NotANumber || !(N || Z); --- NaN or neither N nor Z (x86: 10 and 7)
-1101 0x0b: NotANumber || Z || !N; --- Z or !N (x86: 4 and 3)
-0011 0x0c: NotANumber || (N && !Z); --- N (x86: 2)
-0111 0x0d: NotANumber || Z || N; --- Z or N (x86: 6)
-1010 0x0e: !Z; --- not Z (x86: 5)
-1111 0x0f: 1; --- always
-
-This is not how the 68k handles things, though --- it sets Z to 0 and N
-to the NaN's sign.... ('o' and 'i' denote differences from the above
-table)
-
->=<U
-0000 0x00: 0 --- Never jump
-010o 0x01: Z --- jump if zero (x86: 4, not 10)
-1000 0x02: !(NotANumber || Z || N) --- Neither Z nor N set (x86: 7)
-110o 0x03: Z || !(NotANumber || N); --- Z or !N (x86: 3)
-0010 0x04: N && !(NotANumber || Z); --- N and !Z (x86: 2, not 10)
-011o 0x05: Z || (N && !NotANumber); --- Z or N (x86: 6, not 10)
-1010 0x06: !(NotANumber || Z); --- not Z (x86: 5)
-1110 0x07: !NotANumber; --- not NaN (x86: 11, not parity)
-0001 0x08: NotANumber; --- NaN (x86: 10)
-0101 0x09: NotANumber || Z; --- Z (x86: 4)
-1001 0x0a: NotANumber || !(N || Z); --- NaN or neither N nor Z (x86: 10 and 7)
-1101 0x0b: NotANumber || Z || !N; --- Z or !N (x86: 4 and 3)
-0011 0x0c: NotANumber || (N && !Z); --- N (x86: 2)
-0111 0x0d: NotANumber || Z || N; --- Z or N (x86: 6)
-101i 0x0e: !Z; --- not Z (x86: 5 and 10)
-1111 0x0f: 1; --- always
-
-Of course, this *still* doesn't mean that the x86 and 68k conditions are
-equivalent --- the handling of infinities is different, for one thing.
-On the 68k, +infinity minus +infinity is NotANumber (as it should be). On
-the x86, it is +infinity, and some exception is raised (which I suspect
-is promptly ignored) STUPID!
-The more I learn about their CPUs, the more I detest Intel....
-
-You can see this in action if you have "Benoit" (see Aminet) and
-set the exponent to 16. Wait for a long time, and marvel at the extra black
-areas outside the center one. That's where Benoit expects NaN, and the x86
-gives +infinity. [Ooops --- that must have been some kind of bug in my code.
-it no longer happens, and the resulting graphic looks much better, too]
-
-x86 conditions
-0011 : 2
-1100 : 3
-0101 : 4
-1010 : 5
-0111 : 6
-1000 : 7
-0001 : 10
-1110 : 11
- */
-void comp_fsave_opp (uae_u32 opcode)
-{
- uae_u32 ad;
- int incr = (opcode & 0x38) == 0x20 ? -1 : 1;
- int i;
-
- FAIL(1);
- return;
-
-#if DEBUG_FPP
- printf ("fsave_opp at %08lx\n", m68k_getpc ());
- fflush (stdout);
-#endif
- if (get_fp_ad (opcode, &ad) == 0) {
- m68k_setpc (m68k_getpc () - 2);
- fpuop_illg (opcode,UNKNOWN_EXTRA);
- return;
- }
-
- if (CPUType == 4) {
- /* 4 byte 68040 IDLE frame. */
- if (incr < 0) {
- ad -= 4;
- put_long (ad, 0x41000000);
- } else {
- put_long (ad, 0x41000000);
- ad += 4;
- }
- } else {
- if (incr < 0) {
- ad -= 4;
- put_long (ad, 0x70000000);
- for (i = 0; i < 5; i++) {
- ad -= 4;
- put_long (ad, 0x00000000);
- }
- ad -= 4;
- put_long (ad, 0x1f180000);
- } else {
- put_long (ad, 0x1f180000);
- ad += 4;
- for (i = 0; i < 5; i++) {
- put_long (ad, 0x00000000);
- ad += 4;
- }
- put_long (ad, 0x70000000);
- ad += 4;
- }
- }
- if ((opcode & 0x38) == 0x18)
- m68k_areg (regs, opcode & 7) = ad;
- if ((opcode & 0x38) == 0x20)
- m68k_areg (regs, opcode & 7) = ad;
-}
-
-void comp_frestore_opp (uae_u32 opcode)
-{
- uae_u32 ad;
- uae_u32 d;
- int incr = (opcode & 0x38) == 0x20 ? -1 : 1;
-
- FAIL(1);
- return;
-
-#if DEBUG_FPP
- printf ("frestore_opp at %08lx\n", m68k_getpc ());
- fflush (stdout);
-#endif
- if (get_fp_ad (opcode, &ad) == 0) {
- m68k_setpc (m68k_getpc () - 2);
- fpuop_illg (opcode,UNKNOWN_EXTRA);
- return;
- }
- if (CPUType == 4) {
- /* 68040 */
- if (incr < 0) {
- /* @@@ This may be wrong. */
- ad -= 4;
- d = get_long (ad);
- if ((d & 0xff000000) != 0) { /* Not a NULL frame? */
- if ((d & 0x00ff0000) == 0) { /* IDLE */
- } else if ((d & 0x00ff0000) == 0x00300000) { /* UNIMP */
- ad -= 44;
- } else if ((d & 0x00ff0000) == 0x00600000) { /* BUSY */
- ad -= 92;
- }
- }
- } else {
- d = get_long (ad);
- ad += 4;
- if ((d & 0xff000000) != 0) { /* Not a NULL frame? */
- if ((d & 0x00ff0000) == 0) { /* IDLE */
- } else if ((d & 0x00ff0000) == 0x00300000) { /* UNIMP */
- ad += 44;
- } else if ((d & 0x00ff0000) == 0x00600000) { /* BUSY */
- ad += 92;
- }
- }
- }
- } else {
- if (incr < 0) {
- ad -= 4;
- d = get_long (ad);
- if ((d & 0xff000000) != 0) {
- if ((d & 0x00ff0000) == 0x00180000)
- ad -= 6 * 4;
- else if ((d & 0x00ff0000) == 0x00380000)
- ad -= 14 * 4;
- else if ((d & 0x00ff0000) == 0x00b40000)
- ad -= 45 * 4;
- }
- } else {
- d = get_long (ad);
- ad += 4;
- if ((d & 0xff000000) != 0) {
- if ((d & 0x00ff0000) == 0x00180000)
- ad += 6 * 4;
- else if ((d & 0x00ff0000) == 0x00380000)
- ad += 14 * 4;
- else if ((d & 0x00ff0000) == 0x00b40000)
- ad += 45 * 4;
- }
- }
- }
- if ((opcode & 0x38) == 0x18)
- m68k_areg (regs, opcode & 7) = ad;
- if ((opcode & 0x38) == 0x20)
- m68k_areg (regs, opcode & 7) = ad;
-}
-
-#if USE_LONG_DOUBLE
-static const fpu_register const_e = 2.7182818284590452353602874713526625L;
-static const fpu_register const_log10_e = 0.4342944819032518276511289189166051L;
-static const fpu_register const_loge_10 = 2.3025850929940456840179914546843642L;
-#else
-static const fpu_register const_e = 2.7182818284590452354;
-static const fpu_register const_log10_e = 0.43429448190325182765;
-static const fpu_register const_loge_10 = 2.30258509299404568402;
-#endif
-
-static const fpu_register power10[] = {
- 1e0, 1e1, 1e2, 1e4, 1e8, 1e16, 1e32, 1e64, 1e128, 1e256
-#if USE_LONG_DOUBLE
-, 1e512, 1e1024, 1e2048, 1e4096
-#endif
-};
-
-/* 128 words, indexed through the low byte of the 68k fpu control word */
-static uae_u16 x86_fpucw[]={
- 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, /* p0r0 */
- 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, /* p0r1 */
- 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, /* p0r2 */
- 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, /* p0r3 */
-
- 0x107f, 0x107f, 0x107f, 0x107f, 0x107f, 0x107f, 0x107f, 0x107f, /* p1r0 */
- 0x1c7f, 0x1c7f, 0x1c7f, 0x1c7f, 0x1c7f, 0x1c7f, 0x1c7f, 0x1c7f, /* p1r1 */
- 0x147f, 0x147f, 0x147f, 0x147f, 0x147f, 0x147f, 0x147f, 0x147f, /* p1r2 */
- 0x187f, 0x187f, 0x187f, 0x187f, 0x187f, 0x187f, 0x187f, 0x187f, /* p1r3 */
-
- 0x127f, 0x127f, 0x127f, 0x127f, 0x127f, 0x127f, 0x127f, 0x127f, /* p2r0 */
- 0x1e7f, 0x1e7f, 0x1e7f, 0x1e7f, 0x1e7f, 0x1e7f, 0x1e7f, 0x1e7f, /* p2r1 */
- 0x167f, 0x167f, 0x167f, 0x167f, 0x167f, 0x167f, 0x167f, 0x167f, /* p2r2 */
- 0x1a7f, 0x1a7f, 0x1a7f, 0x1a7f, 0x1a7f, 0x1a7f, 0x1a7f, 0x1a7f, /* p2r3 */
-
- 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, 0x137f, /* p3r0 */
- 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, 0x1f7f, /* p3r1 */
- 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, 0x177f, /* p3r2 */
- 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f, 0x1b7f /* p3r3 */
-};
-
-void comp_fpp_opp (uae_u32 opcode, uae_u16 extra)
-{
- int reg;
- int src;
-
- switch ((extra >> 13) & 0x7) {
- case 3: /* 2nd most common */
- if (put_fp_value ((extra >> 7)&7 , opcode, extra) < 0) {
- FAIL(1);
- return;
-
- }
- return;
- case 6:
- case 7:
- {
- uae_u32 ad, list = 0;
- int incr = 0;
- if (extra & 0x2000) {
- uae_u32 ad;
-
- /* FMOVEM FPP->memory */
- switch ((extra >> 11) & 3) { /* Get out early if failure */
- case 0:
- case 2:
- break;
- case 1:
- case 3:
- default:
- FAIL(1); return;
- }
- ad=get_fp_ad (opcode, &ad);
- if (ad<0) {
- abort();
- m68k_setpc (m68k_getpc () - 4);
- fpuop_illg (opcode,extra);
- return;
- }
- switch ((extra >> 11) & 3) {
- case 0: /* static pred */
- list = extra & 0xff;
- incr = -1;
- break;
- case 2: /* static postinc */
- list = extra & 0xff;
- incr = 1;
- break;
- case 1: /* dynamic pred */
- case 3: /* dynamic postinc */
- abort();
- }
- if (incr < 0) { /* Predecrement */
- for (reg = 7; reg >= 0; reg--) {
- if (list & 0x80) {
- fmov_ext_mr((uintptr)temp_fp,reg);
- delay;
- sub_l_ri(ad,4);
- mov_l_rm(S2,(uintptr)temp_fp);
- writelong_clobber(ad,S2,S3);
- sub_l_ri(ad,4);
- mov_l_rm(S2,(uintptr)temp_fp+4);
- writelong_clobber(ad,S2,S3);
- sub_l_ri(ad,4);
- mov_w_rm(S2,(uintptr)temp_fp+8);
- writeword_clobber(ad,S2,S3);
- }
- list <<= 1;
- }
- }
- else { /* Postincrement */
- for (reg = 0; reg < 8; reg++) {
- if (list & 0x80) {
- fmov_ext_mr((uintptr)temp_fp,reg);
- delay;
- mov_w_rm(S2,(uintptr)temp_fp+8);
- writeword_clobber(ad,S2,S3);
- add_l_ri(ad,4);
- mov_l_rm(S2,(uintptr)temp_fp+4);
- writelong_clobber(ad,S2,S3);
- add_l_ri(ad,4);
- mov_l_rm(S2,(uintptr)temp_fp);
- writelong_clobber(ad,S2,S3);
- add_l_ri(ad,4);
- }
- list <<= 1;
- }
- }
- if ((opcode & 0x38) == 0x18)
- mov_l_rr((opcode & 7)+8,ad);
- if ((opcode & 0x38) == 0x20)
- mov_l_rr((opcode & 7)+8,ad);
- } else {
- /* FMOVEM memory->FPP */
-
- uae_u32 ad;
- switch ((extra >> 11) & 3) { /* Get out early if failure */
- case 0:
- case 2:
- break;
- case 1:
- case 3:
- default:
- FAIL(1); return;
- }
- ad=get_fp_ad (opcode, &ad);
- if (ad<0) {
- abort();
- m68k_setpc (m68k_getpc () - 4);
- write_log("no ad\n");
- fpuop_illg (opcode,extra);
- return;
- }
- switch ((extra >> 11) & 3) {
- case 0: /* static pred */
- list = extra & 0xff;
- incr = -1;
- break;
- case 2: /* static postinc */
- list = extra & 0xff;
- incr = 1;
- break;
- case 1: /* dynamic pred */
- case 3: /* dynamic postinc */
- abort();
- }
-
- if (incr < 0) {
- // not reached
- for (reg = 7; reg >= 0; reg--) {
- uae_u32 wrd1, wrd2, wrd3;
- if (list & 0x80) {
- sub_l_ri(ad,4);
- readlong(ad,S2,S3);
- mov_l_mr((uintptr)(temp_fp),S2);
- sub_l_ri(ad,4);
- readlong(ad,S2,S3);
- mov_l_mr((uintptr)(temp_fp)+4,S2);
- sub_l_ri(ad,4);
- readword(ad,S2,S3);
- mov_w_mr(((uintptr)temp_fp)+8,S2);
- delay2;
- fmov_ext_rm(reg,(uintptr)(temp_fp));
- }
- list <<= 1;
- }
- }
- else {
- for (reg = 0; reg < 8; reg++) {
- uae_u32 wrd1, wrd2, wrd3;
- if (list & 0x80) {
- readword(ad,S2,S3);
- mov_w_mr(((uintptr)temp_fp)+8,S2);
- add_l_ri(ad,4);
- readlong(ad,S2,S3);
- mov_l_mr((uintptr)(temp_fp)+4,S2);
- add_l_ri(ad,4);
- readlong(ad,S2,S3);
- mov_l_mr((uintptr)(temp_fp),S2);
- add_l_ri(ad,4);
- delay2;
- fmov_ext_rm(reg,(uintptr)(temp_fp));
- }
- list <<= 1;
- }
- }
- if ((opcode & 0x38) == 0x18)
- mov_l_rr((opcode & 7)+8,ad);
- if ((opcode & 0x38) == 0x20)
- mov_l_rr((opcode & 7)+8,ad);
- }
- }
- return;
-
- case 4:
- case 5: /* rare */
- if ((opcode & 0x30) == 0) {
- if (extra & 0x2000) {
- if (extra & 0x1000) {
-#if HANDLE_FPCR
- mov_l_rm(opcode & 15, (uintptr)&fpu.fpcr.rounding_mode);
- or_l_rm(opcode & 15, (uintptr)&fpu.fpcr.rounding_precision);
-#else
- FAIL(1);
- return;
-#endif
- }
- if (extra & 0x0800) {
- FAIL(1);
- return;
- }
- if (extra & 0x0400) {
- mov_l_rm(opcode & 15,(uintptr)&fpu.instruction_address);
- return;
- }
- } else {
- // gb-- moved here so that we may FAIL() without generating any code
- if (extra & 0x0800) {
- // set_fpsr(m68k_dreg (regs, opcode & 15));
- FAIL(1);
- return;
- }
- if (extra & 0x1000) {
-#if HANDLE_FPCR
-#if defined(FPU_USE_X86_ROUNDING_MODE) && defined(FPU_USE_X86_ROUNDING_PRECISION)
- FAIL(1);
- return;
-#endif
- mov_l_rr(S1,opcode & 15);
- mov_l_rr(S2,opcode & 15);
- and_l_ri(S1,FPCR_ROUNDING_PRECISION);
- and_l_ri(S2,FPCR_ROUNDING_MODE);
- mov_l_mr((uintptr)&fpu.fpcr.rounding_precision,S1);
- mov_l_mr((uintptr)&fpu.fpcr.rounding_mode,S2);
-#else
- FAIL(1);
- return;
-#endif
-// return; gb-- FMOVEM could also operate on fpiar
- }
- if (extra & 0x0400) {
- mov_l_mr((uintptr)&fpu.instruction_address,opcode & 15);
-// return; gb-- we have to process all FMOVEM bits before returning
- }
- return;
- }
- } else if ((opcode & 0x3f) == 0x3c) {
- if ((extra & 0x2000) == 0) {
- // gb-- moved here so that we may FAIL() without generating any code
- if (extra & 0x0800) {
- FAIL(1);
- return;
- }
- if (extra & 0x1000) {
- uae_u32 val=comp_get_ilong((m68k_pc_offset+=4)-4);
-#if HANDLE_FPCR
-#if defined(FPU_USE_X86_ROUNDING_MODE) && defined(FPU_USE_X86_ROUNDING_PRECISION)
- FAIL(1);
- return;
-#endif
-// mov_l_mi((uintptr)®s.fpcr,val);
- mov_l_ri(S1,val);
- mov_l_ri(S2,val);
- and_l_ri(S1,FPCR_ROUNDING_PRECISION);
- and_l_ri(S2,FPCR_ROUNDING_MODE);
- mov_l_mr((uintptr)&fpu.fpcr.rounding_precision,S1);
- mov_l_mr((uintptr)&fpu.fpcr.rounding_mode,S2);
-#else
- FAIL(1);
- return;
-#endif
-// return; gb-- FMOVEM could also operate on fpiar
- }
- if (extra & 0x0400) {
- uae_u32 val=comp_get_ilong((m68k_pc_offset+=4)-4);
- mov_l_mi((uintptr)&fpu.instruction_address,val);
-// return; gb-- we have to process all FMOVEM bits before returning
- }
- return;
- }
- FAIL(1);
- return;
- } else if (extra & 0x2000) {
- FAIL(1);
- return;
- } else {
- FAIL(1);
- return;
- }
- FAIL(1);
- return;
-
- case 0:
- case 2: /* Extremely common */
- reg = (extra >> 7) & 7;
- if ((extra & 0xfc00) == 0x5c00) {
- switch (extra & 0x7f) {
- case 0x00:
- fmov_pi(reg);
- break;
- case 0x0b:
- fmov_log10_2(reg);
- break;
- case 0x0c:
-#if USE_LONG_DOUBLE
- fmov_ext_rm(reg,(uintptr)&const_e);
-#else
- fmov_rm(reg,(uintptr)&const_e);
-#endif
- break;
- case 0x0d:
- fmov_log2_e(reg);
- break;
- case 0x0e:
-#if USE_LONG_DOUBLE
- fmov_ext_rm(reg,(uintptr)&const_log10_e);
-#else
- fmov_rm(reg,(uintptr)&const_log10_e);
-#endif
- break;
- case 0x0f:
- fmov_0(reg);
- break;
- case 0x30:
- fmov_loge_2(reg);
- break;
- case 0x31:
-#if USE_LONG_DOUBLE
- fmov_ext_rm(reg,(uintptr)&const_loge_10);
-#else
- fmov_rm(reg,(uintptr)&const_loge_10);
-#endif
- break;
- case 0x32:
- fmov_1(reg);
- break;
- case 0x33:
- case 0x34:
- case 0x35:
- case 0x36:
- case 0x37:
- case 0x38:
- case 0x39:
- case 0x3a:
- case 0x3b:
-#if USE_LONG_DOUBLE
- case 0x3c:
- case 0x3d:
- case 0x3e:
- case 0x3f:
- fmov_ext_rm(reg,(uintptr)(power10+(extra & 0x7f)-0x32));
-#else
- fmov_rm(reg,(uintptr)(power10+(extra & 0x7f)-0x32));
-#endif
- break;
- default:
- /* This is not valid, so we fail */
- FAIL(1);
- return;
- }
- return;
- }
-
- switch (extra & 0x7f) {
- case 0x00: /* FMOVE */
- case 0x40: /* Explicit rounding. This is just a quick fix. Same
- * for all other cases that have three choices */
- case 0x44:
- dont_care_fflags();
- src=get_fp_value (opcode, extra);
- if (src < 0) {
- FAIL(1); /* Illegal instruction */
- return;
- }
- fmov_rr(reg,src);
- MAKE_FPSR (src);
- break;
- case 0x01: /* FINT */
- FAIL(1);
- return;
- dont_care_fflags();
- case 0x02: /* FSINH */
- FAIL(1);
- return;
- dont_care_fflags();
- break;
- case 0x03: /* FINTRZ */
-#if USE_X86_FPUCW
- /* If we have control over the CW, we can do this */
- dont_care_fflags();
- src=get_fp_value (opcode, extra);
- if (src < 0) {
- FAIL(1); /* Illegal instruction */
- return;
- }
- mov_l_ri(S1,16); /* Switch to "round to zero" mode */
- fldcw_m_indexed(S1,(uae_u32)x86_fpucw);
-
- frndint_rr(reg,src);
-
- /* restore control word */
- mov_l_rm(S1,(uintptr)®s.fpcr);
- and_l_ri(S1,0x000000f0);
- fldcw_m_indexed(S1,(uintptr)x86_fpucw);
-
- MAKE_FPSR (reg);
- break;
-#endif
- FAIL(1);
- return;
- break;
- case 0x04: /* FSQRT */
- case 0x41:
- case 0x45:
- dont_care_fflags();
- src=get_fp_value (opcode, extra);
- if (src < 0) {
- FAIL(1); /* Illegal instruction */
- return;
- }
- fsqrt_rr(reg,src);
- MAKE_FPSR (reg);
- break;
- case 0x06: /* FLOGNP1 */
- FAIL(1);
- return;
- dont_care_fflags();
- break;
- case 0x08: /* FETOXM1 */
- FAIL(1);
- return;
- dont_care_fflags();
- break;
- case 0x09: /* FTANH */
- FAIL(1);
- return;
- dont_care_fflags();
- break;
- case 0x0a: /* FATAN */
- FAIL(1);
- return;
- dont_care_fflags();
- break;
- case 0x0c: /* FASIN */
- FAIL(1);
- return;
- dont_care_fflags();
- break;
- case 0x0d: /* FATANH */
- FAIL(1);
- return;
- dont_care_fflags();
- break;
- case 0x0e: /* FSIN */
- dont_care_fflags();
- src=get_fp_value (opcode, extra);
- if (src < 0) {
- FAIL(1); /* Illegal instruction */
- return;
- }
- fsin_rr(reg,src);
- MAKE_FPSR (reg);
- break;
- case 0x0f: /* FTAN */
- FAIL(1);
- return;
- dont_care_fflags();
- break;
- case 0x10: /* FETOX */
- dont_care_fflags();
- src=get_fp_value (opcode, extra);
- if (src < 0) {
- FAIL(1); /* Illegal instruction */
- return;
- }
- fetox_rr(reg,src);
- MAKE_FPSR (reg);
- break;
- case 0x11: /* FTWOTOX */
- dont_care_fflags();
- src=get_fp_value (opcode, extra);
- if (src < 0) {
- FAIL(1); /* Illegal instruction */
- return;
- }
- ftwotox_rr(reg,src);
- MAKE_FPSR (reg);
- break;
- case 0x12: /* FTENTOX */
- FAIL(1);
- return;
- dont_care_fflags();
- break;
- case 0x14: /* FLOGN */
- FAIL(1);
- return;
- dont_care_fflags();
- break;
- case 0x15: /* FLOG10 */
- FAIL(1);
- return;
- dont_care_fflags();
- break;
- case 0x16: /* FLOG2 */
- dont_care_fflags();
- src=get_fp_value (opcode, extra);
- if (src < 0) {
- FAIL(1); /* Illegal instruction */
- return;
- }
- flog2_rr(reg,src);
- MAKE_FPSR (reg);
- break;
- case 0x18: /* FABS */
- case 0x58:
- case 0x5c:
- dont_care_fflags();
- src=get_fp_value (opcode, extra);
- if (src < 0) {
- FAIL(1); /* Illegal instruction */
- return;
- }
- fabs_rr(reg,src);
- MAKE_FPSR (reg);
- break;
- case 0x19: /* FCOSH */
- FAIL(1);
- return;
- dont_care_fflags();
- break;
- case 0x1a: /* FNEG */
- case 0x5a:
- case 0x5e:
- dont_care_fflags();
- src=get_fp_value (opcode, extra);
- if (src < 0) {
- FAIL(1); /* Illegal instruction */
- return;
- }
- fneg_rr(reg,src);
- MAKE_FPSR (reg);
- break;
- case 0x1c: /* FACOS */
- FAIL(1);
- return;
- dont_care_fflags();
- break;
- case 0x1d: /* FCOS */
- dont_care_fflags();
- src=get_fp_value (opcode, extra);
- if (src < 0) {
- FAIL(1); /* Illegal instruction */
- return;
- }
- fcos_rr(reg,src);
- MAKE_FPSR (reg);
- break;
- case 0x1e: /* FGETEXP */
- FAIL(1);
- return;
- dont_care_fflags();
- break;
- case 0x1f: /* FGETMAN */
- FAIL(1);
- return;
- dont_care_fflags();
- break;
- case 0x20: /* FDIV */
- case 0x60:
- case 0x64:
- dont_care_fflags();
- src=get_fp_value (opcode, extra);
- if (src < 0) {
- FAIL(1); /* Illegal instruction */
- return;
- }
- fdiv_rr(reg,src);
- MAKE_FPSR (reg);
- break;
- case 0x21: /* FMOD */
- dont_care_fflags();
- src=get_fp_value (opcode, extra);
- if (src < 0) {
- FAIL(1); /* Illegal instruction */
- return;
- }
- frem_rr(reg,src);
- MAKE_FPSR (reg);
- break;
- case 0x22: /* FADD */
- case 0x62:
- case 0x66:
- dont_care_fflags();
- src=get_fp_value (opcode, extra);
- if (src < 0) {
- FAIL(1); /* Illegal instruction */
- return;
- }
- fadd_rr(reg,src);
- MAKE_FPSR (reg);
- break;
- case 0x23: /* FMUL */
- case 0x63:
- case 0x67:
- dont_care_fflags();
- src=get_fp_value (opcode, extra);
- if (src < 0) {
- FAIL(1); /* Illegal instruction */
- return;
- }
- fmul_rr(reg,src);
- MAKE_FPSR (reg);
- break;
- case 0x24: /* FSGLDIV */
- dont_care_fflags();
- src=get_fp_value (opcode, extra);
- if (src < 0) {
- FAIL(1); /* Illegal instruction */
- return;
- }
- fdiv_rr(reg,src);
- MAKE_FPSR (reg);
- break;
- case 0x25: /* FREM */
- // gb-- disabled because the quotient byte must be computed
- // otherwise, free rotation in ClarisWorks doesn't work.
- FAIL(1);
- return;
- dont_care_fflags();
- src=get_fp_value (opcode, extra);
- if (src < 0) {
- FAIL(1); /* Illegal instruction */
- return;
- }
- frem1_rr(reg,src);
- MAKE_FPSR (reg);
- break;
- case 0x26: /* FSCALE */
- dont_care_fflags();
- FAIL(1);
- return;
- break;
- case 0x27: /* FSGLMUL */
- dont_care_fflags();
- src=get_fp_value (opcode, extra);
- if (src < 0) {
- FAIL(1); /* Illegal instruction */
- return;
- }
- fmul_rr(reg,src);
- MAKE_FPSR (reg);
- break;
- case 0x28: /* FSUB */
- case 0x68:
- case 0x6c:
- dont_care_fflags();
- src=get_fp_value (opcode, extra);
- if (src < 0) {
- FAIL(1); /* Illegal instruction */
- return;
- }
- fsub_rr(reg,src);
- MAKE_FPSR (reg);
- break;
- case 0x30: /* FSINCOS */
- case 0x31:
- case 0x32:
- case 0x33:
- case 0x34:
- case 0x35:
- case 0x36:
- case 0x37:
- FAIL(1);
- return;
- dont_care_fflags();
- break;
- case 0x38: /* FCMP */
- src=get_fp_value (opcode, extra);
- if (src < 0) {
- FAIL(1); /* Illegal instruction */
- return;
- }
- fmov_rr(FP_RESULT,reg);
- fsub_rr(FP_RESULT,src); /* Right way? */
- break;
- case 0x3a: /* FTST */
- src=get_fp_value (opcode, extra);
- if (src < 0) {
- FAIL(1); /* Illegal instruction */
- return;
- }
- fmov_rr(FP_RESULT,src);
- break;
- default:
- FAIL(1);
- return;
- break;
- }
- return;
- }
- m68k_setpc (m68k_getpc () - 4);
- fpuop_illg (opcode,extra);
-}
+++ /dev/null
-/*
- * compiler/compemu_support.cpp - Core dynamic translation engine
- *
- * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
- *
- * Adaptation for Basilisk II and improvements, copyright 2000-2005
- * Gwenole Beauchesne
- *
- * Basilisk II (C) 1997-2008 Christian Bauer
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#if !REAL_ADDRESSING && !DIRECT_ADDRESSING
-#error "Only Real or Direct Addressing is supported with the JIT Compiler"
-#endif
-
-#if X86_ASSEMBLY && !SAHF_SETO_PROFITABLE
-#error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler"
-#endif
-
-/* NOTE: support for AMD64 assumes translation cache and other code
- * buffers are allocated into a 32-bit address space because (i) B2/JIT
- * code is not 64-bit clean and (ii) it's faster to resolve branches
- * that way.
- */
-#if !defined(__i386__) && !defined(__x86_64__)
-#error "Only IA-32 and X86-64 targets are supported with the JIT Compiler"
-#endif
-
-#define USE_MATCH 0
-
-/* kludge for Brian, so he can compile under MSVC++ */
-#define USE_NORMAL_CALLING_CONVENTION 0
-
-#ifndef WIN32
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-#endif
-
-#include <stdlib.h>
-#include <fcntl.h>
-#include <errno.h>
-
-#include "sysdeps.h"
-#include "cpu_emulation.h"
-#include "main.h"
-#include "prefs.h"
-#include "user_strings.h"
-#include "vm_alloc.h"
-
-#include "m68k.h"
-#include "memory.h"
-#include "readcpu.h"
-#include "newcpu.h"
-#include "comptbl.h"
-#include "compiler/compemu.h"
-#include "fpu/fpu.h"
-#include "fpu/flags.h"
-
-#define DEBUG 1
-#include "debug.h"
-
-#ifdef ENABLE_MON
-#include "mon.h"
-#endif
-
-#ifndef WIN32
-#define PROFILE_COMPILE_TIME 1
-#define PROFILE_UNTRANSLATED_INSNS 1
-#endif
-
-#if defined(__x86_64__) && 0
-#define RECORD_REGISTER_USAGE 1
-#endif
-
-#ifdef WIN32
-#undef write_log
-#define write_log dummy_write_log
-static void dummy_write_log(const char *, ...) { }
-#endif
-
-#if JIT_DEBUG
-#undef abort
-#define abort() do { \
- fprintf(stderr, "Abort in file %s at line %d\n", __FILE__, __LINE__); \
- exit(EXIT_FAILURE); \
-} while (0)
-#endif
-
-#if RECORD_REGISTER_USAGE
-static uint64 reg_count[16];
-static int reg_count_local[16];
-
-static int reg_count_compare(const void *ap, const void *bp)
-{
- const int a = *((int *)ap);
- const int b = *((int *)bp);
- return reg_count[b] - reg_count[a];
-}
-#endif
-
-#if PROFILE_COMPILE_TIME
-#include <time.h>
-static uae_u32 compile_count = 0;
-static clock_t compile_time = 0;
-static clock_t emul_start_time = 0;
-static clock_t emul_end_time = 0;
-#endif
-
-#if PROFILE_UNTRANSLATED_INSNS
-const int untranslated_top_ten = 20;
-static uae_u32 raw_cputbl_count[65536] = { 0, };
-static uae_u16 opcode_nums[65536];
-
-static int untranslated_compfn(const void *e1, const void *e2)
-{
- return raw_cputbl_count[*(const uae_u16 *)e1] < raw_cputbl_count[*(const uae_u16 *)e2];
-}
-#endif
-
-static compop_func *compfunctbl[65536];
-static compop_func *nfcompfunctbl[65536];
-static cpuop_func *nfcpufunctbl[65536];
-uae_u8* comp_pc_p;
-
-// From newcpu.cpp
-extern bool quit_program;
-
-// gb-- Extra data for Basilisk II/JIT
-#if JIT_DEBUG
-static bool JITDebug = false; // Enable runtime disassemblers through mon?
-#else
-const bool JITDebug = false; // Don't use JIT debug mode at all
-#endif
-#if USE_INLINING
-static bool follow_const_jumps = true; // Flag: translation through constant jumps
-#else
-const bool follow_const_jumps = false;
-#endif
-
-const uae_u32 MIN_CACHE_SIZE = 1024; // Minimal translation cache size (1 MB)
-static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks
-static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already
-static bool lazy_flush = true; // Flag: lazy translation cache invalidation
-static bool avoid_fpu = true; // Flag: compile FPU instructions ?
-static bool have_cmov = false; // target has CMOV instructions ?
-static bool have_lahf_lm = true; // target has LAHF supported in long mode ?
-static bool have_rat_stall = true; // target has partial register stalls ?
-const bool tune_alignment = true; // Tune code alignments for running CPU ?
-const bool tune_nop_fillers = true; // Tune no-op fillers for architecture
-static bool setzflg_uses_bsf = false; // setzflg virtual instruction can use native BSF instruction correctly?
-static int align_loops = 32; // Align the start of loops
-static int align_jumps = 32; // Align the start of jumps
-static int optcount[10] = {
- 10, // How often a block has to be executed before it is translated
- 0, // How often to use naive translation
- 0, 0, 0, 0,
- -1, -1, -1, -1
-};
-
-struct op_properties {
- uae_u8 use_flags;
- uae_u8 set_flags;
- uae_u8 is_addx;
- uae_u8 cflow;
-};
-static op_properties prop[65536];
-
-static inline int end_block(uae_u32 opcode)
-{
- return (prop[opcode].cflow & fl_end_block);
-}
-
-static inline bool is_const_jump(uae_u32 opcode)
-{
- return (prop[opcode].cflow == fl_const_jump);
-}
-
-static inline bool may_trap(uae_u32 opcode)
-{
- return (prop[opcode].cflow & fl_trap);
-}
-
-static inline unsigned int cft_map (unsigned int f)
-{
-#ifndef HAVE_GET_WORD_UNSWAPPED
- return f;
-#else
- return ((f >> 8) & 255) | ((f & 255) << 8);
-#endif
-}
-
-uae_u8* start_pc_p;
-uae_u32 start_pc;
-uae_u32 current_block_pc_p;
-static uintptr current_block_start_target;
-uae_u32 needed_flags;
-static uintptr next_pc_p;
-static uintptr taken_pc_p;
-static int branch_cc;
-static int redo_current_block;
-
-int segvcount=0;
-int soft_flush_count=0;
-int hard_flush_count=0;
-int checksum_count=0;
-static uae_u8* current_compile_p=NULL;
-static uae_u8* max_compile_start;
-static uae_u8* compiled_code=NULL;
-static uae_s32 reg_alloc_run;
-const int POPALLSPACE_SIZE = 1024; /* That should be enough space */
-static uae_u8* popallspace=NULL;
-
-void* pushall_call_handler=NULL;
-static void* popall_do_nothing=NULL;
-static void* popall_exec_nostats=NULL;
-static void* popall_execute_normal=NULL;
-static void* popall_cache_miss=NULL;
-static void* popall_recompile_block=NULL;
-static void* popall_check_checksum=NULL;
-
-/* The 68k only ever executes from even addresses. So right now, we
- * waste half the entries in this array
- * UPDATE: We now use those entries to store the start of the linked
- * lists that we maintain for each hash result.
- */
-cacheline cache_tags[TAGSIZE];
-int letit=0;
-blockinfo* hold_bi[MAX_HOLD_BI];
-blockinfo* active;
-blockinfo* dormant;
-
-/* 68040 */
-extern struct cputbl op_smalltbl_0_nf[];
-extern struct comptbl op_smalltbl_0_comp_nf[];
-extern struct comptbl op_smalltbl_0_comp_ff[];
-
-/* 68020 + 68881 */
-extern struct cputbl op_smalltbl_1_nf[];
-
-/* 68020 */
-extern struct cputbl op_smalltbl_2_nf[];
-
-/* 68010 */
-extern struct cputbl op_smalltbl_3_nf[];
-
-/* 68000 */
-extern struct cputbl op_smalltbl_4_nf[];
-
-/* 68000 slow but compatible. */
-extern struct cputbl op_smalltbl_5_nf[];
-
-static void flush_icache_hard(int n);
-static void flush_icache_lazy(int n);
-static void flush_icache_none(int n);
-void (*flush_icache)(int n) = flush_icache_none;
-
-
-
-bigstate live;
-smallstate empty_ss;
-smallstate default_ss;
-static int optlev;
-
-static int writereg(int r, int size);
-static void unlock2(int r);
-static void setlock(int r);
-static int readreg_specific(int r, int size, int spec);
-static int writereg_specific(int r, int size, int spec);
-static void prepare_for_call_1(void);
-static void prepare_for_call_2(void);
-static void align_target(uae_u32 a);
-
-static uae_s32 nextused[VREGS];
-
-uae_u32 m68k_pc_offset;
-
-/* Some arithmetic ooperations can be optimized away if the operands
- * are known to be constant. But that's only a good idea when the
- * side effects they would have on the flags are not important. This
- * variable indicates whether we need the side effects or not
- */
-uae_u32 needflags=0;
-
-/* Flag handling is complicated.
- *
- * x86 instructions create flags, which quite often are exactly what we
- * want. So at times, the "68k" flags are actually in the x86 flags.
- *
- * Then again, sometimes we do x86 instructions that clobber the x86
- * flags, but don't represent a corresponding m68k instruction. In that
- * case, we have to save them.
- *
- * We used to save them to the stack, but now store them back directly
- * into the regflags.cznv of the traditional emulation. Thus some odd
- * names.
- *
- * So flags can be in either of two places (used to be three; boy were
- * things complicated back then!); And either place can contain either
- * valid flags or invalid trash (and on the stack, there was also the
- * option of "nothing at all", now gone). A couple of variables keep
- * track of the respective states.
- *
- * To make things worse, we might or might not be interested in the flags.
- * by default, we are, but a call to dont_care_flags can change that
- * until the next call to live_flags. If we are not, pretty much whatever
- * is in the register and/or the native flags is seen as valid.
- */
-
-static __inline__ blockinfo* get_blockinfo(uae_u32 cl)
-{
- return cache_tags[cl+1].bi;
-}
-
-static __inline__ blockinfo* get_blockinfo_addr(void* addr)
-{
- blockinfo* bi=get_blockinfo(cacheline(addr));
-
- while (bi) {
- if (bi->pc_p==addr)
- return bi;
- bi=bi->next_same_cl;
- }
- return NULL;
-}
-
-
-/*******************************************************************
- * All sorts of list related functions for all of the lists *
- *******************************************************************/
-
-static __inline__ void remove_from_cl_list(blockinfo* bi)
-{
- uae_u32 cl=cacheline(bi->pc_p);
-
- if (bi->prev_same_cl_p)
- *(bi->prev_same_cl_p)=bi->next_same_cl;
- if (bi->next_same_cl)
- bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p;
- if (cache_tags[cl+1].bi)
- cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use;
- else
- cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
-}
-
-static __inline__ void remove_from_list(blockinfo* bi)
-{
- if (bi->prev_p)
- *(bi->prev_p)=bi->next;
- if (bi->next)
- bi->next->prev_p=bi->prev_p;
-}
-
-static __inline__ void remove_from_lists(blockinfo* bi)
-{
- remove_from_list(bi);
- remove_from_cl_list(bi);
-}
-
-static __inline__ void add_to_cl_list(blockinfo* bi)
-{
- uae_u32 cl=cacheline(bi->pc_p);
-
- if (cache_tags[cl+1].bi)
- cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl);
- bi->next_same_cl=cache_tags[cl+1].bi;
-
- cache_tags[cl+1].bi=bi;
- bi->prev_same_cl_p=&(cache_tags[cl+1].bi);
-
- cache_tags[cl].handler=bi->handler_to_use;
-}
-
-static __inline__ void raise_in_cl_list(blockinfo* bi)
-{
- remove_from_cl_list(bi);
- add_to_cl_list(bi);
-}
-
-static __inline__ void add_to_active(blockinfo* bi)
-{
- if (active)
- active->prev_p=&(bi->next);
- bi->next=active;
-
- active=bi;
- bi->prev_p=&active;
-}
-
-static __inline__ void add_to_dormant(blockinfo* bi)
-{
- if (dormant)
- dormant->prev_p=&(bi->next);
- bi->next=dormant;
-
- dormant=bi;
- bi->prev_p=&dormant;
-}
-
-static __inline__ void remove_dep(dependency* d)
-{
- if (d->prev_p)
- *(d->prev_p)=d->next;
- if (d->next)
- d->next->prev_p=d->prev_p;
- d->prev_p=NULL;
- d->next=NULL;
-}
-
-/* This block's code is about to be thrown away, so it no longer
- depends on anything else */
-static __inline__ void remove_deps(blockinfo* bi)
-{
- remove_dep(&(bi->dep[0]));
- remove_dep(&(bi->dep[1]));
-}
-
-static __inline__ void adjust_jmpdep(dependency* d, cpuop_func* a)
-{
- *(d->jmp_off)=(uintptr)a-((uintptr)d->jmp_off+4);
-}
-
-/********************************************************************
- * Soft flush handling support functions *
- ********************************************************************/
-
-static __inline__ void set_dhtu(blockinfo* bi, cpuop_func* dh)
-{
- //write_log("bi is %p\n",bi);
- if (dh!=bi->direct_handler_to_use) {
- dependency* x=bi->deplist;
- //write_log("bi->deplist=%p\n",bi->deplist);
- while (x) {
- //write_log("x is %p\n",x);
- //write_log("x->next is %p\n",x->next);
- //write_log("x->prev_p is %p\n",x->prev_p);
-
- if (x->jmp_off) {
- adjust_jmpdep(x,dh);
- }
- x=x->next;
- }
- bi->direct_handler_to_use=dh;
- }
-}
-
-static __inline__ void invalidate_block(blockinfo* bi)
-{
- int i;
-
- bi->optlevel=0;
- bi->count=optcount[0]-1;
- bi->handler=NULL;
- bi->handler_to_use=(cpuop_func *)popall_execute_normal;
- bi->direct_handler=NULL;
- set_dhtu(bi,bi->direct_pen);
- bi->needed_flags=0xff;
- bi->status=BI_INVALID;
- for (i=0;i<2;i++) {
- bi->dep[i].jmp_off=NULL;
- bi->dep[i].target=NULL;
- }
- remove_deps(bi);
-}
-
-static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target)
-{
- blockinfo* tbi=get_blockinfo_addr((void*)(uintptr)target);
-
- Dif(!tbi) {
- write_log("Could not create jmpdep!\n");
- abort();
- }
- bi->dep[i].jmp_off=jmpaddr;
- bi->dep[i].source=bi;
- bi->dep[i].target=tbi;
- bi->dep[i].next=tbi->deplist;
- if (bi->dep[i].next)
- bi->dep[i].next->prev_p=&(bi->dep[i].next);
- bi->dep[i].prev_p=&(tbi->deplist);
- tbi->deplist=&(bi->dep[i]);
-}
-
-static __inline__ void block_need_recompile(blockinfo * bi)
-{
- uae_u32 cl = cacheline(bi->pc_p);
-
- set_dhtu(bi, bi->direct_pen);
- bi->direct_handler = bi->direct_pen;
-
- bi->handler_to_use = (cpuop_func *)popall_execute_normal;
- bi->handler = (cpuop_func *)popall_execute_normal;
- if (bi == cache_tags[cl + 1].bi)
- cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
- bi->status = BI_NEED_RECOMP;
-}
-
-static __inline__ void mark_callers_recompile(blockinfo * bi)
-{
- dependency *x = bi->deplist;
-
- while (x) {
- dependency *next = x->next; /* This disappears when we mark for
- * recompilation and thus remove the
- * blocks from the lists */
- if (x->jmp_off) {
- blockinfo *cbi = x->source;
-
- Dif(cbi->status == BI_INVALID) {
- // write_log("invalid block in dependency list\n"); // FIXME?
- // abort();
- }
- if (cbi->status == BI_ACTIVE || cbi->status == BI_NEED_CHECK) {
- block_need_recompile(cbi);
- mark_callers_recompile(cbi);
- }
- else if (cbi->status == BI_COMPILING) {
- redo_current_block = 1;
- }
- else if (cbi->status == BI_NEED_RECOMP) {
- /* nothing */
- }
- else {
- //write_log("Status %d in mark_callers\n",cbi->status); // FIXME?
- }
- }
- x = next;
- }
-}
-
-static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate)
-{
- blockinfo* bi=get_blockinfo_addr(addr);
- int i;
-
- if (!bi) {
- for (i=0;i<MAX_HOLD_BI && !bi;i++) {
- if (hold_bi[i]) {
- uae_u32 cl=cacheline(addr);
-
- bi=hold_bi[i];
- hold_bi[i]=NULL;
- bi->pc_p=(uae_u8 *)addr;
- invalidate_block(bi);
- add_to_active(bi);
- add_to_cl_list(bi);
-
- }
- }
- }
- if (!bi) {
- write_log("Looking for blockinfo, can't find free one\n");
- abort();
- }
- return bi;
-}
-
-static void prepare_block(blockinfo* bi);
-
-/* Managment of blockinfos.
-
- A blockinfo struct is allocated whenever a new block has to be
- compiled. If the list of free blockinfos is empty, we allocate a new
- pool of blockinfos and link the newly created blockinfos altogether
- into the list of free blockinfos. Otherwise, we simply pop a structure
- off the free list.
-
- Blockinfo are lazily deallocated, i.e. chained altogether in the
- list of free blockinfos whenvever a translation cache flush (hard or
- soft) request occurs.
-*/
-
-template< class T >
-class LazyBlockAllocator
-{
- enum {
- kPoolSize = 1 + 4096 / sizeof(T)
- };
- struct Pool {
- T chunk[kPoolSize];
- Pool * next;
- };
- Pool * mPools;
- T * mChunks;
-public:
- LazyBlockAllocator() : mPools(0), mChunks(0) { }
- ~LazyBlockAllocator();
- T * acquire();
- void release(T * const);
-};
-
-template< class T >
-LazyBlockAllocator<T>::~LazyBlockAllocator()
-{
- Pool * currentPool = mPools;
- while (currentPool) {
- Pool * deadPool = currentPool;
- currentPool = currentPool->next;
- free(deadPool);
- }
-}
-
-template< class T >
-T * LazyBlockAllocator<T>::acquire()
-{
- if (!mChunks) {
- // There is no chunk left, allocate a new pool and link the
- // chunks into the free list
- Pool * newPool = (Pool *)malloc(sizeof(Pool));
- for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) {
- chunk->next = mChunks;
- mChunks = chunk;
- }
- newPool->next = mPools;
- mPools = newPool;
- }
- T * chunk = mChunks;
- mChunks = chunk->next;
- return chunk;
-}
-
-template< class T >
-void LazyBlockAllocator<T>::release(T * const chunk)
-{
- chunk->next = mChunks;
- mChunks = chunk;
-}
-
-template< class T >
-class HardBlockAllocator
-{
-public:
- T * acquire() {
- T * data = (T *)current_compile_p;
- current_compile_p += sizeof(T);
- return data;
- }
-
- void release(T * const chunk) {
- // Deallocated on invalidation
- }
-};
-
-#if USE_SEPARATE_BIA
-static LazyBlockAllocator<blockinfo> BlockInfoAllocator;
-static LazyBlockAllocator<checksum_info> ChecksumInfoAllocator;
-#else
-static HardBlockAllocator<blockinfo> BlockInfoAllocator;
-static HardBlockAllocator<checksum_info> ChecksumInfoAllocator;
-#endif
-
-static __inline__ checksum_info *alloc_checksum_info(void)
-{
- checksum_info *csi = ChecksumInfoAllocator.acquire();
- csi->next = NULL;
- return csi;
-}
-
-static __inline__ void free_checksum_info(checksum_info *csi)
-{
- csi->next = NULL;
- ChecksumInfoAllocator.release(csi);
-}
-
-static __inline__ void free_checksum_info_chain(checksum_info *csi)
-{
- while (csi != NULL) {
- checksum_info *csi2 = csi->next;
- free_checksum_info(csi);
- csi = csi2;
- }
-}
-
-static __inline__ blockinfo *alloc_blockinfo(void)
-{
- blockinfo *bi = BlockInfoAllocator.acquire();
-#if USE_CHECKSUM_INFO
- bi->csi = NULL;
-#endif
- return bi;
-}
-
-static __inline__ void free_blockinfo(blockinfo *bi)
-{
-#if USE_CHECKSUM_INFO
- free_checksum_info_chain(bi->csi);
- bi->csi = NULL;
-#endif
- BlockInfoAllocator.release(bi);
-}
-
-static __inline__ void alloc_blockinfos(void)
-{
- int i;
- blockinfo* bi;
-
- for (i=0;i<MAX_HOLD_BI;i++) {
- if (hold_bi[i])
- return;
- bi=hold_bi[i]=alloc_blockinfo();
- prepare_block(bi);
- }
-}
-
-/********************************************************************
- * Functions to emit data into memory, and other general support *
- ********************************************************************/
-
-static uae_u8* target;
-
-static void emit_init(void)
-{
-}
-
-static __inline__ void emit_byte(uae_u8 x)
-{
- *target++=x;
-}
-
-static __inline__ void emit_word(uae_u16 x)
-{
- *((uae_u16*)target)=x;
- target+=2;
-}
-
-static __inline__ void emit_long(uae_u32 x)
-{
- *((uae_u32*)target)=x;
- target+=4;
-}
-
-static __inline__ void emit_quad(uae_u64 x)
-{
- *((uae_u64*)target)=x;
- target+=8;
-}
-
-static __inline__ void emit_block(const uae_u8 *block, uae_u32 blocklen)
-{
- memcpy((uae_u8 *)target,block,blocklen);
- target+=blocklen;
-}
-
-static __inline__ uae_u32 reverse32(uae_u32 v)
-{
-#if 1
- // gb-- We have specialized byteswapping functions, just use them
- return do_byteswap_32(v);
-#else
- return ((v>>24)&0xff) | ((v>>8)&0xff00) | ((v<<8)&0xff0000) | ((v<<24)&0xff000000);
-#endif
-}
-
-/********************************************************************
- * Getting the information about the target CPU *
- ********************************************************************/
-
-#include "codegen_x86.cpp"
-
-void set_target(uae_u8* t)
-{
- target=t;
-}
-
-static __inline__ uae_u8* get_target_noopt(void)
-{
- return target;
-}
-
-__inline__ uae_u8* get_target(void)
-{
- return get_target_noopt();
-}
-
-
-/********************************************************************
- * Flags status handling. EMIT TIME! *
- ********************************************************************/
-
-static void bt_l_ri_noclobber(R4 r, IMM i);
-
-static void make_flags_live_internal(void)
-{
- if (live.flags_in_flags==VALID)
- return;
- Dif (live.flags_on_stack==TRASH) {
- write_log("Want flags, got something on stack, but it is TRASH\n");
- abort();
- }
- if (live.flags_on_stack==VALID) {
- int tmp;
- tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2);
- raw_reg_to_flags(tmp);
- unlock2(tmp);
-
- live.flags_in_flags=VALID;
- return;
- }
- write_log("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n",
- live.flags_in_flags,live.flags_on_stack);
- abort();
-}
-
-static void flags_to_stack(void)
-{
- if (live.flags_on_stack==VALID)
- return;
- if (!live.flags_are_important) {
- live.flags_on_stack=VALID;
- return;
- }
- Dif (live.flags_in_flags!=VALID)
- abort();
- else {
- int tmp;
- tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1);
- raw_flags_to_reg(tmp);
- unlock2(tmp);
- }
- live.flags_on_stack=VALID;
-}
-
-static __inline__ void clobber_flags(void)
-{
- if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID)
- flags_to_stack();
- live.flags_in_flags=TRASH;
-}
-
-/* Prepare for leaving the compiled stuff */
-static __inline__ void flush_flags(void)
-{
- flags_to_stack();
- return;
-}
-
-int touchcnt;
-
-/********************************************************************
- * Partial register flushing for optimized calls *
- ********************************************************************/
-
-struct regusage {
- uae_u16 rmask;
- uae_u16 wmask;
-};
-
-static inline void ru_set(uae_u16 *mask, int reg)
-{
-#if USE_OPTIMIZED_CALLS
- *mask |= 1 << reg;
-#endif
-}
-
-static inline bool ru_get(const uae_u16 *mask, int reg)
-{
-#if USE_OPTIMIZED_CALLS
- return (*mask & (1 << reg));
-#else
- /* Default: instruction reads & write to register */
- return true;
-#endif
-}
-
-static inline void ru_set_read(regusage *ru, int reg)
-{
- ru_set(&ru->rmask, reg);
-}
-
-static inline void ru_set_write(regusage *ru, int reg)
-{
- ru_set(&ru->wmask, reg);
-}
-
-static inline bool ru_read_p(const regusage *ru, int reg)
-{
- return ru_get(&ru->rmask, reg);
-}
-
-static inline bool ru_write_p(const regusage *ru, int reg)
-{
- return ru_get(&ru->wmask, reg);
-}
-
-static void ru_fill_ea(regusage *ru, int reg, amodes mode,
- wordsizes size, int write_mode)
-{
- switch (mode) {
- case Areg:
- reg += 8;
- /* fall through */
- case Dreg:
- ru_set(write_mode ? &ru->wmask : &ru->rmask, reg);
- break;
- case Ad16:
- /* skip displacment */
- m68k_pc_offset += 2;
- case Aind:
- case Aipi:
- case Apdi:
- ru_set_read(ru, reg+8);
- break;
- case Ad8r:
- ru_set_read(ru, reg+8);
- /* fall through */
- case PC8r: {
- uae_u16 dp = comp_get_iword((m68k_pc_offset+=2)-2);
- reg = (dp >> 12) & 15;
- ru_set_read(ru, reg);
- if (dp & 0x100)
- m68k_pc_offset += (((dp & 0x30) >> 3) & 7) + ((dp & 3) * 2);
- break;
- }
- case PC16:
- case absw:
- case imm0:
- case imm1:
- m68k_pc_offset += 2;
- break;
- case absl:
- case imm2:
- m68k_pc_offset += 4;
- break;
- case immi:
- m68k_pc_offset += (size == sz_long) ? 4 : 2;
- break;
- }
-}
-
-/* TODO: split into a static initialization part and a dynamic one
- (instructions depending on extension words) */
-static void ru_fill(regusage *ru, uae_u32 opcode)
-{
- m68k_pc_offset += 2;
-
- /* Default: no register is used or written to */
- ru->rmask = 0;
- ru->wmask = 0;
-
- uae_u32 real_opcode = cft_map(opcode);
- struct instr *dp = &table68k[real_opcode];
-
- bool rw_dest = true;
- bool handled = false;
-
- /* Handle some instructions specifically */
- uae_u16 reg, ext;
- switch (dp->mnemo) {
- case i_BFCHG:
- case i_BFCLR:
- case i_BFEXTS:
- case i_BFEXTU:
- case i_BFFFO:
- case i_BFINS:
- case i_BFSET:
- case i_BFTST:
- ext = comp_get_iword((m68k_pc_offset+=2)-2);
- if (ext & 0x800) ru_set_read(ru, (ext >> 6) & 7);
- if (ext & 0x020) ru_set_read(ru, ext & 7);
- ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
- if (dp->dmode == Dreg)
- ru_set_read(ru, dp->dreg);
- switch (dp->mnemo) {
- case i_BFEXTS:
- case i_BFEXTU:
- case i_BFFFO:
- ru_set_write(ru, (ext >> 12) & 7);
- break;
- case i_BFINS:
- ru_set_read(ru, (ext >> 12) & 7);
- /* fall through */
- case i_BFCHG:
- case i_BFCLR:
- case i_BSET:
- if (dp->dmode == Dreg)
- ru_set_write(ru, dp->dreg);
- break;
- }
- handled = true;
- rw_dest = false;
- break;
-
- case i_BTST:
- rw_dest = false;
- break;
-
- case i_CAS:
- {
- ext = comp_get_iword((m68k_pc_offset+=2)-2);
- int Du = ext & 7;
- ru_set_read(ru, Du);
- int Dc = (ext >> 6) & 7;
- ru_set_read(ru, Dc);
- ru_set_write(ru, Dc);
- break;
- }
- case i_CAS2:
- {
- int Dc1, Dc2, Du1, Du2, Rn1, Rn2;
- ext = comp_get_iword((m68k_pc_offset+=2)-2);
- Rn1 = (ext >> 12) & 15;
- Du1 = (ext >> 6) & 7;
- Dc1 = ext & 7;
- ru_set_read(ru, Rn1);
- ru_set_read(ru, Du1);
- ru_set_read(ru, Dc1);
- ru_set_write(ru, Dc1);
- ext = comp_get_iword((m68k_pc_offset+=2)-2);
- Rn2 = (ext >> 12) & 15;
- Du2 = (ext >> 6) & 7;
- Dc2 = ext & 7;
- ru_set_read(ru, Rn2);
- ru_set_read(ru, Du2);
- ru_set_write(ru, Dc2);
- break;
- }
- case i_DIVL: case i_MULL:
- m68k_pc_offset += 2;
- break;
- case i_LEA:
- case i_MOVE: case i_MOVEA: case i_MOVE16:
- rw_dest = false;
- break;
- case i_PACK: case i_UNPK:
- rw_dest = false;
- m68k_pc_offset += 2;
- break;
- case i_TRAPcc:
- m68k_pc_offset += (dp->size == sz_long) ? 4 : 2;
- break;
- case i_RTR:
- /* do nothing, just for coverage debugging */
- break;
- /* TODO: handle EXG instruction */
- }
-
- /* Handle A-Traps better */
- if ((real_opcode & 0xf000) == 0xa000) {
- handled = true;
- }
-
- /* Handle EmulOps better */
- if ((real_opcode & 0xff00) == 0x7100) {
- handled = true;
- ru->rmask = 0xffff;
- ru->wmask = 0;
- }
-
- if (dp->suse && !handled)
- ru_fill_ea(ru, dp->sreg, (amodes)dp->smode, (wordsizes)dp->size, 0);
-
- if (dp->duse && !handled)
- ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
-
- if (rw_dest)
- ru->rmask |= ru->wmask;
-
- handled = handled || dp->suse || dp->duse;
-
- /* Mark all registers as used/written if the instruction may trap */
- if (may_trap(opcode)) {
- handled = true;
- ru->rmask = 0xffff;
- ru->wmask = 0xffff;
- }
-
- if (!handled) {
- write_log("ru_fill: %04x = { %04x, %04x }\n",
- real_opcode, ru->rmask, ru->wmask);
- abort();
- }
-}
-
-/********************************************************************
- * register allocation per block logging *
- ********************************************************************/
-
-static uae_s8 vstate[VREGS];
-static uae_s8 vwritten[VREGS];
-static uae_s8 nstate[N_REGS];
-
-#define L_UNKNOWN -127
-#define L_UNAVAIL -1
-#define L_NEEDED -2
-#define L_UNNEEDED -3
-
-static __inline__ void big_to_small_state(bigstate * b, smallstate * s)
-{
- int i;
-
- for (i = 0; i < VREGS; i++)
- s->virt[i] = vstate[i];
- for (i = 0; i < N_REGS; i++)
- s->nat[i] = nstate[i];
-}
-
-static __inline__ int callers_need_recompile(bigstate * b, smallstate * s)
-{
- int i;
- int reverse = 0;
-
- for (i = 0; i < VREGS; i++) {
- if (vstate[i] != L_UNNEEDED && s->virt[i] == L_UNNEEDED)
- return 1;
- if (vstate[i] == L_UNNEEDED && s->virt[i] != L_UNNEEDED)
- reverse++;
- }
- for (i = 0; i < N_REGS; i++) {
- if (nstate[i] >= 0 && nstate[i] != s->nat[i])
- return 1;
- if (nstate[i] < 0 && s->nat[i] >= 0)
- reverse++;
- }
- if (reverse >= 2 && USE_MATCH)
- return 1; /* In this case, it might be worth recompiling the
- * callers */
- return 0;
-}
-
-static __inline__ void log_startblock(void)
-{
- int i;
-
- for (i = 0; i < VREGS; i++) {
- vstate[i] = L_UNKNOWN;
- vwritten[i] = 0;
- }
- for (i = 0; i < N_REGS; i++)
- nstate[i] = L_UNKNOWN;
-}
-
-/* Using an n-reg for a temp variable */
-static __inline__ void log_isused(int n)
-{
- if (nstate[n] == L_UNKNOWN)
- nstate[n] = L_UNAVAIL;
-}
-
-static __inline__ void log_visused(int r)
-{
- if (vstate[r] == L_UNKNOWN)
- vstate[r] = L_NEEDED;
-}
-
-static __inline__ void do_load_reg(int n, int r)
-{
- if (r == FLAGTMP)
- raw_load_flagreg(n, r);
- else if (r == FLAGX)
- raw_load_flagx(n, r);
- else
- raw_mov_l_rm(n, (uintptr) live.state[r].mem);
-}
-
-static __inline__ void check_load_reg(int n, int r)
-{
- raw_mov_l_rm(n, (uintptr) live.state[r].mem);
-}
-
-static __inline__ void log_vwrite(int r)
-{
- vwritten[r] = 1;
-}
-
-/* Using an n-reg to hold a v-reg */
-static __inline__ void log_isreg(int n, int r)
-{
- static int count = 0;
-
- if (nstate[n] == L_UNKNOWN && r < 16 && !vwritten[r] && USE_MATCH)
- nstate[n] = r;
- else {
- do_load_reg(n, r);
- if (nstate[n] == L_UNKNOWN)
- nstate[n] = L_UNAVAIL;
- }
- if (vstate[r] == L_UNKNOWN)
- vstate[r] = L_NEEDED;
-}
-
-static __inline__ void log_clobberreg(int r)
-{
- if (vstate[r] == L_UNKNOWN)
- vstate[r] = L_UNNEEDED;
-}
-
-/* This ends all possibility of clever register allocation */
-
-static __inline__ void log_flush(void)
-{
- int i;
-
- for (i = 0; i < VREGS; i++)
- if (vstate[i] == L_UNKNOWN)
- vstate[i] = L_NEEDED;
- for (i = 0; i < N_REGS; i++)
- if (nstate[i] == L_UNKNOWN)
- nstate[i] = L_UNAVAIL;
-}
-
-static __inline__ void log_dump(void)
-{
- int i;
-
- return;
-
- write_log("----------------------\n");
- for (i = 0; i < N_REGS; i++) {
- switch (nstate[i]) {
- case L_UNKNOWN:
- write_log("Nat %d : UNKNOWN\n", i);
- break;
- case L_UNAVAIL:
- write_log("Nat %d : UNAVAIL\n", i);
- break;
- default:
- write_log("Nat %d : %d\n", i, nstate[i]);
- break;
- }
- }
- for (i = 0; i < VREGS; i++) {
- if (vstate[i] == L_UNNEEDED)
- write_log("Virt %d: UNNEEDED\n", i);
- }
-}
-
-/********************************************************************
- * register status handling. EMIT TIME! *
- ********************************************************************/
-
-static __inline__ void set_status(int r, int status)
-{
- if (status == ISCONST)
- log_clobberreg(r);
- live.state[r].status=status;
-}
-
-static __inline__ int isinreg(int r)
-{
- return live.state[r].status==CLEAN || live.state[r].status==DIRTY;
-}
-
-static __inline__ void adjust_nreg(int r, uae_u32 val)
-{
- if (!val)
- return;
- raw_lea_l_brr(r,r,val);
-}
-
-static void tomem(int r)
-{
- int rr=live.state[r].realreg;
-
- if (isinreg(r)) {
- if (live.state[r].val && live.nat[rr].nholds==1
- && !live.nat[rr].locked) {
- // write_log("RemovingA offset %x from reg %d (%d) at %p\n",
- // live.state[r].val,r,rr,target);
- adjust_nreg(rr,live.state[r].val);
- live.state[r].val=0;
- live.state[r].dirtysize=4;
- set_status(r,DIRTY);
- }
- }
-
- if (live.state[r].status==DIRTY) {
- switch (live.state[r].dirtysize) {
- case 1: raw_mov_b_mr((uintptr)live.state[r].mem,rr); break;
- case 2: raw_mov_w_mr((uintptr)live.state[r].mem,rr); break;
- case 4: raw_mov_l_mr((uintptr)live.state[r].mem,rr); break;
- default: abort();
- }
- log_vwrite(r);
- set_status(r,CLEAN);
- live.state[r].dirtysize=0;
- }
-}
-
-static __inline__ int isconst(int r)
-{
- return live.state[r].status==ISCONST;
-}
-
-int is_const(int r)
-{
- return isconst(r);
-}
-
-static __inline__ void writeback_const(int r)
-{
- if (!isconst(r))
- return;
- Dif (live.state[r].needflush==NF_HANDLER) {
- write_log("Trying to write back constant NF_HANDLER!\n");
- abort();
- }
-
- raw_mov_l_mi((uintptr)live.state[r].mem,live.state[r].val);
- log_vwrite(r);
- live.state[r].val=0;
- set_status(r,INMEM);
-}
-
-static __inline__ void tomem_c(int r)
-{
- if (isconst(r)) {
- writeback_const(r);
- }
- else
- tomem(r);
-}
-
-static void evict(int r)
-{
- int rr;
-
- if (!isinreg(r))
- return;
- tomem(r);
- rr=live.state[r].realreg;
-
- Dif (live.nat[rr].locked &&
- live.nat[rr].nholds==1) {
- write_log("register %d in nreg %d is locked!\n",r,live.state[r].realreg);
- abort();
- }
-
- live.nat[rr].nholds--;
- if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */
- int topreg=live.nat[rr].holds[live.nat[rr].nholds];
- int thisind=live.state[r].realind;
-
- live.nat[rr].holds[thisind]=topreg;
- live.state[topreg].realind=thisind;
- }
- live.state[r].realreg=-1;
- set_status(r,INMEM);
-}
-
-static __inline__ void free_nreg(int r)
-{
- int i=live.nat[r].nholds;
-
- while (i) {
- int vr;
-
- --i;
- vr=live.nat[r].holds[i];
- evict(vr);
- }
- Dif (live.nat[r].nholds!=0) {
- write_log("Failed to free nreg %d, nholds is %d\n",r,live.nat[r].nholds);
- abort();
- }
-}
-
-/* Use with care! */
-static __inline__ void isclean(int r)
-{
- if (!isinreg(r))
- return;
- live.state[r].validsize=4;
- live.state[r].dirtysize=0;
- live.state[r].val=0;
- set_status(r,CLEAN);
-}
-
-static __inline__ void disassociate(int r)
-{
- isclean(r);
- evict(r);
-}
-
-static __inline__ void set_const(int r, uae_u32 val)
-{
- disassociate(r);
- live.state[r].val=val;
- set_status(r,ISCONST);
-}
-
-static __inline__ uae_u32 get_offset(int r)
-{
- return live.state[r].val;
-}
-
-static int alloc_reg_hinted(int r, int size, int willclobber, int hint)
-{
- int bestreg;
- uae_s32 when;
- int i;
- uae_s32 badness=0; /* to shut up gcc */
- bestreg=-1;
- when=2000000000;
-
- /* XXX use a regalloc_order table? */
- for (i=0;i<N_REGS;i++) {
- badness=live.nat[i].touched;
- if (live.nat[i].nholds==0)
- badness=0;
- if (i==hint)
- badness-=200000000;
- if (!live.nat[i].locked && badness<when) {
- if ((size==1 && live.nat[i].canbyte) ||
- (size==2 && live.nat[i].canword) ||
- (size==4)) {
- bestreg=i;
- when=badness;
- if (live.nat[i].nholds==0 && hint<0)
- break;
- if (i==hint)
- break;
- }
- }
- }
- Dif (bestreg==-1)
- abort();
-
- if (live.nat[bestreg].nholds>0) {
- free_nreg(bestreg);
- }
- if (isinreg(r)) {
- int rr=live.state[r].realreg;
- /* This will happen if we read a partially dirty register at a
- bigger size */
- Dif (willclobber || live.state[r].validsize>=size)
- abort();
- Dif (live.nat[rr].nholds!=1)
- abort();
- if (size==4 && live.state[r].validsize==2) {
- log_isused(bestreg);
- log_visused(r);
- raw_mov_l_rm(bestreg,(uintptr)live.state[r].mem);
- raw_bswap_32(bestreg);
- raw_zero_extend_16_rr(rr,rr);
- raw_zero_extend_16_rr(bestreg,bestreg);
- raw_bswap_32(bestreg);
- raw_lea_l_brr_indexed(rr,rr,bestreg,1,0);
- live.state[r].validsize=4;
- live.nat[rr].touched=touchcnt++;
- return rr;
- }
- if (live.state[r].validsize==1) {
- /* Nothing yet */
- }
- evict(r);
- }
-
- if (!willclobber) {
- if (live.state[r].status!=UNDEF) {
- if (isconst(r)) {
- raw_mov_l_ri(bestreg,live.state[r].val);
- live.state[r].val=0;
- live.state[r].dirtysize=4;
- set_status(r,DIRTY);
- log_isused(bestreg);
- }
- else {
- log_isreg(bestreg, r); /* This will also load it! */
- live.state[r].dirtysize=0;
- set_status(r,CLEAN);
- }
- }
- else {
- live.state[r].val=0;
- live.state[r].dirtysize=0;
- set_status(r,CLEAN);
- log_isused(bestreg);
- }
- live.state[r].validsize=4;
- }
- else { /* this is the easiest way, but not optimal. FIXME! */
- /* Now it's trickier, but hopefully still OK */
- if (!isconst(r) || size==4) {
- live.state[r].validsize=size;
- live.state[r].dirtysize=size;
- live.state[r].val=0;
- set_status(r,DIRTY);
- if (size == 4) {
- log_clobberreg(r);
- log_isused(bestreg);
- }
- else {
- log_visused(r);
- log_isused(bestreg);
- }
- }
- else {
- if (live.state[r].status!=UNDEF)
- raw_mov_l_ri(bestreg,live.state[r].val);
- live.state[r].val=0;
- live.state[r].validsize=4;
- live.state[r].dirtysize=4;
- set_status(r,DIRTY);
- log_isused(bestreg);
- }
- }
- live.state[r].realreg=bestreg;
- live.state[r].realind=live.nat[bestreg].nholds;
- live.nat[bestreg].touched=touchcnt++;
- live.nat[bestreg].holds[live.nat[bestreg].nholds]=r;
- live.nat[bestreg].nholds++;
-
- return bestreg;
-}
-
-static int alloc_reg(int r, int size, int willclobber)
-{
- return alloc_reg_hinted(r,size,willclobber,-1);
-}
-
-static void unlock2(int r)
-{
- Dif (!live.nat[r].locked)
- abort();
- live.nat[r].locked--;
-}
-
-static void setlock(int r)
-{
- live.nat[r].locked++;
-}
-
-
-static void mov_nregs(int d, int s)
-{
- int ns=live.nat[s].nholds;
- int nd=live.nat[d].nholds;
- int i;
-
- if (s==d)
- return;
-
- if (nd>0)
- free_nreg(d);
-
- log_isused(d);
- raw_mov_l_rr(d,s);
-
- for (i=0;i<live.nat[s].nholds;i++) {
- int vs=live.nat[s].holds[i];
-
- live.state[vs].realreg=d;
- live.state[vs].realind=i;
- live.nat[d].holds[i]=vs;
- }
- live.nat[d].nholds=live.nat[s].nholds;
-
- live.nat[s].nholds=0;
-}
-
-
-static __inline__ void make_exclusive(int r, int size, int spec)
-{
- int clobber;
- reg_status oldstate;
- int rr=live.state[r].realreg;
- int nr;
- int nind;
- int ndirt=0;
- int i;
-
- if (!isinreg(r))
- return;
- if (live.nat[rr].nholds==1)
- return;
- for (i=0;i<live.nat[rr].nholds;i++) {
- int vr=live.nat[rr].holds[i];
- if (vr!=r &&
- (live.state[vr].status==DIRTY || live.state[vr].val))
- ndirt++;
- }
- if (!ndirt && size<live.state[r].validsize && !live.nat[rr].locked) {
- /* Everything else is clean, so let's keep this register */
- for (i=0;i<live.nat[rr].nholds;i++) {
- int vr=live.nat[rr].holds[i];
- if (vr!=r) {
- evict(vr);
- i--; /* Try that index again! */
- }
- }
- Dif (live.nat[rr].nholds!=1) {
- write_log("natreg %d holds %d vregs, %d not exclusive\n",
- rr,live.nat[rr].nholds,r);
- abort();
- }
- return;
- }
-
- /* We have to split the register */
- oldstate=live.state[r];
-
- setlock(rr); /* Make sure this doesn't go away */
- /* Forget about r being in the register rr */
- disassociate(r);
- /* Get a new register, that we will clobber completely */
- if (oldstate.status==DIRTY) {
- /* If dirtysize is <4, we need a register that can handle the
- eventual smaller memory store! Thanks to Quake68k for exposing
- this detail ;-) */
- nr=alloc_reg_hinted(r,oldstate.dirtysize,1,spec);
- }
- else {
- nr=alloc_reg_hinted(r,4,1,spec);
- }
- nind=live.state[r].realind;
- live.state[r]=oldstate; /* Keep all the old state info */
- live.state[r].realreg=nr;
- live.state[r].realind=nind;
-
- if (size<live.state[r].validsize) {
- if (live.state[r].val) {
- /* Might as well compensate for the offset now */
- raw_lea_l_brr(nr,rr,oldstate.val);
- live.state[r].val=0;
- live.state[r].dirtysize=4;
- set_status(r,DIRTY);
- }
- else
- raw_mov_l_rr(nr,rr); /* Make another copy */
- }
- unlock2(rr);
-}
-
-static __inline__ void add_offset(int r, uae_u32 off)
-{
- live.state[r].val+=off;
-}
-
-static __inline__ void remove_offset(int r, int spec)
-{
- reg_status oldstate;
- int rr;
-
- if (isconst(r))
- return;
- if (live.state[r].val==0)
- return;
- if (isinreg(r) && live.state[r].validsize<4)
- evict(r);
-
- if (!isinreg(r))
- alloc_reg_hinted(r,4,0,spec);
-
- Dif (live.state[r].validsize!=4) {
- write_log("Validsize=%d in remove_offset\n",live.state[r].validsize);
- abort();
- }
- make_exclusive(r,0,-1);
- /* make_exclusive might have done the job already */
- if (live.state[r].val==0)
- return;
-
- rr=live.state[r].realreg;
-
- if (live.nat[rr].nholds==1) {
- //write_log("RemovingB offset %x from reg %d (%d) at %p\n",
- // live.state[r].val,r,rr,target);
- adjust_nreg(rr,live.state[r].val);
- live.state[r].dirtysize=4;
- live.state[r].val=0;
- set_status(r,DIRTY);
- return;
- }
- write_log("Failed in remove_offset\n");
- abort();
-}
-
-static __inline__ void remove_all_offsets(void)
-{
- int i;
-
- for (i=0;i<VREGS;i++)
- remove_offset(i,-1);
-}
-
-static inline void flush_reg_count(void)
-{
-#if RECORD_REGISTER_USAGE
- for (int r = 0; r < 16; r++)
- if (reg_count_local[r])
- ADDQim(reg_count_local[r], ((uintptr)reg_count) + (8 * r), X86_NOREG, X86_NOREG, 1);
-#endif
-}
-
-static inline void record_register(int r)
-{
-#if RECORD_REGISTER_USAGE
- if (r < 16)
- reg_count_local[r]++;
-#endif
-}
-
-static __inline__ int readreg_general(int r, int size, int spec, int can_offset)
-{
- int n;
- int answer=-1;
-
- record_register(r);
- if (live.state[r].status==UNDEF) {
- write_log("WARNING: Unexpected read of undefined register %d\n",r);
- }
- if (!can_offset)
- remove_offset(r,spec);
-
- if (isinreg(r) && live.state[r].validsize>=size) {
- n=live.state[r].realreg;
- switch(size) {
- case 1:
- if (live.nat[n].canbyte || spec>=0) {
- answer=n;
- }
- break;
- case 2:
- if (live.nat[n].canword || spec>=0) {
- answer=n;
- }
- break;
- case 4:
- answer=n;
- break;
- default: abort();
- }
- if (answer<0)
- evict(r);
- }
- /* either the value was in memory to start with, or it was evicted and
- is in memory now */
- if (answer<0) {
- answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec);
- }
-
- if (spec>=0 && spec!=answer) {
- /* Too bad */
- mov_nregs(spec,answer);
- answer=spec;
- }
- live.nat[answer].locked++;
- live.nat[answer].touched=touchcnt++;
- return answer;
-}
-
-
-
-static int readreg(int r, int size)
-{
- return readreg_general(r,size,-1,0);
-}
-
-static int readreg_specific(int r, int size, int spec)
-{
- return readreg_general(r,size,spec,0);
-}
-
-static int readreg_offset(int r, int size)
-{
- return readreg_general(r,size,-1,1);
-}
-
-/* writereg_general(r, size, spec)
- *
- * INPUT
- * - r : mid-layer register
- * - size : requested size (1/2/4)
- * - spec : -1 if find or make a register free, otherwise specifies
- * the physical register to use in any case
- *
- * OUTPUT
- * - hard (physical, x86 here) register allocated to virtual register r
- */
-static __inline__ int writereg_general(int r, int size, int spec)
-{
- int n;
- int answer=-1;
-
- record_register(r);
- if (size<4) {
- remove_offset(r,spec);
- }
-
- make_exclusive(r,size,spec);
- if (isinreg(r)) {
- int nvsize=size>live.state[r].validsize?size:live.state[r].validsize;
- int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
- n=live.state[r].realreg;
-
- Dif (live.nat[n].nholds!=1)
- abort();
- switch(size) {
- case 1:
- if (live.nat[n].canbyte || spec>=0) {
- live.state[r].dirtysize=ndsize;
- live.state[r].validsize=nvsize;
- answer=n;
- }
- break;
- case 2:
- if (live.nat[n].canword || spec>=0) {
- live.state[r].dirtysize=ndsize;
- live.state[r].validsize=nvsize;
- answer=n;
- }
- break;
- case 4:
- live.state[r].dirtysize=ndsize;
- live.state[r].validsize=nvsize;
- answer=n;
- break;
- default: abort();
- }
- if (answer<0)
- evict(r);
- }
- /* either the value was in memory to start with, or it was evicted and
- is in memory now */
- if (answer<0) {
- answer=alloc_reg_hinted(r,size,1,spec);
- }
- if (spec>=0 && spec!=answer) {
- mov_nregs(spec,answer);
- answer=spec;
- }
- if (live.state[r].status==UNDEF)
- live.state[r].validsize=4;
- live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
- live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize;
-
- live.nat[answer].locked++;
- live.nat[answer].touched=touchcnt++;
- if (size==4) {
- live.state[r].val=0;
- }
- else {
- Dif (live.state[r].val) {
- write_log("Problem with val\n");
- abort();
- }
- }
- set_status(r,DIRTY);
- return answer;
-}
-
-static int writereg(int r, int size)
-{
- return writereg_general(r,size,-1);
-}
-
-static int writereg_specific(int r, int size, int spec)
-{
- return writereg_general(r,size,spec);
-}
-
-static __inline__ int rmw_general(int r, int wsize, int rsize, int spec)
-{
- int n;
- int answer=-1;
-
- record_register(r);
- if (live.state[r].status==UNDEF) {
- write_log("WARNING: Unexpected read of undefined register %d\n",r);
- }
- remove_offset(r,spec);
- make_exclusive(r,0,spec);
-
- Dif (wsize<rsize) {
- write_log("Cannot handle wsize<rsize in rmw_general()\n");
- abort();
- }
- if (isinreg(r) && live.state[r].validsize>=rsize) {
- n=live.state[r].realreg;
- Dif (live.nat[n].nholds!=1)
- abort();
-
- switch(rsize) {
- case 1:
- if (live.nat[n].canbyte || spec>=0) {
- answer=n;
- }
- break;
- case 2:
- if (live.nat[n].canword || spec>=0) {
- answer=n;
- }
- break;
- case 4:
- answer=n;
- break;
- default: abort();
- }
- if (answer<0)
- evict(r);
- }
- /* either the value was in memory to start with, or it was evicted and
- is in memory now */
- if (answer<0) {
- answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec);
- }
-
- if (spec>=0 && spec!=answer) {
- /* Too bad */
- mov_nregs(spec,answer);
- answer=spec;
- }
- if (wsize>live.state[r].dirtysize)
- live.state[r].dirtysize=wsize;
- if (wsize>live.state[r].validsize)
- live.state[r].validsize=wsize;
- set_status(r,DIRTY);
-
- live.nat[answer].locked++;
- live.nat[answer].touched=touchcnt++;
-
- Dif (live.state[r].val) {
- write_log("Problem with val(rmw)\n");
- abort();
- }
- return answer;
-}
-
-static int rmw(int r, int wsize, int rsize)
-{
- return rmw_general(r,wsize,rsize,-1);
-}
-
-static int rmw_specific(int r, int wsize, int rsize, int spec)
-{
- return rmw_general(r,wsize,rsize,spec);
-}
-
-
-/* needed for restoring the carry flag on non-P6 cores */
-static void bt_l_ri_noclobber(R4 r, IMM i)
-{
- int size=4;
- if (i<16)
- size=2;
- r=readreg(r,size);
- raw_bt_l_ri(r,i);
- unlock2(r);
-}
-
-/********************************************************************
- * FPU register status handling. EMIT TIME! *
- ********************************************************************/
-
-static void f_tomem(int r)
-{
- if (live.fate[r].status==DIRTY) {
-#if USE_LONG_DOUBLE
- raw_fmov_ext_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
-#else
- raw_fmov_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
-#endif
- live.fate[r].status=CLEAN;
- }
-}
-
-static void f_tomem_drop(int r)
-{
- if (live.fate[r].status==DIRTY) {
-#if USE_LONG_DOUBLE
- raw_fmov_ext_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
-#else
- raw_fmov_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
-#endif
- live.fate[r].status=INMEM;
- }
-}
-
-
-static __inline__ int f_isinreg(int r)
-{
- return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY;
-}
-
-static void f_evict(int r)
-{
- int rr;
-
- if (!f_isinreg(r))
- return;
- rr=live.fate[r].realreg;
- if (live.fat[rr].nholds==1)
- f_tomem_drop(r);
- else
- f_tomem(r);
-
- Dif (live.fat[rr].locked &&
- live.fat[rr].nholds==1) {
- write_log("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg);
- abort();
- }
-
- live.fat[rr].nholds--;
- if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */
- int topreg=live.fat[rr].holds[live.fat[rr].nholds];
- int thisind=live.fate[r].realind;
- live.fat[rr].holds[thisind]=topreg;
- live.fate[topreg].realind=thisind;
- }
- live.fate[r].status=INMEM;
- live.fate[r].realreg=-1;
-}
-
-static __inline__ void f_free_nreg(int r)
-{
- int i=live.fat[r].nholds;
-
- while (i) {
- int vr;
-
- --i;
- vr=live.fat[r].holds[i];
- f_evict(vr);
- }
- Dif (live.fat[r].nholds!=0) {
- write_log("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds);
- abort();
- }
-}
-
-
-/* Use with care! */
-static __inline__ void f_isclean(int r)
-{
- if (!f_isinreg(r))
- return;
- live.fate[r].status=CLEAN;
-}
-
-static __inline__ void f_disassociate(int r)
-{
- f_isclean(r);
- f_evict(r);
-}
-
-
-
-static int f_alloc_reg(int r, int willclobber)
-{
- int bestreg;
- uae_s32 when;
- int i;
- uae_s32 badness;
- bestreg=-1;
- when=2000000000;
- for (i=N_FREGS;i--;) {
- badness=live.fat[i].touched;
- if (live.fat[i].nholds==0)
- badness=0;
-
- if (!live.fat[i].locked && badness<when) {
- bestreg=i;
- when=badness;
- if (live.fat[i].nholds==0)
- break;
- }
- }
- Dif (bestreg==-1)
- abort();
-
- if (live.fat[bestreg].nholds>0) {
- f_free_nreg(bestreg);
- }
- if (f_isinreg(r)) {
- f_evict(r);
- }
-
- if (!willclobber) {
- if (live.fate[r].status!=UNDEF) {
-#if USE_LONG_DOUBLE
- raw_fmov_ext_rm(bestreg,(uintptr)live.fate[r].mem);
-#else
- raw_fmov_rm(bestreg,(uintptr)live.fate[r].mem);
-#endif
- }
- live.fate[r].status=CLEAN;
- }
- else {
- live.fate[r].status=DIRTY;
- }
- live.fate[r].realreg=bestreg;
- live.fate[r].realind=live.fat[bestreg].nholds;
- live.fat[bestreg].touched=touchcnt++;
- live.fat[bestreg].holds[live.fat[bestreg].nholds]=r;
- live.fat[bestreg].nholds++;
-
- return bestreg;
-}
-
-static void f_unlock(int r)
-{
- Dif (!live.fat[r].locked)
- abort();
- live.fat[r].locked--;
-}
-
-static void f_setlock(int r)
-{
- live.fat[r].locked++;
-}
-
-static __inline__ int f_readreg(int r)
-{
- int n;
- int answer=-1;
-
- if (f_isinreg(r)) {
- n=live.fate[r].realreg;
- answer=n;
- }
- /* either the value was in memory to start with, or it was evicted and
- is in memory now */
- if (answer<0)
- answer=f_alloc_reg(r,0);
-
- live.fat[answer].locked++;
- live.fat[answer].touched=touchcnt++;
- return answer;
-}
-
-static __inline__ void f_make_exclusive(int r, int clobber)
-{
- freg_status oldstate;
- int rr=live.fate[r].realreg;
- int nr;
- int nind;
- int ndirt=0;
- int i;
-
- if (!f_isinreg(r))
- return;
- if (live.fat[rr].nholds==1)
- return;
- for (i=0;i<live.fat[rr].nholds;i++) {
- int vr=live.fat[rr].holds[i];
- if (vr!=r && live.fate[vr].status==DIRTY)
- ndirt++;
- }
- if (!ndirt && !live.fat[rr].locked) {
- /* Everything else is clean, so let's keep this register */
- for (i=0;i<live.fat[rr].nholds;i++) {
- int vr=live.fat[rr].holds[i];
- if (vr!=r) {
- f_evict(vr);
- i--; /* Try that index again! */
- }
- }
- Dif (live.fat[rr].nholds!=1) {
- write_log("realreg %d holds %d (",rr,live.fat[rr].nholds);
- for (i=0;i<live.fat[rr].nholds;i++) {
- write_log(" %d(%d,%d)",live.fat[rr].holds[i],
- live.fate[live.fat[rr].holds[i]].realreg,
- live.fate[live.fat[rr].holds[i]].realind);
- }
- write_log("\n");
- abort();
- }
- return;
- }
-
- /* We have to split the register */
- oldstate=live.fate[r];
-
- f_setlock(rr); /* Make sure this doesn't go away */
- /* Forget about r being in the register rr */
- f_disassociate(r);
- /* Get a new register, that we will clobber completely */
- nr=f_alloc_reg(r,1);
- nind=live.fate[r].realind;
- if (!clobber)
- raw_fmov_rr(nr,rr); /* Make another copy */
- live.fate[r]=oldstate; /* Keep all the old state info */
- live.fate[r].realreg=nr;
- live.fate[r].realind=nind;
- f_unlock(rr);
-}
-
-
-static __inline__ int f_writereg(int r)
-{
- int n;
- int answer=-1;
-
- f_make_exclusive(r,1);
- if (f_isinreg(r)) {
- n=live.fate[r].realreg;
- answer=n;
- }
- if (answer<0) {
- answer=f_alloc_reg(r,1);
- }
- live.fate[r].status=DIRTY;
- live.fat[answer].locked++;
- live.fat[answer].touched=touchcnt++;
- return answer;
-}
-
-static int f_rmw(int r)
-{
- int n;
-
- f_make_exclusive(r,0);
- if (f_isinreg(r)) {
- n=live.fate[r].realreg;
- }
- else
- n=f_alloc_reg(r,0);
- live.fate[r].status=DIRTY;
- live.fat[n].locked++;
- live.fat[n].touched=touchcnt++;
- return n;
-}
-
-static void fflags_into_flags_internal(uae_u32 tmp)
-{
- int r;
-
- clobber_flags();
- r=f_readreg(FP_RESULT);
- if (FFLAG_NREG_CLOBBER_CONDITION) {
- int tmp2=tmp;
- tmp=writereg_specific(tmp,4,FFLAG_NREG);
- raw_fflags_into_flags(r);
- unlock2(tmp);
- forget_about(tmp2);
- }
- else
- raw_fflags_into_flags(r);
- f_unlock(r);
- live_flags();
-}
-
-
-
-
-/********************************************************************
- * CPU functions exposed to gencomp. Both CREATE and EMIT time *
- ********************************************************************/
-
-/*
- * RULES FOR HANDLING REGISTERS:
- *
- * * In the function headers, order the parameters
- * - 1st registers written to
- * - 2nd read/modify/write registers
- * - 3rd registers read from
- * * Before calling raw_*, you must call readreg, writereg or rmw for
- * each register
- * * The order for this is
- * - 1st call remove_offset for all registers written to with size<4
- * - 2nd call readreg for all registers read without offset
- * - 3rd call rmw for all rmw registers
- * - 4th call readreg_offset for all registers that can handle offsets
- * - 5th call get_offset for all the registers from the previous step
- * - 6th call writereg for all written-to registers
- * - 7th call raw_*
- * - 8th unlock2 all registers that were locked
- */
-
-MIDFUNC(0,live_flags,(void))
-{
- live.flags_on_stack=TRASH;
- live.flags_in_flags=VALID;
- live.flags_are_important=1;
-}
-MENDFUNC(0,live_flags,(void))
-
-MIDFUNC(0,dont_care_flags,(void))
-{
- live.flags_are_important=0;
-}
-MENDFUNC(0,dont_care_flags,(void))
-
-
-MIDFUNC(0,duplicate_carry,(void))
-{
- evict(FLAGX);
- make_flags_live_internal();
- COMPCALL(setcc_m)((uintptr)live.state[FLAGX].mem,2);
- log_vwrite(FLAGX);
-}
-MENDFUNC(0,duplicate_carry,(void))
-
-MIDFUNC(0,restore_carry,(void))
-{
- if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */
- bt_l_ri_noclobber(FLAGX,0);
- }
- else { /* Avoid the stall the above creates.
- This is slow on non-P6, though.
- */
- COMPCALL(rol_b_ri(FLAGX,8));
- isclean(FLAGX);
- }
-}
-MENDFUNC(0,restore_carry,(void))
-
-MIDFUNC(0,start_needflags,(void))
-{
- needflags=1;
-}
-MENDFUNC(0,start_needflags,(void))
-
-MIDFUNC(0,end_needflags,(void))
-{
- needflags=0;
-}
-MENDFUNC(0,end_needflags,(void))
-
-MIDFUNC(0,make_flags_live,(void))
-{
- make_flags_live_internal();
-}
-MENDFUNC(0,make_flags_live,(void))
-
-MIDFUNC(1,fflags_into_flags,(W2 tmp))
-{
- clobber_flags();
- fflags_into_flags_internal(tmp);
-}
-MENDFUNC(1,fflags_into_flags,(W2 tmp))
-
-
-MIDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
-{
- int size=4;
- if (i<16)
- size=2;
- CLOBBER_BT;
- r=readreg(r,size);
- raw_bt_l_ri(r,i);
- unlock2(r);
-}
-MENDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
-
-MIDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
-{
- CLOBBER_BT;
- r=readreg(r,4);
- b=readreg(b,4);
- raw_bt_l_rr(r,b);
- unlock2(r);
- unlock2(b);
-}
-MENDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
-
-MIDFUNC(2,btc_l_ri,(RW4 r, IMM i))
-{
- int size=4;
- if (i<16)
- size=2;
- CLOBBER_BT;
- r=rmw(r,size,size);
- raw_btc_l_ri(r,i);
- unlock2(r);
-}
-MENDFUNC(2,btc_l_ri,(RW4 r, IMM i))
-
-MIDFUNC(2,btc_l_rr,(RW4 r, R4 b))
-{
- CLOBBER_BT;
- b=readreg(b,4);
- r=rmw(r,4,4);
- raw_btc_l_rr(r,b);
- unlock2(r);
- unlock2(b);
-}
-MENDFUNC(2,btc_l_rr,(RW4 r, R4 b))
-
-
-MIDFUNC(2,btr_l_ri,(RW4 r, IMM i))
-{
- int size=4;
- if (i<16)
- size=2;
- CLOBBER_BT;
- r=rmw(r,size,size);
- raw_btr_l_ri(r,i);
- unlock2(r);
-}
-MENDFUNC(2,btr_l_ri,(RW4 r, IMM i))
-
-MIDFUNC(2,btr_l_rr,(RW4 r, R4 b))
-{
- CLOBBER_BT;
- b=readreg(b,4);
- r=rmw(r,4,4);
- raw_btr_l_rr(r,b);
- unlock2(r);
- unlock2(b);
-}
-MENDFUNC(2,btr_l_rr,(RW4 r, R4 b))
-
-
-MIDFUNC(2,bts_l_ri,(RW4 r, IMM i))
-{
- int size=4;
- if (i<16)
- size=2;
- CLOBBER_BT;
- r=rmw(r,size,size);
- raw_bts_l_ri(r,i);
- unlock2(r);
-}
-MENDFUNC(2,bts_l_ri,(RW4 r, IMM i))
-
-MIDFUNC(2,bts_l_rr,(RW4 r, R4 b))
-{
- CLOBBER_BT;
- b=readreg(b,4);
- r=rmw(r,4,4);
- raw_bts_l_rr(r,b);
- unlock2(r);
- unlock2(b);
-}
-MENDFUNC(2,bts_l_rr,(RW4 r, R4 b))
-
-MIDFUNC(2,mov_l_rm,(W4 d, IMM s))
-{
- CLOBBER_MOV;
- d=writereg(d,4);
- raw_mov_l_rm(d,s);
- unlock2(d);
-}
-MENDFUNC(2,mov_l_rm,(W4 d, IMM s))
-
-
-MIDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
-{
- r=readreg(r,4);
- raw_call_r(r);
- unlock2(r);
-}
-MENDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
-
-MIDFUNC(2,sub_l_mi,(IMM d, IMM s))
-{
- CLOBBER_SUB;
- raw_sub_l_mi(d,s) ;
-}
-MENDFUNC(2,sub_l_mi,(IMM d, IMM s))
-
-MIDFUNC(2,mov_l_mi,(IMM d, IMM s))
-{
- CLOBBER_MOV;
- raw_mov_l_mi(d,s) ;
-}
-MENDFUNC(2,mov_l_mi,(IMM d, IMM s))
-
-MIDFUNC(2,mov_w_mi,(IMM d, IMM s))
-{
- CLOBBER_MOV;
- raw_mov_w_mi(d,s) ;
-}
-MENDFUNC(2,mov_w_mi,(IMM d, IMM s))
-
-MIDFUNC(2,mov_b_mi,(IMM d, IMM s))
-{
- CLOBBER_MOV;
- raw_mov_b_mi(d,s) ;
-}
-MENDFUNC(2,mov_b_mi,(IMM d, IMM s))
-
-MIDFUNC(2,rol_b_ri,(RW1 r, IMM i))
-{
- if (!i && !needflags)
- return;
- CLOBBER_ROL;
- r=rmw(r,1,1);
- raw_rol_b_ri(r,i);
- unlock2(r);
-}
-MENDFUNC(2,rol_b_ri,(RW1 r, IMM i))
-
-MIDFUNC(2,rol_w_ri,(RW2 r, IMM i))
-{
- if (!i && !needflags)
- return;
- CLOBBER_ROL;
- r=rmw(r,2,2);
- raw_rol_w_ri(r,i);
- unlock2(r);
-}
-MENDFUNC(2,rol_w_ri,(RW2 r, IMM i))
-
-MIDFUNC(2,rol_l_ri,(RW4 r, IMM i))
-{
- if (!i && !needflags)
- return;
- CLOBBER_ROL;
- r=rmw(r,4,4);
- raw_rol_l_ri(r,i);
- unlock2(r);
-}
-MENDFUNC(2,rol_l_ri,(RW4 r, IMM i))
-
-MIDFUNC(2,rol_l_rr,(RW4 d, R1 r))
-{
- if (isconst(r)) {
- COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val);
- return;
- }
- CLOBBER_ROL;
- r=readreg_specific(r,1,SHIFTCOUNT_NREG);
- d=rmw(d,4,4);
- Dif (r!=1) {
- write_log("Illegal register %d in raw_rol_b\n",r);
- abort();
- }
- raw_rol_l_rr(d,r) ;
- unlock2(r);
- unlock2(d);
-}
-MENDFUNC(2,rol_l_rr,(RW4 d, R1 r))
-
-MIDFUNC(2,rol_w_rr,(RW2 d, R1 r))
-{ /* Can only do this with r==1, i.e. cl */
-
- if (isconst(r)) {
- COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val);
- return;
- }
- CLOBBER_ROL;
- r=readreg_specific(r,1,SHIFTCOUNT_NREG);
- d=rmw(d,2,2);
- Dif (r!=1) {
- write_log("Illegal register %d in raw_rol_b\n",r);
- abort();
- }
- raw_rol_w_rr(d,r) ;
- unlock2(r);
- unlock2(d);
-}
-MENDFUNC(2,rol_w_rr,(RW2 d, R1 r))
-
-MIDFUNC(2,rol_b_rr,(RW1 d, R1 r))
-{ /* Can only do this with r==1, i.e. cl */
-
- if (isconst(r)) {
- COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val);
- return;
- }
-
- CLOBBER_ROL;
- r=readreg_specific(r,1,SHIFTCOUNT_NREG);
- d=rmw(d,1,1);
- Dif (r!=1) {
- write_log("Illegal register %d in raw_rol_b\n",r);
- abort();
- }
- raw_rol_b_rr(d,r) ;
- unlock2(r);
- unlock2(d);
-}
-MENDFUNC(2,rol_b_rr,(RW1 d, R1 r))
-
-
-MIDFUNC(2,shll_l_rr,(RW4 d, R1 r))
-{
- if (isconst(r)) {
- COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val);
- return;
- }
- CLOBBER_SHLL;
- r=readreg_specific(r,1,SHIFTCOUNT_NREG);
- d=rmw(d,4,4);
- Dif (r!=1) {
- write_log("Illegal register %d in raw_rol_b\n",r);
- abort();
- }
- raw_shll_l_rr(d,r) ;
- unlock2(r);
- unlock2(d);
-}
-MENDFUNC(2,shll_l_rr,(RW4 d, R1 r))
-
-MIDFUNC(2,shll_w_rr,(RW2 d, R1 r))
-{ /* Can only do this with r==1, i.e. cl */
-
- if (isconst(r)) {
- COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val);
- return;
- }
- CLOBBER_SHLL;
- r=readreg_specific(r,1,SHIFTCOUNT_NREG);
- d=rmw(d,2,2);
- Dif (r!=1) {
- write_log("Illegal register %d in raw_shll_b\n",r);
- abort();
- }
- raw_shll_w_rr(d,r) ;
- unlock2(r);
- unlock2(d);
-}
-MENDFUNC(2,shll_w_rr,(RW2 d, R1 r))
-
-MIDFUNC(2,shll_b_rr,(RW1 d, R1 r))
-{ /* Can only do this with r==1, i.e. cl */
-
- if (isconst(r)) {
- COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val);
- return;
- }
-
- CLOBBER_SHLL;
- r=readreg_specific(r,1,SHIFTCOUNT_NREG);
- d=rmw(d,1,1);
- Dif (r!=1) {
- write_log("Illegal register %d in raw_shll_b\n",r);
- abort();
- }
- raw_shll_b_rr(d,r) ;
- unlock2(r);
- unlock2(d);
-}
-MENDFUNC(2,shll_b_rr,(RW1 d, R1 r))
-
-
-MIDFUNC(2,ror_b_ri,(R1 r, IMM i))
-{
- if (!i && !needflags)
- return;
- CLOBBER_ROR;
- r=rmw(r,1,1);
- raw_ror_b_ri(r,i);
- unlock2(r);
-}
-MENDFUNC(2,ror_b_ri,(R1 r, IMM i))
-
-MIDFUNC(2,ror_w_ri,(R2 r, IMM i))
-{
- if (!i && !needflags)
- return;
- CLOBBER_ROR;
- r=rmw(r,2,2);
- raw_ror_w_ri(r,i);
- unlock2(r);
-}
-MENDFUNC(2,ror_w_ri,(R2 r, IMM i))
-
-MIDFUNC(2,ror_l_ri,(R4 r, IMM i))
-{
- if (!i && !needflags)
- return;
- CLOBBER_ROR;
- r=rmw(r,4,4);
- raw_ror_l_ri(r,i);
- unlock2(r);
-}
-MENDFUNC(2,ror_l_ri,(R4 r, IMM i))
-
-MIDFUNC(2,ror_l_rr,(R4 d, R1 r))
-{
- if (isconst(r)) {
- COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val);
- return;
- }
- CLOBBER_ROR;
- r=readreg_specific(r,1,SHIFTCOUNT_NREG);
- d=rmw(d,4,4);
- raw_ror_l_rr(d,r) ;
- unlock2(r);
- unlock2(d);
-}
-MENDFUNC(2,ror_l_rr,(R4 d, R1 r))
-
-MIDFUNC(2,ror_w_rr,(R2 d, R1 r))
-{
- if (isconst(r)) {
- COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val);
- return;
- }
- CLOBBER_ROR;
- r=readreg_specific(r,1,SHIFTCOUNT_NREG);
- d=rmw(d,2,2);
- raw_ror_w_rr(d,r) ;
- unlock2(r);
- unlock2(d);
-}
-MENDFUNC(2,ror_w_rr,(R2 d, R1 r))
-
-MIDFUNC(2,ror_b_rr,(R1 d, R1 r))
-{
- if (isconst(r)) {
- COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val);
- return;
- }
-
- CLOBBER_ROR;
- r=readreg_specific(r,1,SHIFTCOUNT_NREG);
- d=rmw(d,1,1);
- raw_ror_b_rr(d,r) ;
- unlock2(r);
- unlock2(d);
-}
-MENDFUNC(2,ror_b_rr,(R1 d, R1 r))
-
-MIDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
-{
- if (isconst(r)) {
- COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val);
- return;
- }
- CLOBBER_SHRL;
- r=readreg_specific(r,1,SHIFTCOUNT_NREG);
- d=rmw(d,4,4);
- Dif (r!=1) {
- write_log("Illegal register %d in raw_rol_b\n",r);
- abort();
- }
- raw_shrl_l_rr(d,r) ;
- unlock2(r);
- unlock2(d);
-}
-MENDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
-
-MIDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
-{ /* Can only do this with r==1, i.e. cl */
-
- if (isconst(r)) {
- COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val);
- return;
- }
- CLOBBER_SHRL;
- r=readreg_specific(r,1,SHIFTCOUNT_NREG);
- d=rmw(d,2,2);
- Dif (r!=1) {
- write_log("Illegal register %d in raw_shrl_b\n",r);
- abort();
- }
- raw_shrl_w_rr(d,r) ;
- unlock2(r);
- unlock2(d);
-}
-MENDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
-
-MIDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
-{ /* Can only do this with r==1, i.e. cl */
-
- if (isconst(r)) {
- COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val);
- return;
- }
-
- CLOBBER_SHRL;
- r=readreg_specific(r,1,SHIFTCOUNT_NREG);
- d=rmw(d,1,1);
- Dif (r!=1) {
- write_log("Illegal register %d in raw_shrl_b\n",r);
- abort();
- }
- raw_shrl_b_rr(d,r) ;
- unlock2(r);
- unlock2(d);
-}
-MENDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
-
-
-
-MIDFUNC(2,shll_l_ri,(RW4 r, IMM i))
-{
- if (!i && !needflags)
- return;
- if (isconst(r) && !needflags) {
- live.state[r].val<<=i;
- return;
- }
- CLOBBER_SHLL;
- r=rmw(r,4,4);
- raw_shll_l_ri(r,i);
- unlock2(r);
-}
-MENDFUNC(2,shll_l_ri,(RW4 r, IMM i))
-
-MIDFUNC(2,shll_w_ri,(RW2 r, IMM i))
-{
- if (!i && !needflags)
- return;
- CLOBBER_SHLL;
- r=rmw(r,2,2);
- raw_shll_w_ri(r,i);
- unlock2(r);
-}
-MENDFUNC(2,shll_w_ri,(RW2 r, IMM i))
-
-MIDFUNC(2,shll_b_ri,(RW1 r, IMM i))
-{
- if (!i && !needflags)
- return;
- CLOBBER_SHLL;
- r=rmw(r,1,1);
- raw_shll_b_ri(r,i);
- unlock2(r);
-}
-MENDFUNC(2,shll_b_ri,(RW1 r, IMM i))
-
-MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
-{
- if (!i && !needflags)
- return;
- if (isconst(r) && !needflags) {
- live.state[r].val>>=i;
- return;
- }
- CLOBBER_SHRL;
- r=rmw(r,4,4);
- raw_shrl_l_ri(r,i);
- unlock2(r);
-}
-MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
-
-MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
-{
- if (!i && !needflags)
- return;
- CLOBBER_SHRL;
- r=rmw(r,2,2);
- raw_shrl_w_ri(r,i);
- unlock2(r);
-}
-MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
-
-MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
-{
- if (!i && !needflags)
- return;
- CLOBBER_SHRL;
- r=rmw(r,1,1);
- raw_shrl_b_ri(r,i);
- unlock2(r);
-}
-MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
-
-MIDFUNC(2,shra_l_ri,(RW4 r, IMM i))
-{
- if (!i && !needflags)
- return;
- CLOBBER_SHRA;
- r=rmw(r,4,4);
- raw_shra_l_ri(r,i);
- unlock2(r);
-}
-MENDFUNC(2,shra_l_ri,(RW4 r, IMM i))
-
-MIDFUNC(2,shra_w_ri,(RW2 r, IMM i))
-{
- if (!i && !needflags)
- return;
- CLOBBER_SHRA;
- r=rmw(r,2,2);
- raw_shra_w_ri(r,i);
- unlock2(r);
-}
-MENDFUNC(2,shra_w_ri,(RW2 r, IMM i))
-
-MIDFUNC(2,shra_b_ri,(RW1 r, IMM i))
-{
- if (!i && !needflags)
- return;
- CLOBBER_SHRA;
- r=rmw(r,1,1);
- raw_shra_b_ri(r,i);
- unlock2(r);
-}
-MENDFUNC(2,shra_b_ri,(RW1 r, IMM i))
-
-MIDFUNC(2,shra_l_rr,(RW4 d, R1 r))
-{
- if (isconst(r)) {
- COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val);
- return;
- }
- CLOBBER_SHRA;
- r=readreg_specific(r,1,SHIFTCOUNT_NREG);
- d=rmw(d,4,4);
- Dif (r!=1) {
- write_log("Illegal register %d in raw_rol_b\n",r);
- abort();
- }
- raw_shra_l_rr(d,r) ;
- unlock2(r);
- unlock2(d);
-}
-MENDFUNC(2,shra_l_rr,(RW4 d, R1 r))
-
-MIDFUNC(2,shra_w_rr,(RW2 d, R1 r))
-{ /* Can only do this with r==1, i.e. cl */
-
- if (isconst(r)) {
- COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val);
- return;
- }
- CLOBBER_SHRA;
- r=readreg_specific(r,1,SHIFTCOUNT_NREG);
- d=rmw(d,2,2);
- Dif (r!=1) {
- write_log("Illegal register %d in raw_shra_b\n",r);
- abort();
- }
- raw_shra_w_rr(d,r) ;
- unlock2(r);
- unlock2(d);
-}
-MENDFUNC(2,shra_w_rr,(RW2 d, R1 r))
-
-MIDFUNC(2,shra_b_rr,(RW1 d, R1 r))
-{ /* Can only do this with r==1, i.e. cl */
-
- if (isconst(r)) {
- COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val);
- return;
- }
-
- CLOBBER_SHRA;
- r=readreg_specific(r,1,SHIFTCOUNT_NREG);
- d=rmw(d,1,1);
- Dif (r!=1) {
- write_log("Illegal register %d in raw_shra_b\n",r);
- abort();
- }
- raw_shra_b_rr(d,r) ;
- unlock2(r);
- unlock2(d);
-}
-MENDFUNC(2,shra_b_rr,(RW1 d, R1 r))
-
-
-MIDFUNC(2,setcc,(W1 d, IMM cc))
-{
- CLOBBER_SETCC;
- d=writereg(d,1);
- raw_setcc(d,cc);
- unlock2(d);
-}
-MENDFUNC(2,setcc,(W1 d, IMM cc))
-
-MIDFUNC(2,setcc_m,(IMM d, IMM cc))
-{
- CLOBBER_SETCC;
- raw_setcc_m(d,cc);
-}
-MENDFUNC(2,setcc_m,(IMM d, IMM cc))
-
-MIDFUNC(3,cmov_b_rr,(RW1 d, R1 s, IMM cc))
-{
- if (d==s)
- return;
- CLOBBER_CMOV;
- s=readreg(s,1);
- d=rmw(d,1,1);
- raw_cmov_b_rr(d,s,cc);
- unlock2(s);
- unlock2(d);
-}
-MENDFUNC(3,cmov_b_rr,(RW1 d, R1 s, IMM cc))
-
-MIDFUNC(3,cmov_w_rr,(RW2 d, R2 s, IMM cc))
-{
- if (d==s)
- return;
- CLOBBER_CMOV;
- s=readreg(s,2);
- d=rmw(d,2,2);
- raw_cmov_w_rr(d,s,cc);
- unlock2(s);
- unlock2(d);
-}
-MENDFUNC(3,cmov_w_rr,(RW2 d, R2 s, IMM cc))
-
-MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
-{
- if (d==s)
- return;
- CLOBBER_CMOV;
- s=readreg(s,4);
- d=rmw(d,4,4);
- raw_cmov_l_rr(d,s,cc);
- unlock2(s);
- unlock2(d);
-}
-MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
-
-MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
-{
- CLOBBER_CMOV;
- d=rmw(d,4,4);
- raw_cmov_l_rm(d,s,cc);
- unlock2(d);
-}
-MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
-
-MIDFUNC(2,bsf_l_rr,(W4 d, W4 s))
-{
- CLOBBER_BSF;
- s = readreg(s, 4);
- d = writereg(d, 4);
- raw_bsf_l_rr(d, s);
- unlock2(s);
- unlock2(d);
-}
-MENDFUNC(2,bsf_l_rr,(W4 d, W4 s))
-
-/* Set the Z flag depending on the value in s. Note that the
- value has to be 0 or -1 (or, more precisely, for non-zero
- values, bit 14 must be set)! */
-MIDFUNC(2,simulate_bsf,(W4 tmp, RW4 s))
-{
- CLOBBER_BSF;
- s=rmw_specific(s,4,4,FLAG_NREG3);
- tmp=writereg(tmp,4);
- raw_flags_set_zero(s, tmp);
- unlock2(tmp);
- unlock2(s);
-}
-MENDFUNC(2,simulate_bsf,(W4 tmp, RW4 s))
-
-MIDFUNC(2,imul_32_32,(RW4 d, R4 s))
-{
- CLOBBER_MUL;
- s=readreg(s,4);
- d=rmw(d,4,4);
- raw_imul_32_32(d,s);
- unlock2(s);
- unlock2(d);
-}
-MENDFUNC(2,imul_32_32,(RW4 d, R4 s))
-
-MIDFUNC(2,imul_64_32,(RW4 d, RW4 s))
-{
- CLOBBER_MUL;
- s=rmw_specific(s,4,4,MUL_NREG2);
- d=rmw_specific(d,4,4,MUL_NREG1);
- raw_imul_64_32(d,s);
- unlock2(s);
- unlock2(d);
-}
-MENDFUNC(2,imul_64_32,(RW4 d, RW4 s))
-
-MIDFUNC(2,mul_64_32,(RW4 d, RW4 s))
-{
- CLOBBER_MUL;
- s=rmw_specific(s,4,4,MUL_NREG2);
- d=rmw_specific(d,4,4,MUL_NREG1);
- raw_mul_64_32(d,s);
- unlock2(s);
- unlock2(d);
-}
-MENDFUNC(2,mul_64_32,(RW4 d, RW4 s))
-
-MIDFUNC(2,mul_32_32,(RW4 d, R4 s))
-{
- CLOBBER_MUL;
- s=readreg(s,4);
- d=rmw(d,4,4);
- raw_mul_32_32(d,s);
- unlock2(s);
- unlock2(d);
-}
-MENDFUNC(2,mul_32_32,(RW4 d, R4 s))
-
-#if SIZEOF_VOID_P == 8
-MIDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
-{
- int isrmw;
-
- if (isconst(s)) {
- set_const(d,(uae_s32)live.state[s].val);
- return;
- }
-
- CLOBBER_SE32;
- isrmw=(s==d);
- if (!isrmw) {
- s=readreg(s,4);
- d=writereg(d,4);
- }
- else { /* If we try to lock this twice, with different sizes, we
- are int trouble! */
- s=d=rmw(s,4,4);
- }
- raw_sign_extend_32_rr(d,s);
- if (!isrmw) {
- unlock2(d);
- unlock2(s);
- }
- else {
- unlock2(s);
- }
-}
-MENDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
-#endif
-
-MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
-{
- int isrmw;
-
- if (isconst(s)) {
- set_const(d,(uae_s32)(uae_s16)live.state[s].val);
- return;
- }
-
- CLOBBER_SE16;
- isrmw=(s==d);
- if (!isrmw) {
- s=readreg(s,2);
- d=writereg(d,4);
- }
- else { /* If we try to lock this twice, with different sizes, we
- are int trouble! */
- s=d=rmw(s,4,2);
- }
- raw_sign_extend_16_rr(d,s);
- if (!isrmw) {
- unlock2(d);
- unlock2(s);
- }
- else {
- unlock2(s);
- }
-}
-MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
-
-MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
-{
- int isrmw;
-
- if (isconst(s)) {
- set_const(d,(uae_s32)(uae_s8)live.state[s].val);
- return;
- }
-
- isrmw=(s==d);
- CLOBBER_SE8;
- if (!isrmw) {
- s=readreg(s,1);
- d=writereg(d,4);
- }
- else { /* If we try to lock this twice, with different sizes, we
- are int trouble! */
- s=d=rmw(s,4,1);
- }
-
- raw_sign_extend_8_rr(d,s);
-
- if (!isrmw) {
- unlock2(d);
- unlock2(s);
- }
- else {
- unlock2(s);
- }
-}
-MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
-
-
-MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
-{
- int isrmw;
-
- if (isconst(s)) {
- set_const(d,(uae_u32)(uae_u16)live.state[s].val);
- return;
- }
-
- isrmw=(s==d);
- CLOBBER_ZE16;
- if (!isrmw) {
- s=readreg(s,2);
- d=writereg(d,4);
- }
- else { /* If we try to lock this twice, with different sizes, we
- are int trouble! */
- s=d=rmw(s,4,2);
- }
- raw_zero_extend_16_rr(d,s);
- if (!isrmw) {
- unlock2(d);
- unlock2(s);
- }
- else {
- unlock2(s);
- }
-}
-MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
-
-MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
-{
- int isrmw;
- if (isconst(s)) {
- set_const(d,(uae_u32)(uae_u8)live.state[s].val);
- return;
- }
-
- isrmw=(s==d);
- CLOBBER_ZE8;
- if (!isrmw) {
- s=readreg(s,1);
- d=writereg(d,4);
- }
- else { /* If we try to lock this twice, with different sizes, we
- are int trouble! */
- s=d=rmw(s,4,1);
- }
-
- raw_zero_extend_8_rr(d,s);
-
- if (!isrmw) {
- unlock2(d);
- unlock2(s);
- }
- else {
- unlock2(s);
- }
-}
-MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
-
-MIDFUNC(2,mov_b_rr,(W1 d, R1 s))
-{
- if (d==s)
- return;
- if (isconst(s)) {
- COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val);
- return;
- }
-
- CLOBBER_MOV;
- s=readreg(s,1);
- d=writereg(d,1);
- raw_mov_b_rr(d,s);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(2,mov_b_rr,(W1 d, R1 s))
-
-MIDFUNC(2,mov_w_rr,(W2 d, R2 s))
-{
- if (d==s)
- return;
- if (isconst(s)) {
- COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val);
- return;
- }
-
- CLOBBER_MOV;
- s=readreg(s,2);
- d=writereg(d,2);
- raw_mov_w_rr(d,s);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(2,mov_w_rr,(W2 d, R2 s))
-
-
-MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
-{
- CLOBBER_MOV;
- baser=readreg(baser,4);
- index=readreg(index,4);
- d=writereg(d,4);
-
- raw_mov_l_rrm_indexed(d,baser,index,factor);
- unlock2(d);
- unlock2(baser);
- unlock2(index);
-}
-MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
-
-MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
-{
- CLOBBER_MOV;
- baser=readreg(baser,4);
- index=readreg(index,4);
- d=writereg(d,2);
-
- raw_mov_w_rrm_indexed(d,baser,index,factor);
- unlock2(d);
- unlock2(baser);
- unlock2(index);
-}
-MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
-
-MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
-{
- CLOBBER_MOV;
- baser=readreg(baser,4);
- index=readreg(index,4);
- d=writereg(d,1);
-
- raw_mov_b_rrm_indexed(d,baser,index,factor);
-
- unlock2(d);
- unlock2(baser);
- unlock2(index);
-}
-MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
-
-
-MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
-{
- CLOBBER_MOV;
- baser=readreg(baser,4);
- index=readreg(index,4);
- s=readreg(s,4);
-
- Dif (baser==s || index==s)
- abort();
-
-
- raw_mov_l_mrr_indexed(baser,index,factor,s);
- unlock2(s);
- unlock2(baser);
- unlock2(index);
-}
-MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
-
-MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
-{
- CLOBBER_MOV;
- baser=readreg(baser,4);
- index=readreg(index,4);
- s=readreg(s,2);
-
- raw_mov_w_mrr_indexed(baser,index,factor,s);
- unlock2(s);
- unlock2(baser);
- unlock2(index);
-}
-MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
-
-MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
-{
- CLOBBER_MOV;
- s=readreg(s,1);
- baser=readreg(baser,4);
- index=readreg(index,4);
-
- raw_mov_b_mrr_indexed(baser,index,factor,s);
- unlock2(s);
- unlock2(baser);
- unlock2(index);
-}
-MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
-
-
-MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
-{
- int basereg=baser;
- int indexreg=index;
-
- CLOBBER_MOV;
- s=readreg(s,4);
- baser=readreg_offset(baser,4);
- index=readreg_offset(index,4);
-
- base+=get_offset(basereg);
- base+=factor*get_offset(indexreg);
-
- raw_mov_l_bmrr_indexed(base,baser,index,factor,s);
- unlock2(s);
- unlock2(baser);
- unlock2(index);
-}
-MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
-
-MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
-{
- int basereg=baser;
- int indexreg=index;
-
- CLOBBER_MOV;
- s=readreg(s,2);
- baser=readreg_offset(baser,4);
- index=readreg_offset(index,4);
-
- base+=get_offset(basereg);
- base+=factor*get_offset(indexreg);
-
- raw_mov_w_bmrr_indexed(base,baser,index,factor,s);
- unlock2(s);
- unlock2(baser);
- unlock2(index);
-}
-MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
-
-MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
-{
- int basereg=baser;
- int indexreg=index;
-
- CLOBBER_MOV;
- s=readreg(s,1);
- baser=readreg_offset(baser,4);
- index=readreg_offset(index,4);
-
- base+=get_offset(basereg);
- base+=factor*get_offset(indexreg);
-
- raw_mov_b_bmrr_indexed(base,baser,index,factor,s);
- unlock2(s);
- unlock2(baser);
- unlock2(index);
-}
-MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
-
-
-
-/* Read a long from base+baser+factor*index */
-MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
-{
- int basereg=baser;
- int indexreg=index;
-
- CLOBBER_MOV;
- baser=readreg_offset(baser,4);
- index=readreg_offset(index,4);
- base+=get_offset(basereg);
- base+=factor*get_offset(indexreg);
- d=writereg(d,4);
- raw_mov_l_brrm_indexed(d,base,baser,index,factor);
- unlock2(d);
- unlock2(baser);
- unlock2(index);
-}
-MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
-
-
-MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
-{
- int basereg=baser;
- int indexreg=index;
-
- CLOBBER_MOV;
- remove_offset(d,-1);
- baser=readreg_offset(baser,4);
- index=readreg_offset(index,4);
- base+=get_offset(basereg);
- base+=factor*get_offset(indexreg);
- d=writereg(d,2);
- raw_mov_w_brrm_indexed(d,base,baser,index,factor);
- unlock2(d);
- unlock2(baser);
- unlock2(index);
-}
-MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
-
-
-MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
-{
- int basereg=baser;
- int indexreg=index;
-
- CLOBBER_MOV;
- remove_offset(d,-1);
- baser=readreg_offset(baser,4);
- index=readreg_offset(index,4);
- base+=get_offset(basereg);
- base+=factor*get_offset(indexreg);
- d=writereg(d,1);
- raw_mov_b_brrm_indexed(d,base,baser,index,factor);
- unlock2(d);
- unlock2(baser);
- unlock2(index);
-}
-MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
-
-/* Read a long from base+factor*index */
-MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
-{
- int indexreg=index;
-
- if (isconst(index)) {
- COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val);
- return;
- }
-
- CLOBBER_MOV;
- index=readreg_offset(index,4);
- base+=get_offset(indexreg)*factor;
- d=writereg(d,4);
-
- raw_mov_l_rm_indexed(d,base,index,factor);
- unlock2(index);
- unlock2(d);
-}
-MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
-
-
-/* read the long at the address contained in s+offset and store in d */
-MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
-{
- if (isconst(s)) {
- COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
- return;
- }
- CLOBBER_MOV;
- s=readreg(s,4);
- d=writereg(d,4);
-
- raw_mov_l_rR(d,s,offset);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
-
-/* read the word at the address contained in s+offset and store in d */
-MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
-{
- if (isconst(s)) {
- COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
- return;
- }
- CLOBBER_MOV;
- s=readreg(s,4);
- d=writereg(d,2);
-
- raw_mov_w_rR(d,s,offset);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
-
-/* read the word at the address contained in s+offset and store in d */
-MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
-{
- if (isconst(s)) {
- COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
- return;
- }
- CLOBBER_MOV;
- s=readreg(s,4);
- d=writereg(d,1);
-
- raw_mov_b_rR(d,s,offset);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
-
-/* read the long at the address contained in s+offset and store in d */
-MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
-{
- int sreg=s;
- if (isconst(s)) {
- COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
- return;
- }
- CLOBBER_MOV;
- s=readreg_offset(s,4);
- offset+=get_offset(sreg);
- d=writereg(d,4);
-
- raw_mov_l_brR(d,s,offset);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
-
-/* read the word at the address contained in s+offset and store in d */
-MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
-{
- int sreg=s;
- if (isconst(s)) {
- COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
- return;
- }
- CLOBBER_MOV;
- remove_offset(d,-1);
- s=readreg_offset(s,4);
- offset+=get_offset(sreg);
- d=writereg(d,2);
-
- raw_mov_w_brR(d,s,offset);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
-
-/* read the word at the address contained in s+offset and store in d */
-MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
-{
- int sreg=s;
- if (isconst(s)) {
- COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
- return;
- }
- CLOBBER_MOV;
- remove_offset(d,-1);
- s=readreg_offset(s,4);
- offset+=get_offset(sreg);
- d=writereg(d,1);
-
- raw_mov_b_brR(d,s,offset);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
-
-MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
-{
- int dreg=d;
- if (isconst(d)) {
- COMPCALL(mov_l_mi)(live.state[d].val+offset,i);
- return;
- }
-
- CLOBBER_MOV;
- d=readreg_offset(d,4);
- offset+=get_offset(dreg);
- raw_mov_l_Ri(d,i,offset);
- unlock2(d);
-}
-MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
-
-MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
-{
- int dreg=d;
- if (isconst(d)) {
- COMPCALL(mov_w_mi)(live.state[d].val+offset,i);
- return;
- }
-
- CLOBBER_MOV;
- d=readreg_offset(d,4);
- offset+=get_offset(dreg);
- raw_mov_w_Ri(d,i,offset);
- unlock2(d);
-}
-MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
-
-MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
-{
- int dreg=d;
- if (isconst(d)) {
- COMPCALL(mov_b_mi)(live.state[d].val+offset,i);
- return;
- }
-
- CLOBBER_MOV;
- d=readreg_offset(d,4);
- offset+=get_offset(dreg);
- raw_mov_b_Ri(d,i,offset);
- unlock2(d);
-}
-MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
-
- /* Warning! OFFSET is byte sized only! */
-MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
-{
- if (isconst(d)) {
- COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
- return;
- }
- if (isconst(s)) {
- COMPCALL(mov_l_Ri)(d,live.state[s].val,offset);
- return;
- }
-
- CLOBBER_MOV;
- s=readreg(s,4);
- d=readreg(d,4);
-
- raw_mov_l_Rr(d,s,offset);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
-
-MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
-{
- if (isconst(d)) {
- COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
- return;
- }
- if (isconst(s)) {
- COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset);
- return;
- }
-
- CLOBBER_MOV;
- s=readreg(s,2);
- d=readreg(d,4);
- raw_mov_w_Rr(d,s,offset);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
-
-MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
-{
- if (isconst(d)) {
- COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
- return;
- }
- if (isconst(s)) {
- COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset);
- return;
- }
-
- CLOBBER_MOV;
- s=readreg(s,1);
- d=readreg(d,4);
- raw_mov_b_Rr(d,s,offset);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
-
-MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
-{
- if (isconst(s)) {
- COMPCALL(mov_l_ri)(d,live.state[s].val+offset);
- return;
- }
-#if USE_OFFSET
- if (d==s) {
- add_offset(d,offset);
- return;
- }
-#endif
- CLOBBER_LEA;
- s=readreg(s,4);
- d=writereg(d,4);
- raw_lea_l_brr(d,s,offset);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
-
-MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
-{
- if (!offset) {
- COMPCALL(lea_l_rr_indexed)(d,s,index,factor);
- return;
- }
- CLOBBER_LEA;
- s=readreg(s,4);
- index=readreg(index,4);
- d=writereg(d,4);
-
- raw_lea_l_brr_indexed(d,s,index,factor,offset);
- unlock2(d);
- unlock2(index);
- unlock2(s);
-}
-MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
-
-MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
-{
- CLOBBER_LEA;
- s=readreg(s,4);
- index=readreg(index,4);
- d=writereg(d,4);
-
- raw_lea_l_rr_indexed(d,s,index,factor);
- unlock2(d);
- unlock2(index);
- unlock2(s);
-}
-MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
-
-/* write d to the long at the address contained in s+offset */
-MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
-{
- int dreg=d;
- if (isconst(d)) {
- COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
- return;
- }
-
- CLOBBER_MOV;
- s=readreg(s,4);
- d=readreg_offset(d,4);
- offset+=get_offset(dreg);
-
- raw_mov_l_bRr(d,s,offset);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
-
-/* write the word at the address contained in s+offset and store in d */
-MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
-{
- int dreg=d;
-
- if (isconst(d)) {
- COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
- return;
- }
-
- CLOBBER_MOV;
- s=readreg(s,2);
- d=readreg_offset(d,4);
- offset+=get_offset(dreg);
- raw_mov_w_bRr(d,s,offset);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
-
-MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
-{
- int dreg=d;
- if (isconst(d)) {
- COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
- return;
- }
-
- CLOBBER_MOV;
- s=readreg(s,1);
- d=readreg_offset(d,4);
- offset+=get_offset(dreg);
- raw_mov_b_bRr(d,s,offset);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
-
-MIDFUNC(1,bswap_32,(RW4 r))
-{
- int reg=r;
-
- if (isconst(r)) {
- uae_u32 oldv=live.state[r].val;
- live.state[r].val=reverse32(oldv);
- return;
- }
-
- CLOBBER_SW32;
- r=rmw(r,4,4);
- raw_bswap_32(r);
- unlock2(r);
-}
-MENDFUNC(1,bswap_32,(RW4 r))
-
-MIDFUNC(1,bswap_16,(RW2 r))
-{
- if (isconst(r)) {
- uae_u32 oldv=live.state[r].val;
- live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) |
- (oldv&0xffff0000);
- return;
- }
-
- CLOBBER_SW16;
- r=rmw(r,2,2);
-
- raw_bswap_16(r);
- unlock2(r);
-}
-MENDFUNC(1,bswap_16,(RW2 r))
-
-
-
-MIDFUNC(2,mov_l_rr,(W4 d, R4 s))
-{
- int olds;
-
- if (d==s) { /* How pointless! */
- return;
- }
- if (isconst(s)) {
- COMPCALL(mov_l_ri)(d,live.state[s].val);
- return;
- }
- olds=s;
- disassociate(d);
- s=readreg_offset(s,4);
- live.state[d].realreg=s;
- live.state[d].realind=live.nat[s].nholds;
- live.state[d].val=live.state[olds].val;
- live.state[d].validsize=4;
- live.state[d].dirtysize=4;
- set_status(d,DIRTY);
-
- live.nat[s].holds[live.nat[s].nholds]=d;
- live.nat[s].nholds++;
- log_clobberreg(d);
- /* write_log("Added %d to nreg %d(%d), now holds %d regs\n",
- d,s,live.state[d].realind,live.nat[s].nholds); */
- unlock2(s);
-}
-MENDFUNC(2,mov_l_rr,(W4 d, R4 s))
-
-MIDFUNC(2,mov_l_mr,(IMM d, R4 s))
-{
- if (isconst(s)) {
- COMPCALL(mov_l_mi)(d,live.state[s].val);
- return;
- }
- CLOBBER_MOV;
- s=readreg(s,4);
-
- raw_mov_l_mr(d,s);
- unlock2(s);
-}
-MENDFUNC(2,mov_l_mr,(IMM d, R4 s))
-
-
-MIDFUNC(2,mov_w_mr,(IMM d, R2 s))
-{
- if (isconst(s)) {
- COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val);
- return;
- }
- CLOBBER_MOV;
- s=readreg(s,2);
-
- raw_mov_w_mr(d,s);
- unlock2(s);
-}
-MENDFUNC(2,mov_w_mr,(IMM d, R2 s))
-
-MIDFUNC(2,mov_w_rm,(W2 d, IMM s))
-{
- CLOBBER_MOV;
- d=writereg(d,2);
-
- raw_mov_w_rm(d,s);
- unlock2(d);
-}
-MENDFUNC(2,mov_w_rm,(W2 d, IMM s))
-
-MIDFUNC(2,mov_b_mr,(IMM d, R1 s))
-{
- if (isconst(s)) {
- COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val);
- return;
- }
-
- CLOBBER_MOV;
- s=readreg(s,1);
-
- raw_mov_b_mr(d,s);
- unlock2(s);
-}
-MENDFUNC(2,mov_b_mr,(IMM d, R1 s))
-
-MIDFUNC(2,mov_b_rm,(W1 d, IMM s))
-{
- CLOBBER_MOV;
- d=writereg(d,1);
-
- raw_mov_b_rm(d,s);
- unlock2(d);
-}
-MENDFUNC(2,mov_b_rm,(W1 d, IMM s))
-
-MIDFUNC(2,mov_l_ri,(W4 d, IMM s))
-{
- set_const(d,s);
- return;
-}
-MENDFUNC(2,mov_l_ri,(W4 d, IMM s))
-
-MIDFUNC(2,mov_w_ri,(W2 d, IMM s))
-{
- CLOBBER_MOV;
- d=writereg(d,2);
-
- raw_mov_w_ri(d,s);
- unlock2(d);
-}
-MENDFUNC(2,mov_w_ri,(W2 d, IMM s))
-
-MIDFUNC(2,mov_b_ri,(W1 d, IMM s))
-{
- CLOBBER_MOV;
- d=writereg(d,1);
-
- raw_mov_b_ri(d,s);
- unlock2(d);
-}
-MENDFUNC(2,mov_b_ri,(W1 d, IMM s))
-
-
-MIDFUNC(2,add_l_mi,(IMM d, IMM s))
-{
- CLOBBER_ADD;
- raw_add_l_mi(d,s) ;
-}
-MENDFUNC(2,add_l_mi,(IMM d, IMM s))
-
-MIDFUNC(2,add_w_mi,(IMM d, IMM s))
-{
- CLOBBER_ADD;
- raw_add_w_mi(d,s) ;
-}
-MENDFUNC(2,add_w_mi,(IMM d, IMM s))
-
-MIDFUNC(2,add_b_mi,(IMM d, IMM s))
-{
- CLOBBER_ADD;
- raw_add_b_mi(d,s) ;
-}
-MENDFUNC(2,add_b_mi,(IMM d, IMM s))
-
-
-MIDFUNC(2,test_l_ri,(R4 d, IMM i))
-{
- CLOBBER_TEST;
- d=readreg(d,4);
-
- raw_test_l_ri(d,i);
- unlock2(d);
-}
-MENDFUNC(2,test_l_ri,(R4 d, IMM i))
-
-MIDFUNC(2,test_l_rr,(R4 d, R4 s))
-{
- CLOBBER_TEST;
- d=readreg(d,4);
- s=readreg(s,4);
-
- raw_test_l_rr(d,s);;
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(2,test_l_rr,(R4 d, R4 s))
-
-MIDFUNC(2,test_w_rr,(R2 d, R2 s))
-{
- CLOBBER_TEST;
- d=readreg(d,2);
- s=readreg(s,2);
-
- raw_test_w_rr(d,s);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(2,test_w_rr,(R2 d, R2 s))
-
-MIDFUNC(2,test_b_rr,(R1 d, R1 s))
-{
- CLOBBER_TEST;
- d=readreg(d,1);
- s=readreg(s,1);
-
- raw_test_b_rr(d,s);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(2,test_b_rr,(R1 d, R1 s))
-
-
-MIDFUNC(2,and_l_ri,(RW4 d, IMM i))
-{
- if (isconst(d) && !needflags) {
- live.state[d].val &= i;
- return;
- }
-
- CLOBBER_AND;
- d=rmw(d,4,4);
-
- raw_and_l_ri(d,i);
- unlock2(d);
-}
-MENDFUNC(2,and_l_ri,(RW4 d, IMM i))
-
-MIDFUNC(2,and_l,(RW4 d, R4 s))
-{
- CLOBBER_AND;
- s=readreg(s,4);
- d=rmw(d,4,4);
-
- raw_and_l(d,s);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(2,and_l,(RW4 d, R4 s))
-
-MIDFUNC(2,and_w,(RW2 d, R2 s))
-{
- CLOBBER_AND;
- s=readreg(s,2);
- d=rmw(d,2,2);
-
- raw_and_w(d,s);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(2,and_w,(RW2 d, R2 s))
-
-MIDFUNC(2,and_b,(RW1 d, R1 s))
-{
- CLOBBER_AND;
- s=readreg(s,1);
- d=rmw(d,1,1);
-
- raw_and_b(d,s);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(2,and_b,(RW1 d, R1 s))
-
-// gb-- used for making an fpcr value in compemu_fpp.cpp
-MIDFUNC(2,or_l_rm,(RW4 d, IMM s))
-{
- CLOBBER_OR;
- d=rmw(d,4,4);
-
- raw_or_l_rm(d,s);
- unlock2(d);
-}
-MENDFUNC(2,or_l_rm,(RW4 d, IMM s))
-
-MIDFUNC(2,or_l_ri,(RW4 d, IMM i))
-{
- if (isconst(d) && !needflags) {
- live.state[d].val|=i;
- return;
- }
- CLOBBER_OR;
- d=rmw(d,4,4);
-
- raw_or_l_ri(d,i);
- unlock2(d);
-}
-MENDFUNC(2,or_l_ri,(RW4 d, IMM i))
-
-MIDFUNC(2,or_l,(RW4 d, R4 s))
-{
- if (isconst(d) && isconst(s) && !needflags) {
- live.state[d].val|=live.state[s].val;
- return;
- }
- CLOBBER_OR;
- s=readreg(s,4);
- d=rmw(d,4,4);
-
- raw_or_l(d,s);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(2,or_l,(RW4 d, R4 s))
-
-MIDFUNC(2,or_w,(RW2 d, R2 s))
-{
- CLOBBER_OR;
- s=readreg(s,2);
- d=rmw(d,2,2);
-
- raw_or_w(d,s);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(2,or_w,(RW2 d, R2 s))
-
-MIDFUNC(2,or_b,(RW1 d, R1 s))
-{
- CLOBBER_OR;
- s=readreg(s,1);
- d=rmw(d,1,1);
-
- raw_or_b(d,s);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(2,or_b,(RW1 d, R1 s))
-
-MIDFUNC(2,adc_l,(RW4 d, R4 s))
-{
- CLOBBER_ADC;
- s=readreg(s,4);
- d=rmw(d,4,4);
-
- raw_adc_l(d,s);
-
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(2,adc_l,(RW4 d, R4 s))
-
-MIDFUNC(2,adc_w,(RW2 d, R2 s))
-{
- CLOBBER_ADC;
- s=readreg(s,2);
- d=rmw(d,2,2);
-
- raw_adc_w(d,s);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(2,adc_w,(RW2 d, R2 s))
-
-MIDFUNC(2,adc_b,(RW1 d, R1 s))
-{
- CLOBBER_ADC;
- s=readreg(s,1);
- d=rmw(d,1,1);
-
- raw_adc_b(d,s);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(2,adc_b,(RW1 d, R1 s))
-
-MIDFUNC(2,add_l,(RW4 d, R4 s))
-{
- if (isconst(s)) {
- COMPCALL(add_l_ri)(d,live.state[s].val);
- return;
- }
-
- CLOBBER_ADD;
- s=readreg(s,4);
- d=rmw(d,4,4);
-
- raw_add_l(d,s);
-
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(2,add_l,(RW4 d, R4 s))
-
-MIDFUNC(2,add_w,(RW2 d, R2 s))
-{
- if (isconst(s)) {
- COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val);
- return;
- }
-
- CLOBBER_ADD;
- s=readreg(s,2);
- d=rmw(d,2,2);
-
- raw_add_w(d,s);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(2,add_w,(RW2 d, R2 s))
-
-MIDFUNC(2,add_b,(RW1 d, R1 s))
-{
- if (isconst(s)) {
- COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val);
- return;
- }
-
- CLOBBER_ADD;
- s=readreg(s,1);
- d=rmw(d,1,1);
-
- raw_add_b(d,s);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(2,add_b,(RW1 d, R1 s))
-
-MIDFUNC(2,sub_l_ri,(RW4 d, IMM i))
-{
- if (!i && !needflags)
- return;
- if (isconst(d) && !needflags) {
- live.state[d].val-=i;
- return;
- }
-#if USE_OFFSET
- if (!needflags) {
- add_offset(d,-i);
- return;
- }
-#endif
-
- CLOBBER_SUB;
- d=rmw(d,4,4);
-
- raw_sub_l_ri(d,i);
- unlock2(d);
-}
-MENDFUNC(2,sub_l_ri,(RW4 d, IMM i))
-
-MIDFUNC(2,sub_w_ri,(RW2 d, IMM i))
-{
- if (!i && !needflags)
- return;
-
- CLOBBER_SUB;
- d=rmw(d,2,2);
-
- raw_sub_w_ri(d,i);
- unlock2(d);
-}
-MENDFUNC(2,sub_w_ri,(RW2 d, IMM i))
-
-MIDFUNC(2,sub_b_ri,(RW1 d, IMM i))
-{
- if (!i && !needflags)
- return;
-
- CLOBBER_SUB;
- d=rmw(d,1,1);
-
- raw_sub_b_ri(d,i);
-
- unlock2(d);
-}
-MENDFUNC(2,sub_b_ri,(RW1 d, IMM i))
-
-MIDFUNC(2,add_l_ri,(RW4 d, IMM i))
-{
- if (!i && !needflags)
- return;
- if (isconst(d) && !needflags) {
- live.state[d].val+=i;
- return;
- }
-#if USE_OFFSET
- if (!needflags) {
- add_offset(d,i);
- return;
- }
-#endif
- CLOBBER_ADD;
- d=rmw(d,4,4);
- raw_add_l_ri(d,i);
- unlock2(d);
-}
-MENDFUNC(2,add_l_ri,(RW4 d, IMM i))
-
-MIDFUNC(2,add_w_ri,(RW2 d, IMM i))
-{
- if (!i && !needflags)
- return;
-
- CLOBBER_ADD;
- d=rmw(d,2,2);
-
- raw_add_w_ri(d,i);
- unlock2(d);
-}
-MENDFUNC(2,add_w_ri,(RW2 d, IMM i))
-
-MIDFUNC(2,add_b_ri,(RW1 d, IMM i))
-{
- if (!i && !needflags)
- return;
-
- CLOBBER_ADD;
- d=rmw(d,1,1);
-
- raw_add_b_ri(d,i);
-
- unlock2(d);
-}
-MENDFUNC(2,add_b_ri,(RW1 d, IMM i))
-
-MIDFUNC(2,sbb_l,(RW4 d, R4 s))
-{
- CLOBBER_SBB;
- s=readreg(s,4);
- d=rmw(d,4,4);
-
- raw_sbb_l(d,s);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(2,sbb_l,(RW4 d, R4 s))
-
-MIDFUNC(2,sbb_w,(RW2 d, R2 s))
-{
- CLOBBER_SBB;
- s=readreg(s,2);
- d=rmw(d,2,2);
-
- raw_sbb_w(d,s);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(2,sbb_w,(RW2 d, R2 s))
-
-MIDFUNC(2,sbb_b,(RW1 d, R1 s))
-{
- CLOBBER_SBB;
- s=readreg(s,1);
- d=rmw(d,1,1);
-
- raw_sbb_b(d,s);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(2,sbb_b,(RW1 d, R1 s))
-
-MIDFUNC(2,sub_l,(RW4 d, R4 s))
-{
- if (isconst(s)) {
- COMPCALL(sub_l_ri)(d,live.state[s].val);
- return;
- }
-
- CLOBBER_SUB;
- s=readreg(s,4);
- d=rmw(d,4,4);
-
- raw_sub_l(d,s);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(2,sub_l,(RW4 d, R4 s))
-
-MIDFUNC(2,sub_w,(RW2 d, R2 s))
-{
- if (isconst(s)) {
- COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val);
- return;
- }
-
- CLOBBER_SUB;
- s=readreg(s,2);
- d=rmw(d,2,2);
-
- raw_sub_w(d,s);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(2,sub_w,(RW2 d, R2 s))
-
-MIDFUNC(2,sub_b,(RW1 d, R1 s))
-{
- if (isconst(s)) {
- COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val);
- return;
- }
-
- CLOBBER_SUB;
- s=readreg(s,1);
- d=rmw(d,1,1);
-
- raw_sub_b(d,s);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(2,sub_b,(RW1 d, R1 s))
-
-MIDFUNC(2,cmp_l,(R4 d, R4 s))
-{
- CLOBBER_CMP;
- s=readreg(s,4);
- d=readreg(d,4);
-
- raw_cmp_l(d,s);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(2,cmp_l,(R4 d, R4 s))
-
-MIDFUNC(2,cmp_l_ri,(R4 r, IMM i))
-{
- CLOBBER_CMP;
- r=readreg(r,4);
-
- raw_cmp_l_ri(r,i);
- unlock2(r);
-}
-MENDFUNC(2,cmp_l_ri,(R4 r, IMM i))
-
-MIDFUNC(2,cmp_w,(R2 d, R2 s))
-{
- CLOBBER_CMP;
- s=readreg(s,2);
- d=readreg(d,2);
-
- raw_cmp_w(d,s);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(2,cmp_w,(R2 d, R2 s))
-
-MIDFUNC(2,cmp_b,(R1 d, R1 s))
-{
- CLOBBER_CMP;
- s=readreg(s,1);
- d=readreg(d,1);
-
- raw_cmp_b(d,s);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(2,cmp_b,(R1 d, R1 s))
-
-
-MIDFUNC(2,xor_l,(RW4 d, R4 s))
-{
- CLOBBER_XOR;
- s=readreg(s,4);
- d=rmw(d,4,4);
-
- raw_xor_l(d,s);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(2,xor_l,(RW4 d, R4 s))
-
-MIDFUNC(2,xor_w,(RW2 d, R2 s))
-{
- CLOBBER_XOR;
- s=readreg(s,2);
- d=rmw(d,2,2);
-
- raw_xor_w(d,s);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(2,xor_w,(RW2 d, R2 s))
-
-MIDFUNC(2,xor_b,(RW1 d, R1 s))
-{
- CLOBBER_XOR;
- s=readreg(s,1);
- d=rmw(d,1,1);
-
- raw_xor_b(d,s);
- unlock2(d);
- unlock2(s);
-}
-MENDFUNC(2,xor_b,(RW1 d, R1 s))
-
-MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
-{
- clobber_flags();
- remove_all_offsets();
- if (osize==4) {
- if (out1!=in1 && out1!=r) {
- COMPCALL(forget_about)(out1);
- }
- }
- else {
- tomem_c(out1);
- }
-
- in1=readreg_specific(in1,isize,REG_PAR1);
- r=readreg(r,4);
- prepare_for_call_1(); /* This should ensure that there won't be
- any need for swapping nregs in prepare_for_call_2
- */
-#if USE_NORMAL_CALLING_CONVENTION
- raw_push_l_r(in1);
-#endif
- unlock2(in1);
- unlock2(r);
-
- prepare_for_call_2();
- raw_call_r(r);
-
-#if USE_NORMAL_CALLING_CONVENTION
- raw_inc_sp(4);
-#endif
-
-
- live.nat[REG_RESULT].holds[0]=out1;
- live.nat[REG_RESULT].nholds=1;
- live.nat[REG_RESULT].touched=touchcnt++;
-
- live.state[out1].realreg=REG_RESULT;
- live.state[out1].realind=0;
- live.state[out1].val=0;
- live.state[out1].validsize=osize;
- live.state[out1].dirtysize=osize;
- set_status(out1,DIRTY);
-}
-MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
-
-MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
-{
- clobber_flags();
- remove_all_offsets();
- in1=readreg_specific(in1,isize1,REG_PAR1);
- in2=readreg_specific(in2,isize2,REG_PAR2);
- r=readreg(r,4);
- prepare_for_call_1(); /* This should ensure that there won't be
- any need for swapping nregs in prepare_for_call_2
- */
-#if USE_NORMAL_CALLING_CONVENTION
- raw_push_l_r(in2);
- raw_push_l_r(in1);
-#endif
- unlock2(r);
- unlock2(in1);
- unlock2(in2);
- prepare_for_call_2();
- raw_call_r(r);
-#if USE_NORMAL_CALLING_CONVENTION
- raw_inc_sp(8);
-#endif
-}
-MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
-
-/* forget_about() takes a mid-layer register */
-MIDFUNC(1,forget_about,(W4 r))
-{
- if (isinreg(r))
- disassociate(r);
- live.state[r].val=0;
- set_status(r,UNDEF);
-}
-MENDFUNC(1,forget_about,(W4 r))
-
-MIDFUNC(0,nop,(void))
-{
- raw_nop();
-}
-MENDFUNC(0,nop,(void))
-
-
-MIDFUNC(1,f_forget_about,(FW r))
-{
- if (f_isinreg(r))
- f_disassociate(r);
- live.fate[r].status=UNDEF;
-}
-MENDFUNC(1,f_forget_about,(FW r))
-
-MIDFUNC(1,fmov_pi,(FW r))
-{
- r=f_writereg(r);
- raw_fmov_pi(r);
- f_unlock(r);
-}
-MENDFUNC(1,fmov_pi,(FW r))
-
-MIDFUNC(1,fmov_log10_2,(FW r))
-{
- r=f_writereg(r);
- raw_fmov_log10_2(r);
- f_unlock(r);
-}
-MENDFUNC(1,fmov_log10_2,(FW r))
-
-MIDFUNC(1,fmov_log2_e,(FW r))
-{
- r=f_writereg(r);
- raw_fmov_log2_e(r);
- f_unlock(r);
-}
-MENDFUNC(1,fmov_log2_e,(FW r))
-
-MIDFUNC(1,fmov_loge_2,(FW r))
-{
- r=f_writereg(r);
- raw_fmov_loge_2(r);
- f_unlock(r);
-}
-MENDFUNC(1,fmov_loge_2,(FW r))
-
-MIDFUNC(1,fmov_1,(FW r))
-{
- r=f_writereg(r);
- raw_fmov_1(r);
- f_unlock(r);
-}
-MENDFUNC(1,fmov_1,(FW r))
-
-MIDFUNC(1,fmov_0,(FW r))
-{
- r=f_writereg(r);
- raw_fmov_0(r);
- f_unlock(r);
-}
-MENDFUNC(1,fmov_0,(FW r))
-
-MIDFUNC(2,fmov_rm,(FW r, MEMR m))
-{
- r=f_writereg(r);
- raw_fmov_rm(r,m);
- f_unlock(r);
-}
-MENDFUNC(2,fmov_rm,(FW r, MEMR m))
-
-MIDFUNC(2,fmovi_rm,(FW r, MEMR m))
-{
- r=f_writereg(r);
- raw_fmovi_rm(r,m);
- f_unlock(r);
-}
-MENDFUNC(2,fmovi_rm,(FW r, MEMR m))
-
-MIDFUNC(2,fmovi_mr,(MEMW m, FR r))
-{
- r=f_readreg(r);
- raw_fmovi_mr(m,r);
- f_unlock(r);
-}
-MENDFUNC(2,fmovi_mr,(MEMW m, FR r))
-
-MIDFUNC(2,fmovs_rm,(FW r, MEMR m))
-{
- r=f_writereg(r);
- raw_fmovs_rm(r,m);
- f_unlock(r);
-}
-MENDFUNC(2,fmovs_rm,(FW r, MEMR m))
-
-MIDFUNC(2,fmovs_mr,(MEMW m, FR r))
-{
- r=f_readreg(r);
- raw_fmovs_mr(m,r);
- f_unlock(r);
-}
-MENDFUNC(2,fmovs_mr,(MEMW m, FR r))
-
-MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
-{
- r=f_readreg(r);
- raw_fmov_ext_mr(m,r);
- f_unlock(r);
-}
-MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
-
-MIDFUNC(2,fmov_mr,(MEMW m, FR r))
-{
- r=f_readreg(r);
- raw_fmov_mr(m,r);
- f_unlock(r);
-}
-MENDFUNC(2,fmov_mr,(MEMW m, FR r))
-
-MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
-{
- r=f_writereg(r);
- raw_fmov_ext_rm(r,m);
- f_unlock(r);
-}
-MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
-
-MIDFUNC(2,fmov_rr,(FW d, FR s))
-{
- if (d==s) { /* How pointless! */
- return;
- }
-#if USE_F_ALIAS
- f_disassociate(d);
- s=f_readreg(s);
- live.fate[d].realreg=s;
- live.fate[d].realind=live.fat[s].nholds;
- live.fate[d].status=DIRTY;
- live.fat[s].holds[live.fat[s].nholds]=d;
- live.fat[s].nholds++;
- f_unlock(s);
-#else
- s=f_readreg(s);
- d=f_writereg(d);
- raw_fmov_rr(d,s);
- f_unlock(s);
- f_unlock(d);
-#endif
-}
-MENDFUNC(2,fmov_rr,(FW d, FR s))
-
-MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
-{
- index=readreg(index,4);
-
- raw_fldcw_m_indexed(index,base);
- unlock2(index);
-}
-MENDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
-
-MIDFUNC(1,ftst_r,(FR r))
-{
- r=f_readreg(r);
- raw_ftst_r(r);
- f_unlock(r);
-}
-MENDFUNC(1,ftst_r,(FR r))
-
-MIDFUNC(0,dont_care_fflags,(void))
-{
- f_disassociate(FP_RESULT);
-}
-MENDFUNC(0,dont_care_fflags,(void))
-
-MIDFUNC(2,fsqrt_rr,(FW d, FR s))
-{
- s=f_readreg(s);
- d=f_writereg(d);
- raw_fsqrt_rr(d,s);
- f_unlock(s);
- f_unlock(d);
-}
-MENDFUNC(2,fsqrt_rr,(FW d, FR s))
-
-MIDFUNC(2,fabs_rr,(FW d, FR s))
-{
- s=f_readreg(s);
- d=f_writereg(d);
- raw_fabs_rr(d,s);
- f_unlock(s);
- f_unlock(d);
-}
-MENDFUNC(2,fabs_rr,(FW d, FR s))
-
-MIDFUNC(2,fsin_rr,(FW d, FR s))
-{
- s=f_readreg(s);
- d=f_writereg(d);
- raw_fsin_rr(d,s);
- f_unlock(s);
- f_unlock(d);
-}
-MENDFUNC(2,fsin_rr,(FW d, FR s))
-
-MIDFUNC(2,fcos_rr,(FW d, FR s))
-{
- s=f_readreg(s);
- d=f_writereg(d);
- raw_fcos_rr(d,s);
- f_unlock(s);
- f_unlock(d);
-}
-MENDFUNC(2,fcos_rr,(FW d, FR s))
-
-MIDFUNC(2,ftwotox_rr,(FW d, FR s))
-{
- s=f_readreg(s);
- d=f_writereg(d);
- raw_ftwotox_rr(d,s);
- f_unlock(s);
- f_unlock(d);
-}
-MENDFUNC(2,ftwotox_rr,(FW d, FR s))
-
-MIDFUNC(2,fetox_rr,(FW d, FR s))
-{
- s=f_readreg(s);
- d=f_writereg(d);
- raw_fetox_rr(d,s);
- f_unlock(s);
- f_unlock(d);
-}
-MENDFUNC(2,fetox_rr,(FW d, FR s))
-
-MIDFUNC(2,frndint_rr,(FW d, FR s))
-{
- s=f_readreg(s);
- d=f_writereg(d);
- raw_frndint_rr(d,s);
- f_unlock(s);
- f_unlock(d);
-}
-MENDFUNC(2,frndint_rr,(FW d, FR s))
-
-MIDFUNC(2,flog2_rr,(FW d, FR s))
-{
- s=f_readreg(s);
- d=f_writereg(d);
- raw_flog2_rr(d,s);
- f_unlock(s);
- f_unlock(d);
-}
-MENDFUNC(2,flog2_rr,(FW d, FR s))
-
-MIDFUNC(2,fneg_rr,(FW d, FR s))
-{
- s=f_readreg(s);
- d=f_writereg(d);
- raw_fneg_rr(d,s);
- f_unlock(s);
- f_unlock(d);
-}
-MENDFUNC(2,fneg_rr,(FW d, FR s))
-
-MIDFUNC(2,fadd_rr,(FRW d, FR s))
-{
- s=f_readreg(s);
- d=f_rmw(d);
- raw_fadd_rr(d,s);
- f_unlock(s);
- f_unlock(d);
-}
-MENDFUNC(2,fadd_rr,(FRW d, FR s))
-
-MIDFUNC(2,fsub_rr,(FRW d, FR s))
-{
- s=f_readreg(s);
- d=f_rmw(d);
- raw_fsub_rr(d,s);
- f_unlock(s);
- f_unlock(d);
-}
-MENDFUNC(2,fsub_rr,(FRW d, FR s))
-
-MIDFUNC(2,fcmp_rr,(FR d, FR s))
-{
- d=f_readreg(d);
- s=f_readreg(s);
- raw_fcmp_rr(d,s);
- f_unlock(s);
- f_unlock(d);
-}
-MENDFUNC(2,fcmp_rr,(FR d, FR s))
-
-MIDFUNC(2,fdiv_rr,(FRW d, FR s))
-{
- s=f_readreg(s);
- d=f_rmw(d);
- raw_fdiv_rr(d,s);
- f_unlock(s);
- f_unlock(d);
-}
-MENDFUNC(2,fdiv_rr,(FRW d, FR s))
-
-MIDFUNC(2,frem_rr,(FRW d, FR s))
-{
- s=f_readreg(s);
- d=f_rmw(d);
- raw_frem_rr(d,s);
- f_unlock(s);
- f_unlock(d);
-}
-MENDFUNC(2,frem_rr,(FRW d, FR s))
-
-MIDFUNC(2,frem1_rr,(FRW d, FR s))
-{
- s=f_readreg(s);
- d=f_rmw(d);
- raw_frem1_rr(d,s);
- f_unlock(s);
- f_unlock(d);
-}
-MENDFUNC(2,frem1_rr,(FRW d, FR s))
-
-MIDFUNC(2,fmul_rr,(FRW d, FR s))
-{
- s=f_readreg(s);
- d=f_rmw(d);
- raw_fmul_rr(d,s);
- f_unlock(s);
- f_unlock(d);
-}
-MENDFUNC(2,fmul_rr,(FRW d, FR s))
-
-/********************************************************************
- * Support functions exposed to gencomp. CREATE time *
- ********************************************************************/
-
-void set_zero(int r, int tmp)
-{
- if (setzflg_uses_bsf)
- bsf_l_rr(r,r);
- else
- simulate_bsf(tmp,r);
-}
-
-int kill_rodent(int r)
-{
- return KILLTHERAT &&
- have_rat_stall &&
- (live.state[r].status==INMEM ||
- live.state[r].status==CLEAN ||
- live.state[r].status==ISCONST ||
- live.state[r].dirtysize==4);
-}
-
-uae_u32 get_const(int r)
-{
- Dif (!isconst(r)) {
- write_log("Register %d should be constant, but isn't\n",r);
- abort();
- }
- return live.state[r].val;
-}
-
-void sync_m68k_pc(void)
-{
- if (m68k_pc_offset) {
- add_l_ri(PC_P,m68k_pc_offset);
- comp_pc_p+=m68k_pc_offset;
- m68k_pc_offset=0;
- }
-}
-
-/********************************************************************
- * Scratch registers management *
- ********************************************************************/
-
-struct scratch_t {
- uae_u32 regs[VREGS];
- fpu_register fregs[VFREGS];
-};
-
-static scratch_t scratch;
-
-/********************************************************************
- * Support functions exposed to newcpu *
- ********************************************************************/
-
-static inline const char *str_on_off(bool b)
-{
- return b ? "on" : "off";
-}
-
-void compiler_init(void)
-{
- static bool initialized = false;
- if (initialized)
- return;
-
-#if JIT_DEBUG
- // JIT debug mode ?
- JITDebug = PrefsFindBool("jitdebug");
-#endif
- write_log("<JIT compiler> : enable runtime disassemblers : %s\n", JITDebug ? "yes" : "no");
-
-#ifdef USE_JIT_FPU
- // Use JIT compiler for FPU instructions ?
- avoid_fpu = !PrefsFindBool("jitfpu");
-#else
- // JIT FPU is always disabled
- avoid_fpu = true;
-#endif
- write_log("<JIT compiler> : compile FPU instructions : %s\n", !avoid_fpu ? "yes" : "no");
-
- // Get size of the translation cache (in KB)
- cache_size = PrefsFindInt32("jitcachesize");
- write_log("<JIT compiler> : requested translation cache size : %d KB\n", cache_size);
-
- // Initialize target CPU (check for features, e.g. CMOV, rat stalls)
- raw_init_cpu();
- setzflg_uses_bsf = target_check_bsf();
- write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
- write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
- write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps);
-
- // Translation cache flush mechanism
- lazy_flush = PrefsFindBool("jitlazyflush");
- write_log("<JIT compiler> : lazy translation cache invalidation : %s\n", str_on_off(lazy_flush));
- flush_icache = lazy_flush ? flush_icache_lazy : flush_icache_hard;
-
- // Compiler features
- write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1));
- write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS));
- write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET));
-#if USE_INLINING
- follow_const_jumps = PrefsFindBool("jitinline");
-#endif
- write_log("<JIT compiler> : translate through constant jumps : %s\n", str_on_off(follow_const_jumps));
- write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA));
-
- // Build compiler tables
- build_comp();
-
- initialized = true;
-
-#if PROFILE_UNTRANSLATED_INSNS
- write_log("<JIT compiler> : gather statistics on untranslated insns count\n");
-#endif
-
-#if PROFILE_COMPILE_TIME
- write_log("<JIT compiler> : gather statistics on translation time\n");
- emul_start_time = clock();
-#endif
-}
-
-void compiler_exit(void)
-{
-#if PROFILE_COMPILE_TIME
- emul_end_time = clock();
-#endif
-
- // Deallocate translation cache
- if (compiled_code) {
- vm_release(compiled_code, cache_size * 1024);
- compiled_code = 0;
- }
-
- // Deallocate popallspace
- if (popallspace) {
- vm_release(popallspace, POPALLSPACE_SIZE);
- popallspace = 0;
- }
-
-#if PROFILE_COMPILE_TIME
- write_log("### Compile Block statistics\n");
- write_log("Number of calls to compile_block : %d\n", compile_count);
- uae_u32 emul_time = emul_end_time - emul_start_time;
- write_log("Total emulation time : %.1f sec\n", double(emul_time)/double(CLOCKS_PER_SEC));
- write_log("Total compilation time : %.1f sec (%.1f%%)\n", double(compile_time)/double(CLOCKS_PER_SEC),
- 100.0*double(compile_time)/double(emul_time));
- write_log("\n");
-#endif
-
-#if PROFILE_UNTRANSLATED_INSNS
- uae_u64 untranslated_count = 0;
- for (int i = 0; i < 65536; i++) {
- opcode_nums[i] = i;
- untranslated_count += raw_cputbl_count[i];
- }
- write_log("Sorting out untranslated instructions count...\n");
- qsort(opcode_nums, 65536, sizeof(uae_u16), untranslated_compfn);
- write_log("\nRank Opc Count Name\n");
- for (int i = 0; i < untranslated_top_ten; i++) {
- uae_u32 count = raw_cputbl_count[opcode_nums[i]];
- struct instr *dp;
- struct mnemolookup *lookup;
- if (!count)
- break;
- dp = table68k + opcode_nums[i];
- for (lookup = lookuptab; lookup->mnemo != dp->mnemo; lookup++)
- ;
- write_log("%03d: %04x %10lu %s\n", i, opcode_nums[i], count, lookup->name);
- }
-#endif
-
-#if RECORD_REGISTER_USAGE
- int reg_count_ids[16];
- uint64 tot_reg_count = 0;
- for (int i = 0; i < 16; i++) {
- reg_count_ids[i] = i;
- tot_reg_count += reg_count[i];
- }
- qsort(reg_count_ids, 16, sizeof(int), reg_count_compare);
- uint64 cum_reg_count = 0;
- for (int i = 0; i < 16; i++) {
- int r = reg_count_ids[i];
- cum_reg_count += reg_count[r];
- printf("%c%d : %16ld %2.1f%% [%2.1f]\n", r < 8 ? 'D' : 'A', r % 8,
- reg_count[r],
- 100.0*double(reg_count[r])/double(tot_reg_count),
- 100.0*double(cum_reg_count)/double(tot_reg_count));
- }
-#endif
-}
-
-bool compiler_use_jit(void)
-{
- // Check for the "jit" prefs item
- if (!PrefsFindBool("jit"))
- return false;
-
- // Don't use JIT if translation cache size is less then MIN_CACHE_SIZE KB
- if (PrefsFindInt32("jitcachesize") < MIN_CACHE_SIZE) {
- write_log("<JIT compiler> : translation cache size is less than %d KB. Disabling JIT.\n", MIN_CACHE_SIZE);
- return false;
- }
-
- // Enable JIT for 68020+ emulation only
- if (CPUType < 2) {
- write_log("<JIT compiler> : JIT is not supported in 680%d0 emulation mode, disabling.\n", CPUType);
- return false;
- }
-
- return true;
-}
-
-void init_comp(void)
-{
- int i;
- uae_s8* cb=can_byte;
- uae_s8* cw=can_word;
- uae_s8* au=always_used;
-
-#if RECORD_REGISTER_USAGE
- for (i=0;i<16;i++)
- reg_count_local[i] = 0;
-#endif
-
- for (i=0;i<VREGS;i++) {
- live.state[i].realreg=-1;
- live.state[i].needflush=NF_SCRATCH;
- live.state[i].val=0;
- set_status(i,UNDEF);
- }
-
- for (i=0;i<VFREGS;i++) {
- live.fate[i].status=UNDEF;
- live.fate[i].realreg=-1;
- live.fate[i].needflush=NF_SCRATCH;
- }
-
- for (i=0;i<VREGS;i++) {
- if (i<16) { /* First 16 registers map to 68k registers */
- live.state[i].mem=((uae_u32*)®s)+i;
- live.state[i].needflush=NF_TOMEM;
- set_status(i,INMEM);
- }
- else
- live.state[i].mem=scratch.regs+i;
- }
- live.state[PC_P].mem=(uae_u32*)&(regs.pc_p);
- live.state[PC_P].needflush=NF_TOMEM;
- set_const(PC_P,(uintptr)comp_pc_p);
-
- live.state[FLAGX].mem=(uae_u32*)&(regflags.x);
- live.state[FLAGX].needflush=NF_TOMEM;
- set_status(FLAGX,INMEM);
-
- live.state[FLAGTMP].mem=(uae_u32*)&(regflags.cznv);
- live.state[FLAGTMP].needflush=NF_TOMEM;
- set_status(FLAGTMP,INMEM);
-
- live.state[NEXT_HANDLER].needflush=NF_HANDLER;
- set_status(NEXT_HANDLER,UNDEF);
-
- for (i=0;i<VFREGS;i++) {
- if (i<8) { /* First 8 registers map to 68k FPU registers */
- live.fate[i].mem=(uae_u32*)fpu_register_address(i);
- live.fate[i].needflush=NF_TOMEM;
- live.fate[i].status=INMEM;
- }
- else if (i==FP_RESULT) {
- live.fate[i].mem=(uae_u32*)(&fpu.result);
- live.fate[i].needflush=NF_TOMEM;
- live.fate[i].status=INMEM;
- }
- else
- live.fate[i].mem=(uae_u32*)(&scratch.fregs[i]);
- }
-
-
- for (i=0;i<N_REGS;i++) {
- live.nat[i].touched=0;
- live.nat[i].nholds=0;
- live.nat[i].locked=0;
- if (*cb==i) {
- live.nat[i].canbyte=1; cb++;
- } else live.nat[i].canbyte=0;
- if (*cw==i) {
- live.nat[i].canword=1; cw++;
- } else live.nat[i].canword=0;
- if (*au==i) {
- live.nat[i].locked=1; au++;
- }
- }
-
- for (i=0;i<N_FREGS;i++) {
- live.fat[i].touched=0;
- live.fat[i].nholds=0;
- live.fat[i].locked=0;
- }
-
- touchcnt=1;
- m68k_pc_offset=0;
- live.flags_in_flags=TRASH;
- live.flags_on_stack=VALID;
- live.flags_are_important=1;
-
- raw_fp_init();
-}
-
-/* Only do this if you really mean it! The next call should be to init!*/
-void flush(int save_regs)
-{
- int fi,i;
-
- log_flush();
- flush_flags(); /* low level */
- sync_m68k_pc(); /* mid level */
-
- if (save_regs) {
- for (i=0;i<VFREGS;i++) {
- if (live.fate[i].needflush==NF_SCRATCH ||
- live.fate[i].status==CLEAN) {
- f_disassociate(i);
- }
- }
- for (i=0;i<VREGS;i++) {
- if (live.state[i].needflush==NF_TOMEM) {
- switch(live.state[i].status) {
- case INMEM:
- if (live.state[i].val) {
- raw_add_l_mi((uintptr)live.state[i].mem,live.state[i].val);
- log_vwrite(i);
- live.state[i].val=0;
- }
- break;
- case CLEAN:
- case DIRTY:
- remove_offset(i,-1); tomem(i); break;
- case ISCONST:
- if (i!=PC_P)
- writeback_const(i);
- break;
- default: break;
- }
- Dif (live.state[i].val && i!=PC_P) {
- write_log("Register %d still has val %x\n",
- i,live.state[i].val);
- }
- }
- }
- for (i=0;i<VFREGS;i++) {
- if (live.fate[i].needflush==NF_TOMEM &&
- live.fate[i].status==DIRTY) {
- f_evict(i);
- }
- }
- raw_fp_cleanup_drop();
- }
- if (needflags) {
- write_log("Warning! flush with needflags=1!\n");
- }
-}
-
-static void flush_keepflags(void)
-{
- int fi,i;
-
- for (i=0;i<VFREGS;i++) {
- if (live.fate[i].needflush==NF_SCRATCH ||
- live.fate[i].status==CLEAN) {
- f_disassociate(i);
- }
- }
- for (i=0;i<VREGS;i++) {
- if (live.state[i].needflush==NF_TOMEM) {
- switch(live.state[i].status) {
- case INMEM:
- /* Can't adjust the offset here --- that needs "add" */
- break;
- case CLEAN:
- case DIRTY:
- remove_offset(i,-1); tomem(i); break;
- case ISCONST:
- if (i!=PC_P)
- writeback_const(i);
- break;
- default: break;
- }
- }
- }
- for (i=0;i<VFREGS;i++) {
- if (live.fate[i].needflush==NF_TOMEM &&
- live.fate[i].status==DIRTY) {
- f_evict(i);
- }
- }
- raw_fp_cleanup_drop();
-}
-
-void freescratch(void)
-{
- int i;
- for (i=0;i<N_REGS;i++)
- if (live.nat[i].locked && i!=4)
- write_log("Warning! %d is locked\n",i);
-
- for (i=0;i<VREGS;i++)
- if (live.state[i].needflush==NF_SCRATCH) {
- forget_about(i);
- }
-
- for (i=0;i<VFREGS;i++)
- if (live.fate[i].needflush==NF_SCRATCH) {
- f_forget_about(i);
- }
-}
-
-/********************************************************************
- * Support functions, internal *
- ********************************************************************/
-
-
-static void align_target(uae_u32 a)
-{
- if (!a)
- return;
-
- if (tune_nop_fillers)
- raw_emit_nop_filler(a - (((uintptr)target) & (a - 1)));
- else {
- /* Fill with NOPs --- makes debugging with gdb easier */
- while ((uintptr)target&(a-1))
- *target++=0x90;
- }
-}
-
-static __inline__ int isinrom(uintptr addr)
-{
- return ((addr >= (uintptr)ROMBaseHost) && (addr < (uintptr)ROMBaseHost + ROMSize));
-}
-
-static void flush_all(void)
-{
- int i;
-
- log_flush();
- for (i=0;i<VREGS;i++)
- if (live.state[i].status==DIRTY) {
- if (!call_saved[live.state[i].realreg]) {
- tomem(i);
- }
- }
- for (i=0;i<VFREGS;i++)
- if (f_isinreg(i))
- f_evict(i);
- raw_fp_cleanup_drop();
-}
-
-/* Make sure all registers that will get clobbered by a call are
- save and sound in memory */
-static void prepare_for_call_1(void)
-{
- flush_all(); /* If there are registers that don't get clobbered,
- * we should be a bit more selective here */
-}
-
-/* We will call a C routine in a moment. That will clobber all registers,
- so we need to disassociate everything */
-static void prepare_for_call_2(void)
-{
- int i;
- for (i=0;i<N_REGS;i++)
- if (!call_saved[i] && live.nat[i].nholds>0)
- free_nreg(i);
-
- for (i=0;i<N_FREGS;i++)
- if (live.fat[i].nholds>0)
- f_free_nreg(i);
-
- live.flags_in_flags=TRASH; /* Note: We assume we already rescued the
- flags at the very start of the call_r
- functions! */
-}
-
-/********************************************************************
- * Memory access and related functions, CREATE time *
- ********************************************************************/
-
-void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond)
-{
- next_pc_p=not_taken;
- taken_pc_p=taken;
- branch_cc=cond;
-}
-
-
-static uae_u32 get_handler_address(uae_u32 addr)
-{
- uae_u32 cl=cacheline(addr);
- blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
- return (uintptr)&(bi->direct_handler_to_use);
-}
-
-static uae_u32 get_handler(uae_u32 addr)
-{
- uae_u32 cl=cacheline(addr);
- blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
- return (uintptr)bi->direct_handler_to_use;
-}
-
-static void load_handler(int reg, uae_u32 addr)
-{
- mov_l_rm(reg,get_handler_address(addr));
-}
-
-/* This version assumes that it is writing *real* memory, and *will* fail
- * if that assumption is wrong! No branches, no second chances, just
- * straight go-for-it attitude */
-
-static void writemem_real(int address, int source, int size, int tmp, int clobber)
-{
- int f=tmp;
-
- if (clobber)
- f=source;
-
- switch(size) {
- case 1: mov_b_bRr(address,source,MEMBaseDiff); break;
- case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break;
- case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,MEMBaseDiff); break;
- }
- forget_about(tmp);
- forget_about(f);
-}
-
-void writebyte(int address, int source, int tmp)
-{
- writemem_real(address,source,1,tmp,0);
-}
-
-static __inline__ void writeword_general(int address, int source, int tmp,
- int clobber)
-{
- writemem_real(address,source,2,tmp,clobber);
-}
-
-void writeword_clobber(int address, int source, int tmp)
-{
- writeword_general(address,source,tmp,1);
-}
-
-void writeword(int address, int source, int tmp)
-{
- writeword_general(address,source,tmp,0);
-}
-
-static __inline__ void writelong_general(int address, int source, int tmp,
- int clobber)
-{
- writemem_real(address,source,4,tmp,clobber);
-}
-
-void writelong_clobber(int address, int source, int tmp)
-{
- writelong_general(address,source,tmp,1);
-}
-
-void writelong(int address, int source, int tmp)
-{
- writelong_general(address,source,tmp,0);
-}
-
-
-
-/* This version assumes that it is reading *real* memory, and *will* fail
- * if that assumption is wrong! No branches, no second chances, just
- * straight go-for-it attitude */
-
-static void readmem_real(int address, int dest, int size, int tmp)
-{
- int f=tmp;
-
- if (size==4 && address!=dest)
- f=dest;
-
- switch(size) {
- case 1: mov_b_brR(dest,address,MEMBaseDiff); break;
- case 2: mov_w_brR(dest,address,MEMBaseDiff); bswap_16(dest); break;
- case 4: mov_l_brR(dest,address,MEMBaseDiff); bswap_32(dest); break;
- }
- forget_about(tmp);
-}
-
-void readbyte(int address, int dest, int tmp)
-{
- readmem_real(address,dest,1,tmp);
-}
-
-void readword(int address, int dest, int tmp)
-{
- readmem_real(address,dest,2,tmp);
-}
-
-void readlong(int address, int dest, int tmp)
-{
- readmem_real(address,dest,4,tmp);
-}
-
-void get_n_addr(int address, int dest, int tmp)
-{
- // a is the register containing the virtual address
- // after the offset had been fetched
- int a=tmp;
-
- // f is the register that will contain the offset
- int f=tmp;
-
- // a == f == tmp if (address == dest)
- if (address!=dest) {
- a=address;
- f=dest;
- }
-
-#if REAL_ADDRESSING
- mov_l_rr(dest, address);
-#elif DIRECT_ADDRESSING
- lea_l_brr(dest,address,MEMBaseDiff);
-#endif
- forget_about(tmp);
-}
-
-void get_n_addr_jmp(int address, int dest, int tmp)
-{
- /* For this, we need to get the same address as the rest of UAE
- would --- otherwise we end up translating everything twice */
- get_n_addr(address,dest,tmp);
-}
-
-
-/* base is a register, but dp is an actual value.
- target is a register, as is tmp */
-void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp)
-{
- int reg = (dp >> 12) & 15;
- int regd_shift=(dp >> 9) & 3;
-
- if (dp & 0x100) {
- int ignorebase=(dp&0x80);
- int ignorereg=(dp&0x40);
- int addbase=0;
- int outer=0;
-
- if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
- if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4);
-
- if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
- if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4);
-
- if ((dp & 0x4) == 0) { /* add regd *before* the get_long */
- if (!ignorereg) {
- if ((dp & 0x800) == 0)
- sign_extend_16_rr(target,reg);
- else
- mov_l_rr(target,reg);
- shll_l_ri(target,regd_shift);
- }
- else
- mov_l_ri(target,0);
-
- /* target is now regd */
- if (!ignorebase)
- add_l(target,base);
- add_l_ri(target,addbase);
- if (dp&0x03) readlong(target,target,tmp);
- } else { /* do the getlong first, then add regd */
- if (!ignorebase) {
- mov_l_rr(target,base);
- add_l_ri(target,addbase);
- }
- else
- mov_l_ri(target,addbase);
- if (dp&0x03) readlong(target,target,tmp);
-
- if (!ignorereg) {
- if ((dp & 0x800) == 0)
- sign_extend_16_rr(tmp,reg);
- else
- mov_l_rr(tmp,reg);
- shll_l_ri(tmp,regd_shift);
- /* tmp is now regd */
- add_l(target,tmp);
- }
- }
- add_l_ri(target,outer);
- }
- else { /* 68000 version */
- if ((dp & 0x800) == 0) { /* Sign extend */
- sign_extend_16_rr(target,reg);
- lea_l_brr_indexed(target,base,target,1<<regd_shift,(uae_s32)((uae_s8)dp));
- }
- else {
- lea_l_brr_indexed(target,base,reg,1<<regd_shift,(uae_s32)((uae_s8)dp));
- }
- }
- forget_about(tmp);
-}
-
-
-
-
-
-void set_cache_state(int enabled)
-{
- if (enabled!=letit)
- flush_icache_hard(77);
- letit=enabled;
-}
-
-int get_cache_state(void)
-{
- return letit;
-}
-
-uae_u32 get_jitted_size(void)
-{
- if (compiled_code)
- return current_compile_p-compiled_code;
- return 0;
-}
-
-const int CODE_ALLOC_MAX_ATTEMPTS = 10;
-const int CODE_ALLOC_BOUNDARIES = 128 * 1024; // 128 KB
-
-static uint8 *do_alloc_code(uint32 size, int depth)
-{
-#if defined(__linux__) && 0
- /*
- This is a really awful hack that is known to work on Linux at
- least.
-
- The trick here is to make sure the allocated cache is nearby
- code segment, and more precisely in the positive half of a
- 32-bit address space. i.e. addr < 0x80000000. Actually, it
- turned out that a 32-bit binary run on AMD64 yields a cache
- allocated around 0xa0000000, thus causing some troubles when
- translating addresses from m68k to x86.
- */
- static uint8 * code_base = NULL;
- if (code_base == NULL) {
- uintptr page_size = getpagesize();
- uintptr boundaries = CODE_ALLOC_BOUNDARIES;
- if (boundaries < page_size)
- boundaries = page_size;
- code_base = (uint8 *)sbrk(0);
- for (int attempts = 0; attempts < CODE_ALLOC_MAX_ATTEMPTS; attempts++) {
- if (vm_acquire_fixed(code_base, size) == 0) {
- uint8 *code = code_base;
- code_base += size;
- return code;
- }
- code_base += boundaries;
- }
- return NULL;
- }
-
- if (vm_acquire_fixed(code_base, size) == 0) {
- uint8 *code = code_base;
- code_base += size;
- return code;
- }
-
- if (depth >= CODE_ALLOC_MAX_ATTEMPTS)
- return NULL;
-
- return do_alloc_code(size, depth + 1);
-#else
- uint8 *code = (uint8 *)vm_acquire(size);
- return code == VM_MAP_FAILED ? NULL : code;
-#endif
-}
-
-static inline uint8 *alloc_code(uint32 size)
-{
- uint8 *ptr = do_alloc_code(size, 0);
- /* allocated code must fit in 32-bit boundaries */
- assert((uintptr)ptr <= 0xffffffff);
- return ptr;
-}
-
-void alloc_cache(void)
-{
- if (compiled_code) {
- flush_icache_hard(6);
- vm_release(compiled_code, cache_size * 1024);
- compiled_code = 0;
- }
-
- if (cache_size == 0)
- return;
-
- while (!compiled_code && cache_size) {
- if ((compiled_code = alloc_code(cache_size * 1024)) == NULL) {
- compiled_code = 0;
- cache_size /= 2;
- }
- }
- vm_protect(compiled_code, cache_size * 1024, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
-
- if (compiled_code) {
- write_log("<JIT compiler> : actual translation cache size : %d KB at 0x%08X\n", cache_size, compiled_code);
- max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST;
- current_compile_p = compiled_code;
- current_cache_size = 0;
- }
-}
-
-
-
-extern void op_illg_1 (uae_u32 opcode) REGPARAM;
-
-static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
-{
- uae_u32 k1 = 0;
- uae_u32 k2 = 0;
-
-#if USE_CHECKSUM_INFO
- checksum_info *csi = bi->csi;
- Dif(!csi) abort();
- while (csi) {
- uae_s32 len = csi->length;
- uintptr tmp = (uintptr)csi->start_p;
-#else
- uae_s32 len = bi->len;
- uintptr tmp = (uintptr)bi->min_pcp;
-#endif
- uae_u32*pos;
-
- len += (tmp & 3);
- tmp &= ~((uintptr)3);
- pos = (uae_u32 *)tmp;
-
- if (len >= 0 && len <= MAX_CHECKSUM_LEN) {
- while (len > 0) {
- k1 += *pos;
- k2 ^= *pos;
- pos++;
- len -= 4;
- }
- }
-
-#if USE_CHECKSUM_INFO
- csi = csi->next;
- }
-#endif
-
- *c1 = k1;
- *c2 = k2;
-}
-
-#if 0
-static void show_checksum(CSI_TYPE* csi)
-{
- uae_u32 k1=0;
- uae_u32 k2=0;
- uae_s32 len=CSI_LENGTH(csi);
- uae_u32 tmp=(uintptr)CSI_START_P(csi);
- uae_u32* pos;
-
- len+=(tmp&3);
- tmp&=(~3);
- pos=(uae_u32*)tmp;
-
- if (len<0 || len>MAX_CHECKSUM_LEN) {
- return;
- }
- else {
- while (len>0) {
- write_log("%08x ",*pos);
- pos++;
- len-=4;
- }
- write_log(" bla\n");
- }
-}
-#endif
-
-
-int check_for_cache_miss(void)
-{
- blockinfo* bi=get_blockinfo_addr(regs.pc_p);
-
- if (bi) {
- int cl=cacheline(regs.pc_p);
- if (bi!=cache_tags[cl+1].bi) {
- raise_in_cl_list(bi);
- return 1;
- }
- }
- return 0;
-}
-
-
-static void recompile_block(void)
-{
- /* An existing block's countdown code has expired. We need to make
- sure that execute_normal doesn't refuse to recompile due to a
- perceived cache miss... */
- blockinfo* bi=get_blockinfo_addr(regs.pc_p);
-
- Dif (!bi)
- abort();
- raise_in_cl_list(bi);
- execute_normal();
- return;
-}
-static void cache_miss(void)
-{
- blockinfo* bi=get_blockinfo_addr(regs.pc_p);
- uae_u32 cl=cacheline(regs.pc_p);
- blockinfo* bi2=get_blockinfo(cl);
-
- if (!bi) {
- execute_normal(); /* Compile this block now */
- return;
- }
- Dif (!bi2 || bi==bi2) {
- write_log("Unexplained cache miss %p %p\n",bi,bi2);
- abort();
- }
- raise_in_cl_list(bi);
- return;
-}
-
-static int called_check_checksum(blockinfo* bi);
-
-static inline int block_check_checksum(blockinfo* bi)
-{
- uae_u32 c1,c2;
- bool isgood;
-
- if (bi->status!=BI_NEED_CHECK)
- return 1; /* This block is in a checked state */
-
- checksum_count++;
-
- if (bi->c1 || bi->c2)
- calc_checksum(bi,&c1,&c2);
- else {
- c1=c2=1; /* Make sure it doesn't match */
- }
-
- isgood=(c1==bi->c1 && c2==bi->c2);
-
- if (isgood) {
- /* This block is still OK. So we reactivate. Of course, that
- means we have to move it into the needs-to-be-flushed list */
- bi->handler_to_use=bi->handler;
- set_dhtu(bi,bi->direct_handler);
- bi->status=BI_CHECKING;
- isgood=called_check_checksum(bi);
- }
- if (isgood) {
- /* write_log("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
- c1,c2,bi->c1,bi->c2);*/
- remove_from_list(bi);
- add_to_active(bi);
- raise_in_cl_list(bi);
- bi->status=BI_ACTIVE;
- }
- else {
- /* This block actually changed. We need to invalidate it,
- and set it up to be recompiled */
- /* write_log("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
- c1,c2,bi->c1,bi->c2); */
- invalidate_block(bi);
- raise_in_cl_list(bi);
- }
- return isgood;
-}
-
-static int called_check_checksum(blockinfo* bi)
-{
- dependency* x=bi->deplist;
- int isgood=1;
- int i;
-
- for (i=0;i<2 && isgood;i++) {
- if (bi->dep[i].jmp_off) {
- isgood=block_check_checksum(bi->dep[i].target);
- }
- }
- return isgood;
-}
-
-static void check_checksum(void)
-{
- blockinfo* bi=get_blockinfo_addr(regs.pc_p);
- uae_u32 cl=cacheline(regs.pc_p);
- blockinfo* bi2=get_blockinfo(cl);
-
- /* These are not the droids you are looking for... */
- if (!bi) {
- /* Whoever is the primary target is in a dormant state, but
- calling it was accidental, and we should just compile this
- new block */
- execute_normal();
- return;
- }
- if (bi!=bi2) {
- /* The block was hit accidentally, but it does exist. Cache miss */
- cache_miss();
- return;
- }
-
- if (!block_check_checksum(bi))
- execute_normal();
-}
-
-static __inline__ void match_states(blockinfo* bi)
-{
- int i;
- smallstate* s=&(bi->env);
-
- if (bi->status==BI_NEED_CHECK) {
- block_check_checksum(bi);
- }
- if (bi->status==BI_ACTIVE ||
- bi->status==BI_FINALIZING) { /* Deal with the *promises* the
- block makes (about not using
- certain vregs) */
- for (i=0;i<16;i++) {
- if (s->virt[i]==L_UNNEEDED) {
- // write_log("unneeded reg %d at %p\n",i,target);
- COMPCALL(forget_about)(i); // FIXME
- }
- }
- }
- flush(1);
-
- /* And now deal with the *demands* the block makes */
- for (i=0;i<N_REGS;i++) {
- int v=s->nat[i];
- if (v>=0) {
- // printf("Loading reg %d into %d at %p\n",v,i,target);
- readreg_specific(v,4,i);
- // do_load_reg(i,v);
- // setlock(i);
- }
- }
- for (i=0;i<N_REGS;i++) {
- int v=s->nat[i];
- if (v>=0) {
- unlock2(i);
- }
- }
-}
-
-static __inline__ void create_popalls(void)
-{
- int i,r;
-
- if ((popallspace = alloc_code(POPALLSPACE_SIZE)) == NULL) {
- write_log("FATAL: Could not allocate popallspace!\n");
- abort();
- }
- vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_WRITE);
-
- int stack_space = STACK_OFFSET;
- for (i=0;i<N_REGS;i++) {
- if (need_to_preserve[i])
- stack_space += sizeof(void *);
- }
- stack_space %= STACK_ALIGN;
- if (stack_space)
- stack_space = STACK_ALIGN - stack_space;
-
- current_compile_p=popallspace;
- set_target(current_compile_p);
-
- /* We need to guarantee 16-byte stack alignment on x86 at any point
- within the JIT generated code. We have multiple exit points
- possible but a single entry. A "jmp" is used so that we don't
- have to generate stack alignment in generated code that has to
- call external functions (e.g. a generic instruction handler).
-
- In summary, JIT generated code is not leaf so we have to deal
- with it here to maintain correct stack alignment. */
- align_target(align_jumps);
- current_compile_p=get_target();
- pushall_call_handler=get_target();
- for (i=N_REGS;i--;) {
- if (need_to_preserve[i])
- raw_push_l_r(i);
- }
- raw_dec_sp(stack_space);
- r=REG_PC_TMP;
- raw_mov_l_rm(r,(uintptr)®s.pc_p);
- raw_and_l_ri(r,TAGMASK);
- raw_jmp_m_indexed((uintptr)cache_tags,r,SIZEOF_VOID_P);
-
- /* now the exit points */
- align_target(align_jumps);
- popall_do_nothing=get_target();
- raw_inc_sp(stack_space);
- for (i=0;i<N_REGS;i++) {
- if (need_to_preserve[i])
- raw_pop_l_r(i);
- }
- raw_jmp((uintptr)do_nothing);
-
- align_target(align_jumps);
- popall_execute_normal=get_target();
- raw_inc_sp(stack_space);
- for (i=0;i<N_REGS;i++) {
- if (need_to_preserve[i])
- raw_pop_l_r(i);
- }
- raw_jmp((uintptr)execute_normal);
-
- align_target(align_jumps);
- popall_cache_miss=get_target();
- raw_inc_sp(stack_space);
- for (i=0;i<N_REGS;i++) {
- if (need_to_preserve[i])
- raw_pop_l_r(i);
- }
- raw_jmp((uintptr)cache_miss);
-
- align_target(align_jumps);
- popall_recompile_block=get_target();
- raw_inc_sp(stack_space);
- for (i=0;i<N_REGS;i++) {
- if (need_to_preserve[i])
- raw_pop_l_r(i);
- }
- raw_jmp((uintptr)recompile_block);
-
- align_target(align_jumps);
- popall_exec_nostats=get_target();
- raw_inc_sp(stack_space);
- for (i=0;i<N_REGS;i++) {
- if (need_to_preserve[i])
- raw_pop_l_r(i);
- }
- raw_jmp((uintptr)exec_nostats);
-
- align_target(align_jumps);
- popall_check_checksum=get_target();
- raw_inc_sp(stack_space);
- for (i=0;i<N_REGS;i++) {
- if (need_to_preserve[i])
- raw_pop_l_r(i);
- }
- raw_jmp((uintptr)check_checksum);
-
- // no need to further write into popallspace
- vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_EXECUTE);
-}
-
-static __inline__ void reset_lists(void)
-{
- int i;
-
- for (i=0;i<MAX_HOLD_BI;i++)
- hold_bi[i]=NULL;
- active=NULL;
- dormant=NULL;
-}
-
-static void prepare_block(blockinfo* bi)
-{
- int i;
-
- set_target(current_compile_p);
- align_target(align_jumps);
- bi->direct_pen=(cpuop_func *)get_target();
- raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
- raw_mov_l_mr((uintptr)®s.pc_p,0);
- raw_jmp((uintptr)popall_execute_normal);
-
- align_target(align_jumps);
- bi->direct_pcc=(cpuop_func *)get_target();
- raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
- raw_mov_l_mr((uintptr)®s.pc_p,0);
- raw_jmp((uintptr)popall_check_checksum);
- current_compile_p=get_target();
-
- bi->deplist=NULL;
- for (i=0;i<2;i++) {
- bi->dep[i].prev_p=NULL;
- bi->dep[i].next=NULL;
- }
- bi->env=default_ss;
- bi->status=BI_INVALID;
- bi->havestate=0;
- //bi->env=empty_ss;
-}
-
-// OPCODE is in big endian format, use cft_map() beforehand, if needed.
-static inline void reset_compop(int opcode)
-{
- compfunctbl[opcode] = NULL;
- nfcompfunctbl[opcode] = NULL;
-}
-
-static int read_opcode(const char *p)
-{
- int opcode = 0;
- for (int i = 0; i < 4; i++) {
- int op = p[i];
- switch (op) {
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- opcode = (opcode << 4) | (op - '0');
- break;
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
- opcode = (opcode << 4) | ((op - 'a') + 10);
- break;
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
- opcode = (opcode << 4) | ((op - 'A') + 10);
- break;
- default:
- return -1;
- }
- }
- return opcode;
-}
-
-static bool merge_blacklist()
-{
- const char *blacklist = PrefsFindString("jitblacklist");
- if (blacklist) {
- const char *p = blacklist;
- for (;;) {
- if (*p == 0)
- return true;
-
- int opcode1 = read_opcode(p);
- if (opcode1 < 0)
- return false;
- p += 4;
-
- int opcode2 = opcode1;
- if (*p == '-') {
- p++;
- opcode2 = read_opcode(p);
- if (opcode2 < 0)
- return false;
- p += 4;
- }
-
- if (*p == 0 || *p == ',' || *p == ';') {
- write_log("<JIT compiler> : blacklist opcodes : %04x-%04x\n", opcode1, opcode2);
- for (int opcode = opcode1; opcode <= opcode2; opcode++)
- reset_compop(cft_map(opcode));
-
- if (*p == ',' || *p++ == ';')
- continue;
-
- return true;
- }
-
- return false;
- }
- }
- return true;
-}
-
-void build_comp(void)
-{
- int i;
- int jumpcount=0;
- unsigned long opcode;
- struct comptbl* tbl=op_smalltbl_0_comp_ff;
- struct comptbl* nftbl=op_smalltbl_0_comp_nf;
- int count;
- int cpu_level = 0; // 68000 (default)
- if (CPUType == 4)
- cpu_level = 4; // 68040 with FPU
- else {
- if (FPUType)
- cpu_level = 3; // 68020 with FPU
- else if (CPUType >= 2)
- cpu_level = 2; // 68020
- else if (CPUType == 1)
- cpu_level = 1;
- }
- struct cputbl *nfctbl = (
- cpu_level == 4 ? op_smalltbl_0_nf
- : cpu_level == 3 ? op_smalltbl_1_nf
- : cpu_level == 2 ? op_smalltbl_2_nf
- : cpu_level == 1 ? op_smalltbl_3_nf
- : op_smalltbl_4_nf);
-
- write_log ("<JIT compiler> : building compiler function tables\n");
-
- for (opcode = 0; opcode < 65536; opcode++) {
- reset_compop(opcode);
- nfcpufunctbl[opcode] = op_illg_1;
- prop[opcode].use_flags = 0x1f;
- prop[opcode].set_flags = 0x1f;
- prop[opcode].cflow = fl_trap; // ILLEGAL instructions do trap
- }
-
- for (i = 0; tbl[i].opcode < 65536; i++) {
- int cflow = table68k[tbl[i].opcode].cflow;
- if (follow_const_jumps && (tbl[i].specific & 16))
- cflow = fl_const_jump;
- else
- cflow &= ~fl_const_jump;
- prop[cft_map(tbl[i].opcode)].cflow = cflow;
-
- int uses_fpu = tbl[i].specific & 32;
- if (uses_fpu && avoid_fpu)
- compfunctbl[cft_map(tbl[i].opcode)] = NULL;
- else
- compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler;
- }
-
- for (i = 0; nftbl[i].opcode < 65536; i++) {
- int uses_fpu = tbl[i].specific & 32;
- if (uses_fpu && avoid_fpu)
- nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL;
- else
- nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler;
-
- nfcpufunctbl[cft_map(nftbl[i].opcode)] = nfctbl[i].handler;
- }
-
- for (i = 0; nfctbl[i].handler; i++) {
- nfcpufunctbl[cft_map(nfctbl[i].opcode)] = nfctbl[i].handler;
- }
-
- for (opcode = 0; opcode < 65536; opcode++) {
- compop_func *f;
- compop_func *nff;
- cpuop_func *nfcf;
- int isaddx,cflow;
-
- if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level)
- continue;
-
- if (table68k[opcode].handler != -1) {
- f = compfunctbl[cft_map(table68k[opcode].handler)];
- nff = nfcompfunctbl[cft_map(table68k[opcode].handler)];
- nfcf = nfcpufunctbl[cft_map(table68k[opcode].handler)];
- cflow = prop[cft_map(table68k[opcode].handler)].cflow;
- isaddx = prop[cft_map(table68k[opcode].handler)].is_addx;
- prop[cft_map(opcode)].cflow = cflow;
- prop[cft_map(opcode)].is_addx = isaddx;
- compfunctbl[cft_map(opcode)] = f;
- nfcompfunctbl[cft_map(opcode)] = nff;
- Dif (nfcf == op_illg_1)
- abort();
- nfcpufunctbl[cft_map(opcode)] = nfcf;
- }
- prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead;
- prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive;
- /* Unconditional jumps don't evaluate condition codes, so they
- * don't actually use any flags themselves */
- if (prop[cft_map(opcode)].cflow & fl_const_jump)
- prop[cft_map(opcode)].use_flags = 0;
- }
- for (i = 0; nfctbl[i].handler != NULL; i++) {
- if (nfctbl[i].specific)
- nfcpufunctbl[cft_map(tbl[i].opcode)] = nfctbl[i].handler;
- }
-
- /* Merge in blacklist */
- if (!merge_blacklist())
- write_log("<JIT compiler> : blacklist merge failure!\n");
-
- count=0;
- for (opcode = 0; opcode < 65536; opcode++) {
- if (compfunctbl[cft_map(opcode)])
- count++;
- }
- write_log("<JIT compiler> : supposedly %d compileable opcodes!\n",count);
-
- /* Initialise state */
- create_popalls();
- alloc_cache();
- reset_lists();
-
- for (i=0;i<TAGSIZE;i+=2) {
- cache_tags[i].handler=(cpuop_func *)popall_execute_normal;
- cache_tags[i+1].bi=NULL;
- }
-
-#if 0
- for (i=0;i<N_REGS;i++) {
- empty_ss.nat[i].holds=-1;
- empty_ss.nat[i].validsize=0;
- empty_ss.nat[i].dirtysize=0;
- }
-#endif
- for (i=0;i<VREGS;i++) {
- empty_ss.virt[i]=L_NEEDED;
- }
- for (i=0;i<N_REGS;i++) {
- empty_ss.nat[i]=L_UNKNOWN;
- }
- default_ss=empty_ss;
-}
-
-
-static void flush_icache_none(int n)
-{
- /* Nothing to do. */
-}
-
-static void flush_icache_hard(int n)
-{
- uae_u32 i;
- blockinfo* bi, *dbi;
-
- hard_flush_count++;
-#if 0
- write_log("Flush Icache_hard(%d/%x/%p), %u KB\n",
- n,regs.pc,regs.pc_p,current_cache_size/1024);
- current_cache_size = 0;
-#endif
- bi=active;
- while(bi) {
- cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
- cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
- dbi=bi; bi=bi->next;
- free_blockinfo(dbi);
- }
- bi=dormant;
- while(bi) {
- cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
- cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
- dbi=bi; bi=bi->next;
- free_blockinfo(dbi);
- }
-
- reset_lists();
- if (!compiled_code)
- return;
- current_compile_p=compiled_code;
- SPCFLAGS_SET( SPCFLAG_JIT_EXEC_RETURN ); /* To get out of compiled code */
-}
-
-
-/* "Soft flushing" --- instead of actually throwing everything away,
- we simply mark everything as "needs to be checked".
-*/
-
-static inline void flush_icache_lazy(int n)
-{
- uae_u32 i;
- blockinfo* bi;
- blockinfo* bi2;
-
- soft_flush_count++;
- if (!active)
- return;
-
- bi=active;
- while (bi) {
- uae_u32 cl=cacheline(bi->pc_p);
- if (bi->status==BI_INVALID ||
- bi->status==BI_NEED_RECOMP) {
- if (bi==cache_tags[cl+1].bi)
- cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
- bi->handler_to_use=(cpuop_func *)popall_execute_normal;
- set_dhtu(bi,bi->direct_pen);
- bi->status=BI_INVALID;
- }
- else {
- if (bi==cache_tags[cl+1].bi)
- cache_tags[cl].handler=(cpuop_func *)popall_check_checksum;
- bi->handler_to_use=(cpuop_func *)popall_check_checksum;
- set_dhtu(bi,bi->direct_pcc);
- bi->status=BI_NEED_CHECK;
- }
- bi2=bi;
- bi=bi->next;
- }
- /* bi2 is now the last entry in the active list */
- bi2->next=dormant;
- if (dormant)
- dormant->prev_p=&(bi2->next);
-
- dormant=active;
- active->prev_p=&dormant;
- active=NULL;
-}
-
-void flush_icache_range(uae_u8 *start_p, uae_u32 length)
-{
- if (!active)
- return;
-
-#if LAZY_FLUSH_ICACHE_RANGE
- blockinfo *bi = active;
- while (bi) {
-#if USE_CHECKSUM_INFO
- bool candidate = false;
- for (checksum_info *csi = bi->csi; csi; csi = csi->next) {
- if (((start_p - csi->start_p) < csi->length) ||
- ((csi->start_p - start_p) < length)) {
- candidate = true;
- break;
- }
- }
-#else
- // Assume system is consistent and would invalidate the right range
- const bool candidate = (bi->pc_p - start_p) < length;
-#endif
- blockinfo *dbi = bi;
- bi = bi->next;
- if (candidate) {
- uae_u32 cl = cacheline(dbi->pc_p);
- if (dbi->status == BI_INVALID || dbi->status == BI_NEED_RECOMP) {
- if (dbi == cache_tags[cl+1].bi)
- cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
- dbi->handler_to_use = (cpuop_func *)popall_execute_normal;
- set_dhtu(dbi, dbi->direct_pen);
- dbi->status = BI_INVALID;
- }
- else {
- if (dbi == cache_tags[cl+1].bi)
- cache_tags[cl].handler = (cpuop_func *)popall_check_checksum;
- dbi->handler_to_use = (cpuop_func *)popall_check_checksum;
- set_dhtu(dbi, dbi->direct_pcc);
- dbi->status = BI_NEED_CHECK;
- }
- remove_from_list(dbi);
- add_to_dormant(dbi);
- }
- }
- return;
-#endif
- flush_icache(-1);
-}
-
-static void catastrophe(void)
-{
- abort();
-}
-
-int failure;
-
-#define TARGET_M68K 0
-#define TARGET_POWERPC 1
-#define TARGET_X86 2
-#define TARGET_X86_64 3
-#if defined(i386) || defined(__i386__)
-#define TARGET_NATIVE TARGET_X86
-#endif
-#if defined(powerpc) || defined(__powerpc__)
-#define TARGET_NATIVE TARGET_POWERPC
-#endif
-#if defined(x86_64) || defined(__x86_64__)
-#define TARGET_NATIVE TARGET_X86_64
-#endif
-
-#ifdef ENABLE_MON
-static uae_u32 mon_read_byte_jit(uintptr addr)
-{
- uae_u8 *m = (uae_u8 *)addr;
- return (uintptr)(*m);
-}
-
-static void mon_write_byte_jit(uintptr addr, uae_u32 b)
-{
- uae_u8 *m = (uae_u8 *)addr;
- *m = b;
-}
-#endif
-
-void disasm_block(int target, uint8 * start, size_t length)
-{
- if (!JITDebug)
- return;
-
-#if defined(JIT_DEBUG) && defined(ENABLE_MON)
- char disasm_str[200];
- sprintf(disasm_str, "%s $%x $%x",
- target == TARGET_M68K ? "d68" :
- target == TARGET_X86 ? "d86" :
- target == TARGET_X86_64 ? "d8664" :
- target == TARGET_POWERPC ? "d" : "x",
- start, start + length - 1);
-
- uae_u32 (*old_mon_read_byte)(uintptr) = mon_read_byte;
- void (*old_mon_write_byte)(uintptr, uae_u32) = mon_write_byte;
-
- mon_read_byte = mon_read_byte_jit;
- mon_write_byte = mon_write_byte_jit;
-
- char *arg[5] = {"mon", "-m", "-r", disasm_str, NULL};
- mon(4, arg);
-
- mon_read_byte = old_mon_read_byte;
- mon_write_byte = old_mon_write_byte;
-#endif
-}
-
-static void disasm_native_block(uint8 *start, size_t length)
-{
- disasm_block(TARGET_NATIVE, start, length);
-}
-
-static void disasm_m68k_block(uint8 *start, size_t length)
-{
- disasm_block(TARGET_M68K, start, length);
-}
-
-#ifdef HAVE_GET_WORD_UNSWAPPED
-# define DO_GET_OPCODE(a) (do_get_mem_word_unswapped((uae_u16 *)(a)))
-#else
-# define DO_GET_OPCODE(a) (do_get_mem_word((uae_u16 *)(a)))
-#endif
-
-#if JIT_DEBUG
-static uae_u8 *last_regs_pc_p = 0;
-static uae_u8 *last_compiled_block_addr = 0;
-
-void compiler_dumpstate(void)
-{
- if (!JITDebug)
- return;
-
- write_log("### Host addresses\n");
- write_log("MEM_BASE : %x\n", MEMBaseDiff);
- write_log("PC_P : %p\n", ®s.pc_p);
- write_log("SPCFLAGS : %p\n", ®s.spcflags);
- write_log("D0-D7 : %p-%p\n", ®s.regs[0], ®s.regs[7]);
- write_log("A0-A7 : %p-%p\n", ®s.regs[8], ®s.regs[15]);
- write_log("\n");
-
- write_log("### M68k processor state\n");
- m68k_dumpstate(0);
- write_log("\n");
-
- write_log("### Block in Mac address space\n");
- write_log("M68K block : %p\n",
- (void *)(uintptr)get_virtual_address(last_regs_pc_p));
- write_log("Native block : %p (%d bytes)\n",
- (void *)(uintptr)get_virtual_address(last_compiled_block_addr),
- get_blockinfo_addr(last_regs_pc_p)->direct_handler_size);
- write_log("\n");
-}
-#endif
-
-static void compile_block(cpu_history* pc_hist, int blocklen)
-{
- if (letit && compiled_code) {
-#if PROFILE_COMPILE_TIME
- compile_count++;
- clock_t start_time = clock();
-#endif
-#if JIT_DEBUG
- bool disasm_block = false;
-#endif
-
- /* OK, here we need to 'compile' a block */
- int i;
- int r;
- int was_comp=0;
- uae_u8 liveflags[MAXRUN+1];
-#if USE_CHECKSUM_INFO
- bool trace_in_rom = isinrom((uintptr)pc_hist[0].location);
- uintptr max_pcp=(uintptr)pc_hist[blocklen - 1].location;
- uintptr min_pcp=max_pcp;
-#else
- uintptr max_pcp=(uintptr)pc_hist[0].location;
- uintptr min_pcp=max_pcp;
-#endif
- uae_u32 cl=cacheline(pc_hist[0].location);
- void* specflags=(void*)®s.spcflags;
- blockinfo* bi=NULL;
- blockinfo* bi2;
- int extra_len=0;
-
- redo_current_block=0;
- if (current_compile_p>=max_compile_start)
- flush_icache_hard(7);
-
- alloc_blockinfos();
-
- bi=get_blockinfo_addr_new(pc_hist[0].location,0);
- bi2=get_blockinfo(cl);
-
- optlev=bi->optlevel;
- if (bi->status!=BI_INVALID) {
- Dif (bi!=bi2) {
- /* I don't think it can happen anymore. Shouldn't, in
- any case. So let's make sure... */
- write_log("WOOOWOO count=%d, ol=%d %p %p\n",
- bi->count,bi->optlevel,bi->handler_to_use,
- cache_tags[cl].handler);
- abort();
- }
-
- Dif (bi->count!=-1 && bi->status!=BI_NEED_RECOMP) {
- write_log("bi->count=%d, bi->status=%d\n",bi->count,bi->status);
- /* What the heck? We are not supposed to be here! */
- abort();
- }
- }
- if (bi->count==-1) {
- optlev++;
- while (!optcount[optlev])
- optlev++;
- bi->count=optcount[optlev]-1;
- }
- current_block_pc_p=(uintptr)pc_hist[0].location;
-
- remove_deps(bi); /* We are about to create new code */
- bi->optlevel=optlev;
- bi->pc_p=(uae_u8*)pc_hist[0].location;
-#if USE_CHECKSUM_INFO
- free_checksum_info_chain(bi->csi);
- bi->csi = NULL;
-#endif
-
- liveflags[blocklen]=0x1f; /* All flags needed afterwards */
- i=blocklen;
- while (i--) {
- uae_u16* currpcp=pc_hist[i].location;
- uae_u32 op=DO_GET_OPCODE(currpcp);
-
-#if USE_CHECKSUM_INFO
- trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp);
- if (follow_const_jumps && is_const_jump(op)) {
- checksum_info *csi = alloc_checksum_info();
- csi->start_p = (uae_u8 *)min_pcp;
- csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
- csi->next = bi->csi;
- bi->csi = csi;
- max_pcp = (uintptr)currpcp;
- }
- min_pcp = (uintptr)currpcp;
-#else
- if ((uintptr)currpcp<min_pcp)
- min_pcp=(uintptr)currpcp;
- if ((uintptr)currpcp>max_pcp)
- max_pcp=(uintptr)currpcp;
-#endif
-
- liveflags[i]=((liveflags[i+1]&
- (~prop[op].set_flags))|
- prop[op].use_flags);
- if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0)
- liveflags[i]&= ~FLAG_Z;
- }
-
-#if USE_CHECKSUM_INFO
- checksum_info *csi = alloc_checksum_info();
- csi->start_p = (uae_u8 *)min_pcp;
- csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
- csi->next = bi->csi;
- bi->csi = csi;
-#endif
-
- bi->needed_flags=liveflags[0];
-
- align_target(align_loops);
- was_comp=0;
-
- bi->direct_handler=(cpuop_func *)get_target();
- set_dhtu(bi,bi->direct_handler);
- bi->status=BI_COMPILING;
- current_block_start_target=(uintptr)get_target();
-
- log_startblock();
-
- if (bi->count>=0) { /* Need to generate countdown code */
- raw_mov_l_mi((uintptr)®s.pc_p,(uintptr)pc_hist[0].location);
- raw_sub_l_mi((uintptr)&(bi->count),1);
- raw_jl((uintptr)popall_recompile_block);
- }
- if (optlev==0) { /* No need to actually translate */
- /* Execute normally without keeping stats */
- raw_mov_l_mi((uintptr)®s.pc_p,(uintptr)pc_hist[0].location);
- raw_jmp((uintptr)popall_exec_nostats);
- }
- else {
- reg_alloc_run=0;
- next_pc_p=0;
- taken_pc_p=0;
- branch_cc=0;
-
- comp_pc_p=(uae_u8*)pc_hist[0].location;
- init_comp();
- was_comp=1;
-
-#ifdef USE_CPU_EMUL_SERVICES
- raw_sub_l_mi((uintptr)&emulated_ticks,blocklen);
- raw_jcc_b_oponly(NATIVE_CC_GT);
- uae_s8 *branchadd=(uae_s8*)get_target();
- emit_byte(0);
- raw_call((uintptr)cpu_do_check_ticks);
- *branchadd=(uintptr)get_target()-((uintptr)branchadd+1);
-#endif
-
-#if JIT_DEBUG
- if (JITDebug) {
- raw_mov_l_mi((uintptr)&last_regs_pc_p,(uintptr)pc_hist[0].location);
- raw_mov_l_mi((uintptr)&last_compiled_block_addr,current_block_start_target);
- }
-#endif
-
- for (i=0;i<blocklen &&
- get_target_noopt()<max_compile_start;i++) {
- cpuop_func **cputbl;
- compop_func **comptbl;
- uae_u32 opcode=DO_GET_OPCODE(pc_hist[i].location);
- needed_flags=(liveflags[i+1] & prop[opcode].set_flags);
- if (!needed_flags) {
- cputbl=nfcpufunctbl;
- comptbl=nfcompfunctbl;
- }
- else {
- cputbl=cpufunctbl;
- comptbl=compfunctbl;
- }
-
-#if FLIGHT_RECORDER
- {
- mov_l_ri(S1, get_virtual_address((uae_u8 *)(pc_hist[i].location)) | 1);
- clobber_flags();
- remove_all_offsets();
- int arg = readreg_specific(S1,4,REG_PAR1);
- prepare_for_call_1();
- unlock2(arg);
- prepare_for_call_2();
- raw_call((uintptr)m68k_record_step);
- }
-#endif
-
- failure = 1; // gb-- defaults to failure state
- if (comptbl[opcode] && optlev>1) {
- failure=0;
- if (!was_comp) {
- comp_pc_p=(uae_u8*)pc_hist[i].location;
- init_comp();
- }
- was_comp=1;
-
- comptbl[opcode](opcode);
- freescratch();
- if (!(liveflags[i+1] & FLAG_CZNV)) {
- /* We can forget about flags */
- dont_care_flags();
- }
-#if INDIVIDUAL_INST
- flush(1);
- nop();
- flush(1);
- was_comp=0;
-#endif
- }
-
- if (failure) {
- if (was_comp) {
- flush(1);
- was_comp=0;
- }
- raw_mov_l_ri(REG_PAR1,(uae_u32)opcode);
-#if USE_NORMAL_CALLING_CONVENTION
- raw_push_l_r(REG_PAR1);
-#endif
- raw_mov_l_mi((uintptr)®s.pc_p,
- (uintptr)pc_hist[i].location);
- raw_call((uintptr)cputbl[opcode]);
-#if PROFILE_UNTRANSLATED_INSNS
- // raw_cputbl_count[] is indexed with plain opcode (in m68k order)
- raw_add_l_mi((uintptr)&raw_cputbl_count[cft_map(opcode)],1);
-#endif
-#if USE_NORMAL_CALLING_CONVENTION
- raw_inc_sp(4);
-#endif
-
- if (i < blocklen - 1) {
- uae_s8* branchadd;
-
- raw_mov_l_rm(0,(uintptr)specflags);
- raw_test_l_rr(0,0);
- raw_jz_b_oponly();
- branchadd=(uae_s8 *)get_target();
- emit_byte(0);
- raw_jmp((uintptr)popall_do_nothing);
- *branchadd=(uintptr)get_target()-(uintptr)branchadd-1;
- }
- }
- }
-#if 1 /* This isn't completely kosher yet; It really needs to be
- be integrated into a general inter-block-dependency scheme */
- if (next_pc_p && taken_pc_p &&
- was_comp && taken_pc_p==current_block_pc_p) {
- blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0);
- blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0);
- uae_u8 x=bi1->needed_flags;
-
- if (x==0xff || 1) { /* To be on the safe side */
- uae_u16* next=(uae_u16*)next_pc_p;
- uae_u32 op=DO_GET_OPCODE(next);
-
- x=0x1f;
- x&=(~prop[op].set_flags);
- x|=prop[op].use_flags;
- }
-
- x|=bi2->needed_flags;
- if (!(x & FLAG_CZNV)) {
- /* We can forget about flags */
- dont_care_flags();
- extra_len+=2; /* The next instruction now is part of this
- block */
- }
-
- }
-#endif
- log_flush();
-
- if (next_pc_p) { /* A branch was registered */
- uintptr t1=next_pc_p;
- uintptr t2=taken_pc_p;
- int cc=branch_cc;
-
- uae_u32* branchadd;
- uae_u32* tba;
- bigstate tmp;
- blockinfo* tbi;
-
- if (taken_pc_p<next_pc_p) {
- /* backward branch. Optimize for the "taken" case ---
- which means the raw_jcc should fall through when
- the 68k branch is taken. */
- t1=taken_pc_p;
- t2=next_pc_p;
- cc=branch_cc^1;
- }
-
- tmp=live; /* ouch! This is big... */
- raw_jcc_l_oponly(cc);
- branchadd=(uae_u32*)get_target();
- emit_long(0);
-
- /* predicted outcome */
- tbi=get_blockinfo_addr_new((void*)t1,1);
- match_states(tbi);
- raw_cmp_l_mi((uintptr)specflags,0);
- raw_jcc_l_oponly(4);
- tba=(uae_u32*)get_target();
- emit_long(get_handler(t1)-((uintptr)tba+4));
- raw_mov_l_mi((uintptr)®s.pc_p,t1);
- flush_reg_count();
- raw_jmp((uintptr)popall_do_nothing);
- create_jmpdep(bi,0,tba,t1);
-
- align_target(align_jumps);
- /* not-predicted outcome */
- *branchadd=(uintptr)get_target()-((uintptr)branchadd+4);
- live=tmp; /* Ouch again */
- tbi=get_blockinfo_addr_new((void*)t2,1);
- match_states(tbi);
-
- //flush(1); /* Can only get here if was_comp==1 */
- raw_cmp_l_mi((uintptr)specflags,0);
- raw_jcc_l_oponly(4);
- tba=(uae_u32*)get_target();
- emit_long(get_handler(t2)-((uintptr)tba+4));
- raw_mov_l_mi((uintptr)®s.pc_p,t2);
- flush_reg_count();
- raw_jmp((uintptr)popall_do_nothing);
- create_jmpdep(bi,1,tba,t2);
- }
- else
- {
- if (was_comp) {
- flush(1);
- }
- flush_reg_count();
-
- /* Let's find out where next_handler is... */
- if (was_comp && isinreg(PC_P)) {
- r=live.state[PC_P].realreg;
- raw_and_l_ri(r,TAGMASK);
- int r2 = (r==0) ? 1 : 0;
- raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
- raw_cmp_l_mi((uintptr)specflags,0);
- raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ);
- raw_jmp_r(r2);
- }
- else if (was_comp && isconst(PC_P)) {
- uae_u32 v=live.state[PC_P].val;
- uae_u32* tba;
- blockinfo* tbi;
-
- tbi=get_blockinfo_addr_new((void*)(uintptr)v,1);
- match_states(tbi);
-
- raw_cmp_l_mi((uintptr)specflags,0);
- raw_jcc_l_oponly(4);
- tba=(uae_u32*)get_target();
- emit_long(get_handler(v)-((uintptr)tba+4));
- raw_mov_l_mi((uintptr)®s.pc_p,v);
- raw_jmp((uintptr)popall_do_nothing);
- create_jmpdep(bi,0,tba,v);
- }
- else {
- r=REG_PC_TMP;
- raw_mov_l_rm(r,(uintptr)®s.pc_p);
- raw_and_l_ri(r,TAGMASK);
- int r2 = (r==0) ? 1 : 0;
- raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
- raw_cmp_l_mi((uintptr)specflags,0);
- raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ);
- raw_jmp_r(r2);
- }
- }
- }
-
-#if USE_MATCH
- if (callers_need_recompile(&live,&(bi->env))) {
- mark_callers_recompile(bi);
- }
-
- big_to_small_state(&live,&(bi->env));
-#endif
-
-#if USE_CHECKSUM_INFO
- remove_from_list(bi);
- if (trace_in_rom) {
- // No need to checksum that block trace on cache invalidation
- free_checksum_info_chain(bi->csi);
- bi->csi = NULL;
- add_to_dormant(bi);
- }
- else {
- calc_checksum(bi,&(bi->c1),&(bi->c2));
- add_to_active(bi);
- }
-#else
- if (next_pc_p+extra_len>=max_pcp &&
- next_pc_p+extra_len<max_pcp+LONGEST_68K_INST)
- max_pcp=next_pc_p+extra_len; /* extra_len covers flags magic */
- else
- max_pcp+=LONGEST_68K_INST;
-
- bi->len=max_pcp-min_pcp;
- bi->min_pcp=min_pcp;
-
- remove_from_list(bi);
- if (isinrom(min_pcp) && isinrom(max_pcp)) {
- add_to_dormant(bi); /* No need to checksum it on cache flush.
- Please don't start changing ROMs in
- flight! */
- }
- else {
- calc_checksum(bi,&(bi->c1),&(bi->c2));
- add_to_active(bi);
- }
-#endif
-
- current_cache_size += get_target() - (uae_u8 *)current_compile_p;
-
-#if JIT_DEBUG
- if (JITDebug)
- bi->direct_handler_size = get_target() - (uae_u8 *)current_block_start_target;
-
- if (JITDebug && disasm_block) {
- uaecptr block_addr = start_pc + ((char *)pc_hist[0].location - (char *)start_pc_p);
- D(bug("M68K block @ 0x%08x (%d insns)\n", block_addr, blocklen));
- uae_u32 block_size = ((uae_u8 *)pc_hist[blocklen - 1].location - (uae_u8 *)pc_hist[0].location) + 1;
- disasm_m68k_block((uae_u8 *)pc_hist[0].location, block_size);
- D(bug("Compiled block @ 0x%08x\n", pc_hist[0].location));
- disasm_native_block((uae_u8 *)current_block_start_target, bi->direct_handler_size);
- getchar();
- }
-#endif
-
- log_dump();
- align_target(align_jumps);
-
- /* This is the non-direct handler */
- bi->handler=
- bi->handler_to_use=(cpuop_func *)get_target();
- raw_cmp_l_mi((uintptr)®s.pc_p,(uintptr)pc_hist[0].location);
- raw_jnz((uintptr)popall_cache_miss);
- comp_pc_p=(uae_u8*)pc_hist[0].location;
-
- bi->status=BI_FINALIZING;
- init_comp();
- match_states(bi);
- flush(1);
-
- raw_jmp((uintptr)bi->direct_handler);
-
- current_compile_p=get_target();
- raise_in_cl_list(bi);
-
- /* We will flush soon, anyway, so let's do it now */
- if (current_compile_p>=max_compile_start)
- flush_icache_hard(7);
-
- bi->status=BI_ACTIVE;
- if (redo_current_block)
- block_need_recompile(bi);
-
-#if PROFILE_COMPILE_TIME
- compile_time += (clock() - start_time);
-#endif
- }
-
- /* Account for compilation time */
- cpu_do_check_ticks();
-}
-
-void do_nothing(void)
-{
- /* What did you expect this to do? */
-}
-
-void exec_nostats(void)
-{
- for (;;) {
- uae_u32 opcode = GET_OPCODE;
-#if FLIGHT_RECORDER
- m68k_record_step(m68k_getpc());
-#endif
- (*cpufunctbl[opcode])(opcode);
- cpu_check_ticks();
- if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) {
- return; /* We will deal with the spcflags in the caller */
- }
- }
-}
-
-void execute_normal(void)
-{
- if (!check_for_cache_miss()) {
- cpu_history pc_hist[MAXRUN];
- int blocklen = 0;
-#if REAL_ADDRESSING || DIRECT_ADDRESSING
- start_pc_p = regs.pc_p;
- start_pc = get_virtual_address(regs.pc_p);
-#else
- start_pc_p = regs.pc_oldp;
- start_pc = regs.pc;
-#endif
- for (;;) { /* Take note: This is the do-it-normal loop */
- pc_hist[blocklen++].location = (uae_u16 *)regs.pc_p;
- uae_u32 opcode = GET_OPCODE;
-#if FLIGHT_RECORDER
- m68k_record_step(m68k_getpc());
-#endif
- (*cpufunctbl[opcode])(opcode);
- cpu_check_ticks();
- if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) {
- compile_block(pc_hist, blocklen);
- return; /* We will deal with the spcflags in the caller */
- }
- /* No need to check regs.spcflags, because if they were set,
- we'd have ended up inside that "if" */
- }
- }
-}
-
-typedef void (*compiled_handler)(void);
-
-static void m68k_do_compile_execute(void)
-{
- for (;;) {
- ((compiled_handler)(pushall_call_handler))();
- /* Whenever we return from that, we should check spcflags */
- if (SPCFLAGS_TEST(SPCFLAG_ALL)) {
- if (m68k_do_specialties ())
- return;
- }
- }
-}
-
-void m68k_compile_execute (void)
-{
- for (;;) {
- if (quit_program)
- break;
- m68k_do_compile_execute();
- }
-}
+++ /dev/null
-/*
- * compiler/flags_x86.h - Native flags definitions for IA-32
- *
- * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
- *
- * Adaptation for Basilisk II and improvements, copyright 2000-2005
- * Gwenole Beauchesne
- *
- * Basilisk II (C) 1997-2008 Christian Bauer
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef NATIVE_FLAGS_X86_H
-#define NATIVE_FLAGS_X86_H
-
-/* Native integer code conditions */
-enum {
- NATIVE_CC_HI = 7,
- NATIVE_CC_LS = 6,
- NATIVE_CC_CC = 3,
- NATIVE_CC_CS = 2,
- NATIVE_CC_NE = 5,
- NATIVE_CC_EQ = 4,
- NATIVE_CC_VC = 11,
- NATIVE_CC_VS = 10,
- NATIVE_CC_PL = 9,
- NATIVE_CC_MI = 8,
- NATIVE_CC_GE = 13,
- NATIVE_CC_LT = 12,
- NATIVE_CC_GT = 15,
- NATIVE_CC_LE = 14
-};
-
-#endif /* NATIVE_FLAGS_X86_H */
+++ /dev/null
-/*
- * compiler/gencomp.c - MC680x0 compilation generator
- *
- * Based on work Copyright 1995, 1996 Bernd Schmidt
- * Changes for UAE-JIT Copyright 2000 Bernd Meyer
- *
- * Adaptation for Basilisk II and improvements, copyright 2000-2005
- * Gwenole Beauchesne
- *
- * Basilisk II (C) 1997-2005 Christian Bauer
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdarg.h>
-#include <string.h>
-#include <ctype.h>
-#include "sysdeps.h"
-#include "readcpu.h"
-
-#define BOOL_TYPE "int"
-#define failure global_failure=1
-#define FAILURE global_failure=1
-#define isjump global_isjump=1
-#define is_const_jump global_iscjump=1;
-#define isaddx global_isaddx=1
-#define uses_cmov global_cmov=1
-#define mayfail global_mayfail=1
-#define uses_fpu global_fpu=1
-
-int hack_opcode;
-
-static int global_failure;
-static int global_isjump;
-static int global_iscjump;
-static int global_isaddx;
-static int global_cmov;
-static int long_opcode;
-static int global_mayfail;
-static int global_fpu;
-
-static char endstr[1000];
-static char lines[100000];
-static int comp_index=0;
-
-static int cond_codes_x86[]={-1,-1,7,6,3,2,5,4,-1,-1,9,8,13,12,15,14};
-
-static void comprintf(const char* format, ...)
-{
- va_list args;
-
- va_start(args,format);
- comp_index+=vsprintf(lines+comp_index,format,args);
-}
-
-static void com_discard(void)
-{
- comp_index=0;
-}
-
-static void com_flush(void)
-{
- int i;
- for (i=0;i<comp_index;i++)
- putchar(lines[i]);
- com_discard();
-}
-
-
-static FILE *headerfile;
-static FILE *stblfile;
-
-static int using_prefetch;
-static int using_exception_3;
-static int cpu_level;
-static int noflags;
-
-/* For the current opcode, the next lower level that will have different code.
- * Initialized to -1 for each opcode. If it remains unchanged, indicates we
- * are done with that opcode. */
-static int next_cpu_level;
-
-static int *opcode_map;
-static int *opcode_next_clev;
-static int *opcode_last_postfix;
-static unsigned long *counts;
-
-static void
-read_counts (void)
-{
- FILE *file;
- unsigned long opcode, count, total;
- char name[20];
- int nr = 0;
- memset (counts, 0, 65536 * sizeof *counts);
-
- file = fopen ("frequent.68k", "r");
- if (file)
- {
- fscanf (file, "Total: %lu\n", &total);
- while (fscanf (file, "%lx: %lu %s\n", &opcode, &count, name) == 3)
- {
- opcode_next_clev[nr] = 4;
- opcode_last_postfix[nr] = -1;
- opcode_map[nr++] = opcode;
- counts[opcode] = count;
- }
- fclose (file);
- }
- if (nr == nr_cpuop_funcs)
- return;
- for (opcode = 0; opcode < 0x10000; opcode++)
- {
- if (table68k[opcode].handler == -1 && table68k[opcode].mnemo != i_ILLG
- && counts[opcode] == 0)
- {
- opcode_next_clev[nr] = 4;
- opcode_last_postfix[nr] = -1;
- opcode_map[nr++] = opcode;
- counts[opcode] = count;
- }
- }
- if (nr != nr_cpuop_funcs)
- abort ();
-}
-
-static int n_braces = 0;
-static int insn_n_cycles;
-
-static void
-start_brace (void)
-{
- n_braces++;
- comprintf ("{");
-}
-
-static void
-close_brace (void)
-{
- assert (n_braces > 0);
- n_braces--;
- comprintf ("}");
-}
-
-static void
-finish_braces (void)
-{
- while (n_braces > 0)
- close_brace ();
-}
-
-static void
-pop_braces (int to)
-{
- while (n_braces > to)
- close_brace ();
-}
-
-static int
-bit_size (int size)
-{
- switch (size)
- {
- case sz_byte:
- return 8;
- case sz_word:
- return 16;
- case sz_long:
- return 32;
- default:
- abort ();
- }
- return 0;
-}
-
-static const char *
-bit_mask (int size)
-{
- switch (size)
- {
- case sz_byte:
- return "0xff";
- case sz_word:
- return "0xffff";
- case sz_long:
- return "0xffffffff";
- default:
- abort ();
- }
- return 0;
-}
-
-static __inline__ void gen_update_next_handler(void)
-{
- return; /* Can anything clever be done here? */
-}
-
-static void gen_writebyte(char* address, char* source)
-{
- comprintf("\twritebyte(%s,%s,scratchie);\n",address,source);
-}
-
-static void gen_writeword(char* address, char* source)
-{
- comprintf("\twriteword(%s,%s,scratchie);\n",address,source);
-}
-
-static void gen_writelong(char* address, char* source)
-{
- comprintf("\twritelong(%s,%s,scratchie);\n",address,source);
-}
-
-static void gen_readbyte(char* address, char* dest)
-{
- comprintf("\treadbyte(%s,%s,scratchie);\n",address,dest);
-}
-
-static void gen_readword(char* address, char* dest)
-{
- comprintf("\treadword(%s,%s,scratchie);\n",address,dest);
-}
-
-static void gen_readlong(char* address, char* dest)
-{
- comprintf("\treadlong(%s,%s,scratchie);\n",address,dest);
-}
-
-
-
-static const char *
-gen_nextilong (void)
-{
- static char buffer[80];
-
- sprintf (buffer, "comp_get_ilong((m68k_pc_offset+=4)-4)");
- insn_n_cycles += 4;
-
- long_opcode=1;
- return buffer;
-}
-
-static const char *
-gen_nextiword (void)
-{
- static char buffer[80];
-
- sprintf (buffer, "comp_get_iword((m68k_pc_offset+=2)-2)");
- insn_n_cycles+=2;
-
- long_opcode=1;
- return buffer;
-}
-
-static const char *
-gen_nextibyte (void)
-{
- static char buffer[80];
-
- sprintf (buffer, "comp_get_ibyte((m68k_pc_offset+=2)-2)");
- insn_n_cycles += 2;
-
- long_opcode=1;
- return buffer;
-}
-
-static void
-swap_opcode (void)
-{
- comprintf("#ifdef HAVE_GET_WORD_UNSWAPPED\n");
- comprintf("\topcode = do_byteswap_16(opcode);\n");
- comprintf("#endif\n");
-}
-
-static void
-sync_m68k_pc (void)
-{
- comprintf("\t if (m68k_pc_offset>100) sync_m68k_pc();\n");
-}
-
-
-/* getv == 1: fetch data; getv != 0: check for odd address. If movem != 0,
- * the calling routine handles Apdi and Aipi modes.
- * gb-- movem == 2 means the same thing but for a MOVE16 instruction */
-static void
-genamode (amodes mode, char *reg, wordsizes size, char *name, int getv, int movem)
-{
- start_brace ();
- switch (mode)
- {
- case Dreg: /* Do we need to check dodgy here? */
- if (movem)
- abort ();
- if (getv == 1 || getv==2) {
- /* We generate the variable even for getv==2, so we can use
- it as a destination for MOVE */
- comprintf ("\tint %s=%s;\n",name,reg);
- }
- return;
-
- case Areg:
- if (movem)
- abort ();
- if (getv == 1 || getv==2) {
- /* see above */
- comprintf ("\tint %s=dodgy?scratchie++:%s+8;\n",name,reg);
- if (getv==1) {
- comprintf ("\tif (dodgy) \n");
- comprintf ("\t\tmov_l_rr(%s,%s+8);\n",name, reg);
- }
- }
- return;
-
- case Aind:
- comprintf ("\tint %sa=dodgy?scratchie++:%s+8;\n",name,reg);
- comprintf ("\tif (dodgy) \n");
- comprintf ("\t\tmov_l_rr(%sa,%s+8);\n",name, reg);
- break;
- case Aipi:
- comprintf ("\tint %sa=scratchie++;\n",name,reg);
- comprintf ("\tmov_l_rr(%sa,%s+8);\n",name, reg);
- break;
- case Apdi:
- switch (size)
- {
- case sz_byte:
- if (movem) {
- comprintf ("\tint %sa=dodgy?scratchie++:%s+8;\n",name,reg);
- comprintf ("\tif (dodgy) \n");
- comprintf("\tmov_l_rr(%sa,8+%s);\n",name,reg);
- }
- else {
- start_brace();
- comprintf ("\tint %sa=dodgy?scratchie++:%s+8;\n",name,reg);
- comprintf("\tlea_l_brr(%s+8,%s+8,(uae_s32)-areg_byteinc[%s]);\n",reg,reg,reg);
- comprintf ("\tif (dodgy) \n");
- comprintf("\tmov_l_rr(%sa,8+%s);\n",name,reg);
- }
- break;
- case sz_word:
- if (movem) {
- comprintf ("\tint %sa=dodgy?scratchie++:%s+8;\n",name,reg);
- comprintf ("\tif (dodgy) \n");
- comprintf("\tmov_l_rr(%sa,8+%s);\n",name,reg);
- }
- else {
- start_brace();
- comprintf ("\tint %sa=dodgy?scratchie++:%s+8;\n",name,reg);
- comprintf("\tlea_l_brr(%s+8,%s+8,-2);\n",reg,reg);
- comprintf ("\tif (dodgy) \n");
- comprintf("\tmov_l_rr(%sa,8+%s);\n",name,reg);
- }
- break;
- case sz_long:
- if (movem) {
- comprintf ("\tint %sa=dodgy?scratchie++:%s+8;\n",name,reg);
- comprintf ("\tif (dodgy) \n");
- comprintf("\tmov_l_rr(%sa,8+%s);\n",name,reg);
- }
- else {
- start_brace();
- comprintf ("\tint %sa=dodgy?scratchie++:%s+8;\n",name,reg);
- comprintf("\tlea_l_brr(%s+8,%s+8,-4);\n",reg,reg);
- comprintf ("\tif (dodgy) \n");
- comprintf("\tmov_l_rr(%sa,8+%s);\n",name,reg);
- }
- break;
- default:
- abort ();
- }
- break;
- case Ad16:
- comprintf("\tint %sa=scratchie++;\n",name);
- comprintf("\tmov_l_rr(%sa,8+%s);\n",name,reg);
- comprintf("\tlea_l_brr(%sa,%sa,(uae_s32)(uae_s16)%s);\n",name,name,gen_nextiword());
- break;
- case Ad8r:
- comprintf("\tint %sa=scratchie++;\n",name);
- comprintf("\tcalc_disp_ea_020(%s+8,%s,%sa,scratchie);\n",
- reg,gen_nextiword(),name);
- break;
-
- case PC16:
- comprintf("\tint %sa=scratchie++;\n",name);
- comprintf("\tuae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n");
- comprintf ("\tuae_s32 PC16off = (uae_s32)(uae_s16)%s;\n", gen_nextiword ());
- comprintf("\tmov_l_ri(%sa,address+PC16off);\n",name);
- break;
-
- case PC8r:
- comprintf("\tint pctmp=scratchie++;\n");
- comprintf("\tint %sa=scratchie++;\n",name);
- comprintf("\tuae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n");
- start_brace();
- comprintf("\tmov_l_ri(pctmp,address);\n");
-
- comprintf("\tcalc_disp_ea_020(pctmp,%s,%sa,scratchie);\n",
- gen_nextiword(),name);
- break;
- case absw:
- comprintf ("\tint %sa = scratchie++;\n",name);
- comprintf ("\tmov_l_ri(%sa,(uae_s32)(uae_s16)%s);\n", name, gen_nextiword ());
- break;
- case absl:
- comprintf ("\tint %sa = scratchie++;\n",name);
- comprintf ("\tmov_l_ri(%sa,%s); /* absl */\n", name, gen_nextilong ());
- break;
- case imm:
- if (getv != 1)
- abort ();
- switch (size)
- {
- case sz_byte:
- comprintf ("\tint %s = scratchie++;\n",name);
- comprintf ("\tmov_l_ri(%s,(uae_s32)(uae_s8)%s);\n", name, gen_nextibyte ());
- break;
- case sz_word:
- comprintf ("\tint %s = scratchie++;\n",name);
- comprintf ("\tmov_l_ri(%s,(uae_s32)(uae_s16)%s);\n", name, gen_nextiword ());
- break;
- case sz_long:
- comprintf ("\tint %s = scratchie++;\n",name);
- comprintf ("\tmov_l_ri(%s,%s);\n", name, gen_nextilong ());
- break;
- default:
- abort ();
- }
- return;
- case imm0:
- if (getv != 1)
- abort ();
- comprintf ("\tint %s = scratchie++;\n",name);
- comprintf ("\tmov_l_ri(%s,(uae_s32)(uae_s8)%s);\n", name, gen_nextibyte ());
- return;
- case imm1:
- if (getv != 1)
- abort ();
- comprintf ("\tint %s = scratchie++;\n",name);
- comprintf ("\tmov_l_ri(%s,(uae_s32)(uae_s16)%s);\n", name, gen_nextiword ());
- return;
- case imm2:
- if (getv != 1)
- abort ();
- comprintf ("\tint %s = scratchie++;\n",name);
- comprintf ("\tmov_l_ri(%s,%s);\n", name, gen_nextilong ());
- return;
- case immi:
- if (getv != 1)
- abort ();
- comprintf ("\tint %s = scratchie++;\n",name);
- comprintf ("\tmov_l_ri(%s,%s);\n", name, reg);
- return;
- default:
- abort ();
- }
-
- /* We get here for all non-reg non-immediate addressing modes to
- * actually fetch the value. */
- if (getv == 1)
- {
- char astring[80];
- sprintf(astring,"%sa",name);
- switch (size)
- {
- case sz_byte:
- insn_n_cycles += 2;
- break;
- case sz_word:
- insn_n_cycles += 2;
- break;
- case sz_long:
- insn_n_cycles += 4;
- break;
- default:
- abort ();
- }
- start_brace ();
- comprintf("\tint %s=scratchie++;\n",name);
- switch (size)
- {
- case sz_byte:
- gen_readbyte(astring,name);
- break;
- case sz_word:
- gen_readword(astring,name);
- break;
- case sz_long:
- gen_readlong(astring,name);
- break;
- default:
- abort ();
- }
- }
-
- /* We now might have to fix up the register for pre-dec or post-inc
- * addressing modes. */
- if (!movem) {
- char x[160];
- switch (mode)
- {
- case Aipi:
- switch (size)
- {
- case sz_byte:
- comprintf("\tlea_l_brr(%s+8,%s+8,areg_byteinc[%s]);\n",reg,reg,reg);
- break;
- case sz_word:
- comprintf("\tlea_l_brr(%s+8,%s+8,2);\n",reg,reg,reg);
- break;
- case sz_long:
- comprintf("\tlea_l_brr(%s+8,%s+8,4);\n",reg,reg);
- break;
- default:
- abort ();
- }
- break;
- case Apdi:
- break;
- default:
- break;
- }
- }
-}
-
-static void
-genastore (char *from, amodes mode, char *reg, wordsizes size, char *to)
-{
- switch (mode)
- {
- case Dreg:
- switch (size)
- {
- case sz_byte:
- comprintf("\tif(%s!=%s)\n",reg,from);
- comprintf ("\t\tmov_b_rr(%s,%s);\n", reg, from);
- break;
- case sz_word:
- comprintf("\tif(%s!=%s)\n",reg,from);
- comprintf ("\t\tmov_w_rr(%s,%s);\n", reg, from);
- break;
- case sz_long:
- comprintf("\tif(%s!=%s)\n",reg,from);
- comprintf ("\t\tmov_l_rr(%s,%s);\n", reg, from);
- break;
- default:
- abort ();
- }
- break;
- case Areg:
- switch (size)
- {
- case sz_word:
- comprintf("\tif(%s+8!=%s)\n",reg,from);
- comprintf ("\t\tmov_w_rr(%s+8,%s);\n", reg, from);
- break;
- case sz_long:
- comprintf("\tif(%s+8!=%s)\n",reg,from);
- comprintf ("\t\tmov_l_rr(%s+8,%s);\n", reg, from);
- break;
- default:
- abort ();
- }
- break;
-
- case Apdi:
- case absw:
- case PC16:
- case PC8r:
- case Ad16:
- case Ad8r:
- case Aipi:
- case Aind:
- case absl:
- {
- char astring[80];
- sprintf(astring,"%sa",to);
-
- switch (size)
- {
- case sz_byte:
- insn_n_cycles += 2;
- gen_writebyte(astring,from);
- break;
- case sz_word:
- insn_n_cycles += 2;
- gen_writeword(astring,from);
- break;
- case sz_long:
- insn_n_cycles += 4;
- gen_writelong(astring,from);
- break;
- default:
- abort ();
- }
- }
- break;
- case imm:
- case imm0:
- case imm1:
- case imm2:
- case immi:
- abort ();
- break;
- default:
- abort ();
- }
-}
-
-static void genmov16(uae_u32 opcode, struct instr *curi)
-{
- comprintf("\tint src=scratchie++;\n");
- comprintf("\tint dst=scratchie++;\n");
-
- if ((opcode & 0xfff8) == 0xf620) {
- /* MOVE16 (Ax)+,(Ay)+ */
- comprintf("\tuae_u16 dstreg=((%s)>>12)&0x07;\n", gen_nextiword());
- comprintf("\tmov_l_rr(src,8+srcreg);\n");
- comprintf("\tmov_l_rr(dst,8+dstreg);\n");
- }
- else {
- /* Other variants */
- genamode (curi->smode, "srcreg", curi->size, "src", 0, 2);
- genamode (curi->dmode, "dstreg", curi->size, "dst", 0, 2);
- comprintf("\tmov_l_rr(src,srca);\n");
- comprintf("\tmov_l_rr(dst,dsta);\n");
- }
-
- /* Align on 16-byte boundaries */
- comprintf("\tand_l_ri(src,~15);\n");
- comprintf("\tand_l_ri(dst,~15);\n");
-
- if ((opcode & 0xfff8) == 0xf620) {
- comprintf("\tif (srcreg != dstreg)\n");
- comprintf("\tadd_l_ri(srcreg+8,16);\n");
- comprintf("\tadd_l_ri(dstreg+8,16);\n");
- }
- else if ((opcode & 0xfff8) == 0xf600)
- comprintf("\tadd_l_ri(srcreg+8,16);\n");
- else if ((opcode & 0xfff8) == 0xf608)
- comprintf("\tadd_l_ri(dstreg+8,16);\n");
-
- comprintf("\tint tmp=scratchie;\n");
- comprintf("\tscratchie+=4;\n");
-
- comprintf("\tget_n_addr(src,src,scratchie);\n"
- "\tget_n_addr(dst,dst,scratchie);\n"
- "\tmov_l_rR(tmp+0,src,0);\n"
- "\tmov_l_rR(tmp+1,src,4);\n"
- "\tmov_l_rR(tmp+2,src,8);\n"
- "\tmov_l_rR(tmp+3,src,12);\n"
- "\tmov_l_Rr(dst,tmp+0,0);\n"
- "\tforget_about(tmp+0);\n"
- "\tmov_l_Rr(dst,tmp+1,4);\n"
- "\tforget_about(tmp+1);\n"
- "\tmov_l_Rr(dst,tmp+2,8);\n"
- "\tforget_about(tmp+2);\n"
- "\tmov_l_Rr(dst,tmp+3,12);\n");
-}
-
-static void
-genmovemel (uae_u16 opcode)
-{
- comprintf ("\tuae_u16 mask = %s;\n", gen_nextiword ());
- comprintf ("\tint native=scratchie++;\n");
- comprintf ("\tint i;\n");
- comprintf ("\tsigned char offset=0;\n");
- genamode (table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src", 2, 1);
- comprintf("\tget_n_addr(srca,native,scratchie);\n");
-
- comprintf("\tfor (i=0;i<16;i++) {\n"
- "\t\tif ((mask>>i)&1) {\n");
- switch(table68k[opcode].size) {
- case sz_long:
- comprintf("\t\t\tmov_l_rR(i,native,offset);\n"
- "\t\t\tbswap_32(i);\n"
- "\t\t\toffset+=4;\n");
- break;
- case sz_word:
- comprintf("\t\t\tmov_w_rR(i,native,offset);\n"
- "\t\t\tbswap_16(i);\n"
- "\t\t\tsign_extend_16_rr(i,i);\n"
- "\t\t\toffset+=2;\n");
- break;
- default: abort();
- }
- comprintf("\t\t}\n"
- "\t}");
- if (table68k[opcode].dmode == Aipi) {
- comprintf("\t\t\tlea_l_brr(8+dstreg,srca,offset);\n");
- }
-}
-
-
-static void
-genmovemle (uae_u16 opcode)
-{
- comprintf ("\tuae_u16 mask = %s;\n", gen_nextiword ());
- comprintf ("\tint native=scratchie++;\n");
- comprintf ("\tint i;\n");
- comprintf ("\tint tmp=scratchie++;\n");
- comprintf ("\tsigned char offset=0;\n");
- genamode (table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src", 2, 1);
-
- comprintf("\tget_n_addr(srca,native,scratchie);\n");
-
- if (table68k[opcode].dmode!=Apdi) {
- comprintf("\tfor (i=0;i<16;i++) {\n"
- "\t\tif ((mask>>i)&1) {\n");
- switch(table68k[opcode].size) {
- case sz_long:
- comprintf("\t\t\tmov_l_rr(tmp,i);\n"
- "\t\t\tbswap_32(tmp);\n"
- "\t\t\tmov_l_Rr(native,tmp,offset);\n"
- "\t\t\toffset+=4;\n");
- break;
- case sz_word:
- comprintf("\t\t\tmov_l_rr(tmp,i);\n"
- "\t\t\tbswap_16(tmp);\n"
- "\t\t\tmov_w_Rr(native,tmp,offset);\n"
- "\t\t\toffset+=2;\n");
- break;
- default: abort();
- }
- }
- else { /* Pre-decrement */
- comprintf("\tfor (i=0;i<16;i++) {\n"
- "\t\tif ((mask>>i)&1) {\n");
- switch(table68k[opcode].size) {
- case sz_long:
- comprintf("\t\t\toffset-=4;\n"
- "\t\t\tmov_l_rr(tmp,15-i);\n"
- "\t\t\tbswap_32(tmp);\n"
- "\t\t\tmov_l_Rr(native,tmp,offset);\n"
- );
- break;
- case sz_word:
- comprintf("\t\t\toffset-=2;\n"
- "\t\t\tmov_l_rr(tmp,15-i);\n"
- "\t\t\tbswap_16(tmp);\n"
- "\t\t\tmov_w_Rr(native,tmp,offset);\n"
- );
- break;
- default: abort();
- }
- }
-
-
- comprintf("\t\t}\n"
- "\t}");
- if (table68k[opcode].dmode == Apdi) {
- comprintf("\t\t\tlea_l_brr(8+dstreg,srca,(uae_s32)offset);\n");
- }
-}
-
-
-static void
-duplicate_carry (void)
-{
- comprintf ("\tif (needed_flags&FLAG_X) duplicate_carry();\n");
-}
-
-typedef enum
-{
- flag_logical_noclobber, flag_logical, flag_add, flag_sub, flag_cmp,
- flag_addx, flag_subx, flag_zn, flag_av, flag_sv, flag_and, flag_or,
- flag_eor, flag_mov
-}
-flagtypes;
-
-
-static void
-genflags (flagtypes type, wordsizes size, char *value, char *src, char *dst)
-{
- if (noflags) {
- switch(type) {
- case flag_cmp:
- comprintf("\tdont_care_flags();\n");
- comprintf("/* Weird --- CMP with noflags ;-) */\n");
- return;
- case flag_add:
- case flag_sub:
- comprintf("\tdont_care_flags();\n");
- {
- char* op;
- switch(type) {
- case flag_add: op="add"; break;
- case flag_sub: op="sub"; break;
- default: abort();
- }
- switch (size)
- {
- case sz_byte:
- comprintf("\t%s_b(%s,%s);\n",op,dst,src);
- break;
- case sz_word:
- comprintf("\t%s_w(%s,%s);\n",op,dst,src);
- break;
- case sz_long:
- comprintf("\t%s_l(%s,%s);\n",op,dst,src);
- break;
- }
- return;
- }
- break;
-
- case flag_and:
- comprintf("\tdont_care_flags();\n");
- switch (size)
- {
- case sz_byte:
- comprintf("if (kill_rodent(dst)) {\n");
- comprintf("\tzero_extend_8_rr(scratchie,%s);\n",src);
- comprintf("\tor_l_ri(scratchie,0xffffff00);\n");
- comprintf("\tand_l(%s,scratchie);\n",dst);
- comprintf("\tforget_about(scratchie);\n");
- comprintf("\t} else \n"
- "\tand_b(%s,%s);\n",dst,src);
- break;
- case sz_word:
- comprintf("if (kill_rodent(dst)) {\n");
- comprintf("\tzero_extend_16_rr(scratchie,%s);\n",src);
- comprintf("\tor_l_ri(scratchie,0xffff0000);\n");
- comprintf("\tand_l(%s,scratchie);\n",dst);
- comprintf("\tforget_about(scratchie);\n");
- comprintf("\t} else \n"
- "\tand_w(%s,%s);\n",dst,src);
- break;
- case sz_long:
- comprintf("\tand_l(%s,%s);\n",dst,src);
- break;
- }
- return;
-
- case flag_mov:
- comprintf("\tdont_care_flags();\n");
- switch (size)
- {
- case sz_byte:
- comprintf("if (kill_rodent(dst)) {\n");
- comprintf("\tzero_extend_8_rr(scratchie,%s);\n",src);
- comprintf("\tand_l_ri(%s,0xffffff00);\n",dst);
- comprintf("\tor_l(%s,scratchie);\n",dst);
- comprintf("\tforget_about(scratchie);\n");
- comprintf("\t} else \n"
- "\tmov_b_rr(%s,%s);\n",dst,src);
- break;
- case sz_word:
- comprintf("if (kill_rodent(dst)) {\n");
- comprintf("\tzero_extend_16_rr(scratchie,%s);\n",src);
- comprintf("\tand_l_ri(%s,0xffff0000);\n",dst);
- comprintf("\tor_l(%s,scratchie);\n",dst);
- comprintf("\tforget_about(scratchie);\n");
- comprintf("\t} else \n"
- "\tmov_w_rr(%s,%s);\n",dst,src);
- break;
- case sz_long:
- comprintf("\tmov_l_rr(%s,%s);\n",dst,src);
- break;
- }
- return;
-
- case flag_or:
- case flag_eor:
- comprintf("\tdont_care_flags();\n");
- start_brace();
- {
- char* op;
- switch(type) {
- case flag_or: op="or"; break;
- case flag_eor: op="xor"; break;
- default: abort();
- }
- switch (size)
- {
- case sz_byte:
- comprintf("if (kill_rodent(dst)) {\n");
- comprintf("\tzero_extend_8_rr(scratchie,%s);\n",src);
- comprintf("\t%s_l(%s,scratchie);\n",op,dst);
- comprintf("\tforget_about(scratchie);\n");
- comprintf("\t} else \n"
- "\t%s_b(%s,%s);\n",op,dst,src);
- break;
- case sz_word:
- comprintf("if (kill_rodent(dst)) {\n");
- comprintf("\tzero_extend_16_rr(scratchie,%s);\n",src);
- comprintf("\t%s_l(%s,scratchie);\n",op,dst);
- comprintf("\tforget_about(scratchie);\n");
- comprintf("\t} else \n"
- "\t%s_w(%s,%s);\n",op,dst,src);
- break;
- case sz_long:
- comprintf("\t%s_l(%s,%s);\n",op,dst,src);
- break;
- }
- close_brace();
- return;
- }
-
-
- case flag_addx:
- case flag_subx:
- comprintf("\tdont_care_flags();\n");
- {
- char* op;
- switch(type) {
- case flag_addx: op="adc"; break;
- case flag_subx: op="sbb"; break;
- default: abort();
- }
- comprintf("\trestore_carry();\n"); /* Reload the X flag into C */
- switch (size)
- {
- case sz_byte:
- comprintf("\t%s_b(%s,%s);\n",op,dst,src);
- break;
- case sz_word:
- comprintf("\t%s_w(%s,%s);\n",op,dst,src);
- break;
- case sz_long:
- comprintf("\t%s_l(%s,%s);\n",op,dst,src);
- break;
- }
- return;
- }
- break;
- default: return;
- }
- }
-
- /* Need the flags, but possibly not all of them */
- switch (type)
- {
- case flag_logical_noclobber:
- failure;
-
- case flag_and:
- case flag_or:
- case flag_eor:
- comprintf("\tdont_care_flags();\n");
- start_brace();
- {
- char* op;
- switch(type) {
- case flag_and: op="and"; break;
- case flag_or: op="or"; break;
- case flag_eor: op="xor"; break;
- default: abort();
- }
- switch (size)
- {
- case sz_byte:
- comprintf("\tstart_needflags();\n"
- "\t%s_b(%s,%s);\n",op,dst,src);
- break;
- case sz_word:
- comprintf("\tstart_needflags();\n"
- "\t%s_w(%s,%s);\n",op,dst,src);
- break;
- case sz_long:
- comprintf("\tstart_needflags();\n"
- "\t%s_l(%s,%s);\n",op,dst,src);
- break;
- }
- comprintf("\tlive_flags();\n");
- comprintf("\tend_needflags();\n");
- close_brace();
- return;
- }
-
- case flag_mov:
- comprintf("\tdont_care_flags();\n");
- start_brace();
- {
- switch (size)
- {
- case sz_byte:
- comprintf("\tif (%s!=%s) {\n",src,dst);
- comprintf("\tmov_b_ri(%s,0);\n"
- "\tstart_needflags();\n",dst);
- comprintf("\tor_b(%s,%s);\n",dst,src);
- comprintf("\t} else {\n");
- comprintf("\tmov_b_rr(%s,%s);\n",dst,src);
- comprintf("\ttest_b_rr(%s,%s);\n",dst,dst);
- comprintf("\t}\n");
- break;
- case sz_word:
- comprintf("\tif (%s!=%s) {\n",src,dst);
- comprintf("\tmov_w_ri(%s,0);\n"
- "\tstart_needflags();\n",dst);
- comprintf("\tor_w(%s,%s);\n",dst,src);
- comprintf("\t} else {\n");
- comprintf("\tmov_w_rr(%s,%s);\n",dst,src);
- comprintf("\ttest_w_rr(%s,%s);\n",dst,dst);
- comprintf("\t}\n");
- break;
- case sz_long:
- comprintf("\tif (%s!=%s) {\n",src,dst);
- comprintf("\tmov_l_ri(%s,0);\n"
- "\tstart_needflags();\n",dst);
- comprintf("\tor_l(%s,%s);\n",dst,src);
- comprintf("\t} else {\n");
- comprintf("\tmov_l_rr(%s,%s);\n",dst,src);
- comprintf("\ttest_l_rr(%s,%s);\n",dst,dst);
- comprintf("\t}\n");
- break;
- }
- comprintf("\tlive_flags();\n");
- comprintf("\tend_needflags();\n");
- close_brace();
- return;
- }
-
- case flag_logical:
- comprintf("\tdont_care_flags();\n");
- start_brace();
- switch (size)
- {
- case sz_byte:
- comprintf("\tstart_needflags();\n"
- "\ttest_b_rr(%s,%s);\n",value,value);
- break;
- case sz_word:
- comprintf("\tstart_needflags();\n"
- "\ttest_w_rr(%s,%s);\n",value,value);
- break;
- case sz_long:
- comprintf("\tstart_needflags();\n"
- "\ttest_l_rr(%s,%s);\n",value,value);
- break;
- }
- comprintf("\tlive_flags();\n");
- comprintf("\tend_needflags();\n");
- close_brace();
- return;
-
-
- case flag_add:
- case flag_sub:
- case flag_cmp:
- comprintf("\tdont_care_flags();\n");
- {
- char* op;
- switch(type) {
- case flag_add: op="add"; break;
- case flag_sub: op="sub"; break;
- case flag_cmp: op="cmp"; break;
- default: abort();
- }
- switch (size)
- {
- case sz_byte:
- comprintf("\tstart_needflags();\n"
- "\t%s_b(%s,%s);\n",op,dst,src);
- break;
- case sz_word:
- comprintf("\tstart_needflags();\n"
- "\t%s_w(%s,%s);\n",op,dst,src);
- break;
- case sz_long:
- comprintf("\tstart_needflags();\n"
- "\t%s_l(%s,%s);\n",op,dst,src);
- break;
- }
- comprintf("\tlive_flags();\n");
- comprintf("\tend_needflags();\n");
- if (type!=flag_cmp) {
- duplicate_carry();
- }
- comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n");
-
- return;
- }
-
- case flag_addx:
- case flag_subx:
- uses_cmov;
- comprintf("\tdont_care_flags();\n");
- {
- char* op;
- switch(type) {
- case flag_addx: op="adc"; break;
- case flag_subx: op="sbb"; break;
- default: abort();
- }
- start_brace();
- comprintf("\tint zero=scratchie++;\n"
- "\tint one=scratchie++;\n"
- "\tif (needed_flags&FLAG_Z) {\n"
- "\tmov_l_ri(zero,0);\n"
- "\tmov_l_ri(one,-1);\n"
- "\tmake_flags_live();\n"
- "\tcmov_l_rr(zero,one,5);\n"
- "\t}\n");
- comprintf("\trestore_carry();\n"); /* Reload the X flag into C */
- switch (size)
- {
- case sz_byte:
- comprintf("\tstart_needflags();\n"
- "\t%s_b(%s,%s);\n",op,dst,src);
- break;
- case sz_word:
- comprintf("\tstart_needflags();\n"
- "\t%s_w(%s,%s);\n",op,dst,src);
- break;
- case sz_long:
- comprintf("\tstart_needflags();\n"
- "\t%s_l(%s,%s);\n",op,dst,src);
- break;
- }
- comprintf("\tlive_flags();\n");
- comprintf("\tif (needed_flags&FLAG_Z) {\n"
- "\tcmov_l_rr(zero,one,5);\n"
- "\tset_zero(zero, one);\n" /* No longer need one */
- "\tlive_flags();\n"
- "\t}\n");
- comprintf("\tend_needflags();\n");
- duplicate_carry();
- comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n");
- return;
- }
- default:
- failure;
- break;
- }
-}
-
-static void
-force_range_for_rox (const char *var, wordsizes size)
-{
- /* Could do a modulo operation here... which one is faster? */
- switch (size)
- {
- case sz_long:
- comprintf ("\tif (%s >= 33) %s -= 33;\n", var, var);
- break;
- case sz_word:
- comprintf ("\tif (%s >= 34) %s -= 34;\n", var, var);
- comprintf ("\tif (%s >= 17) %s -= 17;\n", var, var);
- break;
- case sz_byte:
- comprintf ("\tif (%s >= 36) %s -= 36;\n", var, var);
- comprintf ("\tif (%s >= 18) %s -= 18;\n", var, var);
- comprintf ("\tif (%s >= 9) %s -= 9;\n", var, var);
- break;
- }
-}
-
-static const char *
-cmask (wordsizes size)
-{
- switch (size)
- {
- case sz_byte:
- return "0x80";
- case sz_word:
- return "0x8000";
- case sz_long:
- return "0x80000000";
- default:
- abort ();
- }
-}
-
-static int
-source_is_imm1_8 (struct instr *i)
-{
- return i->stype == 3;
-}
-
-static int /* returns zero for success, non-zero for failure */
-gen_opcode (unsigned long int opcode)
-{
- struct instr *curi = table68k + opcode;
- char* ssize=NULL;
-
- insn_n_cycles = 2;
- global_failure=0;
- long_opcode=0;
- global_isjump=0;
- global_iscjump=0;
- global_isaddx=0;
- global_cmov=0;
- global_fpu=0;
- global_mayfail=0;
- hack_opcode=opcode;
- endstr[0]=0;
-
- start_brace ();
- comprintf("\tuae_u8 scratchie=S1;\n");
- switch (curi->plev)
- {
- case 0: /* not privileged */
- break;
- case 1: /* unprivileged only on 68000 */
- if (cpu_level == 0)
- break;
- if (next_cpu_level < 0)
- next_cpu_level = 0;
-
- /* fall through */
- case 2: /* priviledged */
- failure; /* Easy ones first */
- break;
- case 3: /* privileged if size == word */
- if (curi->size == sz_byte)
- break;
- failure;
- break;
- }
- switch (curi->size) {
- case sz_byte: ssize="b"; break;
- case sz_word: ssize="w"; break;
- case sz_long: ssize="l"; break;
- default: abort();
- }
-
- switch (curi->mnemo)
- {
- case i_OR:
- case i_AND:
- case i_EOR:
- genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
- genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
- switch(curi->mnemo) {
- case i_OR: genflags (flag_or, curi->size, "", "src", "dst"); break;
- case i_AND: genflags (flag_and, curi->size, "", "src", "dst"); break;
- case i_EOR: genflags (flag_eor, curi->size, "", "src", "dst"); break;
- }
- genastore ("dst", curi->dmode, "dstreg", curi->size, "dst");
- break;
-
- case i_ORSR:
- case i_EORSR:
- failure;
- isjump;
- break;
- case i_ANDSR:
- failure;
- isjump;
- break;
- case i_SUB:
- genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
- genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
- genflags (flag_sub, curi->size, "", "src", "dst");
- genastore ("dst", curi->dmode, "dstreg", curi->size, "dst");
- break;
- case i_SUBA:
- genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
- genamode (curi->dmode, "dstreg", sz_long, "dst", 1, 0);
- start_brace();
- comprintf("\tint tmp=scratchie++;\n");
- switch(curi->size) {
- case sz_byte: comprintf("\tsign_extend_8_rr(tmp,src);\n"); break;
- case sz_word: comprintf("\tsign_extend_16_rr(tmp,src);\n"); break;
- case sz_long: comprintf("\ttmp=src;\n"); break;
- default: abort();
- }
- comprintf("\tsub_l(dst,tmp);\n");
- genastore ("dst", curi->dmode, "dstreg", sz_long, "dst");
- break;
- case i_SUBX:
- isaddx;
- genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
- genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
- genflags (flag_subx, curi->size, "", "src", "dst");
- genastore ("dst", curi->dmode, "dstreg", curi->size, "dst");
- break;
- case i_SBCD:
- failure;
- /* I don't think so! */
- break;
- case i_ADD:
- genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
- genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
- genflags (flag_add, curi->size, "", "src", "dst");
- genastore ("dst", curi->dmode, "dstreg", curi->size, "dst");
- break;
- case i_ADDA:
- genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
- genamode (curi->dmode, "dstreg", sz_long, "dst", 1, 0);
- start_brace();
- comprintf("\tint tmp=scratchie++;\n");
- switch(curi->size) {
- case sz_byte: comprintf("\tsign_extend_8_rr(tmp,src);\n"); break;
- case sz_word: comprintf("\tsign_extend_16_rr(tmp,src);\n"); break;
- case sz_long: comprintf("\ttmp=src;\n"); break;
- default: abort();
- }
- comprintf("\tadd_l(dst,tmp);\n");
- genastore ("dst", curi->dmode, "dstreg", sz_long, "dst");
- break;
- case i_ADDX:
- isaddx;
- genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
- genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
- start_brace();
- genflags (flag_addx, curi->size, "", "src", "dst");
- genastore ("dst", curi->dmode, "dstreg", curi->size, "dst");
- break;
- case i_ABCD:
- failure;
- /* No BCD maths for me.... */
- break;
- case i_NEG:
- genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
- start_brace ();
- comprintf("\tint dst=scratchie++;\n");
- comprintf("\tmov_l_ri(dst,0);\n");
- genflags (flag_sub, curi->size, "", "src", "dst");
- genastore ("dst", curi->smode, "srcreg", curi->size, "src");
- break;
- case i_NEGX:
- isaddx;
- genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
- start_brace ();
- comprintf("\tint dst=scratchie++;\n");
- comprintf("\tmov_l_ri(dst,0);\n");
- genflags (flag_subx, curi->size, "", "src", "dst");
- genastore ("dst", curi->smode, "srcreg", curi->size, "src");
- break;
-
- case i_NBCD:
- failure;
- /* Nope! */
- break;
- case i_CLR:
- genamode (curi->smode, "srcreg", curi->size, "src", 2, 0);
- start_brace();
- comprintf("\tint dst=scratchie++;\n");
- comprintf("\tmov_l_ri(dst,0);\n");
- genflags (flag_logical, curi->size, "dst", "", "");
- genastore ("dst", curi->smode, "srcreg", curi->size, "src");
- break;
- case i_NOT:
- genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
- start_brace ();
- comprintf("\tint dst=scratchie++;\n");
- comprintf("\tmov_l_ri(dst,0xffffffff);\n");
- genflags (flag_eor, curi->size, "", "src", "dst");
- genastore ("dst", curi->smode, "srcreg", curi->size, "src");
- break;
- case i_TST:
- genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
- genflags (flag_logical, curi->size, "src", "", "");
- break;
- case i_BCHG:
- case i_BCLR:
- case i_BSET:
- case i_BTST:
-/* failure; /* NEW: from "Ipswitch Town" release */
- genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
- genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
- start_brace();
- comprintf("\tint s=scratchie++;\n"
- "\tint tmp=scratchie++;\n"
- "\tmov_l_rr(s,src);\n");
- if (curi->size == sz_byte)
- comprintf("\tand_l_ri(s,7);\n");
- else
- comprintf("\tand_l_ri(s,31);\n");
-
- {
- char* op;
- int need_write=1;
-
- switch(curi->mnemo) {
- case i_BCHG: op="btc"; break;
- case i_BCLR: op="btr"; break;
- case i_BSET: op="bts"; break;
- case i_BTST: op="bt"; need_write=0; break;
- default: abort();
- }
- comprintf("\t%s_l_rr(dst,s);\n" /* Answer now in C */
- "\tsbb_l(s,s);\n" /* s is 0 if bit was 0, -1 otherwise */
- "\tmake_flags_live();\n" /* Get the flags back */
- "\tdont_care_flags();\n",op);
- if (!noflags) {
- comprintf("\tstart_needflags();\n"
- "\tset_zero(s,tmp);\n"
- "\tlive_flags();\n"
- "\tend_needflags();\n");
- }
- if (need_write)
- genastore ("dst", curi->dmode, "dstreg", curi->size, "dst");
- }
- break;
-
- case i_CMPM:
- case i_CMP:
- genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
- genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
- start_brace ();
- genflags (flag_cmp, curi->size, "", "src", "dst");
- break;
- case i_CMPA:
- genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
- genamode (curi->dmode, "dstreg", sz_long, "dst", 1, 0);
- start_brace();
- comprintf("\tint tmps=scratchie++;\n");
- switch(curi->size) {
- case sz_byte: comprintf("\tsign_extend_8_rr(tmps,src);\n"); break;
- case sz_word: comprintf("\tsign_extend_16_rr(tmps,src);\n"); break;
- case sz_long: comprintf("tmps=src;\n"); break;
- default: abort();
- }
- genflags (flag_cmp, sz_long, "", "tmps", "dst");
- break;
- /* The next two are coded a little unconventional, but they are doing
- * weird things... */
- case i_MVPRM:
- isjump;
- failure;
- break;
- case i_MVPMR:
- isjump;
- failure;
- break;
- case i_MOVE:
- switch(curi->dmode) {
- case Dreg:
- case Areg:
- genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
- genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0);
- genflags (flag_mov, curi->size, "", "src", "dst");
- genastore ("dst", curi->dmode, "dstreg", curi->size, "dst");
- break;
- default: /* It goes to memory, not a register */
- genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
- genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0);
- genflags (flag_logical, curi->size, "src", "", "");
- genastore ("src", curi->dmode, "dstreg", curi->size, "dst");
- break;
- }
- break;
- case i_MOVEA:
- genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
- genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0);
-
- start_brace();
- comprintf("\tint tmps=scratchie++;\n");
- switch(curi->size) {
- case sz_word: comprintf("\tsign_extend_16_rr(dst,src);\n"); break;
- case sz_long: comprintf("\tmov_l_rr(dst,src);\n"); break;
- default: abort();
- }
- genastore ("dst", curi->dmode, "dstreg", sz_long, "dst");
- break;
-
- case i_MVSR2:
- isjump;
- failure;
- break;
- case i_MV2SR:
- isjump;
- failure;
- break;
- case i_SWAP:
- genamode (curi->smode, "srcreg", sz_long, "src", 1, 0);
- comprintf("\tdont_care_flags();\n");
- comprintf("\trol_l_ri(src,16);\n");
- genflags (flag_logical, sz_long, "src", "", "");
- genastore ("src", curi->smode, "srcreg", sz_long, "src");
- break;
- case i_EXG:
- genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
- genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
- start_brace();
- comprintf("\tint tmp=scratchie++;\n"
- "\tmov_l_rr(tmp,src);\n");
- genastore ("dst", curi->smode, "srcreg", curi->size, "src");
- genastore ("tmp", curi->dmode, "dstreg", curi->size, "dst");
- break;
- case i_EXT:
- genamode (curi->smode, "srcreg", sz_long, "src", 1, 0);
- comprintf("\tdont_care_flags();\n");
- start_brace ();
- switch (curi->size)
- {
- case sz_byte:
- comprintf ("\tint dst = src;\n"
- "\tsign_extend_8_rr(src,src);\n");
- break;
- case sz_word:
- comprintf ("\tint dst = scratchie++;\n"
- "\tsign_extend_8_rr(dst,src);\n");
- break;
- case sz_long:
- comprintf ("\tint dst = src;\n"
- "\tsign_extend_16_rr(src,src);\n");
- break;
- default:
- abort ();
- }
- genflags (flag_logical,
- curi->size == sz_word ? sz_word : sz_long, "dst", "", "");
- genastore ("dst", curi->smode, "srcreg",
- curi->size == sz_word ? sz_word : sz_long, "src");
- break;
- case i_MVMEL:
- genmovemel (opcode);
- break;
- case i_MVMLE:
- genmovemle (opcode);
- break;
- case i_TRAP:
- isjump;
- failure;
- break;
- case i_MVR2USP:
- isjump;
- failure;
- break;
- case i_MVUSP2R:
- isjump;
- failure;
- break;
- case i_RESET:
- isjump;
- failure;
- break;
- case i_NOP:
- break;
- case i_STOP:
- isjump;
- failure;
- break;
- case i_RTE:
- isjump;
- failure;
- break;
- case i_RTD:
-/* failure; /* NEW: from "Ipswitch Town" release */
- genamode (curi->smode, "srcreg", curi->size, "offs", 1, 0);
- /* offs is constant */
- comprintf("\tadd_l_ri(offs,4);\n");
- start_brace();
- comprintf("\tint newad=scratchie++;\n"
- "\treadlong(15,newad,scratchie);\n"
- "\tmov_l_mr((uintptr)®s.pc,newad);\n"
- "\tget_n_addr_jmp(newad,PC_P,scratchie);\n"
- "\tmov_l_mr((uintptr)®s.pc_oldp,PC_P);\n"
- "\tm68k_pc_offset=0;\n"
- "\tadd_l(15,offs);\n");
- gen_update_next_handler();
- isjump;
- break;
- case i_LINK:
-/* failure; /* NEW: from "Ipswitch Town" release */
- genamode (curi->smode, "srcreg", sz_long, "src", 1, 0);
- genamode (curi->dmode, "dstreg", curi->size, "offs", 1, 0);
- comprintf("\tsub_l_ri(15,4);\n"
- "\twritelong_clobber(15,src,scratchie);\n"
- "\tmov_l_rr(src,15);\n");
- if (curi->size==sz_word)
- comprintf("\tsign_extend_16_rr(offs,offs);\n");
- comprintf("\tadd_l(15,offs);\n");
- genastore ("src", curi->smode, "srcreg", sz_long, "src");
- break;
- case i_UNLK:
-/* failure; /* NEW: from "Ipswitch Town" release */
- genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
- comprintf("\tmov_l_rr(15,src);\n"
- "\treadlong(15,src,scratchie);\n"
- "\tadd_l_ri(15,4);\n");
- genastore ("src", curi->smode, "srcreg", curi->size, "src");
- break;
- case i_RTS:
- comprintf("\tint newad=scratchie++;\n"
- "\treadlong(15,newad,scratchie);\n"
- "\tmov_l_mr((uintptr)®s.pc,newad);\n"
- "\tget_n_addr_jmp(newad,PC_P,scratchie);\n"
- "\tmov_l_mr((uintptr)®s.pc_oldp,PC_P);\n"
- "\tm68k_pc_offset=0;\n"
- "\tlea_l_brr(15,15,4);\n");
- gen_update_next_handler();
- isjump;
- break;
- case i_TRAPV:
- isjump;
- failure;
- break;
- case i_RTR:
- isjump;
- failure;
- break;
- case i_JSR:
- isjump;
- genamode (curi->smode, "srcreg", curi->size, "src", 0, 0);
- start_brace();
- comprintf("\tuae_u32 retadd=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n");
- comprintf("\tint ret=scratchie++;\n"
- "\tmov_l_ri(ret,retadd);\n"
- "\tsub_l_ri(15,4);\n"
- "\twritelong_clobber(15,ret,scratchie);\n");
- comprintf("\tmov_l_mr((uintptr)®s.pc,srca);\n"
- "\tget_n_addr_jmp(srca,PC_P,scratchie);\n"
- "\tmov_l_mr((uintptr)®s.pc_oldp,PC_P);\n"
- "\tm68k_pc_offset=0;\n");
- gen_update_next_handler();
- break;
- case i_JMP:
- isjump;
- genamode (curi->smode, "srcreg", curi->size, "src", 0, 0);
- comprintf("\tmov_l_mr((uintptr)®s.pc,srca);\n"
- "\tget_n_addr_jmp(srca,PC_P,scratchie);\n"
- "\tmov_l_mr((uintptr)®s.pc_oldp,PC_P);\n"
- "\tm68k_pc_offset=0;\n");
- gen_update_next_handler();
- break;
- case i_BSR:
- is_const_jump;
- genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
- start_brace();
- comprintf("\tuae_u32 retadd=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n");
- comprintf("\tint ret=scratchie++;\n"
- "\tmov_l_ri(ret,retadd);\n"
- "\tsub_l_ri(15,4);\n"
- "\twritelong_clobber(15,ret,scratchie);\n");
- comprintf("\tadd_l_ri(src,m68k_pc_offset_thisinst+2);\n");
- comprintf("\tm68k_pc_offset=0;\n");
- comprintf("\tadd_l(PC_P,src);\n");
-
- comprintf("\tcomp_pc_p=(uae_u8*)get_const(PC_P);\n");
- break;
- case i_Bcc:
- comprintf("\tuae_u32 v,v1,v2;\n");
- genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
- /* That source is an immediate, so we can clobber it with abandon */
- switch(curi->size) {
- case sz_byte: comprintf("\tsign_extend_8_rr(src,src);\n"); break;
- case sz_word: comprintf("\tsign_extend_16_rr(src,src);\n"); break;
- case sz_long: break;
- }
- comprintf("\tsub_l_ri(src,m68k_pc_offset-m68k_pc_offset_thisinst-2);\n");
- /* Leave the following as "add" --- it will allow it to be optimized
- away due to src being a constant ;-) */
- comprintf("\tadd_l_ri(src,(uintptr)comp_pc_p);\n");
- comprintf("\tmov_l_ri(PC_P,(uintptr)comp_pc_p);\n");
- /* Now they are both constant. Might as well fold in m68k_pc_offset */
- comprintf("\tadd_l_ri(src,m68k_pc_offset);\n");
- comprintf("\tadd_l_ri(PC_P,m68k_pc_offset);\n");
- comprintf("\tm68k_pc_offset=0;\n");
-
- if (curi->cc>=2) {
- comprintf("\tv1=get_const(PC_P);\n"
- "\tv2=get_const(src);\n"
- "\tregister_branch(v1,v2,%d);\n",
- cond_codes_x86[curi->cc]);
- comprintf("\tmake_flags_live();\n"); /* Load the flags */
- isjump;
- }
- else {
- is_const_jump;
- }
-
- switch(curi->cc) {
- case 0: /* Unconditional jump */
- comprintf("\tmov_l_rr(PC_P,src);\n");
- comprintf("\tcomp_pc_p=(uae_u8*)get_const(PC_P);\n");
- break;
- case 1: break; /* This is silly! */
- case 8: failure; break; /* Work out details! FIXME */
- case 9: failure; break; /* Not critical, though! */
-
- case 2:
- case 3:
- case 4:
- case 5:
- case 6:
- case 7:
- case 10:
- case 11:
- case 12:
- case 13:
- case 14:
- case 15:
- break;
- default: abort();
- }
- break;
- case i_LEA:
- genamode (curi->smode, "srcreg", curi->size, "src", 0, 0);
- genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0);
- genastore ("srca", curi->dmode, "dstreg", curi->size, "dst");
- break;
- case i_PEA:
- if (table68k[opcode].smode==Areg ||
- table68k[opcode].smode==Aind ||
- table68k[opcode].smode==Aipi ||
- table68k[opcode].smode==Apdi ||
- table68k[opcode].smode==Ad16 ||
- table68k[opcode].smode==Ad8r)
- comprintf("if (srcreg==7) dodgy=1;\n");
-
- genamode (curi->smode, "srcreg", curi->size, "src", 0, 0);
- genamode (Apdi, "7", sz_long, "dst", 2, 0);
- genastore ("srca", Apdi, "7", sz_long, "dst");
- break;
- case i_DBcc:
- isjump;
- uses_cmov;
- genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
- genamode (curi->dmode, "dstreg", curi->size, "offs", 1, 0);
-
- /* That offs is an immediate, so we can clobber it with abandon */
- switch(curi->size) {
- case sz_word: comprintf("\tsign_extend_16_rr(offs,offs);\n"); break;
- default: abort(); /* Seems this only comes in word flavour */
- }
- comprintf("\tsub_l_ri(offs,m68k_pc_offset-m68k_pc_offset_thisinst-2);\n");
- comprintf("\tadd_l_ri(offs,(uintptr)comp_pc_p);\n"); /* New PC,
- once the
- offset_68k is
- * also added */
- /* Let's fold in the m68k_pc_offset at this point */
- comprintf("\tadd_l_ri(offs,m68k_pc_offset);\n");
- comprintf("\tadd_l_ri(PC_P,m68k_pc_offset);\n");
- comprintf("\tm68k_pc_offset=0;\n");
-
- start_brace();
- comprintf("\tint nsrc=scratchie++;\n");
-
- if (curi->cc>=2) {
- comprintf("\tmake_flags_live();\n"); /* Load the flags */
- }
-
- if (curi->size!=sz_word)
- abort();
-
-
- switch(curi->cc) {
- case 0: /* This is an elaborate nop? */
- break;
- case 1:
- comprintf("\tstart_needflags();\n");
- comprintf("\tsub_w_ri(src,1);\n");
- comprintf("\t end_needflags();\n");
- start_brace();
- comprintf("\tuae_u32 v2,v;\n"
- "\tuae_u32 v1=get_const(PC_P);\n");
- comprintf("\tv2=get_const(offs);\n"
- "\tregister_branch(v1,v2,3);\n");
- break;
-
- case 8: failure; break; /* Work out details! FIXME */
- case 9: failure; break; /* Not critical, though! */
-
- case 2:
- case 3:
- case 4:
- case 5:
- case 6:
- case 7:
- case 10:
- case 11:
- case 12:
- case 13:
- case 14:
- case 15:
- comprintf("\tmov_l_rr(nsrc,src);\n");
- comprintf("\tlea_l_brr(scratchie,src,(uae_s32)-1);\n"
- "\tmov_w_rr(src,scratchie);\n");
- comprintf("\tcmov_l_rr(offs,PC_P,%d);\n",
- cond_codes_x86[curi->cc]);
- comprintf("\tcmov_l_rr(src,nsrc,%d);\n",
- cond_codes_x86[curi->cc]);
- /* OK, now for cc=true, we have src==nsrc and offs==PC_P,
- so whether we move them around doesn't matter. However,
- if cc=false, we have offs==jump_pc, and src==nsrc-1 */
-
- comprintf("\t start_needflags();\n");
- comprintf("\ttest_w_rr(nsrc,nsrc);\n");
- comprintf("\t end_needflags();\n");
- comprintf("\tcmov_l_rr(PC_P,offs,5);\n");
- break;
- default: abort();
- }
- genastore ("src", curi->smode, "srcreg", curi->size, "src");
- gen_update_next_handler();
- break;
-
- case i_Scc:
-/* failure; /* NEW: from "Ipswitch Town" release */
- genamode (curi->smode, "srcreg", curi->size, "src", 2, 0);
- start_brace ();
- comprintf ("\tint val = scratchie++;\n");
-
- /* We set val to 0 if we really should use 255, and to 1 for real 0 */
- switch(curi->cc) {
- case 0: /* Unconditional set */
- comprintf("\tmov_l_ri(val,0);\n");
- break;
- case 1:
- /* Unconditional not-set */
- comprintf("\tmov_l_ri(val,1);\n");
- break;
- case 8: failure; break; /* Work out details! FIXME */
- case 9: failure; break; /* Not critical, though! */
-
- case 2:
- case 3:
- case 4:
- case 5:
- case 6:
- case 7:
- case 10:
- case 11:
- case 12:
- case 13:
- case 14:
- case 15:
- comprintf("\tmake_flags_live();\n"); /* Load the flags */
- /* All condition codes can be inverted by changing the LSB */
- comprintf("\tsetcc(val,%d);\n",
- cond_codes_x86[curi->cc]^1); break;
- default: abort();
- }
- comprintf("\tsub_b_ri(val,1);\n");
- genastore ("val", curi->smode, "srcreg", curi->size, "src");
- break;
- case i_DIVU:
- isjump;
- failure;
- break;
- case i_DIVS:
- isjump;
- failure;
- break;
- case i_MULU:
-/* failure; /* NEW: from "Ipswitch Town" release */
- comprintf("\tdont_care_flags();\n");
- genamode (curi->smode, "srcreg", sz_word, "src", 1, 0);
- genamode (curi->dmode, "dstreg", sz_word, "dst", 1, 0);
- /* To do 16x16 unsigned multiplication, we actually use
- 32x32 signed, and zero-extend the registers first.
- That solves the problem of MUL needing dedicated registers
- on the x86 */
- comprintf("\tzero_extend_16_rr(scratchie,src);\n"
- "\tzero_extend_16_rr(dst,dst);\n"
- "\timul_32_32(dst,scratchie);\n");
- genflags (flag_logical, sz_long, "dst", "", "");
- genastore ("dst", curi->dmode, "dstreg", sz_long, "dst");
- break;
- case i_MULS:
-/* failure; /* NEW: from "Ipswitch Town" release */
- comprintf("\tdont_care_flags();\n");
- genamode (curi->smode, "srcreg", sz_word, "src", 1, 0);
- genamode (curi->dmode, "dstreg", sz_word, "dst", 1, 0);
- comprintf("\tsign_extend_16_rr(scratchie,src);\n"
- "\tsign_extend_16_rr(dst,dst);\n"
- "\timul_32_32(dst,scratchie);\n");
- genflags (flag_logical, sz_long, "dst", "", "");
- genastore ("dst", curi->dmode, "dstreg", sz_long, "dst");
- break;
- case i_CHK:
- isjump;
- failure;
- break;
-
- case i_CHK2:
- isjump;
- failure;
- break;
-
- case i_ASR:
- mayfail;
- if (curi->smode==Dreg) {
- comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n"
- " FAIL(1);\n"
- " return;\n"
- "} \n");
- start_brace();
- }
- comprintf("\tdont_care_flags();\n");
-
- genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0);
- genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0);
- if (curi->smode!=immi) {
-/* failure; /* UNTESTED: NEW: from "Ipswitch Town" release */
- if (!noflags) {
- uses_cmov;
- start_brace();
- comprintf("\tint highmask;\n"
- "\tint width;\n"
- "\tint cdata=scratchie++;\n"
- "\tint sdata=scratchie++;\n"
- "\tint tmpcnt=scratchie++;\n");
- comprintf("\tmov_l_rr(sdata,data);\n"
- "\tmov_l_rr(cdata,data);\n"
- "\tmov_l_rr(tmpcnt,cnt);\n");
- switch (curi->size) {
- case sz_byte: comprintf("\tshra_b_ri(sdata,7);\n"); break;
- case sz_word: comprintf("\tshra_w_ri(sdata,15);\n"); break;
- case sz_long: comprintf("\tshra_l_ri(sdata,31);\n"); break;
- default: abort();
- }
- /* sdata is now the MSB propagated to all bits for the
- register of specified size */
- comprintf("\tand_l_ri(tmpcnt,63);\n");
- switch(curi->size) {
- case sz_byte: comprintf("\tshra_b_rr(data,tmpcnt);\n"
- "\thighmask=0x38;\n");
- break;
- case sz_word: comprintf("\tshra_w_rr(data,tmpcnt);\n"
- "\thighmask=0x30;\n");
- break;
- case sz_long: comprintf("\tshra_l_rr(data,tmpcnt);\n"
- "\thighmask=0x20;\n");
- break;
- }
- comprintf("\ttest_l_ri(tmpcnt,highmask);\n");
- switch (curi->size) {
- case sz_byte: comprintf("\tcmov_b_rr(data,sdata,NATIVE_CC_NE);\n"); break;
- case sz_word: comprintf("\tcmov_w_rr(data,sdata,NATIVE_CC_NE);\n"); break;
- case sz_long: comprintf("\tcmov_l_rr(data,sdata,NATIVE_CC_NE);\n"); break;
- }
-
- /* Result of shift is now in data. Now we need to determine
- the carry by shifting cdata one less */
- /* NOTE: carry bit is cleared if shift count is zero */
- comprintf("\tmov_l_ri(scratchie,0);\n"
- "\ttest_l_rr(tmpcnt,tmpcnt);\n"
- "\tcmov_l_rr(sdata,scratchie,NATIVE_CC_EQ);\n"
- "\tforget_about(scratchie);\n");
- comprintf("\tsub_l_ri(tmpcnt,1);\n");
- switch(curi->size) {
- case sz_byte: comprintf("\tshra_b_rr(cdata,tmpcnt);\n");break;
- case sz_word: comprintf("\tshra_w_rr(cdata,tmpcnt);\n");break;
- case sz_long: comprintf("\tshra_l_rr(cdata,tmpcnt);\n");break;
- default: abort();
- }
- /* If the shift count was higher than the width, we need
- to pick up the sign from original data (sdata) */
- /* NOTE: for shift count of zero, the following holds
- true and cdata contains 0 so that carry bit is cleared */
- comprintf("\ttest_l_ri(tmpcnt,highmask);\n"
- "\tforget_about(tmpcnt);\n"
- "\tcmov_l_rr(cdata,sdata,NATIVE_CC_NE);\n");
-
- /* And create the flags (preserve X flag if shift count is zero) */
- comprintf("\ttest_l_ri(cnt,63);\n"
- "\tcmov_l_rr(FLAGX,cdata,NATIVE_CC_NE);\n");
- comprintf("\tstart_needflags();\n");
- comprintf("\tif (needed_flags & FLAG_ZNV)\n");
- switch(curi->size) {
- case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break;
- case sz_word: comprintf("\t test_w_rr(data,data);\n"); break;
- case sz_long: comprintf("\t test_l_rr(data,data);\n"); break;
- }
- comprintf("\t bt_l_ri(cdata,0);\n"); /* Set C */
- comprintf("\t live_flags();\n");
- comprintf("\t end_needflags();\n");
- comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n");
- genastore ("data", curi->dmode, "dstreg", curi->size, "data");
- }
- else {
- uses_cmov;
- start_brace();
- comprintf("\tint highmask;\n"
- "\tint width;\n"
- "\tint highshift=scratchie++;\n");
- switch(curi->size) {
- case sz_byte: comprintf("\tshra_b_rr(data,cnt);\n"
- "\thighmask=0x38;\n"
- "\twidth=8;\n");
- break;
- case sz_word: comprintf("\tshra_w_rr(data,cnt);\n"
- "\thighmask=0x30;\n"
- "\twidth=16;\n");
- break;
- case sz_long: comprintf("\tshra_l_rr(data,cnt);\n"
- "\thighmask=0x20;\n"
- "\twidth=32;\n");
- break;
- default: abort();
- }
- comprintf("test_l_ri(cnt,highmask);\n"
- "mov_l_ri(highshift,0);\n"
- "mov_l_ri(scratchie,width/2);\n"
- "cmov_l_rr(highshift,scratchie,5);\n");
- /* The x86 masks out bits, so we now make sure that things
- really get shifted as much as planned */
- switch(curi->size) {
- case sz_byte: comprintf("\tshra_b_rr(data,highshift);\n");break;
- case sz_word: comprintf("\tshra_w_rr(data,highshift);\n");break;
- case sz_long: comprintf("\tshra_l_rr(data,highshift);\n");break;
- default: abort();
- }
- /* And again */
- switch(curi->size) {
- case sz_byte: comprintf("\tshra_b_rr(data,highshift);\n");break;
- case sz_word: comprintf("\tshra_w_rr(data,highshift);\n");break;
- case sz_long: comprintf("\tshra_l_rr(data,highshift);\n");break;
- default: abort();
- }
- genastore ("data", curi->dmode, "dstreg", curi->size, "data");
- }
- }
- else {
- start_brace();
- comprintf("\tint tmp=scratchie++;\n"
- "\tint bp;\n"
- "\tmov_l_rr(tmp,data);\n");
- switch(curi->size) {
- case sz_byte: comprintf("\tshra_b_ri(data,srcreg);\n"
- "\tbp=srcreg-1;\n"); break;
- case sz_word: comprintf("\tshra_w_ri(data,srcreg);\n"
- "\tbp=srcreg-1;\n"); break;
- case sz_long: comprintf("\tshra_l_ri(data,srcreg);\n"
- "\tbp=srcreg-1;\n"); break;
- default: abort();
- }
-
- if (!noflags) {
- comprintf("\tstart_needflags();\n");
- comprintf("\tif (needed_flags & FLAG_ZNV)\n");
- switch(curi->size) {
- case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break;
- case sz_word: comprintf("\t test_w_rr(data,data);\n"); break;
- case sz_long: comprintf("\t test_l_rr(data,data);\n"); break;
- }
- comprintf("\t bt_l_ri(tmp,bp);\n"); /* Set C */
- comprintf("\t live_flags();\n");
- comprintf("\t end_needflags();\n");
- comprintf("\t duplicate_carry();\n");
- comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n");
- }
- genastore ("data", curi->dmode, "dstreg", curi->size, "data");
- }
- break;
-
- case i_ASL:
-/* failure; /* NEW: from "Ipswitch Town" release */
- mayfail;
- if (curi->smode==Dreg) {
- comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n"
- " FAIL(1);\n"
- " return;\n"
- "} \n");
- start_brace();
- }
- comprintf("\tdont_care_flags();\n");
- /* Except for the handling of the V flag, this is identical to
- LSL. The handling of V is, uhm, unpleasant, so if it's needed,
- let the normal emulation handle it. Shoulders of giants kinda
- thing ;-) */
- comprintf("if (needed_flags & FLAG_V) {\n"
- " FAIL(1);\n"
- " return;\n"
- "} \n");
-
- genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0);
- genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0);
- if (curi->smode!=immi) {
- if (!noflags) {
- uses_cmov;
- start_brace();
- comprintf("\tint highmask;\n"
- "\tint cdata=scratchie++;\n"
- "\tint tmpcnt=scratchie++;\n");
- comprintf("\tmov_l_rr(tmpcnt,cnt);\n"
- "\tand_l_ri(tmpcnt,63);\n"
- "\tmov_l_ri(cdata,0);\n"
- "\tcmov_l_rr(cdata,data,5);\n");
- /* cdata is now either data (for shift count!=0) or
- 0 (for shift count==0) */
- switch(curi->size) {
- case sz_byte: comprintf("\tshll_b_rr(data,cnt);\n"
- "\thighmask=0x38;\n");
- break;
- case sz_word: comprintf("\tshll_w_rr(data,cnt);\n"
- "\thighmask=0x30;\n");
- break;
- case sz_long: comprintf("\tshll_l_rr(data,cnt);\n"
- "\thighmask=0x20;\n");
- break;
- default: abort();
- }
- comprintf("test_l_ri(cnt,highmask);\n"
- "mov_l_ri(scratchie,0);\n"
- "cmov_l_rr(scratchie,data,4);\n");
- switch(curi->size) {
- case sz_byte: comprintf("\tmov_b_rr(data,scratchie);\n");break;
- case sz_word: comprintf("\tmov_w_rr(data,scratchie);\n");break;
- case sz_long: comprintf("\tmov_l_rr(data,scratchie);\n");break;
- default: abort();
- }
- /* Result of shift is now in data. Now we need to determine
- the carry by shifting cdata one less */
- comprintf("\tsub_l_ri(tmpcnt,1);\n");
- switch(curi->size) {
- case sz_byte: comprintf("\tshll_b_rr(cdata,tmpcnt);\n");break;
- case sz_word: comprintf("\tshll_w_rr(cdata,tmpcnt);\n");break;
- case sz_long: comprintf("\tshll_l_rr(cdata,tmpcnt);\n");break;
- default: abort();
- }
- comprintf("test_l_ri(tmpcnt,highmask);\n"
- "mov_l_ri(scratchie,0);\n"
- "cmov_l_rr(cdata,scratchie,5);\n");
- /* And create the flags */
- comprintf("\tstart_needflags();\n");
-
- comprintf("\tif (needed_flags & FLAG_ZNV)\n");
- switch(curi->size) {
- case sz_byte: comprintf("\t test_b_rr(data,data);\n");
- comprintf("\t bt_l_ri(cdata,7);\n"); break;
- case sz_word: comprintf("\t test_w_rr(data,data);\n");
- comprintf("\t bt_l_ri(cdata,15);\n"); break;
- case sz_long: comprintf("\t test_l_rr(data,data);\n");
- comprintf("\t bt_l_ri(cdata,31);\n"); break;
- }
- comprintf("\t live_flags();\n");
- comprintf("\t end_needflags();\n");
- comprintf("\t duplicate_carry();\n");
- comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n");
- genastore ("data", curi->dmode, "dstreg", curi->size, "data");
- }
- else {
- uses_cmov;
- start_brace();
- comprintf("\tint highmask;\n");
- switch(curi->size) {
- case sz_byte: comprintf("\tshll_b_rr(data,cnt);\n"
- "\thighmask=0x38;\n");
- break;
- case sz_word: comprintf("\tshll_w_rr(data,cnt);\n"
- "\thighmask=0x30;\n");
- break;
- case sz_long: comprintf("\tshll_l_rr(data,cnt);\n"
- "\thighmask=0x20;\n");
- break;
- default: abort();
- }
- comprintf("test_l_ri(cnt,highmask);\n"
- "mov_l_ri(scratchie,0);\n"
- "cmov_l_rr(scratchie,data,4);\n");
- switch(curi->size) {
- case sz_byte: comprintf("\tmov_b_rr(data,scratchie);\n");break;
- case sz_word: comprintf("\tmov_w_rr(data,scratchie);\n");break;
- case sz_long: comprintf("\tmov_l_rr(data,scratchie);\n");break;
- default: abort();
- }
- genastore ("data", curi->dmode, "dstreg", curi->size, "data");
- }
- }
- else {
- start_brace();
- comprintf("\tint tmp=scratchie++;\n"
- "\tint bp;\n"
- "\tmov_l_rr(tmp,data);\n");
- switch(curi->size) {
- case sz_byte: comprintf("\tshll_b_ri(data,srcreg);\n"
- "\tbp=8-srcreg;\n"); break;
- case sz_word: comprintf("\tshll_w_ri(data,srcreg);\n"
- "\tbp=16-srcreg;\n"); break;
- case sz_long: comprintf("\tshll_l_ri(data,srcreg);\n"
- "\tbp=32-srcreg;\n"); break;
- default: abort();
- }
-
- if (!noflags) {
- comprintf("\tstart_needflags();\n");
- comprintf("\tif (needed_flags & FLAG_ZNV)\n");
- switch(curi->size) {
- case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break;
- case sz_word: comprintf("\t test_w_rr(data,data);\n"); break;
- case sz_long: comprintf("\t test_l_rr(data,data);\n"); break;
- }
- comprintf("\t bt_l_ri(tmp,bp);\n"); /* Set C */
- comprintf("\t live_flags();\n");
- comprintf("\t end_needflags();\n");
- comprintf("\t duplicate_carry();\n");
- comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n");
- }
- genastore ("data", curi->dmode, "dstreg", curi->size, "data");
- }
- break;
-
- case i_LSR:
-/* failure; /* NEW: from "Ipswitch Town" release */
- mayfail;
- if (curi->smode==Dreg) {
- comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n"
- " FAIL(1);\n"
- " return;\n"
- "} \n");
- start_brace();
- }
- comprintf("\tdont_care_flags();\n");
-
- genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0);
- genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0);
- if (curi->smode!=immi) {
- if (!noflags) {
- uses_cmov;
- start_brace();
- comprintf("\tint highmask;\n"
- "\tint cdata=scratchie++;\n"
- "\tint tmpcnt=scratchie++;\n");
- comprintf("\tmov_l_rr(tmpcnt,cnt);\n"
- "\tand_l_ri(tmpcnt,63);\n"
- "\tmov_l_ri(cdata,0);\n"
- "\tcmov_l_rr(cdata,data,NATIVE_CC_NE);\n");
- /* cdata is now either data (for shift count!=0) or
- 0 (for shift count==0) */
- switch(curi->size) {
- case sz_byte: comprintf("\tshrl_b_rr(data,tmpcnt);\n"
- "\thighmask=0x38;\n");
- break;
- case sz_word: comprintf("\tshrl_w_rr(data,tmpcnt);\n"
- "\thighmask=0x30;\n");
- break;
- case sz_long: comprintf("\tshrl_l_rr(data,tmpcnt);\n"
- "\thighmask=0x20;\n");
- break;
- default: abort();
- }
- comprintf("\ttest_l_ri(tmpcnt,highmask);\n"
- "\rmov_l_ri(scratchie,0);\n");
- if (curi->size == sz_long)
- comprintf("\tcmov_l_rr(data,scratchie,NATIVE_CC_NE);\n");
- else {
- comprintf("\tcmov_l_rr(scratchie,data,NATIVE_CC_EQ);\n");
- switch(curi->size) {
- case sz_byte: comprintf("\tmov_b_rr(data,scratchie);\n");break;
- case sz_word: comprintf("\tmov_w_rr(data,scratchie);\n");break;
- default: abort();
- }
- }
- /* Result of shift is now in data. Now we need to determine
- the carry by shifting cdata one less */
- comprintf("\tsub_l_ri(tmpcnt,1);\n");
- comprintf("\tshrl_l_rr(cdata,tmpcnt);\n");
- comprintf("\ttest_l_ri(tmpcnt,highmask);\n");
- comprintf("\tforget_about(tmpcnt);\n");
- if (curi->size != sz_long) /* scratchie is still live for LSR.L */
- comprintf("\tmov_l_ri(scratchie,0);\n");
- comprintf("\tcmov_l_rr(cdata,scratchie,NATIVE_CC_NE);\n");
- comprintf("\tforget_about(scratchie);\n");
- /* And create the flags (preserve X flag if shift count is zero) */
- comprintf("\ttest_l_ri(cnt,63);\n"
- "\tcmov_l_rr(FLAGX,cdata,NATIVE_CC_NE);\n");
- comprintf("\tstart_needflags();\n");
- comprintf("\tif (needed_flags & FLAG_ZNV)\n");
- switch(curi->size) {
- case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break;
- case sz_word: comprintf("\t test_w_rr(data,data);\n"); break;
- case sz_long: comprintf("\t test_l_rr(data,data);\n"); break;
- }
- comprintf("\t bt_l_ri(cdata,0);\n"); /* Set C */
- comprintf("\t live_flags();\n");
- comprintf("\t end_needflags();\n");
- comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n");
- genastore ("data", curi->dmode, "dstreg", curi->size, "data");
- }
- else {
- uses_cmov;
- start_brace();
- comprintf("\tint highmask;\n");
- switch(curi->size) {
- case sz_byte: comprintf("\tshrl_b_rr(data,cnt);\n"
- "\thighmask=0x38;\n");
- break;
- case sz_word: comprintf("\tshrl_w_rr(data,cnt);\n"
- "\thighmask=0x30;\n");
- break;
- case sz_long: comprintf("\tshrl_l_rr(data,cnt);\n"
- "\thighmask=0x20;\n");
- break;
- default: abort();
- }
- comprintf("test_l_ri(cnt,highmask);\n"
- "mov_l_ri(scratchie,0);\n"
- "cmov_l_rr(scratchie,data,4);\n");
- switch(curi->size) {
- case sz_byte: comprintf("\tmov_b_rr(data,scratchie);\n");break;
- case sz_word: comprintf("\tmov_w_rr(data,scratchie);\n");break;
- case sz_long: comprintf("\tmov_l_rr(data,scratchie);\n");break;
- default: abort();
- }
- genastore ("data", curi->dmode, "dstreg", curi->size, "data");
- }
- }
- else {
- start_brace();
- comprintf("\tint tmp=scratchie++;\n"
- "\tint bp;\n"
- "\tmov_l_rr(tmp,data);\n");
- switch(curi->size) {
- case sz_byte: comprintf("\tshrl_b_ri(data,srcreg);\n"
- "\tbp=srcreg-1;\n"); break;
- case sz_word: comprintf("\tshrl_w_ri(data,srcreg);\n"
- "\tbp=srcreg-1;\n"); break;
- case sz_long: comprintf("\tshrl_l_ri(data,srcreg);\n"
- "\tbp=srcreg-1;\n"); break;
- default: abort();
- }
-
- if (!noflags) {
- comprintf("\tstart_needflags();\n");
- comprintf("\tif (needed_flags & FLAG_ZNV)\n");
- switch(curi->size) {
- case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break;
- case sz_word: comprintf("\t test_w_rr(data,data);\n"); break;
- case sz_long: comprintf("\t test_l_rr(data,data);\n"); break;
- }
- comprintf("\t bt_l_ri(tmp,bp);\n"); /* Set C */
- comprintf("\t live_flags();\n");
- comprintf("\t end_needflags();\n");
- comprintf("\t duplicate_carry();\n");
- comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n");
- }
- genastore ("data", curi->dmode, "dstreg", curi->size, "data");
- }
- break;
-
- case i_LSL:
- mayfail;
- if (curi->smode==Dreg) {
- comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n"
- " FAIL(1);\n"
- " return;\n"
- "} \n");
- start_brace();
- }
- comprintf("\tdont_care_flags();\n");
-
- genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0);
- genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0);
- if (curi->smode!=immi) {
-/* failure; /* UNTESTED: NEW: from "Ipswitch Town" release */
- if (!noflags) {
- uses_cmov;
- start_brace();
- comprintf("\tint highmask;\n"
- "\tint cdata=scratchie++;\n"
- "\tint tmpcnt=scratchie++;\n");
- comprintf("\tmov_l_rr(tmpcnt,cnt);\n"
- "\tand_l_ri(tmpcnt,63);\n"
- "\tmov_l_ri(cdata,0);\n"
- "\tcmov_l_rr(cdata,data,NATIVE_CC_NE);\n");
- /* cdata is now either data (for shift count!=0) or
- 0 (for shift count==0) */
- switch(curi->size) {
- case sz_byte: comprintf("\tshll_b_rr(data,tmpcnt);\n"
- "\thighmask=0x38;\n");
- break;
- case sz_word: comprintf("\tshll_w_rr(data,tmpcnt);\n"
- "\thighmask=0x30;\n");
- break;
- case sz_long: comprintf("\tshll_l_rr(data,tmpcnt);\n"
- "\thighmask=0x20;\n");
- break;
- default: abort();
- }
- comprintf("\ttest_l_ri(tmpcnt,highmask);\n"
- "\tmov_l_ri(scratchie,0);\n");
- if (curi->size == sz_long)
- comprintf("\tcmov_l_rr(data,scratchie,NATIVE_CC_NE);\n");
- else {
- comprintf("\tcmov_l_rr(scratchie,data,NATIVE_CC_EQ);\n");
- switch(curi->size) {
- case sz_byte: comprintf("\tmov_b_rr(data,scratchie);\n");break;
- case sz_word: comprintf("\tmov_w_rr(data,scratchie);\n");break;
- default: abort();
- }
- }
- /* Result of shift is now in data. Now we need to determine
- the carry by shifting cdata one less */
- comprintf("\tsub_l_ri(tmpcnt,1);\n");
- comprintf("\tshll_l_rr(cdata,tmpcnt);\n");
- comprintf("\ttest_l_ri(tmpcnt,highmask);\n");
- comprintf("\tforget_about(tmpcnt);\n");
- if (curi->size != sz_long) /* scratchie is still live for LSL.L */
- comprintf("\tmov_l_ri(scratchie,0);\n");
- comprintf("\tcmov_l_rr(cdata,scratchie,NATIVE_CC_NE);\n");
- comprintf("\tforget_about(scratchie);\n");
- /* And create the flags (preserve X flag if shift count is zero) */
- switch (curi->size) {
- case sz_byte: comprintf("\tshrl_l_ri(cdata,7);\n"); break;
- case sz_word: comprintf("\tshrl_l_ri(cdata,15);\n"); break;
- case sz_long: comprintf("\tshrl_l_ri(cdata,31);\n"); break;
- }
- comprintf("\ttest_l_ri(cnt,63);\n"
- "\tcmov_l_rr(FLAGX,cdata,NATIVE_CC_NE);\n");
- comprintf("\tstart_needflags();\n");
- comprintf("\tif (needed_flags & FLAG_ZNV)\n");
- switch(curi->size) {
- case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break;
- case sz_word: comprintf("\t test_w_rr(data,data);\n"); break;
- case sz_long: comprintf("\t test_l_rr(data,data);\n"); break;
- }
- comprintf("\t bt_l_ri(cdata,0);\n");
- comprintf("\t live_flags();\n");
- comprintf("\t end_needflags();\n");
- comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n");
- genastore ("data", curi->dmode, "dstreg", curi->size, "data");
- }
- else {
- uses_cmov;
- start_brace();
- comprintf("\tint highmask;\n");
- switch(curi->size) {
- case sz_byte: comprintf("\tshll_b_rr(data,cnt);\n"
- "\thighmask=0x38;\n");
- break;
- case sz_word: comprintf("\tshll_w_rr(data,cnt);\n"
- "\thighmask=0x30;\n");
- break;
- case sz_long: comprintf("\tshll_l_rr(data,cnt);\n"
- "\thighmask=0x20;\n");
- break;
- default: abort();
- }
- comprintf("test_l_ri(cnt,highmask);\n"
- "mov_l_ri(scratchie,0);\n"
- "cmov_l_rr(scratchie,data,4);\n");
- switch(curi->size) {
- case sz_byte: comprintf("\tmov_b_rr(data,scratchie);\n");break;
- case sz_word: comprintf("\tmov_w_rr(data,scratchie);\n");break;
- case sz_long: comprintf("\tmov_l_rr(data,scratchie);\n");break;
- default: abort();
- }
- genastore ("data", curi->dmode, "dstreg", curi->size, "data");
- }
- }
- else {
- start_brace();
- comprintf("\tint tmp=scratchie++;\n"
- "\tint bp;\n"
- "\tmov_l_rr(tmp,data);\n");
- switch(curi->size) {
- case sz_byte: comprintf("\tshll_b_ri(data,srcreg);\n"
- "\tbp=8-srcreg;\n"); break;
- case sz_word: comprintf("\tshll_w_ri(data,srcreg);\n"
- "\tbp=16-srcreg;\n"); break;
- case sz_long: comprintf("\tshll_l_ri(data,srcreg);\n"
- "\tbp=32-srcreg;\n"); break;
- default: abort();
- }
-
- if (!noflags) {
- comprintf("\tstart_needflags();\n");
- comprintf("\tif (needed_flags & FLAG_ZNV)\n");
- switch(curi->size) {
- case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break;
- case sz_word: comprintf("\t test_w_rr(data,data);\n"); break;
- case sz_long: comprintf("\t test_l_rr(data,data);\n"); break;
- }
- comprintf("\t bt_l_ri(tmp,bp);\n"); /* Set C */
- comprintf("\t live_flags();\n");
- comprintf("\t end_needflags();\n");
- comprintf("\t duplicate_carry();\n");
- comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n");
- }
- genastore ("data", curi->dmode, "dstreg", curi->size, "data");
- }
- break;
-
- case i_ROL:
- mayfail;
- if (curi->smode==Dreg) {
- comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n"
- " FAIL(1);\n"
- " return;\n"
- "} \n");
- start_brace();
- }
- comprintf("\tdont_care_flags();\n");
- genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0);
- genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0);
- start_brace ();
-
- switch(curi->size) {
- case sz_long: comprintf("\t rol_l_rr(data,cnt);\n"); break;
- case sz_word: comprintf("\t rol_w_rr(data,cnt);\n"); break;
- case sz_byte: comprintf("\t rol_b_rr(data,cnt);\n"); break;
- }
-
- if (!noflags) {
- comprintf("\tstart_needflags();\n");
- comprintf("\tif (needed_flags & FLAG_ZNV)\n");
- switch(curi->size) {
- case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break;
- case sz_word: comprintf("\t test_w_rr(data,data);\n"); break;
- case sz_long: comprintf("\t test_l_rr(data,data);\n"); break;
- }
- comprintf("\t bt_l_ri(data,0x00);\n"); /* Set C */
- comprintf("\t live_flags();\n");
- comprintf("\t end_needflags();\n");
- }
- genastore ("data", curi->dmode, "dstreg", curi->size, "data");
- break;
-
- case i_ROR:
- mayfail;
- if (curi->smode==Dreg) {
- comprintf("if ((uae_u32)srcreg==(uae_u32)dstreg) {\n"
- " FAIL(1);\n"
- " return;\n"
- "} \n");
- start_brace();
- }
- comprintf("\tdont_care_flags();\n");
- genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0);
- genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0);
- start_brace ();
-
- switch(curi->size) {
- case sz_long: comprintf("\t ror_l_rr(data,cnt);\n"); break;
- case sz_word: comprintf("\t ror_w_rr(data,cnt);\n"); break;
- case sz_byte: comprintf("\t ror_b_rr(data,cnt);\n"); break;
- }
-
- if (!noflags) {
- comprintf("\tstart_needflags();\n");
- comprintf("\tif (needed_flags & FLAG_ZNV)\n");
- switch(curi->size) {
- case sz_byte: comprintf("\t test_b_rr(data,data);\n"); break;
- case sz_word: comprintf("\t test_w_rr(data,data);\n"); break;
- case sz_long: comprintf("\t test_l_rr(data,data);\n"); break;
- }
- switch(curi->size) {
- case sz_byte: comprintf("\t bt_l_ri(data,0x07);\n"); break;
- case sz_word: comprintf("\t bt_l_ri(data,0x0f);\n"); break;
- case sz_long: comprintf("\t bt_l_ri(data,0x1f);\n"); break;
- }
- comprintf("\t live_flags();\n");
- comprintf("\t end_needflags();\n");
- }
- genastore ("data", curi->dmode, "dstreg", curi->size, "data");
- break;
-
- case i_ROXL:
- failure;
- break;
- case i_ROXR:
- failure;
- break;
- case i_ASRW:
- failure;
- break;
- case i_ASLW:
- failure;
- break;
- case i_LSRW:
- failure;
- break;
- case i_LSLW:
- failure;
- break;
- case i_ROLW:
- failure;
- break;
- case i_RORW:
- failure;
- break;
- case i_ROXLW:
- failure;
- break;
- case i_ROXRW:
- failure;
- break;
- case i_MOVEC2:
- isjump;
- failure;
- break;
- case i_MOVE2C:
- isjump;
- failure;
- break;
- case i_CAS:
- failure;
- break;
- case i_CAS2:
- failure;
- break;
- case i_MOVES: /* ignore DFC and SFC because we have no MMU */
- isjump;
- failure;
- break;
- case i_BKPT: /* only needed for hardware emulators */
- isjump;
- failure;
- break;
- case i_CALLM: /* not present in 68030 */
- isjump;
- failure;
- break;
- case i_RTM: /* not present in 68030 */
- isjump;
- failure;
- break;
- case i_TRAPcc:
- isjump;
- failure;
- break;
- case i_DIVL:
- isjump;
- failure;
- break;
- case i_MULL:
-/* failure; /* NEW: from "Ipswitch Town" release */
- if (!noflags) {
- failure;
- break;
- }
- comprintf("\tuae_u16 extra=%s;\n",gen_nextiword());
- comprintf("\tint r2=(extra>>12)&7;\n"
- "\tint tmp=scratchie++;\n");
-
- genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
- /* The two operands are in dst and r2 */
- comprintf("\tif (extra&0x0400) {\n" /* Need full 64 bit result */
- "\tint r3=(extra&7);\n"
- "\tmov_l_rr(r3,dst);\n"); /* operands now in r3 and r2 */
- comprintf("\tif (extra&0x0800) { \n" /* signed */
- "\t\timul_64_32(r2,r3);\n"
- "\t} else { \n"
- "\t\tmul_64_32(r2,r3);\n"
- "\t} \n");
- /* The result is in r2/tmp, with r2 holding the lower 32 bits */
- comprintf("\t} else {\n"); /* Only want 32 bit result */
- /* operands in dst and r2, result foes into r2 */
- /* shouldn't matter whether it's signed or unsigned?!? */
- comprintf("\timul_32_32(r2,dst);\n"
- "\t}\n");
- break;
-
- case i_BFTST:
- case i_BFEXTU:
- case i_BFCHG:
- case i_BFEXTS:
- case i_BFCLR:
- case i_BFFFO:
- case i_BFSET:
- case i_BFINS:
- failure;
- break;
- case i_PACK:
- failure;
- break;
- case i_UNPK:
- failure;
- break;
- case i_TAS:
- failure;
- break;
- case i_FPP:
- uses_fpu;
-#ifdef USE_JIT_FPU
- mayfail;
- comprintf("\tuae_u16 extra=%s;\n",gen_nextiword());
- swap_opcode();
- comprintf("\tcomp_fpp_opp(opcode,extra);\n");
-#else
- failure;
-#endif
- break;
- case i_FBcc:
- uses_fpu;
-#ifdef USE_JIT_FPU
- isjump;
- uses_cmov;
- mayfail;
- swap_opcode();
- comprintf("\tcomp_fbcc_opp(opcode);\n");
-#else
- isjump;
- failure;
-#endif
- break;
- case i_FDBcc:
- uses_fpu;
- isjump;
- failure;
- break;
- case i_FScc:
- uses_fpu;
-#ifdef USE_JIT_FPU
- mayfail;
- uses_cmov;
- comprintf("\tuae_u16 extra=%s;\n",gen_nextiword());
- swap_opcode();
- comprintf("\tcomp_fscc_opp(opcode,extra);\n");
-#else
- failure;
-#endif
- break;
- case i_FTRAPcc:
- uses_fpu;
- isjump;
- failure;
- break;
- case i_FSAVE:
- uses_fpu;
- failure;
- break;
- case i_FRESTORE:
- uses_fpu;
- failure;
- break;
-
- case i_CINVL:
- case i_CINVP:
- case i_CINVA:
- isjump; /* Not really, but it's probably a good idea to stop
- translating at this point */
- failure;
- comprintf ("\tflush_icache();\n"); /* Differentiate a bit more? */
- break;
- case i_CPUSHL:
- case i_CPUSHP:
- case i_CPUSHA:
- isjump; /* Not really, but it's probably a good idea to stop
- translating at this point */
- failure;
- break;
- case i_MOVE16:
- genmov16(opcode, curi);
- break;
-
- case i_EMULOP_RETURN:
- isjump;
- failure;
- break;
-
- case i_EMULOP:
- failure;
- break;
-
- case i_MMUOP:
- isjump;
- failure;
- break;
- default:
- abort ();
- break;
- }
- comprintf("%s",endstr);
- finish_braces ();
- sync_m68k_pc ();
- if (global_mayfail)
- comprintf("\tif (failure) m68k_pc_offset=m68k_pc_offset_thisinst;\n");
- return global_failure;
-}
-
-static void
-generate_includes (FILE * f)
-{
- fprintf (f, "#include \"sysdeps.h\"\n");
- fprintf (f, "#include \"m68k.h\"\n");
- fprintf (f, "#include \"memory.h\"\n");
- fprintf (f, "#include \"readcpu.h\"\n");
- fprintf (f, "#include \"newcpu.h\"\n");
- fprintf (f, "#include \"comptbl.h\"\n");
-}
-
-static int postfix;
-
-static void
-generate_one_opcode (int rp, int noflags)
-{
- uae_u16 smsk, dmsk;
- const long int opcode = opcode_map[rp];
- const char *opcode_str;
- int aborted=0;
- int have_srcreg=0;
- int have_dstreg=0;
-
- if (table68k[opcode].mnemo == i_ILLG
- || table68k[opcode].clev > cpu_level)
- return;
-
- if (table68k[opcode].handler != -1)
- return;
-
- switch (table68k[opcode].stype)
- {
- case 0:
- smsk = 7;
- break;
- case 1:
- smsk = 255;
- break;
- case 2:
- smsk = 15;
- break;
- case 3:
- smsk = 7;
- break;
- case 4:
- smsk = 7;
- break;
- case 5:
- smsk = 63;
- break;
- case 6:
- smsk = 255;
- break;
- case 7:
- smsk = 3;
- break;
- default:
- abort ();
- }
- dmsk = 7;
-
- next_cpu_level = -1;
- if (table68k[opcode].suse
- && table68k[opcode].smode != imm && table68k[opcode].smode != imm0
- && table68k[opcode].smode != imm1 && table68k[opcode].smode != imm2
- && table68k[opcode].smode != absw && table68k[opcode].smode != absl
- && table68k[opcode].smode != PC8r && table68k[opcode].smode != PC16)
- {
- have_srcreg=1;
- if (table68k[opcode].spos == -1)
- {
- if (((int) table68k[opcode].sreg) >= 128)
- comprintf ("\tuae_s32 srcreg = (uae_s32)(uae_s8)%d;\n", (int) table68k[opcode].sreg);
- else
- comprintf ("\tuae_s32 srcreg = %d;\n", (int) table68k[opcode].sreg);
- }
- else
- {
- char source[100];
- int pos = table68k[opcode].spos;
-
- comprintf ("#ifdef HAVE_GET_WORD_UNSWAPPED\n");
-
- if (pos < 8 && (smsk >> (8 - pos)) != 0)
- sprintf (source, "(((opcode >> %d) | (opcode << %d)) & %d)",
- pos ^ 8, 8 - pos, dmsk);
- else if (pos != 8)
- sprintf (source, "((opcode >> %d) & %d)", pos ^ 8, smsk);
- else
- sprintf (source, "(opcode & %d)", smsk);
-
- if (table68k[opcode].stype == 3)
- comprintf ("\tuae_u32 srcreg = imm8_table[%s];\n", source);
- else if (table68k[opcode].stype == 1)
- comprintf ("\tuae_u32 srcreg = (uae_s32)(uae_s8)%s;\n", source);
- else
- comprintf ("\tuae_u32 srcreg = %s;\n", source);
-
- comprintf ("#else\n");
-
- if (pos)
- sprintf (source, "((opcode >> %d) & %d)", pos, smsk);
- else
- sprintf (source, "(opcode & %d)", smsk);
-
- if (table68k[opcode].stype == 3)
- comprintf ("\tuae_s32 srcreg = imm8_table[%s];\n", source);
- else if (table68k[opcode].stype == 1)
- comprintf ("\tuae_s32 srcreg = (uae_s32)(uae_s8)%s;\n", source);
- else
- comprintf ("\tuae_s32 srcreg = %s;\n", source);
-
- comprintf ("#endif\n");
- }
- }
- if (table68k[opcode].duse
- /* Yes, the dmode can be imm, in case of LINK or DBcc */
- && table68k[opcode].dmode != imm && table68k[opcode].dmode != imm0
- && table68k[opcode].dmode != imm1 && table68k[opcode].dmode != imm2
- && table68k[opcode].dmode != absw && table68k[opcode].dmode != absl)
- {
- have_dstreg=1;
- if (table68k[opcode].dpos == -1)
- {
- if (((int) table68k[opcode].dreg) >= 128)
- comprintf ("\tuae_s32 dstreg = (uae_s32)(uae_s8)%d;\n", (int) table68k[opcode].dreg);
- else
- comprintf ("\tuae_s32 dstreg = %d;\n", (int) table68k[opcode].dreg);
- }
- else
- {
- int pos = table68k[opcode].dpos;
-
- comprintf ("#ifdef HAVE_GET_WORD_UNSWAPPED\n");
-
- if (pos < 8 && (dmsk >> (8 - pos)) != 0)
- comprintf ("\tuae_u32 dstreg = ((opcode >> %d) | (opcode << %d)) & %d;\n",
- pos ^ 8, 8 - pos, dmsk);
- else if (pos != 8)
- comprintf ("\tuae_u32 dstreg = (opcode >> %d) & %d;\n",
- pos ^ 8, dmsk);
- else
- comprintf ("\tuae_u32 dstreg = opcode & %d;\n", dmsk);
-
- comprintf ("#else\n");
-
- if (pos)
- comprintf ("\tuae_u32 dstreg = (opcode >> %d) & %d;\n",
- pos, dmsk);
- else
- comprintf ("\tuae_u32 dstreg = opcode & %d;\n", dmsk);
-
- comprintf ("#endif\n");
- }
- }
-
- if (have_srcreg && have_dstreg &&
- (table68k[opcode].dmode==Areg ||
- table68k[opcode].dmode==Aind ||
- table68k[opcode].dmode==Aipi ||
- table68k[opcode].dmode==Apdi ||
- table68k[opcode].dmode==Ad16 ||
- table68k[opcode].dmode==Ad8r) &&
- (table68k[opcode].smode==Areg ||
- table68k[opcode].smode==Aind ||
- table68k[opcode].smode==Aipi ||
- table68k[opcode].smode==Apdi ||
- table68k[opcode].smode==Ad16 ||
- table68k[opcode].smode==Ad8r)
- ) {
- comprintf("\tuae_u32 dodgy=(srcreg==(uae_s32)dstreg);\n");
- }
- else {
- comprintf("\tuae_u32 dodgy=0;\n");
- }
- comprintf("\tuae_u32 m68k_pc_offset_thisinst=m68k_pc_offset;\n");
- comprintf("\tm68k_pc_offset+=2;\n");
-
- opcode_str = get_instruction_string (opcode);
-
- aborted=gen_opcode (opcode);
- {
- int flags=0;
- if (global_isjump) flags|=1;
- if (long_opcode) flags|=2;
- if (global_cmov) flags|=4;
- if (global_isaddx) flags|=8;
- if (global_iscjump) flags|=16;
- if (global_fpu) flags|=32;
-
- comprintf ("}\n");
-
- if (aborted) {
- fprintf (stblfile, "{ NULL, 0x%08x, %ld }, /* %s */\n", flags, opcode, opcode_str);
- com_discard();
- }
- else {
- if (noflags) {
- fprintf (stblfile, "{ op_%lx_%d_comp_nf, 0x%08x, %ld }, /* %s */\n", opcode, postfix, flags, opcode, opcode_str);
- fprintf (headerfile, "extern compop_func op_%lx_%d_comp_nf;\n", opcode, postfix);
- printf ("void REGPARAM2 op_%lx_%d_comp_nf(uae_u32 opcode) /* %s */\n{\n", opcode, postfix, opcode_str);
- }
- else {
- fprintf (stblfile, "{ op_%lx_%d_comp_ff, 0x%08x, %ld }, /* %s */\n", opcode, postfix, flags, opcode, opcode_str);
- fprintf (headerfile, "extern compop_func op_%lx_%d_comp_ff;\n", opcode, postfix);
- printf ("void REGPARAM2 op_%lx_%d_comp_ff(uae_u32 opcode) /* %s */\n{\n", opcode, postfix, opcode_str);
- }
- com_flush();
- }
- }
- opcode_next_clev[rp] = next_cpu_level;
- opcode_last_postfix[rp] = postfix;
-}
-
-static void
-generate_func (int noflags)
-{
- int i, j, rp;
-
- using_prefetch = 0;
- using_exception_3 = 0;
- for (i = 0; i < 1; i++) /* We only do one level! */
- {
- cpu_level = 4 - i;
- postfix = i;
-
- if (noflags)
- fprintf (stblfile, "struct comptbl op_smalltbl_%d_comp_nf[] = {\n", postfix);
- else
- fprintf (stblfile, "struct comptbl op_smalltbl_%d_comp_ff[] = {\n", postfix);
-
-
- /* sam: this is for people with low memory (eg. me :)) */
- !printf ("\n"
- "#if !defined(PART_1) && !defined(PART_2) && "
- "!defined(PART_3) && !defined(PART_4) && "
- "!defined(PART_5) && !defined(PART_6) && "
- "!defined(PART_7) && !defined(PART_8)"
- "\n"
- "#define PART_1 1\n"
- "#define PART_2 1\n"
- "#define PART_3 1\n"
- "#define PART_4 1\n"
- "#define PART_5 1\n"
- "#define PART_6 1\n"
- "#define PART_7 1\n"
- "#define PART_8 1\n"
- "#endif\n\n");
-
- rp = 0;
- for (j = 1; j <= 8; ++j)
- {
- int k = (j * nr_cpuop_funcs) / 8;
- printf ("#ifdef PART_%d\n", j);
- for (; rp < k; rp++)
- generate_one_opcode (rp,noflags);
- printf ("#endif\n\n");
- }
-
- fprintf (stblfile, "{ 0, 0,65536 }};\n");
- }
-
-}
-
-int
-main (int argc, char **argv)
-{
- read_table68k ();
- do_merges ();
-
- opcode_map = (int *) malloc (sizeof (int) * nr_cpuop_funcs);
- opcode_last_postfix = (int *) malloc (sizeof (int) * nr_cpuop_funcs);
- opcode_next_clev = (int *) malloc (sizeof (int) * nr_cpuop_funcs);
- counts = (unsigned long *) malloc (65536 * sizeof (unsigned long));
- read_counts ();
-
- /* It would be a lot nicer to put all in one file (we'd also get rid of
- * cputbl.h that way), but cpuopti can't cope. That could be fixed, but
- * I don't dare to touch the 68k version. */
-
- headerfile = fopen ("comptbl.h", "wb");
- stblfile = fopen ("compstbl.cpp", "wb");
- freopen ("compemu.cpp", "wb", stdout);
-
- generate_includes (stdout);
- generate_includes (stblfile);
-
- printf("#include \"compiler/compemu.h\"\n");
-
- noflags=0;
- generate_func (noflags);
-
- free(opcode_map);
- free(opcode_last_postfix);
- free(opcode_next_clev);
- free(counts);
-
- opcode_map = (int *) malloc (sizeof (int) * nr_cpuop_funcs);
- opcode_last_postfix = (int *) malloc (sizeof (int) * nr_cpuop_funcs);
- opcode_next_clev = (int *) malloc (sizeof (int) * nr_cpuop_funcs);
- counts = (unsigned long *) malloc (65536 * sizeof (unsigned long));
- read_counts ();
- noflags=1;
- generate_func (noflags);
-
- free(opcode_map);
- free(opcode_last_postfix);
- free(opcode_next_clev);
- free(counts);
-
- free (table68k);
- fclose (stblfile);
- fclose (headerfile);
- fflush (stdout);
- return 0;
-}
+++ /dev/null
-/******************** -*- mode: C; tab-width: 8 -*- ********************
- *
- * Dumb and Brute Force Run-time assembler verifier for IA-32 and AMD64
- *
- ***********************************************************************/
-
-
-/***********************************************************************
- *
- * Copyright 2004-2008 Gwenole Beauchesne
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- ***********************************************************************/
-
-/*
- * STATUS: 26M variations covering unary register based operations,
- * reg/reg operations, imm/reg operations.
- *
- * TODO:
- * - Rewrite to use internal BFD/opcodes format instead of string compares
- * - Add reg/mem, imm/mem variations
- */
-
-#define _BSD_SOURCE 1
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdarg.h>
-#include <string.h>
-#include <ctype.h>
-#include <errno.h>
-
-#include "sysdeps.h"
-
-static int verbose = 2;
-
-#define TEST_INST_ALU 1
-#define TEST_INST_FPU 1
-#define TEST_INST_MMX 1
-#define TEST_INST_SSE 1
-#if TEST_INST_ALU
-#define TEST_INST_ALU_REG 1
-#define TEST_INST_ALU_REG_REG 1
-#define TEST_INST_ALU_CNT_REG 1
-#define TEST_INST_ALU_IMM_REG 1
-#define TEST_INST_ALU_MEM_REG 1
-#endif
-#if TEST_INST_FPU
-#define TEST_INST_FPU_UNARY 1
-#define TEST_INST_FPU_REG 1
-#define TEST_INST_FPU_MEM 1
-#endif
-#if TEST_INST_MMX
-#define TEST_INST_MMX_REG_REG 1
-#define TEST_INST_MMX_IMM_REG 1
-#define TEST_INST_MMX_MEM_REG 1
-#endif
-#if TEST_INST_SSE
-#define TEST_INST_SSE_REG 1
-#define TEST_INST_SSE_REG_REG 1
-#define TEST_INST_SSE_MEM_REG 1
-#endif
-
-#undef abort
-#define abort() do { \
- fprintf(stderr, "ABORT: %s, line %d\n", __FILE__, __LINE__); \
- (abort)(); \
-} while (0)
-
-#define X86_TARGET_64BIT 1
-#define X86_FLAT_REGISTERS 0
-#define X86_OPTIMIZE_ALU 1
-#define X86_OPTIMIZE_ROTSHI 1
-#define X86_RIP_RELATIVE_ADDR 0
-#include "compiler/codegen_x86.h"
-
-#if X86_TARGET_64BIT
-#define X86_MAX_ALU_REGS 16
-#define X86_MAX_SSE_REGS 16
-#else
-#define X86_MAX_ALU_REGS 8
-#define X86_MAX_SSE_REGS 8
-#endif
-#define X86_MAX_FPU_REGS 8
-#define X86_MAX_MMX_REGS 8
-
-#define VALID_REG(r, b, n) (((unsigned)((r) - X86_##b)) < (n))
-#if X86_TARGET_64BIT
-#define VALID_REG8(r) (VALID_REG(r, AL, 16) || VALID_REG(r, AH, 4))
-#define VALID_REG64(r) VALID_REG(r, RAX, X86_MAX_ALU_REGS)
-#else
-#define VALID_REG8(r) (VALID_REG(r, AL, 4) || VALID_REG(r, AH, 4))
-#define VALID_REG64(r) (0)
-#endif
-#define VALID_REG16(r) VALID_REG(r, AX, X86_MAX_ALU_REGS)
-#define VALID_REG32(r) VALID_REG(r, EAX, X86_MAX_ALU_REGS)
-
-#define x86_emit_byte(B) emit_byte(B)
-#define x86_emit_word(W) emit_word(W)
-#define x86_emit_long(L) emit_long(L)
-#define x86_emit_quad(Q) emit_quad(Q)
-#define x86_get_target() get_target()
-#define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__)
-
-static void jit_fail(const char *msg, const char *file, int line, const char *function)
-{
- fprintf(stderr, "JIT failure in function %s from file %s at line %d: %s\n",
- function, file, line, msg);
- abort();
-}
-
-static uint8 *target;
-
-static inline void emit_byte(uint8 x)
-{
- *target++ = x;
-}
-
-static inline void emit_word(uint16 x)
-{
- *((uint16 *)target) = x;
- target += 2;
-}
-
-static inline void emit_long(uint32 x)
-{
- *((uint32 *)target) = x;
- target += 4;
-}
-
-static inline void emit_quad(uint64 x)
-{
- *((uint64 *)target) = x;
- target += 8;
-}
-
-static inline void set_target(uint8 *t)
-{
- target = t;
-}
-
-static inline uint8 *get_target(void)
-{
- return target;
-}
-
-static uint32 mon_read_byte(uintptr addr)
-{
- uint8 *m = (uint8 *)addr;
- return (uint32)(*m);
-}
-
-extern "C" {
-#include "disass/dis-asm.h"
-
-int buffer_read_memory(bfd_vma from, bfd_byte *to, unsigned int length, struct disassemble_info *info)
-{
- while (length--)
- *to++ = mon_read_byte(from++);
- return 0;
-}
-
-void perror_memory(int status, bfd_vma memaddr, struct disassemble_info *info)
-{
- info->fprintf_func(info->stream, "Unknown error %d\n", status);
-}
-
-void generic_print_address(bfd_vma addr, struct disassemble_info *info)
-{
- if (addr >= UVAL64(0x100000000))
- info->fprintf_func(info->stream, "$%08x%08x", (uint32)(addr >> 32), (uint32)addr);
- else
- info->fprintf_func(info->stream, "$%08x", (uint32)addr);
-}
-
-int generic_symbol_at_address(bfd_vma addr, struct disassemble_info *info)
-{
- return 0;
-}
-}
-
-struct SFILE {
- char *buffer;
- char *current;
-};
-
-static int mon_sprintf(SFILE *f, const char *format, ...)
-{
- int n;
- va_list args;
- va_start(args, format);
- vsprintf(f->current, format, args);
- f->current += n = strlen(f->current);
- va_end(args);
- return n;
-}
-
-static int disass_x86(char *buf, uintptr adr)
-{
- disassemble_info info;
- SFILE sfile;
- sfile.buffer = buf;
- sfile.current = buf;
- INIT_DISASSEMBLE_INFO(info, (FILE *)&sfile, (fprintf_ftype)mon_sprintf);
- info.mach = X86_TARGET_64BIT ? bfd_mach_x86_64 : bfd_mach_i386_i386;
- info.disassembler_options = "suffix";
- return print_insn_i386(adr, &info);
-}
-
-enum {
- op_disp,
- op_reg,
- op_base,
- op_index,
- op_scale,
- op_imm,
-};
-struct operand_t {
- int32 disp;
- int8 reg;
- int8 base;
- int8 index;
- int8 scale;
- int64 imm;
-
- void clear() {
- disp = imm = 0;
- reg = base = index = -1;
- scale = 1;
- }
-
- void fill(int optype, int value) {
- switch (optype) {
- case op_disp: disp = value; break;
- case op_reg: reg = value; break;
- case op_base: base = value; break;
- case op_index: index = value; break;
- case op_scale: scale = value; break;
- case op_imm: imm = value; break;
- default: abort();
- }
- }
-};
-
-#define MAX_INSNS 1024
-#define MAX_INSN_LENGTH 16
-#define MAX_INSN_OPERANDS 3
-
-struct insn_t {
- char name[16];
- int n_operands;
- operand_t operands[MAX_INSN_OPERANDS];
-
- void clear() {
- memset(name, 0, sizeof(name));
- n_operands = 0;
- for (int i = 0; i < MAX_INSN_OPERANDS; i++)
- operands[i].clear();
- }
-
- void pretty_print() {
- printf("%s, %d operands\n", name, n_operands);
- for (int i = 0; i < n_operands; i++) {
- operand_t *op = &operands[i];
- if (op->reg != -1)
- printf(" reg r%d\n", op->reg);
- else {
- printf(" mem 0x%08x(", op->disp);
- if (op->base != -1)
- printf("r%d", op->base);
- printf(",");
- if (op->index != -1)
- printf("r%d", op->index);
- printf(",");
- if (op->base != -1 || op->index != -1)
- printf("%d", op->scale);
- printf(")\n");
- }
- }
- }
-};
-
-static inline char *find_blanks(char *p)
-{
- while (*p && !isspace(*p))
- ++p;
- return p;
-}
-
-static inline char *skip_blanks(char *p)
-{
- while (*p && isspace(*p))
- ++p;
- return p;
-}
-
-static int parse_reg(operand_t *op, int optype, char *buf)
-{
- int reg = X86_NOREG;
- int len = 0;
- char *p = buf;
- switch (p[0]) {
- case 'a': case 'A':
- len = 2;
- switch (p[1]) {
- case 'l': case 'L': reg = X86_AL; break;
- case 'h': case 'H': reg = X86_AH; break;
- case 'x': case 'X': reg = X86_AX; break;
- }
- break;
- case 'b': case 'B':
- len = 2;
- switch (p[1]) {
- case 'l': case 'L': reg = X86_BL; break;
- case 'h': case 'H': reg = X86_BH; break;
- case 'x': case 'X': reg = X86_BX; break;
- case 'p': case 'P':
- switch (p[2]) {
-#if X86_TARGET_64BIT
- case 'l': case 'L': reg = X86_BPL, ++len; break;
-#endif
- default: reg = X86_BP; break;
- }
- break;
- }
- break;
- case 'c': case 'C':
- len = 2;
- switch (p[1]) {
- case 'l': case 'L': reg = X86_CL; break;
- case 'h': case 'H': reg = X86_CH; break;
- case 'x': case 'X': reg = X86_CX; break;
- }
- break;
- case 'd': case 'D':
- len = 2;
- switch (p[1]) {
- case 'l': case 'L': reg = X86_DL; break;
- case 'h': case 'H': reg = X86_DH; break;
- case 'x': case 'X': reg = X86_DX; break;
- case 'i': case 'I':
- switch (p[2]) {
-#if X86_TARGET_64BIT
- case 'l': case 'L': reg = X86_DIL; ++len; break;
-#endif
- default: reg = X86_DI; break;
- }
- break;
- }
- break;
- case 's': case 'S':
- len = 2;
- switch (p[2]) {
-#if X86_TARGET_64BIT
- case 'l': case 'L':
- ++len;
- switch (p[1]) {
- case 'p': case 'P': reg = X86_SPL; break;
- case 'i': case 'I': reg = X86_SIL; break;
- }
- break;
-#endif
- case '(':
- if ((p[1] == 't' || p[1] == 'T') && isdigit(p[3]) && p[4] == ')')
- len += 3, reg = X86_ST0 + (p[3] - '0');
- break;
- default:
- switch (p[1]) {
- case 't': case 'T': reg = X86_ST0; break;
- case 'p': case 'P': reg = X86_SP; break;
- case 'i': case 'I': reg = X86_SI; break;
- }
- break;
- }
- break;
- case 'e': case 'E':
- len = 3;
- switch (p[2]) {
- case 'x': case 'X':
- switch (p[1]) {
- case 'a': case 'A': reg = X86_EAX; break;
- case 'b': case 'B': reg = X86_EBX; break;
- case 'c': case 'C': reg = X86_ECX; break;
- case 'd': case 'D': reg = X86_EDX; break;
- }
- break;
- case 'i': case 'I':
- switch (p[1]) {
- case 's': case 'S': reg = X86_ESI; break;
- case 'd': case 'D': reg = X86_EDI; break;
- }
- break;
- case 'p': case 'P':
- switch (p[1]) {
- case 'b': case 'B': reg = X86_EBP; break;
- case 's': case 'S': reg = X86_ESP; break;
- }
- break;
- }
- break;
-#if X86_TARGET_64BIT
- case 'r': case 'R':
- len = 3;
- switch (p[2]) {
- case 'x': case 'X':
- switch (p[1]) {
- case 'a': case 'A': reg = X86_RAX; break;
- case 'b': case 'B': reg = X86_RBX; break;
- case 'c': case 'C': reg = X86_RCX; break;
- case 'd': case 'D': reg = X86_RDX; break;
- }
- break;
- case 'i': case 'I':
- switch (p[1]) {
- case 's': case 'S': reg = X86_RSI; break;
- case 'd': case 'D': reg = X86_RDI; break;
- }
- break;
- case 'p': case 'P':
- switch (p[1]) {
- case 'b': case 'B': reg = X86_RBP; break;
- case 's': case 'S': reg = X86_RSP; break;
- }
- break;
- case 'b': case 'B':
- switch (p[1]) {
- case '8': reg = X86_R8B; break;
- case '9': reg = X86_R9B; break;
- }
- break;
- case 'w': case 'W':
- switch (p[1]) {
- case '8': reg = X86_R8W; break;
- case '9': reg = X86_R9W; break;
- }
- break;
- case 'd': case 'D':
- switch (p[1]) {
- case '8': reg = X86_R8D; break;
- case '9': reg = X86_R9D; break;
- }
- break;
- case '0': case '1': case '2': case '3': case '4': case '5':
- if (p[1] == '1') {
- const int r = p[2] - '0';
- switch (p[3]) {
- case 'b': case 'B': reg = X86_R10B + r, ++len; break;
- case 'w': case 'W': reg = X86_R10W + r, ++len; break;
- case 'd': case 'D': reg = X86_R10D + r, ++len; break;
- default: reg = X86_R10 + r; break;
- }
- }
- break;
- default:
- switch (p[1]) {
- case '8': reg = X86_R8, len = 2; break;
- case '9': reg = X86_R9, len = 2; break;
- }
- break;
- }
- break;
-#endif
- case 'm': case 'M':
- if ((p[1] == 'm' || p[1] == 'M') && isdigit(p[2]))
- reg = X86_MM0 + (p[2] - '0'), len = 3;
- break;
- case 'x': case 'X':
- if ((p[1] == 'm' || p[1] == 'M') && (p[2] == 'm' || p[2] == 'M')) {
-#if X86_TARGET_64BIT
- if (p[3] == '1' && isdigit(p[4]))
- reg = X86_XMM10 + (p[4] - '0'), len = 5;
- else
-#endif
- if (isdigit(p[3]))
- reg = X86_XMM0 + (p[3] - '0'), len = 4;
- }
- break;
- }
-
- if (len > 0 && reg != X86_NOREG) {
- op->fill(optype, reg);
- return len;
- }
-
- return X86_NOREG;
-}
-
-static unsigned long parse_imm(char *nptr, char **endptr, int base = 0)
-{
- errno = 0;
-#if X86_TARGET_64BIT
- if (sizeof(unsigned long) != 8) {
- unsigned long long val = strtoull(nptr, endptr, 0);
- if (errno == 0)
- return val;
- abort();
- }
-#endif
- unsigned long val = strtoul(nptr, endptr, 0);
- if (errno == 0)
- return val;
- abort();
- return 0;
-}
-
-static int parse_mem(operand_t *op, char *buf)
-{
- char *p = buf;
-
- if (strncmp(buf, "0x", 2) == 0)
- op->disp = parse_imm(buf, &p, 16);
-
- if (*p == '(') {
- p++;
-
- if (*p == '%') {
- p++;
-
- int n = parse_reg(op, op_base, p);
- if (n <= 0)
- return -3;
- p += n;
- }
-
- if (*p == ',') {
- p++;
-
- if (*p == '%') {
- int n = parse_reg(op, op_index, ++p);
- if (n <= 0)
- return -4;
- p += n;
-
- if (*p != ',')
- return -5;
- p++;
-
- goto do_parse_scale;
- }
- else if (isdigit(*p)) {
- do_parse_scale:
- long val = strtol(p, &p, 10);
- if (val == 0 && errno == EINVAL)
- abort();
- op->scale = val;
- }
- }
-
- if (*p != ')')
- return -6;
- p++;
- }
-
- return p - buf;
-}
-
-static void parse_insn(insn_t *ii, char *buf)
-{
- char *p = buf;
- ii->clear();
-
-#if 0
- printf("BUF: %s\n", buf);
-#endif
-
- if (strncmp(p, "rex64", 5) == 0) {
- char *q = find_blanks(p);
- if (verbose > 1) {
- char prefix[16];
- memset(prefix, 0, sizeof(prefix));
- memcpy(prefix, p, q - p);
- fprintf(stderr, "Instruction '%s', skip REX prefix '%s'\n", buf, prefix);
- }
- p = skip_blanks(q);
- }
-
- if (strncmp(p, "rep", 3) == 0) {
- char *q = find_blanks(p);
- if (verbose > 1) {
- char prefix[16];
- memset(prefix, 0, sizeof(prefix));
- memcpy(prefix, p, q - p);
- fprintf(stderr, "Instruction '%s', skip REP prefix '%s'\n", buf, prefix);
- }
- p = skip_blanks(q);
- }
-
- for (int i = 0; !isspace(*p); i++)
- ii->name[i] = *p++;
-
- while (*p && isspace(*p))
- p++;
- if (*p == '\0')
- return;
-
- int n_operands = 0;
- int optype = op_reg;
- bool done = false;
- while (!done) {
- int n;
- switch (*p) {
- case '%':
- n = parse_reg(&ii->operands[n_operands], optype, ++p);
- if (n <= 0) {
- fprintf(stderr, "parse_reg(%s) error %d\n", p, n);
- abort();
- }
- p += n;
- break;
- case '0': case '(':
- n = parse_mem(&ii->operands[n_operands], p);
- if (n <= 0) {
- fprintf(stderr, "parse_mem(%s) error %d\n", p, n);
- abort();
- }
- p += n;
- break;
- case '$': {
- ii->operands[n_operands].imm = parse_imm(++p, &p, 0);
- break;
- }
- case '*':
- p++;
- break;
- case ',':
- n_operands++;
- p++;
- break;
- case ' ': case '\t':
- p++;
- break;
- case '\0':
- done = true;
- break;
- default:
- fprintf(stderr, "parse error> %s\n", p);
- abort();
- }
- }
- ii->n_operands = n_operands + 1;
-}
-
-static unsigned long n_tests, n_failures;
-static unsigned long n_all_tests, n_all_failures;
-
-static bool check_unary(insn_t *ii, const char *name)
-{
- if (strcasecmp(ii->name, name) != 0) {
- fprintf(stderr, "ERROR: instruction mismatch, expected %s, got %s\n", name, ii->name);
- return false;
- }
-
- if (ii->n_operands != 0) {
- fprintf(stderr, "ERROR: instruction expected 0 operand, got %d\n", ii->n_operands);
- return false;
- }
-
- return true;
-}
-
-static bool check_reg(insn_t *ii, const char *name, int r)
-{
- if (strcasecmp(ii->name, name) != 0) {
- fprintf(stderr, "ERROR: instruction mismatch, expected %s, got %s\n", name, ii->name);
- return false;
- }
-
- if (ii->n_operands != 1) {
- fprintf(stderr, "ERROR: instruction expected 1 operand, got %d\n", ii->n_operands);
- return false;
- }
-
- int reg = ii->operands[0].reg;
-
- if (reg != r) {
- fprintf(stderr, "ERROR: instruction expected r%d as source, got ", r);
- if (reg == -1)
- fprintf(stderr, "nothing\n");
- else
- fprintf(stderr, "r%d\n", reg);
- return false;
- }
-
- return true;
-}
-
-static bool check_reg_reg(insn_t *ii, const char *name, int s, int d)
-{
- if (strcasecmp(ii->name, name) != 0) {
- fprintf(stderr, "ERROR: instruction mismatch, expected %s, got %s\n", name, ii->name);
- return false;
- }
-
- if (ii->n_operands != 2) {
- fprintf(stderr, "ERROR: instruction expected 2 operands, got %d\n", ii->n_operands);
- return false;
- }
-
- int srcreg = ii->operands[0].reg;
- int dstreg = ii->operands[1].reg;
-
- if (srcreg != s) {
- fprintf(stderr, "ERROR: instruction expected r%d as source, got ", s);
- if (srcreg == -1)
- fprintf(stderr, "nothing\n");
- else
- fprintf(stderr, "r%d\n", srcreg);
- return false;
- }
-
- if (dstreg != d) {
- fprintf(stderr, "ERROR: instruction expected r%d as destination, got ", d);
- if (dstreg == -1)
- fprintf(stderr, "nothing\n");
- else
- fprintf(stderr, "r%d\n", dstreg);
- return false;
- }
-
- return true;
-}
-
-static bool check_imm_reg(insn_t *ii, const char *name, uint32 v, int d, int mode = -1)
-{
- if (strcasecmp(ii->name, name) != 0) {
- fprintf(stderr, "ERROR: instruction mismatch, expected %s, got %s\n", name, ii->name);
- return false;
- }
-
- if (ii->n_operands != 2) {
- fprintf(stderr, "ERROR: instruction expected 2 operands, got %d\n", ii->n_operands);
- return false;
- }
-
- uint32 imm = ii->operands[0].imm;
- int dstreg = ii->operands[1].reg;
-
- if (mode == -1) {
- char suffix = name[strlen(name) - 1];
- switch (suffix) {
- case 'b': mode = 1; break;
- case 'w': mode = 2; break;
- case 'l': mode = 4; break;
- case 'q': mode = 8; break;
- }
- }
- switch (mode) {
- case 1: v &= 0xff; break;
- case 2: v &= 0xffff; break;
- }
-
- if (imm != v) {
- fprintf(stderr, "ERROR: instruction expected 0x%08x as immediate, got ", v);
- if (imm == -1)
- fprintf(stderr, "nothing\n");
- else
- fprintf(stderr, "0x%08x\n", imm);
- return false;
- }
-
- if (dstreg != d) {
- fprintf(stderr, "ERROR: instruction expected r%d as destination, got ", d);
- if (dstreg == -1)
- fprintf(stderr, "nothing\n");
- else
- fprintf(stderr, "%d\n", dstreg);
- return false;
- }
-
- return true;
-}
-
-static bool do_check_mem(insn_t *ii, uint32 D, int B, int I, int S, int Mpos)
-{
- operand_t *mem = &ii->operands[Mpos];
- uint32 d = mem->disp;
- int b = mem->base;
- int i = mem->index;
- int s = mem->scale;
-
- if (d != D) {
- fprintf(stderr, "ERROR: instruction expected 0x%08x as displacement, got 0x%08x\n", D, d);
- return false;
- }
-
- if (b != B) {
- fprintf(stderr, "ERROR: instruction expected r%d as base, got r%d\n", B, b);
- return false;
- }
-
- if (i != I) {
- fprintf(stderr, "ERROR: instruction expected r%d as index, got r%d\n", I, i);
- return false;
- }
-
- if (s != S) {
- fprintf(stderr, "ERROR: instruction expected %d as scale factor, got %d\n", S, s);
- return false;
- }
-
- return true;
-}
-
-static bool check_mem(insn_t *ii, const char *name, uint32 D, int B, int I, int S)
-{
- if (strcasecmp(ii->name, name) != 0) {
- fprintf(stderr, "ERROR: instruction mismatch, expected %s, got %s\n", name, ii->name);
- return false;
- }
-
- if (ii->n_operands != 1) {
- fprintf(stderr, "ERROR: instruction expected 1 operand, got %d\n", ii->n_operands);
- return false;
- }
-
- return do_check_mem(ii, D, B, I, S, 0);
-}
-
-static bool check_mem_reg(insn_t *ii, const char *name, uint32 D, int B, int I, int S, int R, int Rpos = 1)
-{
- if (strcasecmp(ii->name, name) != 0) {
- fprintf(stderr, "ERROR: instruction mismatch, expected %s, got %s\n", name, ii->name);
- return false;
- }
-
- if (ii->n_operands != 2) {
- fprintf(stderr, "ERROR: instruction expected 2 operands, got %d\n", ii->n_operands);
- return false;
- }
-
- if (!do_check_mem(ii, D, B, I, S, Rpos ^ 1))
- return false;
-
- int r = ii->operands[Rpos].reg;
-
- if (r != R) {
- fprintf(stderr, "ERROR: instruction expected r%d as reg operand, got r%d\n", R, r);
- return false;
- }
-
- return true;
-}
-
-static inline bool check_reg_mem(insn_t *ii, const char *name, uint32 D, int B, int I, int S, int R)
-{
- return check_mem_reg(ii, name, D, B, I, S, R, 0);
-}
-
-static void show_instruction(const char *buffer, const uint8 *bytes)
-{
- if (verbose > 1) {
- if (1) {
- for (int j = 0; j < MAX_INSN_LENGTH; j++)
- fprintf(stderr, "%02x ", bytes[j]);
- fprintf(stderr, "| ");
- }
- fprintf(stderr, "%s\n", buffer);
- }
-}
-
-static void show_status(unsigned long n_tests)
-{
-#if 1
- const unsigned long N_STEPS = 100000;
- static const char cursors[] = { '-', '\\', '|', '/' };
- if ((n_tests % N_STEPS) == 0) {
- printf(" %c (%d)\r", cursors[(n_tests/N_STEPS)%sizeof(cursors)], n_tests);
- fflush(stdout);
- }
-#else
- const unsigned long N_STEPS = 1000000;
- if ((n_tests % N_STEPS) == 0)
- printf(" ... %d\n", n_tests);
-#endif
-}
-
-int main(void)
-{
- static char buffer[1024];
- static uint8 block[MAX_INSNS * MAX_INSN_LENGTH];
- static char *insns[MAX_INSNS];
- static int modes[MAX_INSNS];
- n_all_tests = n_all_failures = 0;
-
-#if TEST_INST_ALU_REG
- printf("Testing reg forms\n");
- n_tests = n_failures = 0;
- for (int r = 0; r < X86_MAX_ALU_REGS; r++) {
- set_target(block);
- uint8 *b = get_target();
- int i = 0;
-#define GEN(INSN, GENOP) do { \
- insns[i++] = INSN; \
- GENOP##r(r); \
-} while (0)
-#define GEN64(INSN, GENOP) do { \
- if (X86_TARGET_64BIT) \
- GEN(INSN, GENOP); \
-} while (0)
-#define GENA(INSN, GENOP) do { \
- if (VALID_REG8(r)) \
- GEN(INSN "b", GENOP##B); \
- GEN(INSN "w", GENOP##W); \
- GEN(INSN "l", GENOP##L); \
- GEN64(INSN "q", GENOP##Q); \
-} while (0)
- GENA("not", NOT);
- GENA("neg", NEG);
- GENA("mul", MUL);
- GENA("imul", IMUL);
- GENA("div", DIV);
- GENA("idiv", IDIV);
- GENA("dec", DEC);
- GENA("inc", INC);
- if (X86_TARGET_64BIT) {
- GEN("callq", CALLs);
- GEN("jmpq", JMPs);
- GEN("pushq", PUSHQ);
- GEN("popq", POPQ);
- }
- else {
- GEN("calll", CALLs);
- GEN("jmpl", JMPs);
- GEN("pushl", PUSHL);
- GEN("popl", POPL);
- }
- GEN("bswap", BSWAPL); // FIXME: disass bug? no suffix
- GEN64("bswap", BSWAPQ); // FIXME: disass bug? no suffix
- if (VALID_REG8(r)) {
- GEN("seto", SETO);
- GEN("setno", SETNO);
- GEN("setb", SETB);
- GEN("setae", SETAE);
- GEN("sete", SETE);
- GEN("setne", SETNE);
- GEN("setbe", SETBE);
- GEN("seta", SETA);
- GEN("sets", SETS);
- GEN("setns", SETNS);
- GEN("setp", SETP);
- GEN("setnp", SETNP);
- GEN("setl", SETL);
- GEN("setge", SETGE);
- GEN("setle", SETLE);
- GEN("setg", SETG);
- }
-#undef GENA
-#undef GEN64
-#undef GEN
- int last_insn = i;
- uint8 *e = get_target();
-
- uint8 *p = b;
- i = 0;
- while (p < e) {
- int n = disass_x86(buffer, (uintptr)p);
- insn_t ii;
- parse_insn(&ii, buffer);
-
- if (!check_reg(&ii, insns[i], r)) {
- show_instruction(buffer, p);
- n_failures++;
- }
-
- p += n;
- i += 1;
- n_tests++;
- }
- if (i != last_insn)
- abort();
- }
- printf(" done %ld/%ld\n", n_tests - n_failures, n_tests);
- n_all_tests += n_tests;
- n_all_failures += n_failures;
-#endif
-
-#if TEST_INST_ALU_REG_REG
- printf("Testing reg,reg forms\n");
- n_tests = n_failures = 0;
- for (int s = 0; s < X86_MAX_ALU_REGS; s++) {
- for (int d = 0; d < X86_MAX_ALU_REGS; d++) {
- set_target(block);
- uint8 *b = get_target();
- int i = 0;
-#define GEN(INSN, GENOP) do { \
- insns[i++] = INSN; \
- GENOP##rr(s, d); \
-} while (0)
-#define GEN64(INSN, GENOP) do { \
- if (X86_TARGET_64BIT) \
- GEN(INSN, GENOP); \
-} while (0)
-#define GEN1(INSN, GENOP, OP) do { \
- insns[i++] = INSN; \
- GENOP##rr(OP, s, d); \
-} while (0)
-#define GENA(INSN, GENOP) do { \
- if (VALID_REG8(s) && VALID_REG8(d)) \
- GEN(INSN "b", GENOP##B); \
- GEN(INSN "w", GENOP##W); \
- GEN(INSN "l", GENOP##L); \
- GEN64(INSN "q", GENOP##Q); \
-} while (0)
- GENA("adc", ADC);
- GENA("add", ADD);
- GENA("and", AND);
- GENA("cmp", CMP);
- GENA("or", OR);
- GENA("sbb", SBB);
- GENA("sub", SUB);
- GENA("xor", XOR);
- GENA("mov", MOV);
- GEN("btw", BTW);
- GEN("btl", BTL);
- GEN64("btq", BTQ);
- GEN("btcw", BTCW);
- GEN("btcl", BTCL);
- GEN64("btcq", BTCQ);
- GEN("btrw", BTRW);
- GEN("btrl", BTRL);
- GEN64("btrq", BTRQ);
- GEN("btsw", BTSW);
- GEN("btsl", BTSL);
- GEN64("btsq", BTSQ);
- GEN("imulw", IMULW);
- GEN("imull", IMULL);
- GEN64("imulq", IMULQ);
- GEN1("cmove", CMOVW, X86_CC_Z);
- GEN1("cmove", CMOVL, X86_CC_Z);
- if (X86_TARGET_64BIT)
- GEN1("cmove", CMOVQ, X86_CC_Z);
- GENA("test", TEST);
- GENA("cmpxchg", CMPXCHG);
- GENA("xadd", XADD);
- GENA("xchg", XCHG);
- GEN("bsfw", BSFW);
- GEN("bsfl", BSFL);
- GEN64("bsfq", BSFQ);
- GEN("bsrw", BSRW);
- GEN("bsrl", BSRL);
- GEN64("bsrq", BSRQ);
- if (VALID_REG8(s)) {
- GEN("movsbw", MOVSBW);
- GEN("movsbl", MOVSBL);
- GEN64("movsbq", MOVSBQ);
- GEN("movzbw", MOVZBW);
- GEN("movzbl", MOVZBL);
- GEN64("movzbq", MOVZBQ);
- }
- GEN("movswl", MOVSWL);
- GEN64("movswq", MOVSWQ);
- GEN("movzwl", MOVZWL);
- GEN64("movzwq", MOVZWQ);
- GEN64("movslq", MOVSLQ);
-#undef GENA
-#undef GEN1
-#undef GEN64
-#undef GEN
- int last_insn = i;
- uint8 *e = get_target();
-
- uint8 *p = b;
- i = 0;
- while (p < e) {
- int n = disass_x86(buffer, (uintptr)p);
- insn_t ii;
- parse_insn(&ii, buffer);
-
- if (!check_reg_reg(&ii, insns[i], s, d)) {
- show_instruction(buffer, p);
- n_failures++;
- }
-
- p += n;
- i += 1;
- n_tests++;
- }
- if (i != last_insn)
- abort();
- }
- }
- printf(" done %ld/%ld\n", n_tests - n_failures, n_tests);
- n_all_tests += n_tests;
- n_all_failures += n_failures;
-#endif
-
-#if TEST_INST_ALU_CNT_REG
- printf("Testing cl,reg forms\n");
- n_tests = n_failures = 0;
- for (int d = 0; d < X86_MAX_ALU_REGS; d++) {
- set_target(block);
- uint8 *b = get_target();
- int i = 0;
-#define GEN(INSN, GENOP) do { \
- insns[i++] = INSN; \
- GENOP##rr(X86_CL, d); \
-} while (0)
-#define GEN64(INSN, GENOP) do { \
- if (X86_TARGET_64BIT) \
- GEN(INSN, GENOP); \
-} while (0)
-#define GENA(INSN, GENOP) do { \
- if (VALID_REG8(d)) \
- GEN(INSN "b", GENOP##B); \
- GEN(INSN "w", GENOP##W); \
- GEN(INSN "l", GENOP##L); \
- GEN64(INSN "q", GENOP##Q); \
-} while (0)
- GENA("rol", ROL);
- GENA("ror", ROR);
- GENA("rcl", RCL);
- GENA("rcr", RCR);
- GENA("shl", SHL);
- GENA("shr", SHR);
- GENA("sar", SAR);
-#undef GENA
-#undef GEN64
-#undef GEN
- int last_insn = i;
- uint8 *e = get_target();
-
- uint8 *p = b;
- i = 0;
- while (p < e) {
- int n = disass_x86(buffer, (uintptr)p);
- insn_t ii;
- parse_insn(&ii, buffer);
-
- if (!check_reg_reg(&ii, insns[i], X86_CL, d)) {
- show_instruction(buffer, p);
- n_failures++;
- }
-
- p += n;
- i += 1;
- n_tests++;
- }
- if (i != last_insn)
- abort();
- }
- printf(" done %ld/%ld\n", n_tests - n_failures, n_tests);
- n_all_tests += n_tests;
- n_all_failures += n_failures;
-#endif
-
- static const uint32 imm_table[] = {
- 0x00000000, 0x00000001, 0x00000002, 0x00000004,
- 0x00000008, 0x00000010, 0x00000020, 0x00000040,
- 0x00000080, 0x000000fe, 0x000000ff, 0x00000100,
- 0x00000101, 0x00000102, 0xfffffffe, 0xffffffff,
- 0x00000000, 0x10000000, 0x20000000, 0x30000000,
- 0x40000000, 0x50000000, 0x60000000, 0x70000000,
- 0x80000000, 0x90000000, 0xa0000000, 0xb0000000,
- 0xc0000000, 0xd0000000, 0xe0000000, 0xf0000000,
- 0xfffffffd, 0xfffffffe, 0xffffffff, 0x00000001,
- 0x00000002, 0x00000003, 0x11111111, 0x22222222,
- 0x33333333, 0x44444444, 0x55555555, 0x66666666,
- 0x77777777, 0x88888888, 0x99999999, 0xaaaaaaaa,
- 0xbbbbbbbb, 0xcccccccc, 0xdddddddd, 0xeeeeeeee,
- };
- const int n_imm_tab_count = sizeof(imm_table)/sizeof(imm_table[0]);
-
-#if TEST_INST_ALU_IMM_REG
- printf("Testing imm,reg forms\n");
- n_tests = n_failures = 0;
- for (int j = 0; j < n_imm_tab_count; j++) {
- const uint32 value = imm_table[j];
- for (int d = 0; d < X86_MAX_ALU_REGS; d++) {
- set_target(block);
- uint8 *b = get_target();
- int i = 0;
-#define GEN(INSN, GENOP) do { \
- insns[i] = INSN; \
- modes[i] = -1; \
- i++; GENOP##ir(value, d); \
- } while (0)
-#define GEN64(INSN, GENOP) do { \
- if (X86_TARGET_64BIT) \
- GEN(INSN, GENOP); \
- } while (0)
-#define GENM(INSN, GENOP, MODE) do { \
- insns[i] = INSN; \
- modes[i] = MODE; \
- i++; GENOP##ir(value, d); \
- } while (0)
-#define GENM64(INSN, GENOP, MODE) do { \
- if (X86_TARGET_64BIT) \
- GENM(INSN, GENOP, MODE); \
- } while (0)
-#define GENA(INSN, GENOP) do { \
- if (VALID_REG8(d)) \
- GEN(INSN "b", GENOP##B); \
- GEN(INSN "w", GENOP##W); \
- GEN(INSN "l", GENOP##L); \
- GEN64(INSN "q", GENOP##Q); \
- } while (0)
-#define GENAM(INSN, GENOP, MODE) do { \
- if (VALID_REG8(d)) \
- GENM(INSN "b", GENOP##B, MODE); \
- GENM(INSN "w", GENOP##W, MODE); \
- GENM(INSN "l", GENOP##L, MODE); \
- GENM64(INSN "q", GENOP##Q, MODE); \
- } while (0)
- GENA("adc", ADC);
- GENA("add", ADD);
- GENA("and", AND);
- GENA("cmp", CMP);
- GENA("or", OR);
- GENA("sbb", SBB);
- GENA("sub", SUB);
- GENA("xor", XOR);
- GENA("mov", MOV);
- GENM("btw", BTW, 1);
- GENM("btl", BTL, 1);
- GENM64("btq", BTQ, 1);
- GENM("btcw", BTCW, 1);
- GENM("btcl", BTCL, 1);
- GENM64("btcq", BTCQ, 1);
- GENM("btrw", BTRW, 1);
- GENM("btrl", BTRL, 1);
- GENM64("btrq", BTRQ, 1);
- GENM("btsw", BTSW, 1);
- GENM("btsl", BTSL, 1);
- GENM64("btsq", BTSQ, 1);
- if (value != 1) {
- GENAM("rol", ROL, 1);
- GENAM("ror", ROR, 1);
- GENAM("rcl", RCL, 1);
- GENAM("rcr", RCR, 1);
- GENAM("shl", SHL, 1);
- GENAM("shr", SHR, 1);
- GENAM("sar", SAR, 1);
- }
- GENA("test", TEST);
-#undef GENAM
-#undef GENA
-#undef GENM64
-#undef GENM
-#undef GEN64
-#undef GEN
- int last_insn = i;
- uint8 *e = get_target();
-
- uint8 *p = b;
- i = 0;
- while (p < e) {
- int n = disass_x86(buffer, (uintptr)p);
- insn_t ii;
- parse_insn(&ii, buffer);
-
- if (!check_imm_reg(&ii, insns[i], value, d, modes[i])) {
- show_instruction(buffer, p);
- n_failures++;
- }
-
- p += n;
- i += 1;
- n_tests++;
- }
- if (i != last_insn)
- abort();
- }
- }
- printf(" done %ld/%ld\n", n_tests - n_failures, n_tests);
- n_all_tests += n_tests;
- n_all_failures += n_failures;
-#endif
-
- static const uint32 off_table[] = {
- 0x00000000,
- 0x00000001,
- 0x00000040,
- 0x00000080,
- 0x000000ff,
- 0x00000100,
- 0xfffffffe,
- 0xffffffff,
- };
- const int off_table_count = sizeof(off_table) / sizeof(off_table[0]);
-
-#if TEST_INST_ALU_MEM_REG
- printf("Testing mem,reg forms\n");
- n_tests = n_failures = 0;
- for (int d = 0; d < off_table_count; d++) {
- const uint32 D = off_table[d];
- for (int B = -1; B < X86_MAX_ALU_REGS; B++) {
- for (int I = -1; I < X86_MAX_ALU_REGS; I++) {
- if (I == X86_RSP)
- continue;
- for (int S = 1; S < 16; S *= 2) {
- if (I == -1 && S > 1)
- continue;
- for (int r = 0; r < X86_MAX_ALU_REGS; r++) {
- set_target(block);
- uint8 *b = get_target();
- int i = 0;
-#define GEN(INSN, GENOP) do { \
- insns[i++] = INSN; \
- GENOP##mr(D, B, I, S, r); \
- } while (0)
-#define GEN64(INSN, GENOP) do { \
- if (X86_TARGET_64BIT) \
- GEN(INSN, GENOP); \
- } while (0)
-#define GENA(INSN, GENOP) do { \
- if (VALID_REG8(r)) \
- GEN(INSN "b", GENOP##B); \
- GEN(INSN "w", GENOP##W); \
- GEN(INSN "l", GENOP##L); \
- GEN64(INSN "q", GENOP##Q); \
- } while (0)
- GENA("adc", ADC);
- GENA("add", ADD);
- GENA("and", AND);
- GENA("cmp", CMP);
- GENA("or", OR);
- GENA("sbb", SBB);
- GENA("sub", SUB);
- GENA("xor", XOR);
- GENA("mov", MOV);
- GEN("imulw", IMULW);
- GEN("imull", IMULL);
- GEN64("imulq", IMULQ);
- GEN("bsfw", BSFW);
- GEN("bsfl", BSFL);
- GEN64("bsfq", BSFQ);
- GEN("bsrw", BSRW);
- GEN("bsrl", BSRL);
- GEN64("bsrq", BSRQ);
- GEN("movsbw", MOVSBW);
- GEN("movsbl", MOVSBL);
- GEN64("movsbq", MOVSBQ);
- GEN("movzbw", MOVZBW);
- GEN("movzbl", MOVZBL);
- GEN64("movzbq", MOVZBQ);
- GEN("movswl", MOVSWL);
- GEN64("movswq", MOVSWQ);
- GEN("movzwl", MOVZWL);
- GEN64("movzwq", MOVZWQ);
- GEN64("movslq", MOVSLQ);
-#undef GENA
-#undef GEN64
-#undef GEN
- int last_insn = i;
- uint8 *e = get_target();
-
- uint8 *p = b;
- i = 0;
- while (p < e) {
- int n = disass_x86(buffer, (uintptr)p);
- insn_t ii;
- parse_insn(&ii, buffer);
-
- if (!check_mem_reg(&ii, insns[i], D, B, I, S, r)) {
- show_instruction(buffer, p);
- n_failures++;
- }
-
- p += n;
- i += 1;
- n_tests++;
- show_status(n_tests);
- }
- if (i != last_insn)
- abort();
- }
- }
- }
- }
- }
- printf(" done %ld/%ld\n", n_tests - n_failures, n_tests);
- n_all_tests += n_tests;
- n_all_failures += n_failures;
-#endif
-
-#if TEST_INST_FPU_UNARY
- printf("Testing FPU unary forms\n");
- n_tests = n_failures = 0;
- {
- set_target(block);
- uint8 *b = get_target();
- int i = 0;
-#define GEN(INSN, GENOP) do { \
- insns[i++] = INSN; \
- GENOP(); \
-} while (0)
- GEN("f2xm1", F2XM1);
- GEN("fabs", FABS);
- GEN("fchs", FCHS);
- GEN("fcompp", FCOMPP);
- GEN("fcos", FCOS);
- GEN("fdecstp", FDECSTP);
- GEN("fincstp", FINCSTP);
- GEN("fld1", FLD1);
- GEN("fldl2t", FLDL2T);
- GEN("fldl2e", FLDL2E);
- GEN("fldpi", FLDPI);
- GEN("fldlg2", FLDLG2);
- GEN("fldln2", FLDLN2);
- GEN("fldz", FLDZ);
- GEN("fnop", FNOP);
- GEN("fpatan", FPATAN);
- GEN("fprem", FPREM);
- GEN("fprem1", FPREM1);
- GEN("fptan", FPTAN);
- GEN("frndint", FRNDINT);
- GEN("fscale", FSCALE);
- GEN("fsin", FSIN);
- GEN("fsincos", FSINCOS);
- GEN("fsqrt", FSQRT);
- GEN("ftst", FTST);
- GEN("fucompp", FUCOMPP);
- GEN("fxam", FXAM);
- GEN("fxtract", FXTRACT);
- GEN("fyl2x", FYL2X);
- GEN("fyl2xp1", FYL2XP1);
-#undef GEN
- int last_insn = i;
- uint8 *e = get_target();
-
- uint8 *p = b;
- i = 0;
- while (p < e) {
- int n = disass_x86(buffer, (uintptr)p);
- insn_t ii;
- parse_insn(&ii, buffer);
-
- if (!check_unary(&ii, insns[i])) {
- show_instruction(buffer, p);
- n_failures++;
- }
-
- p += n;
- i += 1;
- n_tests++;
- }
- if (i != last_insn)
- abort();
- }
- printf(" done %ld/%ld\n", n_tests - n_failures, n_tests);
- n_all_tests += n_tests;
- n_all_failures += n_failures;
-#endif
-
-#if TEST_INST_FPU_REG
- printf("Testing FPU reg forms\n");
- n_tests = n_failures = 0;
- for (int r = 0; r < X86_MAX_FPU_REGS; r++) {
- set_target(block);
- uint8 *b = get_target();
- int i = 0;
-#define GENr(INSN, GENOP) do { \
- insns[i] = INSN; \
- modes[i] = 0; \
- i++, GENOP##r(r); \
-} while (0)
-#define GENr0(INSN, GENOP) do { \
- insns[i] = INSN; \
- modes[i] = 1; \
- i++, GENOP##r0(r); \
-} while (0)
-#define GEN0r(INSN, GENOP) do { \
- insns[i] = INSN; \
- modes[i] = 2; \
- i++, GENOP##0r(r); \
-} while (0)
- GENr("fcom", FCOM);
- GENr("fcomp", FCOMP);
- GENr("ffree", FFREE);
- GENr("fxch", FXCH);
- GENr("fst", FST);
- GENr("fstp", FSTP);
- GENr("fucom", FUCOM);
- GENr("fucomp", FUCOMP);
- GENr0("fadd", FADD);
- GENr0("fcmovb", FCMOVB);
- GENr0("fcmove", FCMOVE);
- GENr0("fcmovbe", FCMOVBE);
- GENr0("fcmovu", FCMOVU);
- GENr0("fcmovnb", FCMOVNB);
- GENr0("fcmovne", FCMOVNE);
- GENr0("fcmovnbe", FCMOVNBE);
- GENr0("fcmovnu", FCMOVNU);
- GENr0("fcomi", FCOMI);
- GENr0("fcomip", FCOMIP);
- GENr0("fucomi", FUCOMI);
- GENr0("fucomip", FUCOMIP);
- GENr0("fdiv", FDIV);
- GENr0("fdivr", FDIVR);
- GENr0("fmul", FMUL);
- GENr0("fsub", FSUB);
- GENr0("fsubr", FSUBR);
-#undef GEN0r
-#undef GENr0
-#undef GENr
- int last_insn = i;
- uint8 *e = get_target();
-
- uint8 *p = b;
- i = 0;
- while (p < e) {
- int n = disass_x86(buffer, (uintptr)p);
- insn_t ii;
- parse_insn(&ii, buffer);
-
- switch (modes[i]) {
- case 0:
- if (!check_reg(&ii, insns[i], r)) {
- show_instruction(buffer, p);
- n_failures++;
- }
- break;
- case 1:
- if (!check_reg_reg(&ii, insns[i], r, 0)) {
- show_instruction(buffer, p);
- n_failures++;
- }
- break;
- case 2:
- if (!check_reg_reg(&ii, insns[i], 0, r)) {
- show_instruction(buffer, p);
- n_failures++;
- }
- break;
- }
-
- p += n;
- i += 1;
- n_tests++;
- }
- if (i != last_insn)
- abort();
- }
- printf(" done %ld/%ld\n", n_tests - n_failures, n_tests);
- n_all_tests += n_tests;
- n_all_failures += n_failures;
-#endif
-
-#if TEST_INST_FPU_MEM
- printf("Testing FPU mem forms\n");
- n_tests = n_failures = 0;
- for (int d = 0; d < off_table_count; d++) {
- const uint32 D = off_table[d];
- for (int B = -1; B < X86_MAX_ALU_REGS; B++) {
- for (int I = -1; I < X86_MAX_ALU_REGS; I++) {
- if (I == X86_RSP)
- continue;
- for (int S = 1; S < 16; S *= 2) {
- if (I == -1 && S > 1)
- continue;
- set_target(block);
- uint8 *b = get_target();
- int i = 0;
-#define GEN(INSN, GENOP) do { \
- insns[i++] = INSN; \
- GENOP##m(D, B, I, S); \
-} while (0)
- GEN("fadds", FADDS);
- GEN("faddl", FADDD);
- GEN("fiadd", FIADDW);
- GEN("fiaddl", FIADDL);
- GEN("fbld", FBLD);
- GEN("fbstp", FBSTP);
- GEN("fcoms", FCOMS);
- GEN("fcoml", FCOMD);
- GEN("fcomps", FCOMPS);
- GEN("fcompl", FCOMPD);
- GEN("fdivs", FDIVS);
- GEN("fdivl", FDIVD);
- GEN("fidiv", FIDIVW);
- GEN("fidivl", FIDIVL);
- GEN("fdivrs", FDIVRS);
- GEN("fdivrl", FDIVRD);
- GEN("fidivr", FIDIVRW);
- GEN("fidivrl", FIDIVRL);
- GEN("ficom", FICOMW);
- GEN("ficoml", FICOML);
- GEN("ficomp", FICOMPW);
- GEN("ficompl", FICOMPL);
- GEN("fild", FILDW);
- GEN("fildl", FILDL);
- GEN("fildll", FILDQ);
- GEN("fist", FISTW);
- GEN("fistl", FISTL);
- GEN("fistp", FISTPW);
- GEN("fistpl", FISTPL);
- GEN("fistpll", FISTPQ);
- GEN("fisttp", FISTTPW);
- GEN("fisttpl", FISTTPL);
- GEN("fisttpll", FISTTPQ);
- GEN("flds", FLDS);
- GEN("fldl", FLDD);
- GEN("fldt", FLDT);
- GEN("fmuls", FMULS);
- GEN("fmull", FMULD);
- GEN("fimul", FIMULW);
- GEN("fimull", FIMULL);
- GEN("fsts", FSTS);
- GEN("fstl", FSTD);
- GEN("fstps", FSTPS);
- GEN("fstpl", FSTPD);
- GEN("fstpt", FSTPT);
- GEN("fsubs", FSUBS);
- GEN("fsubl", FSUBD);
- GEN("fisub", FISUBW);
- GEN("fisubl", FISUBL);
- GEN("fsubrs", FSUBRS);
- GEN("fsubrl", FSUBRD);
- GEN("fisubr", FISUBRW);
- GEN("fisubrl", FISUBRL);
-#undef GEN
- int last_insn = i;
- uint8 *e = get_target();
-
- uint8 *p = b;
- i = 0;
- while (p < e) {
- int n = disass_x86(buffer, (uintptr)p);
- insn_t ii;
- parse_insn(&ii, buffer);
-
- if (!check_mem(&ii, insns[i], D, B, I, S)) {
- show_instruction(buffer, p);
- n_failures++;
- }
-
- p += n;
- i += 1;
- n_tests++;
- show_status(n_tests);
- }
- if (i != last_insn)
- abort();
- }
- }
- }
- }
- printf(" done %ld/%ld\n", n_tests - n_failures, n_tests);
- n_all_tests += n_tests;
- n_all_failures += n_failures;
-#endif
-
-#if TEST_INST_MMX_REG_REG
- printf("Testing MMX reg,reg forms\n");
- n_tests = n_failures = 0;
- for (int s = 0; s < X86_MAX_MMX_REGS; s++) {
- for (int d = 0; d < X86_MAX_MMX_REGS; d++) {
- set_target(block);
- uint8 *b = get_target();
- int i = 0;
-#define GEN(INSN, GENOP) do { \
- insns[i++] = INSN; \
- MMX_##GENOP##rr(s, d); \
-} while (0)
-#define GEN64(INSN, GENOP) do { \
- if (X86_TARGET_64BIT) \
- GEN(INSN, GENOP); \
-} while (0)
- GEN("movq", MOVQ);
- GEN("packsswb", PACKSSWB);
- GEN("packssdw", PACKSSDW);
- GEN("packuswb", PACKUSWB);
- GEN("paddb", PADDB);
- GEN("paddw", PADDW);
- GEN("paddd", PADDD);
- GEN("paddq", PADDQ);
- GEN("paddsb", PADDSB);
- GEN("paddsw", PADDSW);
- GEN("paddusb", PADDUSB);
- GEN("paddusw", PADDUSW);
- GEN("pand", PAND);
- GEN("pandn", PANDN);
- GEN("pavgb", PAVGB);
- GEN("pavgw", PAVGW);
- GEN("pcmpeqb", PCMPEQB);
- GEN("pcmpeqw", PCMPEQW);
- GEN("pcmpeqd", PCMPEQD);
- GEN("pcmpgtb", PCMPGTB);
- GEN("pcmpgtw", PCMPGTW);
- GEN("pcmpgtd", PCMPGTD);
- GEN("pmaddwd", PMADDWD);
- GEN("pmaxsw", PMAXSW);
- GEN("pmaxub", PMAXUB);
- GEN("pminsw", PMINSW);
- GEN("pminub", PMINUB);
- GEN("pmulhuw", PMULHUW);
- GEN("pmulhw", PMULHW);
- GEN("pmullw", PMULLW);
- GEN("pmuludq", PMULUDQ);
- GEN("por", POR);
- GEN("psadbw", PSADBW);
- GEN("psllw", PSLLW);
- GEN("pslld", PSLLD);
- GEN("psllq", PSLLQ);
- GEN("psraw", PSRAW);
- GEN("psrad", PSRAD);
- GEN("psrlw", PSRLW);
- GEN("psrld", PSRLD);
- GEN("psrlq", PSRLQ);
- GEN("psubb", PSUBB);
- GEN("psubw", PSUBW);
- GEN("psubd", PSUBD);
- GEN("psubq", PSUBQ);
- GEN("psubsb", PSUBSB);
- GEN("psubsw", PSUBSW);
- GEN("psubusb", PSUBUSB);
- GEN("psubusw", PSUBUSW);
- GEN("punpckhbw", PUNPCKHBW);
- GEN("punpckhwd", PUNPCKHWD);
- GEN("punpckhdq", PUNPCKHDQ);
- GEN("punpcklbw", PUNPCKLBW);
- GEN("punpcklwd", PUNPCKLWD);
- GEN("punpckldq", PUNPCKLDQ);
- GEN("pxor", PXOR);
- GEN("pabsb", PABSB);
- GEN("pabsw", PABSW);
- GEN("pabsd", PABSD);
- GEN("phaddw", PHADDW);
- GEN("phaddd", PHADDD);
- GEN("phaddsw", PHADDSW);
- GEN("phsubw", PHSUBW);
- GEN("phsubd", PHSUBD);
- GEN("phsubsw", PHSUBSW);
- GEN("pmaddubsw", PMADDUBSW);
- GEN("pmulhrsw", PMULHRSW);
- GEN("pshufb", PSHUFB);
- GEN("psignb", PSIGNB);
- GEN("psignw", PSIGNW);
- GEN("psignd", PSIGND);
-#undef GEN64
-#undef GEN
- int last_insn = i;
- uint8 *e = get_target();
-
- uint8 *p = b;
- i = 0;
- while (p < e) {
- int n = disass_x86(buffer, (uintptr)p);
- insn_t ii;
- parse_insn(&ii, buffer);
-
- if (!check_reg_reg(&ii, insns[i], s, d)) {
- show_instruction(buffer, p);
- n_failures++;
- }
-
- p += n;
- i += 1;
- n_tests++;
- }
- if (i != last_insn)
- abort();
- }
- }
- printf(" done %ld/%ld\n", n_tests - n_failures, n_tests);
- n_all_tests += n_tests;
- n_all_failures += n_failures;
-#endif
-
- static const uint8 imm8_table[] = {
- 0x00, 0x01, 0x02, 0x03,
- 0x06, 0x07, 0x08, 0x09,
- 0x0e, 0x0f, 0x10, 0x11,
- 0x1e, 0x1f, 0x20, 0x21,
- 0xfc, 0xfd, 0xfe, 0xff,
- };
- const int n_imm8_tab_count = sizeof(imm8_table)/sizeof(imm8_table[0]);
-
-#if TEST_INST_MMX_IMM_REG
- printf("Testing imm,reg forms\n");
- n_tests = n_failures = 0;
- for (int j = 0; j < n_imm8_tab_count; j++) {
- const uint8 value = imm8_table[j];
- for (int d = 0; d < X86_MAX_MMX_REGS; d++) {
- set_target(block);
- uint8 *b = get_target();
- int i = 0;
-#define GEN(INSN, GENOP) do { \
- insns[i] = INSN; \
- modes[i] = 1; \
- i++; MMX_##GENOP##ir(value, d); \
-} while (0)
- GEN("psllw", PSLLW);
- GEN("pslld", PSLLD);
- GEN("psllq", PSLLQ);
- GEN("psraw", PSRAW);
- GEN("psrad", PSRAD);
- GEN("psrlw", PSRLW);
- GEN("psrld", PSRLD);
- GEN("psrlq", PSRLQ);
-#undef GEN
- int last_insn = i;
- uint8 *e = get_target();
-
- uint8 *p = b;
- i = 0;
- while (p < e) {
- int n = disass_x86(buffer, (uintptr)p);
- insn_t ii;
- parse_insn(&ii, buffer);
-
- if (!check_imm_reg(&ii, insns[i], value, d, modes[i])) {
- show_instruction(buffer, p);
- n_failures++;
- }
-
- p += n;
- i += 1;
- n_tests++;
- }
- if (i != last_insn)
- abort();
- }
- }
- printf(" done %ld/%ld\n", n_tests - n_failures, n_tests);
- n_all_tests += n_tests;
- n_all_failures += n_failures;
-#endif
-
-#if TEST_INST_MMX_MEM_REG
- printf("Testing MMX mem,reg forms\n");
- n_tests = n_failures = 0;
- for (int d = 0; d < off_table_count; d++) {
- const uint32 D = off_table[d];
- for (int B = -1; B < X86_MAX_ALU_REGS; B++) {
- for (int I = -1; I < X86_MAX_ALU_REGS; I++) {
- if (I == X86_RSP)
- continue;
- for (int S = 1; S < 16; S *= 2) {
- if (I == -1 && S > 1)
- continue;
- for (int r = 0; r < X86_MAX_MMX_REGS; r++) {
- set_target(block);
- uint8 *b = get_target();
- int i = 0;
-#define _GENrm(INSN, GENOP) do { \
- insns[i] = INSN; \
- modes[i] = 0; \
- i++; MMX_##GENOP##rm(r, D, B, I, S); \
-} while (0)
-#define _GENmr(INSN, GENOP) do { \
- insns[i] = INSN; \
- modes[i] = 1; \
- i++; MMX_##GENOP##mr(D, B, I, S, r); \
-} while (0)
-#define GEN(INSN, GENOP) do { \
- _GENmr(INSN, GENOP); \
-} while (0)
- _GENmr("movd", MOVD);
- _GENrm("movd", MOVD);
- _GENmr("movq", MOVQ);
- _GENrm("movq", MOVQ);
- GEN("packsswb", PACKSSWB);
- GEN("packssdw", PACKSSDW);
- GEN("packuswb", PACKUSWB);
- GEN("paddb", PADDB);
- GEN("paddw", PADDW);
- GEN("paddd", PADDD);
- GEN("paddq", PADDQ);
- GEN("paddsb", PADDSB);
- GEN("paddsw", PADDSW);
- GEN("paddusb", PADDUSB);
- GEN("paddusw", PADDUSW);
- GEN("pand", PAND);
- GEN("pandn", PANDN);
- GEN("pavgb", PAVGB);
- GEN("pavgw", PAVGW);
- GEN("pcmpeqb", PCMPEQB);
- GEN("pcmpeqw", PCMPEQW);
- GEN("pcmpeqd", PCMPEQD);
- GEN("pcmpgtb", PCMPGTB);
- GEN("pcmpgtw", PCMPGTW);
- GEN("pcmpgtd", PCMPGTD);
- GEN("pmaddwd", PMADDWD);
- GEN("pmaxsw", PMAXSW);
- GEN("pmaxub", PMAXUB);
- GEN("pminsw", PMINSW);
- GEN("pminub", PMINUB);
- GEN("pmulhuw", PMULHUW);
- GEN("pmulhw", PMULHW);
- GEN("pmullw", PMULLW);
- GEN("pmuludq", PMULUDQ);
- GEN("por", POR);
- GEN("psadbw", PSADBW);
- GEN("psllw", PSLLW);
- GEN("pslld", PSLLD);
- GEN("psllq", PSLLQ);
- GEN("psraw", PSRAW);
- GEN("psrad", PSRAD);
- GEN("psrlw", PSRLW);
- GEN("psrld", PSRLD);
- GEN("psrlq", PSRLQ);
- GEN("psubb", PSUBB);
- GEN("psubw", PSUBW);
- GEN("psubd", PSUBD);
- GEN("psubq", PSUBQ);
- GEN("psubsb", PSUBSB);
- GEN("psubsw", PSUBSW);
- GEN("psubusb", PSUBUSB);
- GEN("psubusw", PSUBUSW);
- GEN("punpckhbw", PUNPCKHBW);
- GEN("punpckhwd", PUNPCKHWD);
- GEN("punpckhdq", PUNPCKHDQ);
- GEN("punpcklbw", PUNPCKLBW);
- GEN("punpcklwd", PUNPCKLWD);
- GEN("punpckldq", PUNPCKLDQ);
- GEN("pxor", PXOR);
- GEN("pabsb", PABSB);
- GEN("pabsw", PABSW);
- GEN("pabsd", PABSD);
- GEN("phaddw", PHADDW);
- GEN("phaddd", PHADDD);
- GEN("phaddsw", PHADDSW);
- GEN("phsubw", PHSUBW);
- GEN("phsubd", PHSUBD);
- GEN("phsubsw", PHSUBSW);
- GEN("pmaddubsw", PMADDUBSW);
- GEN("pmulhrsw", PMULHRSW);
- GEN("pshufb", PSHUFB);
- GEN("psignb", PSIGNB);
- GEN("psignw", PSIGNW);
- GEN("psignd", PSIGND);
-#undef GEN
-#undef _GENmr
-#undef _GENrm
- int last_insn = i;
- uint8 *e = get_target();
-
- uint8 *p = b;
- i = 0;
- while (p < e) {
- int n = disass_x86(buffer, (uintptr)p);
- insn_t ii;
- parse_insn(&ii, buffer);
-
- if (!check_mem_reg(&ii, insns[i], D, B, I, S, r, modes[i])) {
- show_instruction(buffer, p);
- n_failures++;
- }
-
- p += n;
- i += 1;
- n_tests++;
- show_status(n_tests);
- }
- if (i != last_insn)
- abort();
- }
- }
- }
- }
- }
- printf(" done %ld/%ld\n", n_tests - n_failures, n_tests);
- n_all_tests += n_tests;
- n_all_failures += n_failures;
-#endif
-
-#if TEST_INST_SSE_REG_REG
- printf("Testing SSE reg,reg forms\n");
- n_tests = n_failures = 0;
- for (int s = 0; s < X86_MAX_SSE_REGS; s++) {
- for (int d = 0; d < X86_MAX_SSE_REGS; d++) {
- set_target(block);
- uint8 *b = get_target();
- int i = 0;
-#define GEN(INSN, GENOP) do { \
- insns[i++] = INSN; \
- GENOP##rr(s, d); \
-} while (0)
-#define GEN64(INSN, GENOP) do { \
- if (X86_TARGET_64BIT) \
- GEN(INSN, GENOP); \
-} while (0)
-#define GEN1(INSN, GENOP) do { \
- GEN(INSN "s", GENOP##S); \
- GEN(INSN "d", GENOP##D); \
-} while (0)
-#define GENA(INSN, GENOP) do { \
- GEN1(INSN "s", GENOP##S); \
- GEN1(INSN "p", GENOP##P); \
-} while (0)
-#define GENI(INSN, GENOP, IMM) do { \
- insns[i++] = INSN; \
- GENOP##rr(IMM, s, d); \
-} while (0)
-#define GENI1(INSN, GENOP, IMM) do { \
- GENI(INSN "s", GENOP##S, IMM); \
- GENI(INSN "d", GENOP##D, IMM); \
-} while (0)
-#define GENIA(INSN, GENOP, IMM) do { \
- GENI1(INSN "s", GENOP##S, IMM); \
- GENI1(INSN "p", GENOP##P, IMM); \
-} while (0)
- GEN1("andp", ANDP);
- GEN1("andnp", ANDNP);
- GEN1("orp", ORP);
- GEN1("xorp", XORP);
- GENA("add", ADD);
- GENA("sub", SUB);
- GENA("mul", MUL);
- GENA("div", DIV);
- GEN1("comis", COMIS);
- GEN1("ucomis", UCOMIS);
- GENA("min", MIN);
- GENA("max", MAX);
- GEN("rcpss", RCPSS);
- GEN("rcpps", RCPPS);
- GEN("rsqrtss", RSQRTSS);
- GEN("rsqrtps", RSQRTPS);
- GENA("sqrt", SQRT);
- GENIA("cmpeq", CMP, X86_SSE_CC_EQ);
- GENIA("cmplt", CMP, X86_SSE_CC_LT);
- GENIA("cmple", CMP, X86_SSE_CC_LE);
- GENIA("cmpunord", CMP, X86_SSE_CC_U);
- GENIA("cmpneq", CMP, X86_SSE_CC_NEQ);
- GENIA("cmpnlt", CMP, X86_SSE_CC_NLT);
- GENIA("cmpnle", CMP, X86_SSE_CC_NLE);
- GENIA("cmpord", CMP, X86_SSE_CC_O);
- GEN1("movap", MOVAP);
- GEN("movdqa", MOVDQA);
- GEN("movdqu", MOVDQU);
- GEN("movd", MOVDXD);
- GEN64("movd", MOVQXD); // FIXME: disass bug? "movq" expected
- GEN("movd", MOVDXS);
- GEN64("movd", MOVQXS); // FIXME: disass bug? "movq" expected
- GEN("cvtdq2pd", CVTDQ2PD);
- GEN("cvtdq2ps", CVTDQ2PS);
- GEN("cvtpd2dq", CVTPD2DQ);
- GEN("cvtpd2ps", CVTPD2PS);
- GEN("cvtps2dq", CVTPS2DQ);
- GEN("cvtps2pd", CVTPS2PD);
- GEN("cvtsd2si", CVTSD2SIL);
- GEN64("cvtsd2siq", CVTSD2SIQ);
- GEN("cvtsd2ss", CVTSD2SS);
- GEN("cvtsi2sd", CVTSI2SDL);
- GEN64("cvtsi2sdq", CVTSI2SDQ);
- GEN("cvtsi2ss", CVTSI2SSL);
- GEN64("cvtsi2ssq", CVTSI2SSQ);
- GEN("cvtss2sd", CVTSS2SD);
- GEN("cvtss2si", CVTSS2SIL);
- GEN64("cvtss2siq", CVTSS2SIQ);
- GEN("cvttpd2dq", CVTTPD2DQ);
- GEN("cvttps2dq", CVTTPS2DQ);
- GEN("cvttsd2si", CVTTSD2SIL);
- GEN64("cvttsd2siq", CVTTSD2SIQ);
- GEN("cvttss2si", CVTTSS2SIL);
- GEN64("cvttss2siq", CVTTSS2SIQ);
- if (s < 8) {
- // MMX source register
- GEN("cvtpi2pd", CVTPI2PD);
- GEN("cvtpi2ps", CVTPI2PS);
- }
- if (d < 8) {
- // MMX dest register
- GEN("cvtpd2pi", CVTPD2PI);
- GEN("cvtps2pi", CVTPS2PI);
- GEN("cvttpd2pi", CVTTPD2PI);
- GEN("cvttps2pi", CVTTPS2PI);
- }
-#undef GENIA
-#undef GENI1
-#undef GENI
-#undef GENA
-#undef GEN1
-#undef GEN64
-#undef GEN
- int last_insn = i;
- uint8 *e = get_target();
-
- uint8 *p = b;
- i = 0;
- while (p < e) {
- int n = disass_x86(buffer, (uintptr)p);
- insn_t ii;
- parse_insn(&ii, buffer);
-
- if (!check_reg_reg(&ii, insns[i], s, d)) {
- show_instruction(buffer, p);
- n_failures++;
- }
-
- p += n;
- i += 1;
- n_tests++;
- }
- if (i != last_insn)
- abort();
- }
- }
- printf(" done %ld/%ld\n", n_tests - n_failures, n_tests);
- n_all_tests += n_tests;
- n_all_failures += n_failures;
-#endif
-
-#if TEST_INST_SSE_MEM_REG
- printf("Testing SSE mem,reg forms\n");
- n_tests = n_failures = 0;
- for (int d = 0; d < off_table_count; d++) {
- const uint32 D = off_table[d];
- for (int B = -1; B < X86_MAX_ALU_REGS; B++) {
- for (int I = -1; I < X86_MAX_ALU_REGS; I++) {
- if (I == X86_RSP)
- continue;
- for (int S = 1; S < 16; S *= 2) {
- if (I == -1 && S > 1)
- continue;
- for (int r = 0; r < X86_MAX_SSE_REGS; r++) {
- set_target(block);
- uint8 *b = get_target();
- int i = 0;
-#define GEN(INSN, GENOP) do { \
- insns[i++] = INSN; \
- GENOP##mr(D, B, I, S, r); \
-} while (0)
-#define GEN64(INSN, GENOP) do { \
- if (X86_TARGET_64BIT) \
- GEN(INSN, GENOP); \
-} while (0)
-#define GEN1(INSN, GENOP) do { \
- GEN(INSN "s", GENOP##S); \
- GEN(INSN "d", GENOP##D); \
-} while (0)
-#define GENA(INSN, GENOP) do { \
- GEN1(INSN "s", GENOP##S); \
- GEN1(INSN "p", GENOP##P); \
-} while (0)
-#define GENI(INSN, GENOP, IMM) do { \
- insns[i++] = INSN; \
- GENOP##mr(IMM, D, B, I, S, r); \
-} while (0)
-#define GENI1(INSN, GENOP, IMM) do { \
- GENI(INSN "s", GENOP##S, IMM); \
- GENI(INSN "d", GENOP##D, IMM); \
-} while (0)
-#define GENIA(INSN, GENOP, IMM) do { \
- GENI1(INSN "s", GENOP##S, IMM); \
- GENI1(INSN "p", GENOP##P, IMM); \
-} while (0)
- GEN1("andp", ANDP);
- GEN1("andnp", ANDNP);
- GEN1("orp", ORP);
- GEN1("xorp", XORP);
- GENA("add", ADD);
- GENA("sub", SUB);
- GENA("mul", MUL);
- GENA("div", DIV);
- GEN1("comis", COMIS);
- GEN1("ucomis", UCOMIS);
- GENA("min", MIN);
- GENA("max", MAX);
- GEN("rcpss", RCPSS);
- GEN("rcpps", RCPPS);
- GEN("rsqrtss", RSQRTSS);
- GEN("rsqrtps", RSQRTPS);
- GENA("sqrt", SQRT);
- GENIA("cmpeq", CMP, X86_SSE_CC_EQ);
- GENIA("cmplt", CMP, X86_SSE_CC_LT);
- GENIA("cmple", CMP, X86_SSE_CC_LE);
- GENIA("cmpunord", CMP, X86_SSE_CC_U);
- GENIA("cmpneq", CMP, X86_SSE_CC_NEQ);
- GENIA("cmpnlt", CMP, X86_SSE_CC_NLT);
- GENIA("cmpnle", CMP, X86_SSE_CC_NLE);
- GENIA("cmpord", CMP, X86_SSE_CC_O);
- GEN1("movap", MOVAP);
- GEN("movdqa", MOVDQA);
- GEN("movdqu", MOVDQU);
-#if 0
- // FIXME: extraneous REX bits generated
- GEN("movd", MOVDXD);
- GEN64("movd", MOVQXD); // FIXME: disass bug? "movq" expected
-#endif
- GEN("cvtdq2pd", CVTDQ2PD);
- GEN("cvtdq2ps", CVTDQ2PS);
- GEN("cvtpd2dq", CVTPD2DQ);
- GEN("cvtpd2ps", CVTPD2PS);
- GEN("cvtps2dq", CVTPS2DQ);
- GEN("cvtps2pd", CVTPS2PD);
- GEN("cvtsd2si", CVTSD2SIL);
- GEN64("cvtsd2siq", CVTSD2SIQ);
- GEN("cvtsd2ss", CVTSD2SS);
- GEN("cvtsi2sd", CVTSI2SDL);
- GEN64("cvtsi2sdq", CVTSI2SDQ);
- GEN("cvtsi2ss", CVTSI2SSL);
- GEN64("cvtsi2ssq", CVTSI2SSQ);
- GEN("cvtss2sd", CVTSS2SD);
- GEN("cvtss2si", CVTSS2SIL);
- GEN64("cvtss2siq", CVTSS2SIQ);
- GEN("cvttpd2dq", CVTTPD2DQ);
- GEN("cvttps2dq", CVTTPS2DQ);
- GEN("cvttsd2si", CVTTSD2SIL);
- GEN64("cvttsd2siq", CVTTSD2SIQ);
- GEN("cvttss2si", CVTTSS2SIL);
- GEN64("cvttss2siq", CVTTSS2SIQ);
- if (r < 8) {
- // MMX dest register
- GEN("cvtpd2pi", CVTPD2PI);
- GEN("cvtps2pi", CVTPS2PI);
- GEN("cvttpd2pi", CVTTPD2PI);
- GEN("cvttps2pi", CVTTPS2PI);
- }
-#undef GENIA
-#undef GENI1
-#undef GENI
-#undef GENA
-#undef GEN1
-#undef GEN64
-#undef GEN
- int last_insn = i;
- uint8 *e = get_target();
-
- uint8 *p = b;
- i = 0;
- while (p < e) {
- int n = disass_x86(buffer, (uintptr)p);
- insn_t ii;
- parse_insn(&ii, buffer);
-
- if (!check_mem_reg(&ii, insns[i], D, B, I, S, r)) {
- show_instruction(buffer, p);
- n_failures++;
- }
-
- p += n;
- i += 1;
- n_tests++;
- show_status(n_tests);
- }
- if (i != last_insn)
- abort();
- }
- }
- }
- }
- }
- printf(" done %ld/%ld\n", n_tests - n_failures, n_tests);
- n_all_tests += n_tests;
- n_all_failures += n_failures;
-#endif
-
- printf("\n");
- printf("All %ld tests run, %ld failures\n", n_all_tests, n_all_failures);
-}