From: Frode Solheim Date: Sun, 6 Sep 2015 17:31:04 +0000 (+0200) Subject: JIT: more harmless Aranym JIT code merge in codegen_x86.cpp X-Git-Tag: 3200~79^2~44 X-Git-Url: https://git.unchartedbackwaters.co.uk/w/?a=commitdiff_plain;h=1a7972f81f987f4abac08caaf70d3f727a287591;p=francis%2Fwinuae.git JIT: more harmless Aranym JIT code merge in codegen_x86.cpp --- diff --git a/jit/codegen_x86.cpp b/jit/codegen_x86.cpp index 3f7697a0..aa6301e3 100644 --- a/jit/codegen_x86.cpp +++ b/jit/codegen_x86.cpp @@ -1,3 +1,34 @@ +/* + * compiler/codegen_x86.cpp - IA-32 and AMD64 code generator + * + * Copyright (c) 2001-2004 Milan Jurik of ARAnyM dev team (see AUTHORS) + * + * Inspired by Christian Bauer's Basilisk II + * + * This file is part of the ARAnyM project which builds a new and powerful + * TOS/FreeMiNT compatible virtual machine running on almost any hardware. + * + * JIT compiler m68k -> IA-32 and AMD64 + * + * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer + * Adaptation for Basilisk II and improvements, copyright 2000-2004 Gwenole Beauchesne + * Portions related to CPU detection come from linux/arch/i386/kernel/setup.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + /* This should eventually end up in machdep/, but for now, x86 is the only target, and it's easier this way... */ @@ -74,63 +105,1114 @@ uae_u8 can_byte[]={0,1,2,3,0xff}; uae_u8 can_word[]={0,1,2,3,5,6,7,0xff}; #endif -uae_u8 call_saved[]={0,0,0,0,1,0,0,0}; +uae_u8 call_saved[]={0,0,0,0,1,0,0,0}; + +/* This *should* be the same as call_saved. But: +- We might not really know which registers are saved, and which aren't, +so we need to preserve some, but don't want to rely on everyone else +also saving those registers +- Special registers (such like the stack pointer) should not be "preserved" +by pushing, even though they are "saved" across function calls +*/ +uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1}; + +/* Whether classes of instructions do or don't clobber the native flags */ +#define CLOBBER_MOV +#define CLOBBER_LEA +#define CLOBBER_CMOV +#define CLOBBER_POP +#define CLOBBER_PUSH +#define CLOBBER_SUB clobber_flags() +#define CLOBBER_SBB clobber_flags() +#define CLOBBER_CMP clobber_flags() +#define CLOBBER_ADD clobber_flags() +#define CLOBBER_ADC clobber_flags() +#define CLOBBER_AND clobber_flags() +#define CLOBBER_OR clobber_flags() +#define CLOBBER_XOR clobber_flags() + +#define CLOBBER_ROL clobber_flags() +#define CLOBBER_ROR clobber_flags() +#define CLOBBER_SHLL clobber_flags() +#define CLOBBER_SHRL clobber_flags() +#define CLOBBER_SHRA clobber_flags() +#define CLOBBER_TEST clobber_flags() +#define CLOBBER_CL16 +#define CLOBBER_CL8 +#define CLOBBER_SE16 +#define CLOBBER_SE8 +#define CLOBBER_ZE16 +#define CLOBBER_ZE8 +#define CLOBBER_SW16 clobber_flags() +#define CLOBBER_SW32 +#define CLOBBER_SETCC +#define CLOBBER_MUL clobber_flags() +#define CLOBBER_BT clobber_flags() +#define CLOBBER_BSF clobber_flags() + + +#if USE_NEW_RTASM + +#if defined(CPU_x86_64) +#define X86_TARGET_64BIT 1 +/* The address override prefix causes a 5 cycles penalty on Intel Core + processors. Another solution would be to decompose the load in an LEA, + MOV (to zero-extend), MOV (from memory): is it better? */ +#define ADDR32 x86_emit_byte(0x67), +#else +#define ADDR32 +#endif +#define X86_FLAT_REGISTERS 0 +#define X86_OPTIMIZE_ALU 1 +#define X86_OPTIMIZE_ROTSHI 1 +#include "codegen_x86.h" + +#define x86_emit_byte(B) emit_byte(B) +#define x86_emit_word(W) emit_word(W) +#define x86_emit_long(L) emit_long(L) +#define x86_emit_quad(Q) emit_quad(Q) +#define x86_get_target() get_target() +#define x86_emit_failure(MSG) jit_fail(MSG, __FILE__, __LINE__, __FUNCTION__) + +static void jit_fail(const char *msg, const char *file, int line, const char *function) +{ + panicbug("JIT failure in function %s from file %s at line %d: %s", + function, file, line, msg); + abort(); +} + +LOWFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r)) +{ +#if defined(CPU_x86_64) + PUSHQr(r); +#else + PUSHLr(r); +#endif +} +LENDFUNC(NONE,WRITE,1,raw_push_l_r,(R4 r)) + +LOWFUNC(NONE,READ,1,raw_pop_l_r,(R4 r)) +{ +#if defined(CPU_x86_64) + POPQr(r); +#else + POPLr(r); +#endif +} +LENDFUNC(NONE,READ,1,raw_pop_l_r,(R4 r)) + +LOWFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d)) +{ +#if defined(CPU_x86_64) + POPQm(d, X86_NOREG, X86_NOREG, 1); +#else + POPLm(d, X86_NOREG, X86_NOREG, 1); +#endif +} +LENDFUNC(NONE,READ,1,raw_pop_l_m,(MEMW d)) + +LOWFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i)) +{ + BTLir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_bt_l_ri,(R4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b)) +{ + BTLrr(b, r); +} +LENDFUNC(WRITE,NONE,2,raw_bt_l_rr,(R4 r, R4 b)) + +LOWFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i)) +{ + BTCLir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_btc_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b)) +{ + BTCLrr(b, r); +} +LENDFUNC(WRITE,NONE,2,raw_btc_l_rr,(RW4 r, R4 b)) + +LOWFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i)) +{ + BTRLir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_btr_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b)) +{ + BTRLrr(b, r); +} +LENDFUNC(WRITE,NONE,2,raw_btr_l_rr,(RW4 r, R4 b)) + +LOWFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i)) +{ + BTSLir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_bts_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b)) +{ + BTSLrr(b, r); +} +LENDFUNC(WRITE,NONE,2,raw_bts_l_rr,(RW4 r, R4 b)) + +LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i)) +{ + SUBWir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i)) + +LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s)) +{ + MOVLmr(s, X86_NOREG, X86_NOREG, 1, d); +} +LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s)) +{ + MOVLim(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s)) +{ + MOVWim(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,2,raw_mov_w_mi,(MEMW d, IMM s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s)) +{ + MOVBim(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s)) + +LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i)) +{ + ROLBim(i, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i)) +{ + ROLBir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i)) +{ + ROLWir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i)) +{ + ROLLir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r)) +{ + ROLLrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_rol_l_rr,(RW4 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r)) +{ + ROLWrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_rol_w_rr,(RW2 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r)) +{ + ROLBrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_rol_b_rr,(RW1 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r)) +{ + SHLLrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_shll_l_rr,(RW4 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r)) +{ + SHLWrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_shll_w_rr,(RW2 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r)) +{ + SHLBrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i)) +{ + RORBir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i)) +{ + RORWir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s)) +{ + ORLmr(s, X86_NOREG, X86_NOREG, 1, d); +} +LENDFUNC(WRITE,READ,2,raw_or_l_rm,(RW4 d, MEMR s)) + +LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i)) +{ + RORLir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r)) +{ + RORLrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_ror_l_rr,(RW4 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r)) +{ + RORWrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_ror_w_rr,(RW2 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r)) +{ + RORBrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_ror_b_rr,(RW1 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r)) +{ + SHRLrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_l_rr,(RW4 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r)) +{ + SHRWrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_w_rr,(RW2 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r)) +{ + SHRBrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_b_rr,(RW1 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r)) +{ + SARLrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_shra_l_rr,(RW4 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r)) +{ + SARWrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_shra_w_rr,(RW2 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r)) +{ + SARBrr(r, d); +} +LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r)) + +LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i)) +{ + SHLLir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i)) +{ + SHLWir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i)) +{ + SHLBir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i)) +{ + SHRLir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i)) +{ + SHRWir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i)) +{ + SHRBir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i)) +{ + SARLir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i)) +{ + SARWir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i)) +{ + SARBir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i)) + +LOWFUNC(WRITE,NONE,1,raw_sahf,(R2)) +{ + SAHF(); +} +LENDFUNC(WRITE,NONE,1,raw_sahf,(R2 dummy_ah)) + +LOWFUNC(NONE,NONE,1,raw_cpuid,(R4)) +{ + CPUID(); +} +LENDFUNC(NONE,NONE,1,raw_cpuid,(R4 dummy_eax)) + +LOWFUNC(READ,NONE,1,raw_lahf,(W2)) +{ + LAHF(); +} +LENDFUNC(READ,NONE,1,raw_lahf,(W2 dummy_ah)) + +LOWFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc)) +{ + SETCCir(cc, d); +} +LENDFUNC(READ,NONE,2,raw_setcc,(W1 d, IMM cc)) + +LOWFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc)) +{ + SETCCim(cc, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(READ,WRITE,2,raw_setcc_m,(MEMW d, IMM cc)) + +LOWFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc)) +{ + if (have_cmov) + CMOVLrr(cc, s, d); + else { /* replacement using branch and mov */ + int8 *target_p = (int8 *)x86_get_target() + 1; + JCCSii(cc^1, 0); + MOVLrr(s, d); + *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1); + } +} +LENDFUNC(READ,NONE,3,raw_cmov_l_rr,(RW4 d, R4 s, IMM cc)) + +LOWFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s)) +{ + BSFLrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_bsf_l_rr,(W4 d, R4 s)) + +LOWFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s)) +{ + MOVSLQrr(s, d); +} +LENDFUNC(NONE,NONE,2,raw_sign_extend_32_rr,(W4 d, R4 s)) + +LOWFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s)) +{ + MOVSWLrr(s, d); +} +LENDFUNC(NONE,NONE,2,raw_sign_extend_16_rr,(W4 d, R2 s)) + +LOWFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s)) +{ + MOVSBLrr(s, d); +} +LENDFUNC(NONE,NONE,2,raw_sign_extend_8_rr,(W4 d, R1 s)) + +LOWFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s)) +{ + MOVZWLrr(s, d); +} +LENDFUNC(NONE,NONE,2,raw_zero_extend_16_rr,(W4 d, R2 s)) + +LOWFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s)) +{ + MOVZBLrr(s, d); +} +LENDFUNC(NONE,NONE,2,raw_zero_extend_8_rr,(W4 d, R1 s)) + +LOWFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s)) +{ + IMULLrr(s, d); +} +LENDFUNC(NONE,NONE,2,raw_imul_32_32,(RW4 d, R4 s)) + +LOWFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s)) +{ + if (d!=MUL_NREG1 || s!=MUL_NREG2) { + write_log("Bad register in IMUL: d=%d, s=%d\n",d,s); + abort(); + } + IMULLr(s); +} +LENDFUNC(NONE,NONE,2,raw_imul_64_32,(RW4 d, RW4 s)) + +LOWFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s)) +{ + if (d!=MUL_NREG1 || s!=MUL_NREG2) { + write_log("Bad register in MUL: d=%d, s=%d\n",d,s); + abort(); + } + MULLr(s); +} +LENDFUNC(NONE,NONE,2,raw_mul_64_32,(RW4 d, RW4 s)) + +LOWFUNC(NONE,NONE,2,raw_mul_32_32,(RW4, R4)) +{ + abort(); /* %^$&%^$%#^ x86! */ +} +LENDFUNC(NONE,NONE,2,raw_mul_32_32,(RW4 d, R4 s)) + +LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s)) +{ + MOVBrr(s, d); +} +LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, R1 s)) + +LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s)) +{ + MOVWrr(s, d); +} +LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, R2 s)) + +LOWFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor)) +{ + ADDR32 MOVLmr(0, baser, index, factor, d); +} +LENDFUNC(NONE,READ,4,raw_mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor)) +{ + ADDR32 MOVWmr(0, baser, index, factor, d); +} +LENDFUNC(NONE,READ,4,raw_mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor)) +{ + ADDR32 MOVBmr(0, baser, index, factor, d); +} +LENDFUNC(NONE,READ,4,raw_mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s)) +{ + ADDR32 MOVLrm(s, 0, baser, index, factor); +} +LENDFUNC(NONE,WRITE,4,raw_mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s)) + +LOWFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s)) +{ + ADDR32 MOVWrm(s, 0, baser, index, factor); +} +LENDFUNC(NONE,WRITE,4,raw_mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s)) + +LOWFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s)) +{ + ADDR32 MOVBrm(s, 0, baser, index, factor); +} +LENDFUNC(NONE,WRITE,4,raw_mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s)) + +LOWFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s)) +{ + ADDR32 MOVLrm(s, base, baser, index, factor); +} +LENDFUNC(NONE,WRITE,5,raw_mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s)) + +LOWFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s)) +{ + ADDR32 MOVWrm(s, base, baser, index, factor); +} +LENDFUNC(NONE,WRITE,5,raw_mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s)) + +LOWFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s)) +{ + ADDR32 MOVBrm(s, base, baser, index, factor); +} +LENDFUNC(NONE,WRITE,5,raw_mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s)) + +LOWFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor)) +{ + ADDR32 MOVLmr(base, baser, index, factor, d); +} +LENDFUNC(NONE,READ,5,raw_mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor)) +{ + ADDR32 MOVWmr(base, baser, index, factor, d); +} +LENDFUNC(NONE,READ,5,raw_mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor)) +{ + ADDR32 MOVBmr(base, baser, index, factor, d); +} +LENDFUNC(NONE,READ,5,raw_mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor)) +{ + ADDR32 MOVLmr(base, X86_NOREG, index, factor, d); +} +LENDFUNC(NONE,READ,4,raw_mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor)) + +LOWFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond)) +{ + if (have_cmov) + ADDR32 CMOVLmr(cond, base, X86_NOREG, index, factor, d); + else { /* replacement using branch and mov */ + int8 *target_p = (int8 *)x86_get_target() + 1; + JCCSii(cond^1, 0); + ADDR32 MOVLmr(base, X86_NOREG, index, factor, d); + *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1); + } +} +LENDFUNC(NONE,READ,5,raw_cmov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor, IMM cond)) + +LOWFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond)) +{ + if (have_cmov) + CMOVLmr(cond, mem, X86_NOREG, X86_NOREG, 1, d); + else { /* replacement using branch and mov */ + int8 *target_p = (int8 *)x86_get_target() + 1; + JCCSii(cond^1, 0); + MOVLmr(mem, X86_NOREG, X86_NOREG, 1, d); + *target_p = (uintptr)x86_get_target() - ((uintptr)target_p + 1); + } +} +LENDFUNC(NONE,READ,3,raw_cmov_l_rm,(W4 d, IMM mem, IMM cond)) + +LOWFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset)) +{ + ADDR32 MOVLmr(offset, s, X86_NOREG, 1, d); +} +LENDFUNC(NONE,READ,3,raw_mov_l_rR,(W4 d, R4 s, IMM offset)) + +LOWFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset)) +{ + ADDR32 MOVWmr(offset, s, X86_NOREG, 1, d); +} +LENDFUNC(NONE,READ,3,raw_mov_w_rR,(W2 d, R4 s, IMM offset)) + +LOWFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset)) +{ + ADDR32 MOVBmr(offset, s, X86_NOREG, 1, d); +} +LENDFUNC(NONE,READ,3,raw_mov_b_rR,(W1 d, R4 s, IMM offset)) + +LOWFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset)) +{ + ADDR32 MOVLmr(offset, s, X86_NOREG, 1, d); +} +LENDFUNC(NONE,READ,3,raw_mov_l_brR,(W4 d, R4 s, IMM offset)) + +LOWFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset)) +{ + ADDR32 MOVWmr(offset, s, X86_NOREG, 1, d); +} +LENDFUNC(NONE,READ,3,raw_mov_w_brR,(W2 d, R4 s, IMM offset)) + +LOWFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset)) +{ + ADDR32 MOVBmr(offset, s, X86_NOREG, 1, d); +} +LENDFUNC(NONE,READ,3,raw_mov_b_brR,(W1 d, R4 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset)) +{ + ADDR32 MOVLim(i, offset, d, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,3,raw_mov_l_Ri,(R4 d, IMM i, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset)) +{ + ADDR32 MOVWim(i, offset, d, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,3,raw_mov_w_Ri,(R4 d, IMM i, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset)) +{ + ADDR32 MOVBim(i, offset, d, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,3,raw_mov_b_Ri,(R4 d, IMM i, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset)) +{ + ADDR32 MOVLrm(s, offset, d, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,3,raw_mov_l_Rr,(R4 d, R4 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset)) +{ + ADDR32 MOVWrm(s, offset, d, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,3,raw_mov_w_Rr,(R4 d, R2 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset)) +{ + ADDR32 MOVBrm(s, offset, d, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset)) + +LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset)) +{ + LEALmr(offset, s, X86_NOREG, 1, d); +} +LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset)) + +LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset)) +{ + LEALmr(offset, s, index, factor, d); +} +LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset)) + +LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor)) +{ + LEALmr(0, s, index, factor, d); +} +LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor)) + +LOWFUNC(NONE,NONE,4,raw_lea_l_r_scaled,(W4 d, R4 index, IMM factor)) +{ + LEALmr(0, X86_NOREG, index, factor, d); +} +LENDFUNC(NONE,NONE,4,raw_lea_l_r_scaled,(W4 d, R4 index, IMM factor)) + +LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset)) +{ + ADDR32 MOVLrm(s, offset, d, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset)) +{ + ADDR32 MOVWrm(s, offset, d, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset)) + +LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset)) +{ + ADDR32 MOVBrm(s, offset, d, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset)) + +LOWFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r)) +{ + BSWAPLr(r); +} +LENDFUNC(NONE,NONE,1,raw_bswap_32,(RW4 r)) + +LOWFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r)) +{ + ROLWir(8, r); +} +LENDFUNC(WRITE,NONE,1,raw_bswap_16,(RW2 r)) + +LOWFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s)) +{ + MOVLrr(s, d); +} +LENDFUNC(NONE,NONE,2,raw_mov_l_rr,(W4 d, R4 s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s)) +{ + MOVLrm(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, R4 s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s)) +{ + MOVWrm(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,2,raw_mov_w_mr,(IMM d, R2 s)) + +LOWFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s)) +{ + MOVWmr(s, X86_NOREG, X86_NOREG, 1, d); +} +LENDFUNC(NONE,READ,2,raw_mov_w_rm,(W2 d, IMM s)) + +LOWFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s)) +{ + MOVBrm(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(NONE,WRITE,2,raw_mov_b_mr,(IMM d, R1 s)) + +LOWFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s)) +{ + MOVBmr(s, X86_NOREG, X86_NOREG, 1, d); +} +LENDFUNC(NONE,READ,2,raw_mov_b_rm,(W1 d, IMM s)) + +LOWFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s)) +{ + MOVLir(s, d); +} +LENDFUNC(NONE,NONE,2,raw_mov_l_ri,(W4 d, IMM s)) + +LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s)) +{ + MOVWir(s, d); +} +LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s)) + +LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s)) +{ + MOVBir(s, d); +} +LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s)) + +LOWFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s)) +{ + ADCLim(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s)) + +LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s)) +{ + ADDLim(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s)) + +LOWFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s)) +{ + ADDWim(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(WRITE,RMW,2,raw_add_w_mi,(IMM d, IMM s)) + +LOWFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s)) +{ + ADDBim(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s)) + +LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i)) +{ + TESTLir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s)) +{ + TESTLrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_test_l_rr,(R4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s)) +{ + TESTWrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_test_w_rr,(R2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s)) +{ + TESTBrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i)) +{ + XORLir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_xor_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i)) +{ + ANDLir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i)) +{ + ANDWir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s)) +{ + ANDLrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_and_l,(RW4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s)) +{ + ANDWrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_and_w,(RW2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s)) +{ + ANDBrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_and_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i)) +{ + ORLir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s)) +{ + ORLrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_or_l,(RW4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s)) +{ + ORWrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_or_w,(RW2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s)) +{ + ORBrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_or_b,(RW1 d, R1 s)) + +LOWFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s)) +{ + ADCLrr(s, d); +} +LENDFUNC(RMW,NONE,2,raw_adc_l,(RW4 d, R4 s)) + +LOWFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s)) +{ + ADCWrr(s, d); +} +LENDFUNC(RMW,NONE,2,raw_adc_w,(RW2 d, R2 s)) + +LOWFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s)) +{ + ADCBrr(s, d); +} +LENDFUNC(RMW,NONE,2,raw_adc_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s)) +{ + ADDLrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s)) +{ + ADDWrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_add_w,(RW2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s)) +{ + ADDBrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_add_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i)) +{ + SUBLir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i)) +{ + SUBBir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i)) +{ + ADDLir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i)) +{ + ADDWir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i)) +{ + ADDBir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i)) + +LOWFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s)) +{ + SBBLrr(s, d); +} +LENDFUNC(RMW,NONE,2,raw_sbb_l,(RW4 d, R4 s)) + +LOWFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s)) +{ + SBBWrr(s, d); +} +LENDFUNC(RMW,NONE,2,raw_sbb_w,(RW2 d, R2 s)) + +LOWFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s)) +{ + SBBBrr(s, d); +} +LENDFUNC(RMW,NONE,2,raw_sbb_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s)) +{ + SUBLrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_sub_l,(RW4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s)) +{ + SUBWrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_sub_w,(RW2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s)) +{ + SUBBrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_sub_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s)) +{ + CMPLrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_cmp_l,(R4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i)) +{ + CMPLir(i, r); +} +LENDFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s)) +{ + CMPWrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s)) + +LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s)) +{ + CMPBim(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i)) +{ + CMPBir(i, d); +} +LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i)) + +LOWFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s)) +{ + CMPBrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_cmp_b,(R1 d, R1 s)) + +LOWFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor)) +{ + ADDR32 CMPLmr(offset, X86_NOREG, index, factor, d); +} +LENDFUNC(WRITE,READ,4,raw_cmp_l_rm_indexed,(R4 d, IMM offset, R4 index, IMM factor)) + +LOWFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s)) +{ + XORLrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_xor_l,(RW4 d, R4 s)) + +LOWFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s)) +{ + XORWrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_xor_w,(RW2 d, R2 s)) + +LOWFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s)) +{ + XORBrr(s, d); +} +LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s)) + +LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s)) +{ + SUBLim(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s)) + +LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s)) +{ + CMPLim(s, d, X86_NOREG, X86_NOREG, 1); +} +LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s)) + +LOWFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2)) +{ + XCHGLrr(r2, r1); +} +LENDFUNC(NONE,NONE,2,raw_xchg_l_rr,(RW4 r1, RW4 r2)) + +LOWFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2)) +{ + XCHGBrr(r2, r1); +} +LENDFUNC(NONE,NONE,2,raw_xchg_b_rr,(RW4 r1, RW4 r2)) + +LOWFUNC(READ,WRITE,0,raw_pushfl,(void)) +{ + PUSHF(); +} +LENDFUNC(READ,WRITE,0,raw_pushfl,(void)) + +LOWFUNC(WRITE,READ,0,raw_popfl,(void)) +{ + POPF(); +} +LENDFUNC(WRITE,READ,0,raw_popfl,(void)) -/* This *should* be the same as call_saved. But: -- We might not really know which registers are saved, and which aren't, -so we need to preserve some, but don't want to rely on everyone else -also saving those registers -- Special registers (such like the stack pointer) should not be "preserved" -by pushing, even though they are "saved" across function calls -*/ -uae_u8 need_to_preserve[]={1,1,1,1,0,1,1,1}; +/* Generate floating-point instructions */ +static inline void x86_fadd_m(MEMR s) +{ + FADDLm(s,X86_NOREG,X86_NOREG,1); +} -/* Whether classes of instructions do or don't clobber the native flags */ -#define CLOBBER_MOV -#define CLOBBER_LEA -#define CLOBBER_CMOV -#define CLOBBER_POP -#define CLOBBER_PUSH -#define CLOBBER_SUB clobber_flags() -#define CLOBBER_SBB clobber_flags() -#define CLOBBER_CMP clobber_flags() -#define CLOBBER_ADD clobber_flags() -#define CLOBBER_ADC clobber_flags() -#define CLOBBER_AND clobber_flags() -#define CLOBBER_OR clobber_flags() -#define CLOBBER_XOR clobber_flags() +#else -#define CLOBBER_ROL clobber_flags() -#define CLOBBER_ROR clobber_flags() -#define CLOBBER_SHLL clobber_flags() -#define CLOBBER_SHRL clobber_flags() -#define CLOBBER_SHRA clobber_flags() -#define CLOBBER_TEST clobber_flags() -#define CLOBBER_CL16 -#define CLOBBER_CL8 -#define CLOBBER_SE16 -#define CLOBBER_SE8 -#define CLOBBER_ZE16 -#define CLOBBER_ZE8 -#define CLOBBER_SW16 clobber_flags() -#define CLOBBER_SW32 -#define CLOBBER_SETCC -#define CLOBBER_MUL clobber_flags() -#define CLOBBER_BT clobber_flags() -#define CLOBBER_BSF clobber_flags() +const bool optimize_accum = false; +const bool optimize_imm8 = false; +const bool optimize_shift_once = false; /************************************************************************* * Actual encoding of the instructions on the target CPU * *************************************************************************/ -static inline uae_u16 swap16(uae_u16 x) -{ - return ((x&0xff00)>>8)|((x&0x00ff)<<8); -} - -static inline uae_u32 swap32(uae_u32 x) +static inline int isaccum(int r) { - return ((x&0xff00)<<8)|((x&0x00ff)<<24)|((x&0xff0000)>>8)|((x&0xff000000)>>24); + return (r == EAX_INDEX); } static inline int isbyte(uae_s32 x) @@ -233,8 +1315,12 @@ LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i)) emit_byte(i); } else { - emit_byte(0x81); - emit_byte(0xe8+d); + if (optimize_accum && isaccum(d)) + emit_byte(0x2d); + else { + emit_byte(0x81); + emit_byte(0xe8+d); + } emit_word(i); } } @@ -271,18 +1357,31 @@ LENDFUNC(NONE,WRITE,2,raw_mov_b_mi,(MEMW d, IMM s)) LOWFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i)) { - emit_byte(0xc0); - emit_byte(0x05); - emit_long(d); - emit_byte(i); + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd0); + emit_byte(0x05); + emit_long(d); + } + else { + emit_byte(0xc0); + emit_byte(0x05); + emit_long(d); + emit_byte(i); + } } LENDFUNC(WRITE,RMW,2,raw_rol_b_mi,(MEMRW d, IMM i)) LOWFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i)) { - emit_byte(0xc0); - emit_byte(0xc0+r); - emit_byte(i); + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd0); + emit_byte(0xc0+r); + } + else { + emit_byte(0xc0); + emit_byte(0xc0+r); + emit_byte(i); + } } LENDFUNC(WRITE,NONE,2,raw_rol_b_ri,(RW1 r, IMM i)) @@ -297,9 +1396,15 @@ LENDFUNC(WRITE,NONE,2,raw_rol_w_ri,(RW2 r, IMM i)) LOWFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i)) { - emit_byte(0xc1); - emit_byte(0xc0+r); - emit_byte(i); + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd1); + emit_byte(0xc0+r); + } + else { + emit_byte(0xc1); + emit_byte(0xc0+r); + emit_byte(i); + } } LENDFUNC(WRITE,NONE,2,raw_rol_l_ri,(RW4 r, IMM i)) @@ -349,9 +1454,15 @@ LENDFUNC(WRITE,NONE,2,raw_shll_b_rr,(RW1 d, R1 r)) LOWFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i)) { - emit_byte(0xc0); - emit_byte(0xc8+r); - emit_byte(i); + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd0); + emit_byte(0xc8+r); + } + else { + emit_byte(0xc0); + emit_byte(0xc8+r); + emit_byte(i); + } } LENDFUNC(WRITE,NONE,2,raw_ror_b_ri,(RW1 r, IMM i)) @@ -366,9 +1477,15 @@ LENDFUNC(WRITE,NONE,2,raw_ror_w_ri,(RW2 r, IMM i)) LOWFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i)) { - emit_byte(0xc1); - emit_byte(0xc8+r); - emit_byte(i); + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd1); + emit_byte(0xc8+r); + } + else { + emit_byte(0xc1); + emit_byte(0xc8+r); + emit_byte(i); + } } LENDFUNC(WRITE,NONE,2,raw_ror_l_ri,(RW4 r, IMM i)) @@ -440,9 +1557,15 @@ LENDFUNC(WRITE,NONE,2,raw_shra_b_rr,(RW1 d, R1 r)) LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i)) { - emit_byte(0xc1); - emit_byte(0xe0+r); - emit_byte(i); + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd1); + emit_byte(0xe0+r); + } + else { + emit_byte(0xc1); + emit_byte(0xe0+r); + emit_byte(i); + } } LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i)) @@ -457,17 +1580,29 @@ LENDFUNC(WRITE,NONE,2,raw_shll_w_ri,(RW2 r, IMM i)) LOWFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i)) { - emit_byte(0xc0); - emit_byte(0xe0+r); - emit_byte(i); + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd0); + emit_byte(0xe0+r); + } + else { + emit_byte(0xc0); + emit_byte(0xe0+r); + emit_byte(i); + } } LENDFUNC(WRITE,NONE,2,raw_shll_b_ri,(RW1 r, IMM i)) LOWFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i)) { - emit_byte(0xc1); - emit_byte(0xe8+r); - emit_byte(i); + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd1); + emit_byte(0xe8+r); + } + else { + emit_byte(0xc1); + emit_byte(0xe8+r); + emit_byte(i); + } } LENDFUNC(WRITE,NONE,2,raw_shrl_l_ri,(RW4 r, IMM i)) @@ -482,17 +1617,29 @@ LENDFUNC(WRITE,NONE,2,raw_shrl_w_ri,(RW2 r, IMM i)) LOWFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i)) { - emit_byte(0xc0); - emit_byte(0xe8+r); - emit_byte(i); + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd0); + emit_byte(0xe8+r); + } + else { + emit_byte(0xc0); + emit_byte(0xe8+r); + emit_byte(i); + } } LENDFUNC(WRITE,NONE,2,raw_shrl_b_ri,(RW1 r, IMM i)) LOWFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i)) { - emit_byte(0xc1); - emit_byte(0xf8+r); - emit_byte(i); + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd1); + emit_byte(0xf8+r); + } + else { + emit_byte(0xc1); + emit_byte(0xf8+r); + emit_byte(i); + } } LENDFUNC(WRITE,NONE,2,raw_shra_l_ri,(RW4 r, IMM i)) @@ -507,9 +1654,15 @@ LENDFUNC(WRITE,NONE,2,raw_shra_w_ri,(RW2 r, IMM i)) LOWFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i)) { - emit_byte(0xc0); - emit_byte(0xf8+r); - emit_byte(i); + if (optimize_shift_once && (i == 1)) { + emit_byte(0xd0); + emit_byte(0xf8+r); + } + else { + emit_byte(0xc0); + emit_byte(0xf8+r); + emit_byte(i); + } } LENDFUNC(WRITE,NONE,2,raw_shra_b_ri,(RW1 r, IMM i)) @@ -922,9 +2075,16 @@ LENDFUNC(NONE,WRITE,3,raw_mov_b_Rr,(R4 d, R1 s, IMM offset)) LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset)) { - emit_byte(0x8d); - emit_byte(0x80+8*d+s); - emit_long(offset); + if (optimize_imm8 && isbyte(offset)) { + emit_byte(0x8d); + emit_byte(0x40+8*d+s); + emit_byte(offset); + } + else { + emit_byte(0x8d); + emit_byte(0x80+8*d+s); + emit_long(offset); + } } LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, R4 s, IMM offset)) @@ -964,9 +2124,16 @@ LENDFUNC(NONE,NONE,3,raw_lea_l_rr_indexed,(W4 d, R4 s, R4 index)) LOWFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset)) { - emit_byte(0x89); - emit_byte(0x80+8*s+d); - emit_long(offset); + if (optimize_imm8 && isbyte(offset)) { + emit_byte(0x89); + emit_byte(0x40+8*s+d); + emit_byte(offset); + } + else { + emit_byte(0x89); + emit_byte(0x80+8*s+d); + emit_long(offset); + } } LENDFUNC(NONE,WRITE,3,raw_mov_l_bRr,(R4 d, R4 s, IMM offset)) @@ -981,9 +2148,16 @@ LENDFUNC(NONE,WRITE,3,raw_mov_w_bRr,(R4 d, R2 s, IMM offset)) LOWFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset)) { - emit_byte(0x88); - emit_byte(0x80+8*s+d); - emit_long(offset); + if (optimize_imm8 && isbyte(offset)) { + emit_byte(0x88); + emit_byte(0x40+8*s+d); + emit_byte(offset); + } + else { + emit_byte(0x88); + emit_byte(0x80+8*s+d); + emit_long(offset); + } } LENDFUNC(NONE,WRITE,3,raw_mov_b_bRr,(R4 d, R1 s, IMM offset)) @@ -1093,10 +2267,18 @@ LENDFUNC(RMW,RMW,2,raw_adc_l_mi,(MEMRW d, IMM s)) LOWFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s)) { - emit_byte(0x81); - emit_byte(0x05); - emit_long(d); - emit_long(s); + if (optimize_imm8 && isbyte(s)) { + emit_byte(0x83); + emit_byte(0x05); + emit_long(d); + emit_byte(s); + } + else { + emit_byte(0x81); + emit_byte(0x05); + emit_long(d); + emit_long(s); + } } LENDFUNC(WRITE,RMW,2,raw_add_l_mi,(IMM d, IMM s)) @@ -1121,8 +2303,12 @@ LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d, IMM s)) LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i)) { - emit_byte(0xf7); - emit_byte(0xc0+d); + if (optimize_accum && isaccum(d)) + emit_byte(0xa9); + else { + emit_byte(0xf7); + emit_byte(0xc0+d); + } emit_long(i); } LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i)) @@ -1270,8 +2456,12 @@ LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i)) emit_byte(i); } else { - emit_byte(0x81); - emit_byte(0xe8+d); + if (optimize_accum && isaccum(d)) + emit_byte(0x2d); + else { + emit_byte(0x81); + emit_byte(0xe8+d); + } emit_long(i); } } @@ -1279,8 +2469,12 @@ LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i)) LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i)) { - emit_byte(0x80); - emit_byte(0xe8+d); + if (optimize_accum && isaccum(d)) + emit_byte(0x2c); + else { + emit_byte(0x80); + emit_byte(0xe8+d); + } emit_byte(i); } LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i)) @@ -1293,8 +2487,12 @@ LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i)) emit_byte(i); } else { - emit_byte(0x81); - emit_byte(0xc0+d); + if (optimize_accum && isaccum(d)) + emit_byte(0x05); + else { + emit_byte(0x81); + emit_byte(0xc0+d); + } emit_long(i); } } @@ -1302,16 +2500,19 @@ LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i)) LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i)) { + emit_byte(0x66); if (isbyte(i)) { - emit_byte(0x66); emit_byte(0x83); emit_byte(0xc0+d); emit_byte(i); } else { - emit_byte(0x66); - emit_byte(0x81); - emit_byte(0xc0+d); + if (optimize_accum && isaccum(d)) + emit_byte(0x05); + else { + emit_byte(0x81); + emit_byte(0xc0+d); + } emit_word(i); } } @@ -1319,8 +2520,12 @@ LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i)) LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i)) { - emit_byte(0x80); - emit_byte(0xc0+d); + if (optimize_accum && isaccum(d)) + emit_byte(0x04); + else { + emit_byte(0x80); + emit_byte(0xc0+d); + } emit_byte(i); } LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i)) @@ -1394,8 +2599,12 @@ LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s)) LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i)) { - emit_byte(0x80); - emit_byte(0xf8+d); + if (optimize_accum && isaccum(d)) + emit_byte(0x3c); + else { + emit_byte(0x80); + emit_byte(0xf8+d); + } emit_byte(i); } LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i)) @@ -1431,19 +2640,35 @@ LENDFUNC(WRITE,NONE,2,raw_xor_b,(RW1 d, R1 s)) LOWFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s)) { - emit_byte(0x81); - emit_byte(0x2d); - emit_long(d); - emit_long(s); + if (optimize_imm8 && isbyte(s)) { + emit_byte(0x83); + emit_byte(0x2d); + emit_long(d); + emit_byte(s); + } + else { + emit_byte(0x81); + emit_byte(0x2d); + emit_long(d); + emit_long(s); + } } LENDFUNC(WRITE,RMW,2,raw_sub_l_mi,(MEMRW d, IMM s)) LOWFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s)) { - emit_byte(0x81); - emit_byte(0x3d); - emit_long(d); - emit_long(s); + if (optimize_imm8 && isbyte(s)) { + emit_byte(0x83); + emit_byte(0x3d); + emit_long(d); + emit_byte(s); + } + else { + emit_byte(0x81); + emit_byte(0x3d); + emit_long(d); + emit_long(s); + } } LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s)) @@ -1466,24 +2691,57 @@ LOWFUNC(WRITE,READ,0,raw_popfl,(void)) } LENDFUNC(WRITE,READ,0,raw_popfl,(void)) +#endif + /************************************************************************* * Unoptimizable stuff --- jump * *************************************************************************/ static inline void raw_call_r(R4 r) { +#if USE_NEW_RTASM + CALLsr(r); +#else emit_byte(0xff); emit_byte(0xd0+r); +#endif +} + +static inline void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m) +{ +#if USE_NEW_RTASM + CALLsm(base, X86_NOREG, r, m); +#else + int mu; + switch(m) { + case 1: mu=0; break; + case 2: mu=1; break; + case 4: mu=2; break; + case 8: mu=3; break; + default: abort(); + } + emit_byte(0xff); + emit_byte(0x14); + emit_byte(0x05+8*r+0x40*mu); + emit_long(base); +#endif } static inline void raw_jmp_r(R4 r) { +#if USE_NEW_RTASM + JMPsr(r); +#else emit_byte(0xff); emit_byte(0xe0+r); +#endif } static inline void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m) { +#if USE_NEW_RTASM + JMPsm(base, X86_NOREG, r, m); +#else int sib; switch (m) { @@ -1497,6 +2755,7 @@ static inline void raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m) emit_byte(0x24); emit_byte(8*r+sib); emit_long(base); +#endif } static inline void raw_jmp_m(uae_u32 base) @@ -1506,16 +2765,25 @@ static inline void raw_jmp_m(uae_u32 base) emit_long(base); } + static inline void raw_call(uae_u32 t) { +#if USE_NEW_RTASM + CALLm(t); +#else emit_byte(0xe8); emit_long(t-(uae_u32)target-4); +#endif } static inline void raw_jmp(uae_u32 t) { +#if USE_NEW_RTASM + JMPm(t); +#else emit_byte(0xe9); emit_long(t-(uae_u32)target-4); +#endif } static inline void raw_jl(uae_u32 t) @@ -1561,6 +2829,11 @@ static inline void raw_jz_b_oponly(void) emit_byte(0x74); } +static inline void raw_jcc_b_oponly(int cc) +{ + emit_byte(0x70+cc); +} + static inline void raw_jmp_l_oponly(void) { emit_byte(0xe9); @@ -1581,6 +2854,91 @@ static inline void raw_nop(void) emit_byte(0x90); } +static inline void raw_emit_nop_filler(int nbytes) +{ + +#if defined(CPU_x86_64) + /* The recommended way to pad 64bit code is to use NOPs preceded by + maximally four 0x66 prefixes. Balance the size of nops. */ + static const uae_u8 prefixes[4] = { 0x66, 0x66, 0x66, 0x66 }; + if (nbytes == 0) + return; + + int i; + int nnops = (nbytes + 3) / 4; + int len = nbytes / nnops; + int remains = nbytes - nnops * len; + + for (i = 0; i < remains; i++) { + emit_block(prefixes, len); + raw_nop(); + } + for (; i < nnops; i++) { + emit_block(prefixes, len - 1); + raw_nop(); + } +#else + /* Source: GNU Binutils 2.12.90.0.15 */ + /* Various efficient no-op patterns for aligning code labels. + Note: Don't try to assemble the instructions in the comments. + 0L and 0w are not legal. */ + static const uae_u8 f32_1[] = + {0x90}; /* nop */ + static const uae_u8 f32_2[] = + {0x89,0xf6}; /* movl %esi,%esi */ + static const uae_u8 f32_3[] = + {0x8d,0x76,0x00}; /* leal 0(%esi),%esi */ + static const uae_u8 f32_4[] = + {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */ + static const uae_u8 f32_5[] = + {0x90, /* nop */ + 0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */ + static const uae_u8 f32_6[] = + {0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */ + static const uae_u8 f32_7[] = + {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */ + static const uae_u8 f32_8[] = + {0x90, /* nop */ + 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */ + static const uae_u8 f32_9[] = + {0x89,0xf6, /* movl %esi,%esi */ + 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ + static const uae_u8 f32_10[] = + {0x8d,0x76,0x00, /* leal 0(%esi),%esi */ + 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ + static const uae_u8 f32_11[] = + {0x8d,0x74,0x26,0x00, /* leal 0(%esi,1),%esi */ + 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ + static const uae_u8 f32_12[] = + {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */ + 0x8d,0xbf,0x00,0x00,0x00,0x00}; /* leal 0L(%edi),%edi */ + static const uae_u8 f32_13[] = + {0x8d,0xb6,0x00,0x00,0x00,0x00, /* leal 0L(%esi),%esi */ + 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ + static const uae_u8 f32_14[] = + {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00, /* leal 0L(%esi,1),%esi */ + 0x8d,0xbc,0x27,0x00,0x00,0x00,0x00}; /* leal 0L(%edi,1),%edi */ + static const uae_u8 f32_15[] = + {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */ + 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90}; + static const uae_u8 f32_16[] = + {0xeb,0x0d,0x90,0x90,0x90,0x90,0x90, /* jmp .+15; lotsa nops */ + 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90}; + static const uae_u8 *const f32_patt[] = { + f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8, + f32_9, f32_10, f32_11, f32_12, f32_13, f32_14, f32_15 + }; + + int nloops = nbytes / 16; + while (nloops-- > 0) + emit_block(f32_16, sizeof(f32_16)); + + nbytes %= 16; + if (nbytes) + emit_block(f32_patt[nbytes - 1], nbytes); +#endif +} + /************************************************************************* * Flag handling, to and fro UAE flag register * @@ -1715,6 +3073,17 @@ enum { X86_PROCESSOR_max }; +static const char * x86_processor_string_table[X86_PROCESSOR_max] = { + "80386", + "80486", + "Pentium", + "PentiumPro", + "K6", + "Athlon", + "Pentium4", + "x86-64" +}; + static struct ptt { const int align_loop; const int align_loop_max_skip; @@ -1805,7 +3174,6 @@ static void raw_init_cpu(void) { struct cpuinfo_x86 *c = &cpuinfo; - uae_u32 xlvl; /* Defaults */ c->x86_processor = X86_PROCESSOR_max; @@ -1845,6 +3213,7 @@ raw_init_cpu(void) } /* AMD-defined flags: level 0x80000001 */ + uae_u32 xlvl; cpuid(0x80000000, &xlvl, NULL, NULL, NULL); if ( (xlvl & 0xffff0000) == 0x80000000 ) { if ( xlvl >= 0x80000001 ) { @@ -2015,6 +3384,30 @@ static inline void make_tos(int r) live.spos[q]=p; } +static inline void make_tos2(int r, int r2) +{ + int q; + + make_tos(r2); /* Put the reg that's supposed to end up in position2 + on top */ + + if (live.spos[r]<0) { /* Register not yet on stack */ + make_tos(r); /* This will extend the stack */ + return; + } + /* Register is on stack */ + emit_byte(0xd9); + emit_byte(0xc9); /* Move r2 into position 2 */ + + q=live.onstack[live.tos-1]; + live.onstack[live.tos]=q; + live.spos[q]=live.tos; + live.onstack[live.tos-1]=r2; + live.spos[r2]=live.tos-1; + + make_tos(r); /* And r into 1 */ +} + static inline int stackpos(int r) { if (live.spos[r]<0) @@ -3307,6 +4700,7 @@ LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s)) } LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s)) + LOWFUNC(NONE,NONE,1,raw_ftst_r,(FR r)) { make_tos(r); diff --git a/jit/exception_handler.cpp b/jit/exception_handler.cpp index e93ade41..5505522c 100644 --- a/jit/exception_handler.cpp +++ b/jit/exception_handler.cpp @@ -75,6 +75,16 @@ typedef void *CONTEXT_T; #endif #endif +static inline uae_u16 swap16(uae_u16 x) +{ + return ((x&0xff00)>>8)|((x&0x00ff)<<8); +} + +static inline uae_u32 swap32(uae_u32 x) +{ + return ((x&0xff00)<<8)|((x&0x00ff)<<24)|((x&0xff0000)>>8)|((x&0xff000000)>>24); +} + #ifdef HAVE_CONTEXT_T /* * Try to handle faulted memory access in compiled code