From 3c76b5dedf4c6d3eead71e0588b57d5d3e5f925f Mon Sep 17 00:00:00 2001 From: Klaus Treichel Date: Sun, 2 Mar 2008 17:07:05 +0000 Subject: [PATCH] Fix two 64bit arch issues and add first (not pubic usable) native support for X86_64. --- ChangeLog | 28 + jit/Makefile.am | 11 +- jit/jit-apply-x86-64.c | 25 +- jit/jit-apply-x86-64.h | 13 + jit/jit-gen-x86-64.h | 1402 +++++++++++++-- jit/jit-insn.c | 3 - jit/jit-rules-x86-64.c | 3549 ++++++++++++++++++++++++++++++++++++++ jit/jit-rules-x86-64.h | 126 ++ jit/jit-rules-x86-64.ins | 1941 +++++++++++++++++++++ jit/jit-rules.h | 5 + jit/jit-value.c | 5 + 11 files changed, 6997 insertions(+), 111 deletions(-) create mode 100644 jit/jit-rules-x86-64.c create mode 100644 jit/jit-rules-x86-64.h create mode 100644 jit/jit-rules-x86-64.ins diff --git a/ChangeLog b/ChangeLog index 733995c..a144875 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,31 @@ +2008-03-02 Klaus Treichel + + * jit/jit-apply-x86-64.h: define the sizes for indirector and + redirector. + + * jit/jit-apply-x86-64.c: do either a memory indirect, RIP relative + or register relative jump in the redirector whatever is appropriate + for the address location in _jit_create_indirector. + + * jit/jit-gen-x86-64.h: add lots of additional code generation + macros and fix some bugs. + + * jit/jit-insn.c: don't mark the current block dead after throwing + an exception in jit_insn_call_intrinsic because this is handled in + jit_insn_call_native if the flag JIT_CALL_NORETURN is specified. + + * jit/Makefile.am: Add the new files jit-rules-x86-64.c, + jit-rules-x86-64.h and jit-rules-x86-64.ins to the sources. + + * jit/jit-rules.h: add the native backend for X86_64. + + * jit/jit-rules-x86-64.c, jit/jit-rules-x86-64.h, + jit/jit-rules-x86-64.ins: add the first native code generation for + X86_64. + + * jit/jit-value.c: create a nint constant for long/ulong types in + jit_value_create_constant on 64bit archs. + 2008-02-29 Aleksey Demakov * include/jit/jit-plus.h, jitplus/jit-plus-jump-table.cpp: diff --git a/jit/Makefile.am b/jit/Makefile.am index 2785ac5..2daf76e 100644 --- a/jit/Makefile.am +++ b/jit/Makefile.am @@ -62,6 +62,8 @@ libjit_la_SOURCES = \ jit-rules-arm.c \ jit-rules-x86.h \ jit-rules-x86.c \ + jit-rules-x86-64.h \ + jit-rules-x86-64.c \ jit-setjmp.h \ jit-signal.c \ jit-string.c \ @@ -76,7 +78,8 @@ EXTRA_DIST = \ mklabel.sh \ jit-rules-alpha.ins \ jit-rules-arm.sel \ - jit-rules-x86.ins + jit-rules-x86.ins \ + jit-rules-x86-64.ins AM_CFLAGS = -I$(top_srcdir)/include -I$(top_builddir)/include -I. -I$(srcdir) @@ -108,6 +111,12 @@ jit-rules-alpha.inc: jit-rules-alpha.ins $(top_builddir)/tools/gen-rules$(EXEEXT $(top_builddir)/tools/gen-rules$(EXEEXT) $(srcdir)/jit-rules-alpha.ins \ >jit-rules-alpha.inc +jit-rules-x86-64.lo: jit-rules-x86-64.inc + +jit-rules-x86-64.inc: jit-rules-x86-64.ins $(top_builddir)/tools/gen-rules$(EXEEXT) + $(top_builddir)/tools/gen-rules$(EXEEXT) $(srcdir)/jit-rules-x86-64.ins \ + >jit-rules-x86-64.inc + CLEANFILES = \ jit-interp-labels.h \ jit-rules-alpha.inc \ diff --git a/jit/jit-apply-x86-64.c b/jit/jit-apply-x86-64.c index 49ef49f..3d07e98 100644 --- a/jit/jit-apply-x86-64.c +++ b/jit/jit-apply-x86-64.c @@ -171,21 +171,30 @@ void *_jit_create_redirector(unsigned char *buf, void *func, void *_jit_create_indirector(unsigned char *buf, void **entry) { - jit_nint offset; void *start = (void *)buf; /* Jump to the entry point. */ - offset = (jit_nint)entry - ((jit_nint)buf + 5); - if((offset < jit_min_int) || (offset > jit_max_int)) + if(((jit_nint)entry >= jit_min_int) && ((jit_nint)entry <= jit_max_int)) { - /* offset is outside the 32 bit offset range */ - /* so we have to do an indirect jump via register. */ - x86_64_mov_reg_imm_size(buf, X86_64_R11, (jit_nint)entry, 8); - x86_64_jmp_reg(buf, X86_64_R11); + /* We are in the 32bit range so we can use the entry directly. */ + x86_64_jmp_mem(buf, (jit_nint)entry); } else { - x86_64_jmp_mem(buf, offset); + jit_nint offset = (jit_nint)entry - ((jit_nint)buf + 7); + + if((offset >= jit_min_int) && (offset <= jit_max_int)) + { + /* We are in the 32bit range so we can use RIP relative addressing. */ + x86_64_jmp_membase(buf, X86_64_RIP, offset); + } + else + { + /* offset is outside the 32 bit offset range */ + /* so we have to do an indirect jump via register. */ + x86_64_mov_reg_imm_size(buf, X86_64_R11, (jit_nint)entry, 8); + x86_64_jmp_regp(buf, X86_64_R11); + } } return start; diff --git a/jit/jit-apply-x86-64.h b/jit/jit-apply-x86-64.h index 23a841b..bc8c2c5 100644 --- a/jit/jit-apply-x86-64.h +++ b/jit/jit-apply-x86-64.h @@ -174,4 +174,17 @@ #define jit_closure_size 0x90 #define jit_closure_align 0x20 +/* + * The number of bytes that are needed for a redirector stub. + * This includes any extra bytes that are needed for alignment. + */ +#define jit_redirector_size 0x100 + +/* + * The number of bytes that are needed for a indirector stub. + * This includes any extra bytes that are needed for alignment. + */ +#define jit_indirector_size 0x10 + + #endif /* _JIT_APPLY_X86_64_H */ diff --git a/jit/jit-gen-x86-64.h b/jit/jit-gen-x86-64.h index 639b856..87ea111 100644 --- a/jit/jit-gen-x86-64.h +++ b/jit/jit-gen-x86-64.h @@ -94,6 +94,22 @@ typedef enum /* 1 = 64 bit operand size */ } X86_64_REX_Bits; +/* + * Third part of the opcodes for xmm instructions which are encoded + * Opcode1: 0xF3 (single precision) or 0xF2 (double precision) + * This is handled as a prefix. + * Opcode2: 0x0F + */ +typedef enum +{ + XMM1_MOV = 0x10, + XMM1_MOV_REV = 0x11, + XMM1_ADD = 0x58, + XMM1_MUL = 0x59, + XMM1_SUB = 0x5C, + XMM1_DIV = 0x5E +} X86_64_XMM1_OP; + /* * Helper union for emmitting 64 bit immediate values. */ @@ -175,18 +191,29 @@ typedef union } \ } while(0) +/* + * Emit the Rex prefix. + * The natural size is a power of 2 (1, 2, 4 or 8). + * For accessing the low byte registers DIL, SIL, BPL and SPL we have to + * generate a Rex prefix with the value 0x40 too. + * To enable this OR the natural size with 1. + */ #define x86_64_rex(rex_bits) (0x40 | (rex_bits)) #define x86_64_rex_emit(inst, width, modrm_reg, index_reg, rm_base_opcode_reg) \ do { \ unsigned char __rex_bits = \ - (((width) > 4) ? X86_64_REX_W : 0) | \ - (((modrm_reg) > 7) ? X86_64_REX_R : 0) | \ - (((index_reg) > 7) ? X86_64_REX_X : 0) | \ - (((rm_base_opcode_reg) > 7) ? X86_64_REX_B : 0); \ + (((width) & 8) ? X86_64_REX_W : 0) | \ + (((modrm_reg) & 8) ? X86_64_REX_R : 0) | \ + (((index_reg) & 8) ? X86_64_REX_X : 0) | \ + (((rm_base_opcode_reg) & 8) ? X86_64_REX_B : 0); \ if((__rex_bits != 0)) \ { \ *(inst)++ = x86_64_rex(__rex_bits); \ } \ + else if(((width) & 1) && ((modrm_reg & 4) || (rm_base_opcode_reg & 4))) \ + { \ + *(inst)++ = x86_64_rex(0); \ + } \ } while(0) /* @@ -854,6 +881,9 @@ typedef union * Instructions with one opcode (plus optional r/m) */ +/* + * Unary opcodes + */ #define x86_64_alu1_reg(inst, opc1, r, reg) \ do { \ x86_64_rex_emit((inst), 0, 0, 0, (reg)); \ @@ -943,7 +973,7 @@ typedef union x86_64_memindex_emit((inst), (r), (basereg), (disp), (indexreg), (shift)); \ } while(0) -#define x86_64_alu2_reg_reg_size(inst, opc1, opc2, dreg, sreg, size) \ +#define x86_64_alu1_reg_reg_size(inst, opc1, dreg, sreg, size) \ do { \ if((size) == 2) \ { \ @@ -951,11 +981,10 @@ typedef union } \ x86_64_rex_emit((inst), (size), (dreg), 0, (sreg)); \ *(inst)++ = (unsigned char)(opc1); \ - *(inst)++ = (unsigned char)(opc2); \ x86_64_reg_emit((inst), (dreg), (sreg)); \ } while(0) -#define x86_64_alu2_reg_regp_size(inst, opc1, opc2, dreg, sregp, size) \ +#define x86_64_alu1_reg_regp_size(inst, opc1, dreg, sregp, size) \ do { \ if((size) == 2) \ { \ @@ -963,11 +992,10 @@ typedef union } \ x86_64_rex_emit((inst), (size), (dreg), 0, (sregp)); \ *(inst)++ = (unsigned char)(opc1); \ - *(inst)++ = (unsigned char)(opc2); \ x86_64_regp_emit((inst), (dreg), (sregp)); \ } while(0) -#define x86_64_alu2_reg_mem_size(inst, opc1, opc2, dreg, mem, size) \ +#define x86_64_alu1_reg_mem_size(inst, opc1, dreg, mem, size) \ do { \ if((size) == 2) \ { \ @@ -975,11 +1003,10 @@ typedef union } \ x86_64_rex_emit((inst), (size), (dreg), 0, 0); \ *(inst)++ = (unsigned char)(opc1); \ - *(inst)++ = (unsigned char)(opc2); \ x86_64_mem_emit((inst), (dreg), (mem)); \ } while(0) -#define x86_64_alu2_reg_membase_size(inst, opc1, opc2, dreg, basereg, disp, size) \ +#define x86_64_alu1_reg_membase_size(inst, opc1, dreg, basereg, disp, size) \ do { \ if((size) == 2) \ { \ @@ -987,11 +1014,10 @@ typedef union } \ x86_64_rex_emit((inst), (size), (dreg), 0, (basereg)); \ *(inst)++ = (unsigned char)(opc1); \ - *(inst)++ = (unsigned char)(opc2); \ x86_64_membase_emit((inst), (dreg), (basereg), (disp)); \ } while(0) -#define x86_64_alu2_reg_memindex_size(inst, opc1, opc2, dreg, basereg, disp, indexreg, shift, size) \ +#define x86_64_alu1_reg_memindex_size(inst, opc1, dreg, basereg, disp, indexreg, shift, size) \ do { \ if((size) == 2) \ { \ @@ -999,82 +1025,67 @@ typedef union } \ x86_64_rex_emit((inst), (size), (dreg), (indexreg), (basereg)); \ *(inst)++ = (unsigned char)(opc1); \ - *(inst)++ = (unsigned char)(opc2); \ x86_64_memindex_emit((inst), (dreg), (basereg), (disp), (indexreg), (shift)); \ } while(0) -/* - * xmm instructions with two opcodes - */ -#define x86_64_xmm2_reg_reg(inst, opc1, opc2, r, reg) \ - do { \ - x86_64_rex_emit(inst, 0, (r), 0, (reg)); \ - *(inst)++ = (unsigned char)(opc1); \ - *(inst)++ = (unsigned char)(opc2); \ - x86_64_reg_emit(inst, (r), (reg)); \ - } while(0) - -#define x86_64_xmm2_reg_regp(inst, opc1, opc2, r, regp) \ - do { \ - x86_64_rex_emit(inst, 0, (r), 0, (regp)); \ - *(inst)++ = (unsigned char)(opc1); \ - *(inst)++ = (unsigned char)(opc2); \ - x86_64_regp_emit(inst, (r), (regp)); \ - } while(0) - -#define x86_64_xmm2_reg_membase(inst, opc1, opc2, r, basereg, disp) \ - do { \ - x86_64_rex_emit(inst, 0, (r), 0, (basereg)); \ - *(inst)++ = (unsigned char)(opc1); \ - *(inst)++ = (unsigned char)(opc2); \ - x86_64_membase_emit(inst, (r), (basereg), (disp)); \ - } while(0) - -#define x86_64_xmm2_reg_memindex(inst, opc1, opc2, r, basereg, disp, indexreg, shift) \ +#define x86_64_alu2_reg_reg_size(inst, opc1, opc2, dreg, sreg, size) \ do { \ - x86_64_rex_emit(inst, 0, (r), (indexreg), (basereg)); \ + if((size) == 2) \ + { \ + *(inst)++ = (unsigned char)0x66; \ + } \ + x86_64_rex_emit((inst), (size), (dreg), 0, (sreg)); \ *(inst)++ = (unsigned char)(opc1); \ *(inst)++ = (unsigned char)(opc2); \ - x86_64_memindex_emit((inst), (r), (basereg), (disp), (indexreg), (shift)); \ + x86_64_reg_emit((inst), (dreg), (sreg)); \ } while(0) -/* - * xmm instructions with a prefix and two opcodes - */ -#define x86_64_p1_xmm2_reg_reg(inst, p1, opc1, opc2, r, reg) \ +#define x86_64_alu2_reg_regp_size(inst, opc1, opc2, dreg, sregp, size) \ do { \ - *(inst)++ = (unsigned char)(p1); \ - x86_64_rex_emit(inst, 0, (r), 0, (reg)); \ + if((size) == 2) \ + { \ + *(inst)++ = (unsigned char)0x66; \ + } \ + x86_64_rex_emit((inst), (size), (dreg), 0, (sregp)); \ *(inst)++ = (unsigned char)(opc1); \ *(inst)++ = (unsigned char)(opc2); \ - x86_64_reg_emit(inst, (r), (reg)); \ + x86_64_regp_emit((inst), (dreg), (sregp)); \ } while(0) -#define x86_64_p1_xmm2_reg_regp(inst, p1, opc1, opc2, r, regp) \ +#define x86_64_alu2_reg_mem_size(inst, opc1, opc2, dreg, mem, size) \ do { \ - *(inst)++ = (unsigned char)(p1); \ - x86_64_rex_emit(inst, 0, (r), 0, (regp)); \ + if((size) == 2) \ + { \ + *(inst)++ = (unsigned char)0x66; \ + } \ + x86_64_rex_emit((inst), (size), (dreg), 0, 0); \ *(inst)++ = (unsigned char)(opc1); \ *(inst)++ = (unsigned char)(opc2); \ - x86_64_regp_emit(inst, (r), (regp)); \ + x86_64_mem_emit((inst), (dreg), (mem)); \ } while(0) -#define x86_64_p1_xmm2_reg_membase(inst, p1, opc1, opc2, r, basereg, disp) \ +#define x86_64_alu2_reg_membase_size(inst, opc1, opc2, dreg, basereg, disp, size) \ do { \ - *(inst)++ = (unsigned char)(p1); \ - x86_64_rex_emit(inst, 0, (r), 0, (basereg)); \ + if((size) == 2) \ + { \ + *(inst)++ = (unsigned char)0x66; \ + } \ + x86_64_rex_emit((inst), (size), (dreg), 0, (basereg)); \ *(inst)++ = (unsigned char)(opc1); \ *(inst)++ = (unsigned char)(opc2); \ - x86_64_membase_emit(inst, (r), (basereg), (disp)); \ + x86_64_membase_emit((inst), (dreg), (basereg), (disp)); \ } while(0) - -#define x86_64_p1_xmm2_reg_memindex(inst, p1, opc1, opc2, r, basereg, disp, indexreg, shift) \ + +#define x86_64_alu2_reg_memindex_size(inst, opc1, opc2, dreg, basereg, disp, indexreg, shift, size) \ do { \ - *(inst)++ = (unsigned char)(p1); \ - x86_64_rex_emit(inst, 0, (r), (indexreg), (basereg)); \ + if((size) == 2) \ + { \ + *(inst)++ = (unsigned char)0x66; \ + } \ + x86_64_rex_emit((inst), (size), (dreg), (indexreg), (basereg)); \ *(inst)++ = (unsigned char)(opc1); \ *(inst)++ = (unsigned char)(opc2); \ - x86_64_memindex_emit((inst), (r), (basereg), (disp), (indexreg), (shift)); \ + x86_64_memindex_emit((inst), (dreg), (basereg), (disp), (indexreg), (shift)); \ } while(0) /* @@ -1872,6 +1883,12 @@ typedef union x86_64_alu1_memindex_size((inst), 0xf6, 2, (basereg), (disp), (indexreg), (shift), (size)); \ } while(0) +/* + * Note: x86_64_clear_reg () changes the condition code! + */ +#define x86_64_clear_reg(inst, reg) \ + x86_64_xor_reg_reg_size((inst), (reg), (reg), 4) + /* * Lea instructions */ @@ -1986,15 +2003,43 @@ typedef union *(inst)++ = (unsigned char)0x66; \ } \ x86_64_rex_emit(inst, (size), 0, 0, (dreg)); \ - if((size) == 1) \ - { \ - *(inst)++ = (unsigned char)0xb0 + ((dreg) & 0x7); \ - } \ - else \ + switch((size)) \ { \ - *(inst)++ = (unsigned char)0xb8 + ((dreg) & 0x7); \ + case 1: \ + { \ + *(inst)++ = (unsigned char)0xb0 + ((dreg) & 0x7); \ + x86_imm_emit8(inst, (imm)); \ + } \ + break; \ + case 2: \ + { \ + *(inst)++ = (unsigned char)0xb8 + ((dreg) & 0x7); \ + x86_imm_emit16(inst, (imm)); \ + } \ + break; \ + case 4: \ + { \ + *(inst)++ = (unsigned char)0xb8 + ((dreg) & 0x7); \ + x86_imm_emit32(inst, (imm)); \ + } \ + break; \ + case 8: \ + { \ + jit_nint __x86_64_imm = (imm); \ + if(__x86_64_imm >= jit_min_int && __x86_64_imm <= jit_max_int) \ + { \ + *(inst)++ = (unsigned char)0xc7; \ + x86_64_reg_emit((inst), 0, (dreg)); \ + x86_imm_emit32(inst, (__x86_64_imm)); \ + } \ + else \ + { \ + *(inst)++ = (unsigned char)0xb8 + ((dreg) & 0x7); \ + x86_64_imm_emit64(inst, (__x86_64_imm)); \ + } \ + } \ + break; \ } \ - x86_64_imm_emit_max64(inst, (imm), (size)); \ } while(0) /* @@ -2022,6 +2067,17 @@ typedef union } \ } while (0) +#define x86_64_mov_reg_regp_size(inst, dreg, sregp, size) \ + do { \ + if((size) == 2) \ + { \ + *(inst)++ = (unsigned char)0x66; \ + } \ + x86_64_rex_emit(inst, (size), (dreg), 0, (sregp)); \ + x86_64_opcode1_emit(inst, 0x8a, (size)); \ + x86_64_regp_emit((inst), (dreg), (sregp)); \ + } while(0) + #define x86_64_mov_reg_membase_size(inst, dreg, basereg, disp, size) \ do { \ if((size) == 2) \ @@ -2062,6 +2118,18 @@ typedef union x86_64_imm_emit_max32(inst, (imm), (size)); \ } while(0) +#define x86_64_mov_regp_imm_size(inst, dregp, imm, size) \ + do { \ + if((size) == 2) \ + { \ + *(inst)++ = (unsigned char)0x66; \ + } \ + x86_64_rex_emit(inst, (size), 0, 0, (dregp)); \ + x86_64_opcode1_emit(inst, 0xc6, (size)); \ + x86_64_regp_emit((inst), 0, (dregp)); \ + x86_64_imm_emit_max32(inst, (imm), (size)); \ + } while(0) + #define x86_64_mov_membase_imm_size(inst, basereg, disp, imm, size) \ do { \ if((size) == 2) \ @@ -2087,11 +2155,11 @@ typedef union } while(0) /* - * Move with sign extension to the given size (unsigned) + * Move with sign extension to the given size (signed) */ #define x86_64_movsx8_reg_reg_size(inst, dreg, sreg, size) \ do { \ - x86_64_alu2_reg_reg_size((inst), 0x0f, 0xbe, (dreg), (sreg), (size)); \ + x86_64_alu2_reg_reg_size((inst), 0x0f, 0xbe, (dreg), (sreg), (size) | 1); \ }while(0) #define x86_64_movsx8_reg_regp_size(inst, dreg, sregp, size) \ @@ -2139,12 +2207,37 @@ typedef union x86_64_alu2_reg_memindex_size((inst), 0x0f, 0xbf, (dreg), (basereg), (disp), (indexreg), (shift), (size)); \ }while(0) +#define x86_64_movsx32_reg_reg_size(inst, dreg, sreg, size) \ + do { \ + x86_64_alu1_reg_reg_size((inst), 0x63, (dreg), (sreg), (size)); \ + }while(0) + +#define x86_64_movsx32_reg_regp_size(inst, dreg, sregp, size) \ + do { \ + x86_64_alu1_reg_regp_size((inst), 0x63, (dreg), (sregp), (size)); \ + }while(0) + +#define x86_64_movsx32_reg_mem_size(inst, dreg, mem, size) \ + do { \ + x86_64_alu1_reg_mem_size((inst), 0x63, (dreg), (mem), (size)); \ + }while(0) + +#define x86_64_movsx32_reg_membase_size(inst, dreg, basereg, disp, size) \ + do { \ + x86_64_alu1_reg_membase_size((inst), 0x63, (dreg), (basereg), (disp), (size)); \ + }while(0) + +#define x86_64_movsx32_reg_memindex_size(inst, dreg, basereg, disp, indexreg, shift, size) \ + do { \ + x86_64_alu1_reg_memindex_size((inst), 0x63, (dreg), (basereg), (disp), (indexreg), (shift), (size)); \ + }while(0) + /* * Move with zero extension to the given size (unsigned) */ #define x86_64_movzx8_reg_reg_size(inst, dreg, sreg, size) \ do { \ - x86_64_alu2_reg_reg_size((inst), 0x0f, 0xb6, (dreg), (sreg), (size)); \ + x86_64_alu2_reg_reg_size((inst), 0x0f, 0xb6, (dreg), (sreg), (size) | 1); \ }while(0) #define x86_64_movzx8_reg_regp_size(inst, dreg, sregp, size) \ @@ -2261,9 +2354,49 @@ typedef union */ #define x86_64_push_imm(inst, imm) \ do { \ - x86_push_imm((inst), (imm)); \ + int _imm = (int) (imm); \ + if(x86_is_imm8(_imm)) \ + { \ + *(inst)++ = (unsigned char)0x6A; \ + x86_imm_emit8 ((inst), (_imm)); \ + } \ + else \ + { \ + *(inst)++ = (unsigned char)0x68; \ + x86_imm_emit32((inst), (_imm)); \ + } \ } while(0) +/* + * Use this version if you need a specific width of the value + * pushed. The Value on the stack will allways be 64bit wide. + */ +#define x86_64_push_imm_size(inst, imm, size) \ + do { \ + switch(size) \ + { \ + case 1: \ + { \ + *(inst)++ = (unsigned char)0x6A; \ + x86_imm_emit8((inst), (imm)); \ + } \ + break; \ + case 2: \ + { \ + *(inst)++ = (unsigned char)0x66; \ + *(inst)++ = (unsigned char)0x68; \ + x86_imm_emit16((inst), (imm)); \ + } \ + break; \ + case 4: \ + { \ + *(inst)++ = (unsigned char)0x68; \ + x86_imm_emit32((inst), (imm)); \ + }\ + } \ + } while (0) + + /* * Pop instructions have a default size of 64 bit in 64 bit mode. * There is no way to encode a 32 bit pop. @@ -2420,6 +2553,54 @@ typedef union x86_64_alu1_memindex((inst), 0xff, 4, (basereg), (disp), (indexreg), (shift)); \ } while(0) +/* + * Set the low byte in a register to 0x01 if a condition is met + * or 0x00 otherwise. + */ +#define x86_64_set_reg(inst, cond, dreg, is_signed) \ + do { \ + x86_64_rex_emit((inst), 1, 0, 0, (dreg)); \ + *(inst)++ = (unsigned char)0x0f; \ + if((is_signed)) \ + { \ + *(inst)++ = x86_cc_signed_map[(cond)] + 0x20; \ + } \ + else \ + { \ + *(inst)++ = x86_cc_unsigned_map[(cond)] + 0x20; \ + } \ + x86_64_reg_emit((inst), 0, (dreg)); \ + } while(0) + +#define x86_64_set_mem(inst, cond, mem, is_signed) \ + do { \ + *(inst)++ = (unsigned char)0x0f; \ + if((is_signed)) \ + { \ + *(inst)++ = x86_cc_signed_map[(cond)] + 0x20; \ + } \ + else \ + { \ + *(inst)++ = x86_cc_unsigned_map[(cond)] + 0x20; \ + } \ + x86_64_mem_emit((inst), 0, (mem)); \ + } while(0) + +#define x86_64_set_membase(inst, cond, basereg, disp, is_signed) \ + do { \ + x86_64_rex_emit((inst), 4, 0, 0, (basereg)); \ + *(inst)++ = (unsigned char)0x0f; \ + if((is_signed)) \ + { \ + *(inst)++ = x86_cc_signed_map[(cond)] + 0x20; \ + } \ + else \ + { \ + *(inst)++ = x86_cc_unsigned_map[(cond)] + 0x20; \ + } \ + x86_64_membase_emit((inst), 0, (basereg), (disp)); \ + } while(0) + /* * ret */ @@ -2433,37 +2614,170 @@ typedef union */ /* - * movaps + * xmm instructions with two opcodes */ -#define x86_64_movaps_reg_reg(inst, dreg, sreg) \ +#define x86_64_xmm2_reg_reg(inst, opc1, opc2, r, reg) \ do { \ - x86_64_xmm2_reg_reg((inst), 0x0f, 0x28, (dreg), (sreg)); \ + x86_64_rex_emit(inst, 0, (r), 0, (reg)); \ + *(inst)++ = (unsigned char)(opc1); \ + *(inst)++ = (unsigned char)(opc2); \ + x86_64_reg_emit(inst, (r), (reg)); \ } while(0) -#define x86_64_movaps_membase_reg(inst, basereg, disp, sreg) \ +#define x86_64_xmm2_reg_regp(inst, opc1, opc2, r, regp) \ do { \ - x86_64_xmm2_reg_membase((inst), 0x0f, 0x29, (sreg), (basereg), (disp)); \ + x86_64_rex_emit(inst, 0, (r), 0, (regp)); \ + *(inst)++ = (unsigned char)(opc1); \ + *(inst)++ = (unsigned char)(opc2); \ + x86_64_regp_emit(inst, (r), (regp)); \ } while(0) -#define x86_64_movaps_memindex_reg(inst, basereg, disp, indexreg, shift, sreg) \ +#define x86_64_xmm2_reg_mem(inst, opc1, opc2, r, mem) \ do { \ - x86_64_xmm2_reg_memindex((inst), 0x0f, 0x29, (sreg), (basereg), (disp), (indexreg), (shift)); \ + x86_64_rex_emit(inst, 0, (r), 0, 0); \ + *(inst)++ = (unsigned char)(opc1); \ + *(inst)++ = (unsigned char)(opc2); \ + x86_64_mem_emit(inst, (r), (mem)); \ } while(0) -#define x86_64_movaps_regp_reg(inst, dregp, sreg) \ +#define x86_64_xmm2_reg_membase(inst, opc1, opc2, r, basereg, disp) \ do { \ - x86_64_xmm2_reg_regp((inst), 0x0f, 0x29, (sreg), (dregp)); \ + x86_64_rex_emit(inst, 0, (r), 0, (basereg)); \ + *(inst)++ = (unsigned char)(opc1); \ + *(inst)++ = (unsigned char)(opc2); \ + x86_64_membase_emit(inst, (r), (basereg), (disp)); \ } while(0) -#define x86_64_movaps_reg_regp(inst, dreg, sregp) \ +#define x86_64_xmm2_reg_memindex(inst, opc1, opc2, r, basereg, disp, indexreg, shift) \ do { \ - x86_64_xmm2_reg_regp((inst), 0x0f, 0x28, (dreg), (sregp)); \ + x86_64_rex_emit(inst, 0, (r), (indexreg), (basereg)); \ + *(inst)++ = (unsigned char)(opc1); \ + *(inst)++ = (unsigned char)(opc2); \ + x86_64_memindex_emit((inst), (r), (basereg), (disp), (indexreg), (shift)); \ } while(0) -#define x86_64_movaps_reg_membase(inst, dreg, basereg, disp) \ +/* + * xmm instructions with a prefix and two opcodes + */ +#define x86_64_p1_xmm2_reg_reg_size(inst, p1, opc1, opc2, r, reg, size) \ do { \ - x86_64_xmm2_reg_membase((inst), 0x0f, 0x28, (dreg), (basereg), (disp)); \ - } while(0) + *(inst)++ = (unsigned char)(p1); \ + x86_64_rex_emit(inst, (size), (r), 0, (reg)); \ + *(inst)++ = (unsigned char)(opc1); \ + *(inst)++ = (unsigned char)(opc2); \ + x86_64_reg_emit(inst, (r), (reg)); \ + } while(0) + +#define x86_64_p1_xmm2_reg_regp_size(inst, p1, opc1, opc2, r, regp, size) \ + do { \ + *(inst)++ = (unsigned char)(p1); \ + x86_64_rex_emit(inst, (size), (r), 0, (regp)); \ + *(inst)++ = (unsigned char)(opc1); \ + *(inst)++ = (unsigned char)(opc2); \ + x86_64_regp_emit(inst, (r), (regp)); \ + } while(0) + +#define x86_64_p1_xmm2_reg_mem_size(inst, p1, opc1, opc2, r, mem, size) \ + do { \ + *(inst)++ = (unsigned char)(p1); \ + x86_64_rex_emit(inst, (size), (r), 0, 0); \ + *(inst)++ = (unsigned char)(opc1); \ + *(inst)++ = (unsigned char)(opc2); \ + x86_64_mem_emit(inst, (r), (mem)); \ + } while(0) + +#define x86_64_p1_xmm2_reg_membase_size(inst, p1, opc1, opc2, r, basereg, disp, size) \ + do { \ + *(inst)++ = (unsigned char)(p1); \ + x86_64_rex_emit(inst, (size), (r), 0, (basereg)); \ + *(inst)++ = (unsigned char)(opc1); \ + *(inst)++ = (unsigned char)(opc2); \ + x86_64_membase_emit(inst, (r), (basereg), (disp)); \ + } while(0) + +#define x86_64_p1_xmm2_reg_memindex_size(inst, p1, opc1, opc2, r, basereg, disp, indexreg, shift, size) \ + do { \ + *(inst)++ = (unsigned char)(p1); \ + x86_64_rex_emit(inst, (size), (r), (indexreg), (basereg)); \ + *(inst)++ = (unsigned char)(opc1); \ + *(inst)++ = (unsigned char)(opc2); \ + x86_64_memindex_emit((inst), (r), (basereg), (disp), (indexreg), (shift)); \ + } while(0) + +/* + * xmm1: Macro for use of the X86_64_XMM1 enum + */ +#define x86_64_xmm1_reg_reg(inst, opc, dreg, sreg, is_double) \ + do { \ + x86_64_p1_xmm2_reg_reg_size((inst), ((is_double) ? 0xf2 : 0xf3), 0x0f, (opc), (dreg), (sreg), 0); \ + } while(0) + +#define x86_64_xmm1_reg_regp(inst, opc, dreg, sregp, is_double) \ + do { \ + x86_64_p1_xmm2_reg_regp_size((inst), ((is_double) ? 0xf2 : 0xf3), 0x0f, (opc), (dreg), (sregp), 0); \ + } while(0) + +#define x86_64_xmm1_reg_mem(inst, opc, dreg, mem, is_double) \ + do { \ + x86_64_p1_xmm2_reg_mem_size((inst), ((is_double) ? 0xf2 : 0xf3), 0x0f, (opc), (dreg), (mem), 0); \ + } while(0) + +#define x86_64_xmm1_reg_membase(inst, opc, dreg, basereg, disp, is_double) \ + do { \ + x86_64_p1_xmm2_reg_membase_size((inst), ((is_double) ? 0xf2 : 0xf3), 0x0f, (opc), (dreg), (basereg), (disp), 0); \ + } while(0) + +#define x86_64_xmm1_reg_memindex(inst, opc, dreg, basereg, disp, indexreg, shift, is_double) \ + do { \ + x86_64_p1_xmm2_reg_memindex_size((inst), ((is_double) ? 0xf2 : 0xf3), 0x0f, (opc), (dreg), (basereg), (disp), (indexreg), (shift), 0); \ + } while(0) + +/* + * Move instructions + */ + +/* + * movaps: Move aligned quadword (16 bytes) + */ +#define x86_64_movaps_reg_reg(inst, dreg, sreg) \ + do { \ + x86_64_xmm2_reg_reg((inst), 0x0f, 0x28, (dreg), (sreg)); \ + } while(0) + +#define x86_64_movaps_regp_reg(inst, dregp, sreg) \ + do { \ + x86_64_xmm2_reg_regp((inst), 0x0f, 0x29, (sreg), (dregp)); \ + } while(0) + +#define x86_64_movaps_mem_reg(inst, mem, sreg) \ + do { \ + x86_64_xmm2_reg_mem((inst), 0x0f, 0x29, (sreg), (mem)); \ + } while(0) + +#define x86_64_movaps_membase_reg(inst, basereg, disp, sreg) \ + do { \ + x86_64_xmm2_reg_membase((inst), 0x0f, 0x29, (sreg), (basereg), (disp)); \ + } while(0) + +#define x86_64_movaps_memindex_reg(inst, basereg, disp, indexreg, shift, sreg) \ + do { \ + x86_64_xmm2_reg_memindex((inst), 0x0f, 0x29, (sreg), (basereg), (disp), (indexreg), (shift)); \ + } while(0) + +#define x86_64_movaps_reg_regp(inst, dreg, sregp) \ + do { \ + x86_64_xmm2_reg_regp((inst), 0x0f, 0x28, (dreg), (sregp)); \ + } while(0) + +#define x86_64_movaps_reg_mem(inst, dreg, mem) \ + do { \ + x86_64_xmm2_reg_mem((inst), 0x0f, 0x28, (dreg), (mem)); \ + } while(0) + +#define x86_64_movaps_reg_membase(inst, dreg, basereg, disp) \ + do { \ + x86_64_xmm2_reg_membase((inst), 0x0f, 0x28, (dreg), (basereg), (disp)); \ + } while(0) #define x86_64_movaps_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \ do { \ @@ -2471,41 +2785,931 @@ typedef union } while(0) /* - * movsd + * movups: Move unaligned quadword (16 bytes) + */ +#define x86_64_movups_reg_reg(inst, dreg, sreg) \ + do { \ + x86_64_xmm2_reg_reg((inst), 0x0f, 0x10, (dreg), (sreg)); \ + } while(0) + +#define x86_64_movups_regp_reg(inst, dregp, sreg) \ + do { \ + x86_64_xmm2_reg_regp((inst), 0x0f, 0x11, (sreg), (dregp)); \ + } while(0) + +#define x86_64_movups_mem_reg(inst, mem, sreg) \ + do { \ + x86_64_xmm2_reg_mem((inst), 0x0f, 0x11, (sreg), (mem)); \ + } while(0) + +#define x86_64_movups_membase_reg(inst, basereg, disp, sreg) \ + do { \ + x86_64_xmm2_reg_membase((inst), 0x0f, 0x11, (sreg), (basereg), (disp)); \ + } while(0) + +#define x86_64_movups_memindex_reg(inst, basereg, disp, indexreg, shift, sreg) \ + do { \ + x86_64_xmm2_reg_memindex((inst), 0x0f, 0x11, (sreg), (basereg), (disp), (indexreg), (shift)); \ + } while(0) + +#define x86_64_movups_reg_regp(inst, dreg, sregp) \ + do { \ + x86_64_xmm2_reg_regp((inst), 0x0f, 0x10, (dreg), (sregp)); \ + } while(0) + +#define x86_64_movups_reg_mem(inst, dreg, mem) \ + do { \ + x86_64_xmm2_reg_mem((inst), 0x0f, 0x10, (dreg), (mem)); \ + } while(0) + +#define x86_64_movups_reg_membase(inst, dreg, basereg, disp) \ + do { \ + x86_64_xmm2_reg_membase((inst), 0x0f, 0x10, (dreg), (basereg), (disp)); \ + } while(0) + +#define x86_64_movups_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \ + do { \ + x86_64_xmm2_reg_memindex((inst), 0x0f, 0x10, (dreg), (basereg), (disp), (indexreg), (shift)); \ + } while(0) + +/* + * movsd: Move scalar double (64bit float) */ #define x86_64_movsd_reg_reg(inst, dreg, sreg) \ do { \ - x86_64_p1_xmm2_reg_reg((inst), 0xf2, 0x0f, 0x10, (dreg), (sreg)); \ + x86_64_p1_xmm2_reg_reg_size((inst), 0xf2, 0x0f, 0x10, (dreg), (sreg), 0); \ + } while(0) + +#define x86_64_movsd_regp_reg(inst, dregp, sreg) \ + do { \ + x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x11, (sreg), (dregp), 0); \ + } while(0) + +#define x86_64_movsd_mem_reg(inst, mem, sreg) \ + do { \ + x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x11, (sreg), (mem), 0); \ } while(0) #define x86_64_movsd_membase_reg(inst, basereg, disp, sreg) \ do { \ - x86_64_p1_xmm2_reg_membase((inst), 0xf2, 0x0f, 0x11, (sreg), (basereg), (disp)); \ + x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x11, (sreg), (basereg), (disp), 0); \ } while(0) #define x86_64_movsd_memindex_reg(inst, basereg, disp, indexreg, shift, sreg) \ do { \ - x86_64_p1_xmm2_reg_memindex((inst), 0xf2, 0x0f, 0x11, (sreg), (basereg), (disp), (indexreg), (shift)); \ + x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2, 0x0f, 0x11, (sreg), (basereg), (disp), (indexreg), (shift), 0); \ } while(0) -#define x86_64_movsd_regp_reg(inst, dregp, sreg) \ +#define x86_64_movsd_reg_regp(inst, dreg, sregp) \ do { \ - x86_64_p1_xmm2_reg_regp((inst), 0xf2, 0x0f, 0x11, (sreg), (dregp)); \ + x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x10, (dreg), (sregp), 0); \ } while(0) -#define x86_64_movsd_reg_regp(inst, dreg, sregp) \ +#define x86_64_movsd_reg_mem(inst, dreg, mem) \ do { \ - x86_64_p1_xmm2_reg_regp((inst), 0xf2, 0x0f, 0x10, (dreg), (sregp)); \ + x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x10, (dreg), (mem), 0); \ } while(0) #define x86_64_movsd_reg_membase(inst, dreg, basereg, disp) \ do { \ - x86_64_p1_xmm2_reg_membase((inst), 0xf2, 0x0f, 0x10, (dreg), (basereg), (disp)); \ + x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x10, (dreg), (basereg), (disp), 0); \ } while(0) #define x86_64_movsd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \ do { \ - x86_64_p1_xmm2_reg_memindex((inst), 0xf2, 0x0f, 0x10, (dreg), (basereg), (disp), (indexreg), (shift)); \ + x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2, 0x0f, 0x10, (dreg), (basereg), (disp), (indexreg), (shift), 0); \ + } while(0) + +/* + * movss: Move scalar single (32bit float) + */ +#define x86_64_movss_reg_reg(inst, dreg, sreg) \ + do { \ + x86_64_p1_xmm2_reg_reg_size((inst), 0xf3, 0x0f, 0x10, (dreg), (sreg), 0); \ + } while(0) + +#define x86_64_movss_regp_reg(inst, dregp, sreg) \ + do { \ + x86_64_p1_xmm2_reg_regp_size((inst), 0xf3, 0x0f, 0x11, (sreg), (dregp), 0); \ + } while(0) + +#define x86_64_movss_mem_reg(inst, mem, sreg) \ + do { \ + x86_64_p1_xmm2_reg_mem_size((inst), 0xf3, 0x0f, 0x11, (sreg), (mem), 0); \ + } while(0) + +#define x86_64_movss_membase_reg(inst, basereg, disp, sreg) \ + do { \ + x86_64_p1_xmm2_reg_membase_size((inst), 0xf3, 0x0f, 0x11, (sreg), (basereg), (disp), 0); \ + } while(0) + +#define x86_64_movss_memindex_reg(inst, basereg, disp, indexreg, shift, sreg) \ + do { \ + x86_64_p1_xmm2_reg_memindex_size((inst), 0xf3, 0x0f, 0x11, (sreg), (basereg), (disp), (indexreg), (shift), 0); \ + } while(0) + +#define x86_64_movss_reg_regp(inst, dreg, sregp) \ + do { \ + x86_64_p1_xmm2_reg_regp_size((inst), 0xf3, 0x0f, 0x10, (dreg), (sregp), 0); \ + } while(0) + +#define x86_64_movss_reg_mem(inst, dreg, mem) \ + do { \ + x86_64_p1_xmm2_reg_mem_size((inst), 0xf3, 0x0f, 0x10, (dreg), (mem), 0); \ + } while(0) + +#define x86_64_movss_reg_membase(inst, dreg, basereg, disp) \ + do { \ + x86_64_p1_xmm2_reg_membase_size((inst), 0xf3, 0x0f, 0x10, (dreg), (basereg), (disp), 0); \ + } while(0) + +#define x86_64_movss_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \ + do { \ + x86_64_p1_xmm2_reg_memindex_size((inst), 0xf3, 0x0f, 0x10, (dreg), (basereg), (disp), (indexreg), (shift), 0); \ + } while(0) + +/* + * Conversion opcodes + */ + +/* + * cvtsi2ss: Convert signed integer to float32 + * The size is the size of the integer value (4 or 8) + */ +#define x86_64_cvtsi2ss_reg_reg_size(inst, dxreg, sreg, size) \ + do { \ + x86_64_p1_xmm2_reg_reg_size((inst), 0xf3, 0x0f, 0x2a, (dxreg), (sreg), (size)); \ + } while(0) + +#define x86_64_cvtsi2ss_reg_regp_size(inst, dxreg, sregp, size) \ + do { \ + x86_64_p1_xmm2_reg_regp_size((inst), 0xf3, 0x0f, 0x2a, (dxreg), (sregp), (size)); \ + } while(0) + +#define x86_64_cvtsi2ss_reg_mem_size(inst, dxreg, mem, size) \ + do { \ + x86_64_p1_xmm2_reg_mem_size((inst), 0xf3, 0x0f, 0x2a, (dxreg), (mem), (size)); \ + } while(0) + +#define x86_64_cvtsi2ss_reg_membase_size(inst, dreg, basereg, disp, size) \ + do { \ + x86_64_p1_xmm2_reg_membase_size((inst), 0xf3, 0x0f, 0x2a, (dreg), (basereg), (disp), (size)); \ + } while(0) + +#define x86_64_cvtsi2ss_reg_memindex_size(inst, dreg, basereg, disp, indexreg, shift, size) \ + do { \ + x86_64_p1_xmm2_reg_memindex_size((inst), 0xf3, 0x0f, 0x2a, (dreg), (basereg), (disp), (indexreg), (shift), (size)); \ + } while(0) + +/* + * cvtsi2sd: Convert signed integer to float64 + * The size is the size of the integer value (4 or 8) + */ +#define x86_64_cvtsi2sd_reg_reg_size(inst, dxreg, sreg, size) \ + do { \ + x86_64_p1_xmm2_reg_reg_size((inst), 0xf2, 0x0f, 0x2a, (dxreg), (sreg), (size)); \ + } while(0) + +#define x86_64_cvtsi2sd_reg_regp_size(inst, dxreg, sregp, size) \ + do { \ + x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x2a, (dxreg), (sregp), (size)); \ + } while(0) + +#define x86_64_cvtsi2sd_reg_mem_size(inst, dxreg, mem, size) \ + do { \ + x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x2a, (dxreg), (mem), (size)); \ + } while(0) + +#define x86_64_cvtsi2sd_reg_membase_size(inst, dreg, basereg, disp, size) \ + do { \ + x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x2a, (dreg), (basereg), (disp), (size)); \ + } while(0) + +#define x86_64_cvtsi2sd_reg_memindex_size(inst, dreg, basereg, disp, indexreg, shift, size) \ + do { \ + x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2, 0x0f, 0x2a, (dreg), (basereg), (disp), (indexreg), (shift), (size)); \ + } while(0) + +/* + * cvtss2sd: Convert float32 to float64 + */ +#define x86_64_cvtss2sd_reg_reg(inst, dreg, sreg) \ + do { \ + x86_64_p1_xmm2_reg_reg_size((inst), 0xf3, 0x0f, 0x5a, (dreg), (sreg), 0); \ + } while(0) + +#define x86_64_cvtss2sd_reg_regp(inst, dxreg, sregp) \ + do { \ + x86_64_p1_xmm2_reg_regp_size((inst), 0xf3, 0x0f, 0x5a, (dxreg), (sregp), 0); \ + } while(0) + +#define x86_64_cvtss2sd_reg_mem(inst, dxreg, mem) \ + do { \ + x86_64_p1_xmm2_reg_mem_size((inst), 0xf3, 0x0f, 0x5a, (dxreg), (mem), 0); \ + } while(0) + +#define x86_64_cvtss2sd_reg_membase(inst, dreg, basereg, disp) \ + do { \ + x86_64_p1_xmm2_reg_membase_size((inst), 0xf3, 0x0f, 0x5a, (dreg), (basereg), (disp), 0); \ + } while(0) + +#define x86_64_cvtss2sd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \ + do { \ + x86_64_p1_xmm2_reg_memindex_size((inst), 0xf3, 0x0f, 0x5a, (dreg), (basereg), (disp), (indexreg), (shift), 0); \ + } while(0) + +/* + * cvtsd2ss: Convert float64 to float32 + */ +#define x86_64_cvtsd2ss_reg_reg(inst, dreg, sreg) \ + do { \ + x86_64_p1_xmm2_reg_reg_size((inst), 0xf2, 0x0f, 0x5a, (dreg), (sreg), 0); \ + } while(0) + +#define x86_64_cvtsd2ss_reg_regp(inst, dxreg, sregp) \ + do { \ + x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x5a, (dxreg), (sregp), 0); \ + } while(0) + +#define x86_64_cvtsd2ss_reg_mem(inst, dxreg, mem) \ + do { \ + x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x5a, (dxreg), (mem), 0); \ + } while(0) + +#define x86_64_cvtsd2ss_reg_membase(inst, dreg, basereg, disp) \ + do { \ + x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x5a, (dreg), (basereg), (disp), 0); \ + } while(0) + +#define x86_64_cvtsd2ss_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \ + do { \ + x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2, 0x0f, 0x5a, (dreg), (basereg), (disp), (indexreg), (shift), 0); \ + } while(0) + +/* + * Arithmetic opcodes + */ + +/* + * addss: Add scalar single precision float values + */ +#define x86_64_addss_reg_reg(inst, dreg, sreg) \ + do { \ + x86_64_p1_xmm2_reg_reg_size((inst), 0xf3, 0x0f, 0x58, (dreg), (sreg), 0); \ + } while(0) + +#define x86_64_addss_reg_regp(inst, dreg, sregp) \ + do { \ + x86_64_p1_xmm2_reg_regp_size((inst), 0xf3, 0x0f, 0x58, (dreg), (sregp), 0); \ + } while(0) + +#define x86_64_addss_reg_mem(inst, dreg, mem) \ + do { \ + x86_64_p1_xmm2_reg_mem_size((inst), 0xf3, 0x0f, 0x58, (dreg), (mem), 0); \ + } while(0) + +#define x86_64_addss_reg_membase(inst, dreg, basereg, disp) \ + do { \ + x86_64_p1_xmm2_reg_membase_size((inst), 0xf3, 0x0f, 0x58, (dreg), (basereg), (disp), 0); \ + } while(0) + +#define x86_64_addss_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \ + do { \ + x86_64_p1_xmm2_reg_memindex_size((inst), 0xf3, 0x0f, 0x58, (dreg), (basereg), (disp), (indexreg), (shift), 0); \ + } while(0) + +/* + * subss: Substract scalar single precision float values + */ +#define x86_64_subss_reg_reg(inst, dreg, sreg) \ + do { \ + x86_64_p1_xmm2_reg_reg_size((inst), 0xf3, 0x0f, 0x5c, (dreg), (sreg), 0); \ + } while(0) + +#define x86_64_subss_reg_regp(inst, dreg, sregp) \ + do { \ + x86_64_p1_xmm2_reg_regp_size((inst), 0xf3, 0x0f, 0x5c, (dreg), (sregp), 0); \ + } while(0) + +#define x86_64_subss_reg_mem(inst, dreg, mem) \ + do { \ + x86_64_p1_xmm2_reg_mem_size((inst), 0xf3, 0x0f, 0x5c, (dreg), (mem), 0); \ + } while(0) + +#define x86_64_subss_reg_membase(inst, dreg, basereg, disp) \ + do { \ + x86_64_p1_xmm2_reg_membase_size((inst), 0xf3, 0x0f, 0x5c, (dreg), (basereg), (disp), 0); \ + } while(0) + +#define x86_64_subss_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \ + do { \ + x86_64_p1_xmm2_reg_memindex_size((inst), 0xf3, 0x0f, 0x5c, (dreg), (basereg), (disp), (indexreg), (shift), 0); \ + } while(0) + +/* + * mulss: Multiply scalar single precision float values + */ +#define x86_64_mulss_reg_reg(inst, dreg, sreg) \ + do { \ + x86_64_p1_xmm2_reg_reg_size((inst), 0xf3, 0x0f, 0x59, (dreg), (sreg), 0); \ + } while(0) + +#define x86_64_mulss_reg_regp(inst, dreg, sregp) \ + do { \ + x86_64_p1_xmm2_reg_regp_size((inst), 0xf3, 0x0f, 0x59, (dreg), (sregp), 0); \ + } while(0) + +#define x86_64_mulss_reg_mem(inst, dreg, mem) \ + do { \ + x86_64_p1_xmm2_reg_mem_size((inst), 0xf3, 0x0f, 0x59, (dreg), (mem), 0); \ + } while(0) + +#define x86_64_mulss_reg_membase(inst, dreg, basereg, disp) \ + do { \ + x86_64_p1_xmm2_reg_membase_size((inst), 0xf3, 0x0f, 0x59, (dreg), (basereg), (disp), 0); \ + } while(0) + +#define x86_64_mulss_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \ + do { \ + x86_64_p1_xmm2_reg_memindex_size((inst), 0xf3, 0x0f, 0x59, (dreg), (basereg), (disp), (indexreg), (shift), 0); \ + } while(0) + +/* + * divss: Divide scalar single precision float values + */ +#define x86_64_divss_reg_reg(inst, dreg, sreg) \ + do { \ + x86_64_p1_xmm2_reg_reg_size((inst), 0xf3, 0x0f, 0x5e, (dreg), (sreg), 0); \ + } while(0) + +#define x86_64_divss_reg_regp(inst, dreg, sregp) \ + do { \ + x86_64_p1_xmm2_reg_regp_size((inst), 0xf3, 0x0f, 0x5e, (dreg), (sregp), 0); \ + } while(0) + +#define x86_64_divss_reg_mem(inst, dreg, mem) \ + do { \ + x86_64_p1_xmm2_reg_mem_size((inst), 0xf3, 0x0f, 0x5e, (dreg), (mem), 0); \ + } while(0) + +#define x86_64_divss_reg_membase(inst, dreg, basereg, disp) \ + do { \ + x86_64_p1_xmm2_reg_membase_size((inst), 0xf3, 0x0f, 0x5e, (dreg), (basereg), (disp), 0); \ + } while(0) + +#define x86_64_divss_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \ + do { \ + x86_64_p1_xmm2_reg_memindex_size((inst), 0xf3, 0x0f, 0x5e, (dreg), (basereg), (disp), (indexreg), (shift), 0); \ + } while(0) + +/* + * addsd: Add scalar double precision float values + */ +#define x86_64_addsd_reg_reg(inst, dreg, sreg) \ + do { \ + x86_64_p1_xmm2_reg_reg_size((inst), 0xf2, 0x0f, 0x58, (dreg), (sreg), 0); \ + } while(0) + +#define x86_64_addsd_reg_regp(inst, dreg, sregp) \ + do { \ + x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x58, (dreg), (sregp), 0); \ + } while(0) + +#define x86_64_addsd_reg_mem(inst, dreg, mem) \ + do { \ + x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x58, (dreg), (mem), 0); \ + } while(0) + +#define x86_64_addsd_reg_membase(inst, dreg, basereg, disp) \ + do { \ + x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x58, (dreg), (basereg), (disp), 0); \ + } while(0) + +#define x86_64_addsd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \ + do { \ + x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2, 0x0f, 0x58, (dreg), (basereg), (disp), (indexreg), (shift), 0); \ + } while(0) + +/* + * subsd: Substract scalar double precision float values + */ +#define x86_64_subsd_reg_reg(inst, dreg, sreg) \ + do { \ + x86_64_p1_xmm2_reg_reg_size((inst), 0xf2, 0x0f, 0x5c, (dreg), (sreg), 0); \ + } while(0) + +#define x86_64_subsd_reg_regp(inst, dreg, sregp) \ + do { \ + x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x5c, (dreg), (sregp), 0); \ + } while(0) + +#define x86_64_subsd_reg_mem(inst, dreg, mem) \ + do { \ + x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x5c, (dreg), (mem), 0); \ + } while(0) + +#define x86_64_subsd_reg_membase(inst, dreg, basereg, disp) \ + do { \ + x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x5c, (dreg), (basereg), (disp), 0); \ + } while(0) + +#define x86_64_subsd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \ + do { \ + x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2, 0x0f, 0x5c, (dreg), (basereg), (disp), (indexreg), (shift), 0); \ + } while(0) + +/* + * mulsd: Multiply scalar double precision float values + */ +#define x86_64_mulsd_reg_reg(inst, dreg, sreg) \ + do { \ + x86_64_p1_xmm2_reg_reg_size((inst), 0xf2, 0x0f, 0x59, (dreg), (sreg), 0); \ + } while(0) + +#define x86_64_mulsd_reg_regp(inst, dreg, sregp) \ + do { \ + x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x59, (dreg), (sregp), 0); \ + } while(0) + +#define x86_64_mulsd_reg_mem(inst, dreg, mem) \ + do { \ + x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x59, (dreg), (mem), 0); \ + } while(0) + +#define x86_64_mulsd_reg_membase(inst, dreg, basereg, disp) \ + do { \ + x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x59, (dreg), (basereg), (disp), 0); \ + } while(0) + +#define x86_64_mulsd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \ + do { \ + x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2, 0x0f, 0x59, (dreg), (basereg), (disp), (indexreg), (shift), 0); \ + } while(0) + +/* + * divsd: Divide scalar double precision float values + */ +#define x86_64_divsd_reg_reg(inst, dreg, sreg) \ + do { \ + x86_64_p1_xmm2_reg_reg_size((inst), 0xf2, 0x0f, 0x5e, (dreg), (sreg), 0); \ + } while(0) + +#define x86_64_divsd_reg_regp(inst, dreg, sregp) \ + do { \ + x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x5e, (dreg), (sregp), 0); \ + } while(0) + +#define x86_64_divsd_reg_mem(inst, dreg, mem) \ + do { \ + x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x5e, (dreg), (mem), 0); \ + } while(0) + +#define x86_64_divsd_reg_membase(inst, dreg, basereg, disp) \ + do { \ + x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x5e, (dreg), (basereg), (disp), 0); \ + } while(0) + +#define x86_64_divsd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \ + do { \ + x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2, 0x0f, 0x5e, (dreg), (basereg), (disp), (indexreg), (shift), 0); \ + } while(0) + +/* + * fpu instructions + */ + +/* + * fld + */ + +#define x86_64_fld_regp_size(inst, sregp, size) \ + do { \ + x86_64_rex_emit((inst), 0, 0, 0, (sregp)); \ + switch(size) \ + { \ + case 4: \ + { \ + *(inst)++ = (unsigned char)0xd9; \ + x86_64_regp_emit((inst), 0, (sregp)); \ + } \ + break; \ + case 8: \ + { \ + *(inst)++ = (unsigned char)0xdd; \ + x86_64_regp_emit((inst), 0, (sregp)); \ + } \ + break; \ + case 10: \ + { \ + *(inst)++ = (unsigned char)0xdb; \ + x86_64_regp_emit((inst), 5, (sregp)); \ + } \ + break; \ + } \ + } while(0) + +#define x86_64_fld_mem_size(inst, mem, size) \ + do { \ + switch(size) \ + { \ + case 4: \ + { \ + *(inst)++ = (unsigned char)0xd9; \ + x86_64_mem_emit((inst), 0, (mem)); \ + } \ + break; \ + case 8: \ + { \ + *(inst)++ = (unsigned char)0xdd; \ + x86_64_mem_emit((inst), 0, (mem)); \ + } \ + break; \ + case 10: \ + { \ + *(inst)++ = (unsigned char)0xdb; \ + x86_64_mem_emit((inst), 5, (mem)); \ + } \ + break; \ + } \ + } while(0) + +#define x86_64_fld_membase_size(inst, basereg, disp, size) \ + do { \ + x86_64_rex_emit((inst), 0, 0, 0, (basereg)); \ + switch(size) \ + { \ + case 4: \ + { \ + *(inst)++ = (unsigned char)0xd9; \ + x86_64_membase_emit((inst), 0, (basereg), (disp)); \ + } \ + break; \ + case 8: \ + { \ + *(inst)++ = (unsigned char)0xdd; \ + x86_64_membase_emit((inst), 0, (basereg), (disp)); \ + } \ + break; \ + case 10: \ + { \ + *(inst)++ = (unsigned char)0xdb; \ + x86_64_membase_emit((inst), 5, (basereg), (disp)); \ + } \ + break; \ + } \ + } while(0) + +#define x86_64_fld_memindex_size(inst, basereg, disp, indexreg, shift, size) \ + do { \ + x86_64_rex_emit((inst), 0, 0, (indexreg), (basereg)); \ + switch(size) \ + { \ + case 4: \ + { \ + *(inst)++ = (unsigned char)0xd9; \ + x86_64_memindex_emit((inst), 0, (basereg), (disp), (indexreg), (shift)); \ + } \ + break; \ + case 8: \ + { \ + *(inst)++ = (unsigned char)0xdd; \ + x86_64_memindex_emit((inst), 0, (basereg), (disp), (indexreg), (shift)); \ + } \ + break; \ + case 10: \ + { \ + *(inst)++ = (unsigned char)0xdb; \ + x86_64_memindex_emit((inst), 5, (basereg), (disp), (indexreg), (shift)); \ + } \ + break; \ + } \ + } while(0) + +/* + * fild: Load an integer and convert it to long double + */ +#define x86_fild_mem_size(inst, mem, size) \ + do { \ + switch(size) \ + { \ + case 2: \ + { \ + *(inst)++ = (unsigned char)0xdf; \ + x86_64_mem_emit((inst), 0, (mem)); \ + } \ + break; \ + case 4: \ + { \ + *(inst)++ = (unsigned char)0xdb; \ + x86_64_mem_emit((inst), 0, (mem)); \ + } \ + break; \ + case 8: \ + { \ + *(inst)++ = (unsigned char)0xdf; \ + x86_64_mem_emit((inst), 5, (mem)); \ + } \ + break; \ + } \ + } while (0) + +#define x86_fild_membase_size(inst, mem, size) \ + do { \ + x86_64_rex_emit((inst), 0, 0, 0, (basereg)); \ + switch(size) \ + { \ + case 2: \ + { \ + *(inst)++ = (unsigned char)0xdf; \ + x86_64_membase_emit((inst), 0, (basereg), (disp)); \ + } \ + break; \ + case 4: \ + { \ + *(inst)++ = (unsigned char)0xdb; \ + x86_64_membase_emit((inst), 0, (basereg), (disp)); \ + } \ + break; \ + case 8: \ + { \ + *(inst)++ = (unsigned char)0xdf; \ + x86_64_membase_emit((inst), 5, (basereg), (disp)); \ + } \ + break; \ + } \ + } while (0) + +/* + * fst: Store fpu register to memory (only float32 and float64 allowed) + */ + +#define x86_64_fst_regp_size(inst, sregp, size) \ + do { \ + x86_64_rex_emit((inst), 0, 0, 0, (sregp)); \ + switch(size) \ + { \ + case 4: \ + { \ + *(inst)++ = (unsigned char)0xd9; \ + x86_64_regp_emit((inst), 2, (sregp)); \ + } \ + break; \ + case 8: \ + { \ + *(inst)++ = (unsigned char)0xdd; \ + x86_64_regp_emit((inst), 2, (sregp)); \ + } \ + break; \ + } \ + } while(0) + +#define x86_64_fst_mem_size(inst, mem, size) \ + do { \ + switch(size) \ + { \ + case 4: \ + { \ + *(inst)++ = (unsigned char)0xd9; \ + x86_64_mem_emit((inst), 2, (mem)); \ + } \ + break; \ + case 8: \ + { \ + *(inst)++ = (unsigned char)0xdd; \ + x86_64_mem_emit((inst), 2, (mem)); \ + } \ + break; \ + } \ + } while(0) + +#define x86_64_fst_membase_size(inst, basereg, disp, size) \ + do { \ + x86_64_rex_emit((inst), 0, 0, 0, (basereg)); \ + switch(size) \ + { \ + case 4: \ + { \ + *(inst)++ = (unsigned char)0xd9; \ + x86_64_membase_emit((inst), 2, (basereg), (disp)); \ + } \ + break; \ + case 8: \ + { \ + *(inst)++ = (unsigned char)0xdd; \ + x86_64_membase_emit((inst), 2, (basereg), (disp)); \ + } \ + break; \ + } \ + } while(0) + +#define x86_64_fst_memindex_size(inst, basereg, disp, indexreg, shift, size) \ + do { \ + x86_64_rex_emit((inst), 0, 0, (indexreg), (basereg)); \ + switch(size) \ + { \ + case 4: \ + { \ + *(inst)++ = (unsigned char)0xd9; \ + x86_64_memindex_emit((inst), 2, (basereg), (disp), (indexreg), (shift)); \ + } \ + break; \ + case 8: \ + { \ + *(inst)++ = (unsigned char)0xdd; \ + x86_64_memindex_emit((inst), 2, (basereg), (disp), (indexreg), (shift)); \ + } \ + break; \ + } \ + } while(0) + +/* + * fstp: store top fpu register to memory and pop it from the fpu stack + */ + +#define x86_64_fstp_regp_size(inst, sregp, size) \ + do { \ + x86_64_rex_emit((inst), 0, 0, 0, (sregp)); \ + switch(size) \ + { \ + case 4: \ + { \ + *(inst)++ = (unsigned char)0xd9; \ + x86_64_regp_emit((inst), 3, (sregp)); \ + } \ + break; \ + case 8: \ + { \ + *(inst)++ = (unsigned char)0xdd; \ + x86_64_regp_emit((inst), 3, (sregp)); \ + } \ + break; \ + case 10: \ + { \ + *(inst)++ = (unsigned char)0xdb; \ + x86_64_regp_emit((inst), 7, (sregp)); \ + } \ + break; \ + } \ + } while(0) + +#define x86_64_fstp_mem_size(inst, mem, size) \ + do { \ + switch(size) \ + { \ + case 4: \ + { \ + *(inst)++ = (unsigned char)0xd9; \ + x86_64_mem_emit((inst), 3, (mem)); \ + } \ + break; \ + case 8: \ + { \ + *(inst)++ = (unsigned char)0xdd; \ + x86_64_mem_emit((inst), 3, (mem)); \ + } \ + break; \ + case 10: \ + { \ + *(inst)++ = (unsigned char)0xdb; \ + x86_64_mem_emit((inst), 7, (mem)); \ + } \ + break; \ + } \ + } while(0) + +#define x86_64_fstp_membase_size(inst, basereg, disp, size) \ + do { \ + x86_64_rex_emit((inst), 0, 0, 0, (basereg)); \ + switch(size) \ + { \ + case 4: \ + { \ + *(inst)++ = (unsigned char)0xd9; \ + x86_64_membase_emit((inst), 3, (basereg), (disp)); \ + } \ + break; \ + case 8: \ + { \ + *(inst)++ = (unsigned char)0xdd; \ + x86_64_membase_emit((inst), 3, (basereg), (disp)); \ + } \ + break; \ + case 10: \ + { \ + *(inst)++ = (unsigned char)0xdb; \ + x86_64_membase_emit((inst), 7, (basereg), (disp)); \ + } \ + break; \ + } \ + } while(0) + +#define x86_64_fstp_memindex_size(inst, basereg, disp, indexreg, shift, size) \ + do { \ + x86_64_rex_emit((inst), 0, 0, (indexreg), (basereg)); \ + switch(size) \ + { \ + case 4: \ + { \ + *(inst)++ = (unsigned char)0xd9; \ + x86_64_memindex_emit((inst), 3, (basereg), (disp), (indexreg), (shift)); \ + } \ + break; \ + case 8: \ + { \ + *(inst)++ = (unsigned char)0xdd; \ + x86_64_memindex_emit((inst), 3, (basereg), (disp), (indexreg), (shift)); \ + } \ + break; \ + case 10: \ + { \ + *(inst)++ = (unsigned char)0xdb; \ + x86_64_memindex_emit((inst), 7, (basereg), (disp), (indexreg), (shift)); \ + } \ + break; \ + } \ + } while(0) + +/* + * Convert long double to integer + */ +#define x86_64_fistp_mem_size(inst, mem, size) \ + do { \ + switch((size)) \ + { \ + case 2: \ + { \ + *(inst)++ = (unsigned char)0xdf; \ + x86_64_mem_emit((inst), 3, (mem)); \ + } \ + break; \ + case 4: \ + { \ + *(inst)++ = (unsigned char)0xdb; \ + x86_64_mem_emit((inst), 3, (mem)); \ + } \ + break; \ + case 8: \ + { \ + *(inst)++ = (unsigned char)0xdf; \ + x86_64_mem_emit((inst), 7, (mem)); \ + } \ + break; \ + } \ + } while(0) + +#define x86_64_fistp_membase_size(inst, basereg, disp, size) \ + do { \ + switch((size)) \ + { \ + case 2: \ + { \ + *(inst)++ = (unsigned char)0xdf; \ + x86_64_membase_emit((inst), 3, (basereg), (disp)); \ + } \ + break; \ + case 4: \ + { \ + *(inst)++ = (unsigned char)0xdb; \ + x86_64_membase_emit((inst), 3, (basereg), (disp)); \ + } \ + break; \ + case 8: \ + { \ + *(inst)++ = (unsigned char)0xdf; \ + x86_64_membase_emit((inst), 7, (basereg), (disp)); \ + } \ + break; \ + } \ + } while(0) + +/* + * Store fpu control word after checking for pending unmasked fpu exceptions + */ +#define x86_64_fnstcw(inst, mem) \ + do { \ + *(inst)++ = (unsigned char)0xd9; \ + x86_64_mem_emit((inst), 7, (mem)); \ + } while (0) + +#define x86_64_fnstcw_membase(inst, basereg, disp) \ + do { \ + *(inst)++ = (unsigned char)0xd9; \ + x86_64_membase_emit((inst), 7, (basereg), (disp)); \ + } while(0) + +/* + * Load fpu control word + */ +#define x86_64_fldcw(inst, mem) \ + do { \ + *(inst)++ = (unsigned char)0xd9; \ + x86_64_mem_emit((inst), 5, (mem)); \ + } while(0) + +#define x86_64_fldcw_membase(inst, basereg, disp) \ + do { \ + *(inst)++ = (unsigned char)0xd9; \ + x86_64_membase_emit ((inst), 5, (basereg), (disp)); \ } while(0) #ifdef __cplusplus diff --git a/jit/jit-insn.c b/jit/jit-insn.c index 498e3cc..6e39363 100644 --- a/jit/jit-insn.c +++ b/jit/jit-insn.c @@ -6206,9 +6206,6 @@ jit_value_t jit_insn_call_intrinsic JIT_CALL_NORETURN); jit_type_free(signature); - /* The "jit_exception_builtin" function will never return */ - func->builder->current_block->ends_in_dead = 1; - /* Execution continues here if there was no exception */ if(!jit_insn_label(func, &label)) { diff --git a/jit/jit-rules-x86-64.c b/jit/jit-rules-x86-64.c new file mode 100644 index 0000000..0dc87da --- /dev/null +++ b/jit/jit-rules-x86-64.c @@ -0,0 +1,3549 @@ +/* + * jit-rules-x86-64.c - Rules that define the characteristics of the x86_64. + * + * Copyright (C) 2008 Southern Storm Software, Pty Ltd. + * + * This file is part of the libjit library. + * + * The libjit library is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation, either version 2.1 of + * the License, or (at your option) any later version. + * + * The libjit library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with the libjit library. If not, see + * . + */ + +#include "jit-internal.h" +#include "jit-rules.h" +#include "jit-apply-rules.h" + +#if defined(JIT_BACKEND_X86_64) + +#include "jit-gen-x86-64.h" +#include "jit-reg-alloc.h" +#include "jit-setjmp.h" +#include + +/* + * Pseudo register numbers for the x86_64 registers. These are not the + * same as the CPU instruction register numbers. The order of these + * values must match the order in "JIT_REG_INFO". + */ +#define X86_64_REG_RAX 0 +#define X86_64_REG_RCX 1 +#define X86_64_REG_RDX 2 +#define X86_64_REG_RBX 3 +#define X86_64_REG_RSI 4 +#define X86_64_REG_RDI 5 +#define X86_64_REG_R8 6 +#define X86_64_REG_R9 7 +#define X86_64_REG_R10 8 +#define X86_64_REG_R11 9 +#define X86_64_REG_R12 10 +#define X86_64_REG_R13 11 +#define X86_64_REG_R14 12 +#define X86_64_REG_R15 13 +#define X86_64_REG_RBP 14 +#define X86_64_REG_RSP 15 +#define X86_64_REG_XMM0 16 +#define X86_64_REG_XMM1 17 +#define X86_64_REG_XMM2 18 +#define X86_64_REG_XMM3 19 +#define X86_64_REG_XMM4 20 +#define X86_64_REG_XMM5 21 +#define X86_64_REG_XMM6 22 +#define X86_64_REG_XMM7 23 +#define X86_64_REG_XMM8 24 +#define X86_64_REG_XMM9 25 +#define X86_64_REG_XMM10 26 +#define X86_64_REG_XMM11 27 +#define X86_64_REG_XMM12 28 +#define X86_64_REG_XMM13 29 +#define X86_64_REG_XMM14 30 +#define X86_64_REG_XMM15 31 +#define X86_64_REG_ST0 32 +#define X86_64_REG_ST1 33 +#define X86_64_REG_ST2 34 +#define X86_64_REG_ST3 35 +#define X86_64_REG_ST4 36 +#define X86_64_REG_ST5 37 +#define X86_64_REG_ST6 38 +#define X86_64_REG_ST7 39 + +/* + * Determine if a pseudo register number is general, xmm or fpu. + */ +#define IS_GENERAL_REG(reg) (((reg) & ~0x0f) == 0) +#define IS_XMM_REG(reg) (((reg) & ~0x0f) == 0x10) +#define IS_FPU_REG(reg) (((reg) & ~0x0f) == 0x20) + +/* + * Scratch register, that is used for calls via register and + * for loading the exception pc to the setjmp buffer. + * This register *MUST* not be used for parameter passing and + * *MUST* not be a callee saved register. + * For SysV abi R11 is perfect. + */ +#define X86_64_SCRATCH X86_64_R11 + +/* + * Set this definition to 1 if the OS supports the SysV red zone. + * This is a 128 byte area below the stack pointer that is guaranteed + * to be not modified by interrupts or signal handlers. + * This allows us to use a temporary area on the stack without + * having to modify the stack pointer saving us two instructions. + * TODO: Make this a configure switch. + */ +#define HAVE_RED_ZONE 1 + +/* + * X86_64 argument types as specified in the X86_64 SysV ABI. + */ +#define X86_64_ARG_NO_CLASS 0x00 +#define X86_64_ARG_INTEGER 0x01 +#define X86_64_ARG_MEMORY 0x02 +#define X86_64_ARG_SSE 0x11 +#define X86_64_ARG_SSEUP 0x12 +#define X86_64_ARG_X87 0x21 +#define X86_64_ARG_X87UP 0x22 + +#define X86_64_ARG_IS_SSE(arg) (((arg) & 0x10) != 0) +#define X86_64_ARG_IS_X87(arg) (((arg) & 0x20) != 0) + +/* + * The granularity of the stack + */ +#define STACK_SLOT_SIZE sizeof(void *) + +/* + * Get he number of complete stack slots used + */ +#define STACK_SLOTS_USED(size) ((size) >> 3) + +/* + * Round a size up to a multiple of the stack word size. + */ +#define ROUND_STACK(size) \ + (((size) + (STACK_SLOT_SIZE - 1)) & ~(STACK_SLOT_SIZE - 1)) + +/* + * Setup or teardown the x86 code output process. + */ +#define jit_cache_setup_output(needed) \ + unsigned char *inst = gen->posn.ptr; \ + if(!jit_cache_check_for_n(&(gen->posn), (needed))) \ + { \ + jit_cache_mark_full(&(gen->posn)); \ + return; \ + } +#define jit_cache_end_output() \ + gen->posn.ptr = inst + +/* + * Set this to 1 for debugging fixups + */ +#define DEBUG_FIXUPS 0 + +/* + * The maximum block size copied inline + */ +#define _JIT_MAX_MEMCPY_INLINE 0x40 + +/* + * va_list type as specified in x86_64 sysv abi version 0.99 + * Figure 3.34 + */ +typedef struct +{ + unsigned int gp_offset; + unsigned int fp_offset; + void *overflow_arg_area; + void *reg_save_area; +} _jit_va_list; + +/* Registers used for INTEGER arguments */ +static int _jit_word_arg_regs[] = {X86_64_REG_RDI, X86_64_REG_RSI, + X86_64_REG_RDX, X86_64_REG_RCX, + X86_64_REG_R8, X86_64_REG_R9}; +#define _jit_num_word_regs 6 + +/* Registers used for float arguments */ +static int _jit_float_arg_regs[] = {X86_64_REG_XMM0, X86_64_REG_XMM1, + X86_64_REG_XMM2, X86_64_REG_XMM3, + X86_64_REG_XMM4, X86_64_REG_XMM5, + X86_64_REG_XMM6, X86_64_REG_XMM7}; +#define _jit_num_float_regs 8 + +/* Registers used for returning INTEGER values */ +static int _jit_word_return_regs[] = {X86_64_REG_RAX, X86_64_REG_RDX}; +#define _jit_num_word_return_regs 2 + +/* Registers used for returning sse values */ +static int _jit_sse_return_regs[] = {X86_64_REG_XMM0, X86_64_REG_XMM1}; +#define _jit_num_sse_return_regs 2 + +/* + * X86_64 register classes + */ +static _jit_regclass_t *x86_64_reg; /* X86_64 general purpose registers */ +static _jit_regclass_t *x86_64_creg; /* X86_64 call clobbered general */ + /* purpose registers */ +static _jit_regclass_t *x86_64_rreg; /* general purpose registers not used*/ + /* for returning values */ +static _jit_regclass_t *x86_64_freg; /* X86_64 fpu registers */ +static _jit_regclass_t *x86_64_xreg; /* X86_64 xmm registers */ + +void +_jit_init_backend(void) +{ + x86_64_reg = _jit_regclass_create( + "reg", JIT_REG_WORD | JIT_REG_LONG, 14, + X86_64_REG_RAX, X86_64_REG_RCX, + X86_64_REG_RDX, X86_64_REG_RBX, + X86_64_REG_RSI, X86_64_REG_RDI, + X86_64_REG_R8, X86_64_REG_R9, + X86_64_REG_R10, X86_64_REG_R11, + X86_64_REG_R12, X86_64_REG_R13, + X86_64_REG_R14, X86_64_REG_R15); + + /* register class with all call clobbered registers */ + x86_64_creg = _jit_regclass_create( + "creg", JIT_REG_WORD | JIT_REG_LONG, 9, + X86_64_REG_RAX, X86_64_REG_RCX, + X86_64_REG_RDX, X86_64_REG_RSI, + X86_64_REG_RDI, X86_64_REG_R8, + X86_64_REG_R9, X86_64_REG_R10, + X86_64_REG_R11); + + /* register class with all registers not used for returning values */ + x86_64_rreg = _jit_regclass_create( + "rreg", JIT_REG_WORD | JIT_REG_LONG, 12, + X86_64_REG_RCX, X86_64_REG_RBX, + X86_64_REG_RSI, X86_64_REG_RDI, + X86_64_REG_R8, X86_64_REG_R9, + X86_64_REG_R10, X86_64_REG_R11, + X86_64_REG_R12, X86_64_REG_R13, + X86_64_REG_R14, X86_64_REG_R15); + + x86_64_freg = _jit_regclass_create( + "freg", JIT_REG_X86_64_FLOAT | JIT_REG_IN_STACK, 8, + X86_64_REG_ST0, X86_64_REG_ST1, + X86_64_REG_ST2, X86_64_REG_ST3, + X86_64_REG_ST4, X86_64_REG_ST5, + X86_64_REG_ST6, X86_64_REG_ST7); + + x86_64_xreg = _jit_regclass_create( + "xreg", JIT_REG_FLOAT32 | JIT_REG_FLOAT64, 16, + X86_64_REG_XMM0, X86_64_REG_XMM1, + X86_64_REG_XMM2, X86_64_REG_XMM3, + X86_64_REG_XMM4, X86_64_REG_XMM5, + X86_64_REG_XMM6, X86_64_REG_XMM7, + X86_64_REG_XMM8, X86_64_REG_XMM9, + X86_64_REG_XMM10, X86_64_REG_XMM11, + X86_64_REG_XMM12, X86_64_REG_XMM13, + X86_64_REG_XMM14, X86_64_REG_XMM15); +} + +int +_jit_opcode_is_supported(int opcode) +{ + switch(opcode) + { + #define JIT_INCLUDE_SUPPORTED + #include "jit-rules-x86-64.inc" + #undef JIT_INCLUDE_SUPPORTED + } + return 0; +} + +int +_jit_setup_indirect_pointer(jit_function_t func, jit_value_t value) +{ + return jit_insn_outgoing_reg(func, value, X86_64_REG_R11); +} + +/* + * Do a xmm operation with a constant float32 value + */ +static int +_jit_xmm1_reg_imm_size_float32(jit_gencode_t gen, unsigned char **inst_ptr, + X86_64_XMM1_OP opc, int reg, + jit_float32 *float32_value) +{ + void *ptr; + jit_nint offset; + unsigned char *inst; + + inst = *inst_ptr; + ptr = _jit_cache_alloc(&(gen->posn), sizeof(jit_float32)); + if(!ptr) + { + return 0; + } + jit_memcpy(ptr, float32_value, sizeof(jit_float32)); + + offset = (jit_nint)ptr - ((jit_nint)inst + (reg > 7 ? 9 : 8)); + if((offset >= jit_min_int) && (offset <= jit_max_int)) + { + /* We can use RIP relative addressing here */ + x86_64_xmm1_reg_membase(inst, opc, reg, + X86_64_RIP, offset, 0); + } + else if(((jit_nint)ptr >= jit_min_int) && + ((jit_nint)ptr <= jit_max_int)) + { + /* We can use absolute addressing */ + x86_64_xmm1_reg_mem(inst, opc, reg, (jit_nint)ptr, 0); + } + else + { + /* We have to use an extra general register */ + /* TODO */ + return 0; + } + *inst_ptr = inst; + return 1; +} + +/* + * Do a xmm operation with a constant float64 value + */ +static int +_jit_xmm1_reg_imm_size_float64(jit_gencode_t gen, unsigned char **inst_ptr, + X86_64_XMM1_OP opc, int reg, + jit_float64 *float64_value) +{ + void *ptr; + jit_nint offset; + unsigned char *inst; + + inst = *inst_ptr; + ptr = _jit_cache_alloc(&(gen->posn), sizeof(jit_float64)); + if(!ptr) + { + return 0; + } + jit_memcpy(ptr, float64_value, sizeof(jit_float64)); + + offset = (jit_nint)ptr - ((jit_nint)inst + (reg > 7 ? 9 : 8)); + if((offset >= jit_min_int) && (offset <= jit_max_int)) + { + /* We can use RIP relative addressing here */ + x86_64_xmm1_reg_membase(inst, opc, reg, + X86_64_RIP, offset, 1); + } + else if(((jit_nint)ptr >= jit_min_int) && + ((jit_nint)ptr <= jit_max_int)) + { + /* We can use absolute addressing */ + x86_64_xmm1_reg_mem(inst, opc, reg, (jit_nint)ptr, 1); + } + else + { + /* We have to use an extra general register */ + /* TODO */ + return 0; + } + *inst_ptr = inst; + return 1; +} + +/* + * Call a function + */ +static unsigned char * +x86_64_call_code(unsigned char *inst, jit_nint func) +{ + jit_nint offset; + + offset = func - ((jit_nint)inst + 5); + if(offset >= jit_min_int && offset <= jit_max_int) + { + /* We can use the immediate call */ + x86_64_call_imm(inst, offset); + } + else + { + /* We have to do a call via register */ + x86_64_mov_reg_imm_size(inst, X86_64_SCRATCH, func, 8); + x86_64_call_reg(inst, X86_64_SCRATCH); + } + return inst; +} + +/* + * Jump to a function + */ +static unsigned char * +x86_64_jump_to_code(unsigned char *inst, jit_nint func) +{ + jit_nint offset; + + offset = func - ((jit_nint)inst + 5); + if(offset >= jit_min_int && offset <= jit_max_int) + { + /* We can use the immediate call */ + x86_64_jmp_imm(inst, offset); + } + else + { + /* We have to do a call via register */ + x86_64_mov_reg_imm_size(inst, X86_64_SCRATCH, func, 8); + x86_64_jmp_reg(inst, X86_64_SCRATCH); + } + return inst; +} + +/* + * Throw a builtin exception. + */ +static unsigned char * +throw_builtin(unsigned char *inst, jit_function_t func, int type) +{ + /* We need to update "catch_pc" if we have a "try" block */ + if(func->builder->setjmp_value != 0) + { + _jit_gen_fix_value(func->builder->setjmp_value); + + x86_64_lea_membase_size(inst, X86_64_RDI, X86_64_RIP, 0, 8); + x86_64_mov_membase_reg_size(inst, X86_64_RBP, + func->builder->setjmp_value->frame_offset + + jit_jmp_catch_pc_offset, X86_64_RDI, 8); + } + + /* Push the exception type onto the stack */ + x86_64_mov_reg_imm_size(inst, X86_64_RDI, type, 4); + + /* Call the "jit_exception_builtin" function, which will never return */ + return x86_64_call_code(inst, (jit_nint)jit_exception_builtin); +} + +/* + * spill a register to it's place in the current stack frame. + * The argument type must be in it's normalized form. + */ +static void +_spill_reg(unsigned char **inst_ptr, jit_type_t type, + jit_int reg, jit_int offset) +{ + unsigned char *inst = *inst_ptr; + + if(IS_GENERAL_REG(reg)) + { + switch(type->kind) + { + case JIT_TYPE_SBYTE: + case JIT_TYPE_UBYTE: + { + x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset, + _jit_reg_info[reg].cpu_reg, 1); + } + break; + + case JIT_TYPE_SHORT: + case JIT_TYPE_USHORT: + { + x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset, + _jit_reg_info[reg].cpu_reg, 2); + } + break; + + case JIT_TYPE_INT: + case JIT_TYPE_UINT: + case JIT_TYPE_FLOAT32: + { + x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset, + _jit_reg_info[reg].cpu_reg, 4); + } + break; + + case JIT_TYPE_LONG: + case JIT_TYPE_ULONG: + case JIT_TYPE_FLOAT64: + { + x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset, + _jit_reg_info[reg].cpu_reg, 8); + } + break; + + case JIT_TYPE_STRUCT: + case JIT_TYPE_UNION: + { + jit_nuint size = jit_type_get_size(type); + + if(size == 1) + { + x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset, + _jit_reg_info[reg].cpu_reg, 1); + } + else if(size == 2) + { + x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset, + _jit_reg_info[reg].cpu_reg, 2); + } + else if(size <= 4) + { + x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset, + _jit_reg_info[reg].cpu_reg, 4); + } + else + { + x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset, + _jit_reg_info[reg].cpu_reg, 8); + } + } + } + } + else if(IS_XMM_REG(reg)) + { + switch(type->kind) + { + case JIT_TYPE_FLOAT32: + { + x86_64_movss_membase_reg(inst, X86_64_RBP, offset, + _jit_reg_info[reg].cpu_reg); + } + break; + + case JIT_TYPE_FLOAT64: + { + x86_64_movsd_membase_reg(inst, X86_64_RBP, offset, + _jit_reg_info[reg].cpu_reg); + } + break; + + case JIT_TYPE_STRUCT: + case JIT_TYPE_UNION: + { + jit_nuint size = jit_type_get_size(type); + + if(size <= 4) + { + x86_64_movss_membase_reg(inst, X86_64_RBP, offset, + _jit_reg_info[reg].cpu_reg); + } + else if(size <= 8) + { + x86_64_movsd_membase_reg(inst, X86_64_RBP, offset, + _jit_reg_info[reg].cpu_reg); + } + else + { + jit_nint alignment = jit_type_get_alignment(type); + + if((alignment & 0xf) == 0) + { + x86_64_movaps_membase_reg(inst, X86_64_RBP, offset, + _jit_reg_info[reg].cpu_reg); + } + else + { + x86_64_movups_membase_reg(inst, X86_64_RBP, offset, + _jit_reg_info[reg].cpu_reg); + } + } + } + break; + } + } + else if(IS_FPU_REG(reg)) + { + switch(type->kind) + { + case JIT_TYPE_FLOAT32: + { + x86_64_fstp_membase_size(inst, X86_64_RBP, offset, 4); + } + break; + + case JIT_TYPE_FLOAT64: + { + x86_64_fstp_membase_size(inst, X86_64_RBP, offset, 8); + } + break; + + case JIT_TYPE_NFLOAT: + { + if(sizeof(jit_nfloat) == sizeof(jit_float64)) + { + x86_64_fstp_membase_size(inst, X86_64_RBP, offset, 8); + } + else + { + x86_64_fstp_membase_size(inst, X86_64_RBP, offset, 10); + } + } + break; + } + } + + /* Write the current instruction pointer back */ + *inst_ptr = inst; +} + +void +_jit_gen_fix_value(jit_value_t value) +{ + if(!(value->has_frame_offset) && !(value->is_constant)) + { + jit_nuint alignment = jit_type_get_alignment(value->type); + jit_nint size =jit_type_get_size(value->type); + jit_nint frame_size = value->block->func->builder->frame_size; + + /* Round the size to a multiple of the stack item size */ + size = (jit_nint)(ROUND_STACK(size)); + + /* Add the size to the existing local items */ + frame_size += size; + + /* Align the new frame_size for the value */ + frame_size = (frame_size + (alignment - 1)) & ~(alignment - 1); + + value->block->func->builder->frame_size = frame_size; + value->frame_offset = -frame_size; + value->has_frame_offset = 1; + } +} + +void +_jit_gen_spill_global(jit_gencode_t gen, int reg, jit_value_t value) +{ + jit_cache_setup_output(16); + if(value) + { + jit_type_t type = jit_type_normalize(value->type); + + _jit_gen_fix_value(value); + + _spill_reg(&inst, type, value->global_reg, value->frame_offset); + } + else + { + x86_64_push_reg_size(inst, _jit_reg_info[reg].cpu_reg, 8); + } + jit_cache_end_output(); +} + +void +_jit_gen_load_global(jit_gencode_t gen, int reg, jit_value_t value) +{ + jit_cache_setup_output(16); + if(value) + { + x86_64_mov_reg_membase_size(inst, + _jit_reg_info[value->global_reg].cpu_reg, + X86_64_RBP, value->frame_offset, 8); + } + else + { + x86_64_pop_reg_size(inst, _jit_reg_info[reg].cpu_reg, 8); + } + jit_cache_end_output(); +} + +void +_jit_gen_spill_reg(jit_gencode_t gen, int reg, + int other_reg, jit_value_t value) +{ + jit_type_t type; + + /* Make sure that we have sufficient space */ + jit_cache_setup_output(16); + + /* If the value is associated with a global register, then copy to that */ + if(value->has_global_register) + { + reg = _jit_reg_info[reg].cpu_reg; + other_reg = _jit_reg_info[value->global_reg].cpu_reg; + x86_64_mov_reg_reg_size(inst, other_reg, reg, sizeof(void *)); + jit_cache_end_output(); + return; + } + + /* Fix the value in place within the local variable frame */ + _jit_gen_fix_value(value); + + /* Get the normalized type */ + type = jit_type_normalize(value->type); + + /* and spill the register */ + _spill_reg(&inst, type, reg, value->frame_offset); + + /* End the code output process */ + jit_cache_end_output(); +} + +void +_jit_gen_free_reg(jit_gencode_t gen, int reg, + int other_reg, int value_used) +{ + /* We only need to take explicit action if we are freeing a + floating-point register whose value hasn't been used yet */ + if(!value_used && IS_FPU_REG(reg)) + { + if(jit_cache_check_for_n(&(gen->posn), 2)) + { + x86_fstp(gen->posn.ptr, reg - X86_64_REG_ST0); + } + else + { + jit_cache_mark_full(&(gen->posn)); + } + } +} + +/* + * Set a register value based on a condition code. + */ +static unsigned char * +setcc_reg(unsigned char *inst, int reg, int cond, int is_signed) +{ + /* Use a SETcc instruction if we have a basic register */ + x86_64_set_reg(inst, cond, reg, is_signed); + x86_64_movzx8_reg_reg_size(inst, reg, reg, 4); + return inst; +} + +/* + * Helper macros for fixup handling. + * + * We have only 4 bytes for the jump offsets. + * Therefore we have do something tricky here. + * We need some fixed value that is known to be fix throughout the + * building of the function and that will be near the emitted code. + * The posn limit looks like the perfect value to use. + */ +#define _JIT_GET_FIXVALUE(gen) ((gen)->posn.limit) + +/* + * Calculate the fixup value + * This is the value stored as placeholder in the instruction. + */ +#define _JIT_CALC_FIXUP(fixup_list, inst) \ + ((jit_int)((jit_nint)(inst) - (jit_nint)(fixup_list))) + +/* + * Calculate the pointer to the fixup value. + */ +#define _JIT_CALC_NEXT_FIXUP(fixup_list, fixup) \ + ((fixup) ? ((jit_nint)(fixup_list) - (jit_nint)(fixup)) : (jit_nint)0) + +/* + * Get the long form of a branch opcode. + */ +static int +long_form_branch(int opcode) +{ + if(opcode == 0xEB) + { + return 0xE9; + } + else + { + return opcode + 0x0F10; + } +} + +/* + * Output a branch instruction. + */ +static unsigned char * +output_branch(jit_function_t func, unsigned char *inst, int opcode, + jit_insn_t insn) +{ + jit_block_t block; + + if((insn->flags & JIT_INSN_VALUE1_IS_LABEL) != 0) + { + /* "address_of_label" instruction */ + block = jit_block_from_label(func, (jit_label_t)(insn->value1)); + } + else + { + block = jit_block_from_label(func, (jit_label_t)(insn->dest)); + } + if(!block) + { + return inst; + } + if(block->address) + { + jit_nint offset; + + /* We already know the address of the block */ + offset = ((unsigned char *)(block->address)) - (inst + 2); + if(x86_is_imm8(offset)) + { + /* We can output a short-form backwards branch */ + *inst++ = (unsigned char)opcode; + *inst++ = (unsigned char)offset; + } + else + { + /* We need to output a long-form backwards branch */ + offset -= 3; + opcode = long_form_branch(opcode); + if(opcode < 256) + { + *inst++ = (unsigned char)opcode; + } + else + { + *inst++ = (unsigned char)(opcode >> 8); + *inst++ = (unsigned char)opcode; + --offset; + } + x86_imm_emit32(inst, offset); + } + } + else + { + jit_int fixup; + + /* Output a placeholder and record on the block's fixup list */ + opcode = long_form_branch(opcode); + if(opcode < 256) + { + *inst++ = (unsigned char)opcode; + } + else + { + *inst++ = (unsigned char)(opcode >> 8); + *inst++ = (unsigned char)opcode; + } + if(block->fixup_list) + { + fixup = _JIT_CALC_FIXUP(block->fixup_list, inst); + } + else + { + fixup = 0; + } + block->fixup_list = (void *)inst; + x86_imm_emit32(inst, fixup); + + if(DEBUG_FIXUPS) + { + fprintf(stderr, + "Block: %lx, Current Fixup: %lx, Next fixup: %lx\n", + (jit_nint)block, (jit_nint)(block->fixup_list), + (jit_nint)fixup); + } + } + return inst; +} + +/* + * Jump to the current function's epilog. + */ +static unsigned char * +jump_to_epilog(jit_gencode_t gen, unsigned char *inst, jit_block_t block) +{ + jit_int fixup; + + /* If the epilog is the next thing that we will output, + then fall through to the epilog directly */ + block = block->next; + while(block != 0 && block->first_insn > block->last_insn) + { + block = block->next; + } + if(!block) + { + return inst; + } + + /* Output a placeholder for the jump and add it to the fixup list */ + *inst++ = (unsigned char)0xE9; + if(gen->epilog_fixup) + { + fixup = _JIT_CALC_FIXUP(gen->epilog_fixup, inst); + } + else + { + fixup = 0; + } + gen->epilog_fixup = (void *)inst; + x86_imm_emit32(inst, fixup); + return inst; +} + +/* + * Support functiond for the FPU stack + */ + +static int +fp_stack_index(jit_gencode_t gen, int reg) +{ + return gen->reg_stack_top - reg - 1; +} + +void +_jit_gen_exch_top(jit_gencode_t gen, int reg) +{ + if(IS_FPU_REG(reg)) + { + jit_cache_setup_output(2); + x86_fxch(inst, fp_stack_index(gen, reg)); + jit_cache_end_output(); + } +} + +void + _jit_gen_move_top(jit_gencode_t gen, int reg) +{ + if(IS_FPU_REG(reg)) + { + jit_cache_setup_output(2); + x86_fstp(inst, fp_stack_index(gen, reg)); + jit_cache_end_output(); + } +} + +void +_jit_gen_spill_top(jit_gencode_t gen, int reg, jit_value_t value, int pop) +{ + if(IS_FPU_REG(reg)) + { + int offset; + + /* Make sure that we have sufficient space */ + jit_cache_setup_output(16); + + /* Fix the value in place within the local variable frame */ + _jit_gen_fix_value(value); + + /* Output an appropriate instruction to spill the value */ + offset = (int)(value->frame_offset); + + /* Spill the top of the floating-point register stack */ + switch(jit_type_normalize(value->type)->kind) + { + case JIT_TYPE_FLOAT32: + { + if(pop) + { + x86_64_fstp_membase_size(inst, X86_64_RBP, offset, 4); + } + else + { + x86_64_fst_membase_size(inst, X86_64_RBP, offset, 4); + } + } + break; + + case JIT_TYPE_FLOAT64: + { + if(pop) + { + x86_64_fstp_membase_size(inst, X86_64_RBP, offset, 8); + } + else + { + x86_64_fst_membase_size(inst, X86_64_RBP, offset, 8); + } + } + break; + + case JIT_TYPE_NFLOAT: + { + if(sizeof(jit_nfloat) == sizeof(jit_float64)) + { + if(pop) + { + x86_64_fstp_membase_size(inst, X86_64_RBP, offset, 8); + } + else + { + x86_64_fst_membase_size(inst, X86_64_RBP, offset, 8); + } + } + else + { + x86_64_fstp_membase_size(inst, X86_64_RBP, offset, 10); + if(!pop) + { + x86_64_fld_membase_size(inst, X86_64_RBP, offset, 10); + } + } + } + break; + } + + /* End the code output process */ + jit_cache_end_output(); + } +} + +void +_jit_gen_load_value(jit_gencode_t gen, int reg, int other_reg, jit_value_t value) +{ + jit_type_t type; + int src_reg, other_src_reg; + void *ptr; + int offset; + + /* Make sure that we have sufficient space */ + jit_cache_setup_output(16); + + type = jit_type_normalize(value->type); + + /* Load zero */ + if(value->is_constant) + { + switch(type->kind) + { + case JIT_TYPE_SBYTE: + case JIT_TYPE_UBYTE: + case JIT_TYPE_SHORT: + case JIT_TYPE_USHORT: + case JIT_TYPE_INT: + case JIT_TYPE_UINT: + { + if((jit_nint)(value->address) == 0) + { + x86_64_clear_reg(inst, _jit_reg_info[reg].cpu_reg); + } + else + { + x86_64_mov_reg_imm_size(inst, _jit_reg_info[reg].cpu_reg, + (jit_nint)(value->address), 4); + } + } + break; + + case JIT_TYPE_LONG: + case JIT_TYPE_ULONG: + { + if((jit_nint)(value->address) == 0) + { + x86_64_clear_reg(inst, _jit_reg_info[reg].cpu_reg); + } + else + { + x86_64_mov_reg_imm_size(inst, _jit_reg_info[reg].cpu_reg, + (jit_nint)(value->address), 8); + } + } + break; + + case JIT_TYPE_FLOAT32: + { + jit_float32 float32_value; + + float32_value = jit_value_get_float32_constant(value); + + if(IS_GENERAL_REG(reg)) + { + union + { + jit_float32 float32_value; + jit_int int_value; + } un; + + un.float32_value = float32_value; + x86_64_mov_reg_imm_size(inst, _jit_reg_info[reg].cpu_reg, + un.int_value, 4); + } + else if(IS_XMM_REG(reg)) + { + int xmm_reg = _jit_reg_info[reg].cpu_reg; + + _jit_xmm1_reg_imm_size_float32(gen, &inst, XMM1_MOV, + xmm_reg, &float32_value); + } + else + { + if(float32_value == (jit_float32) 0.0) + { + x86_fldz(inst); + } + else if(float32_value == (jit_float32) 1.0) + { + x86_fld1(inst); + } + else + { + jit_nint offset; + + ptr = _jit_cache_alloc(&(gen->posn), sizeof(jit_float32)); + jit_memcpy(ptr, &float32_value, sizeof(float32_value)); + + offset = (jit_nint)ptr - ((jit_nint)inst + 7); + if((offset >= jit_min_int) && (offset <= jit_max_int)) + { + /* We can use RIP relative addressing here */ + x86_64_fld_membase_size(inst, X86_64_RIP, offset, 4); + } + else if(((jit_nint)ptr >= jit_min_int) && + ((jit_nint)ptr <= jit_max_int)) + { + /* We can use absolute addressing */ + x86_64_fld_mem_size(inst, (jit_nint)ptr, 4); + } + else + { + /* We have to use an extra general register */ + /* TODO */ + } + } + } + } + break; + + case JIT_TYPE_FLOAT64: + { + jit_float64 float64_value; + float64_value = jit_value_get_float64_constant(value); + if(IS_GENERAL_REG(reg)) + { + union + { + jit_float64 float64_value; + jit_long long_value; + } un; + + un.float64_value = float64_value; + x86_64_mov_reg_imm_size(inst, _jit_reg_info[reg].cpu_reg, + un.long_value, 8); + } + else if(IS_XMM_REG(reg)) + { + int xmm_reg = _jit_reg_info[reg].cpu_reg; + + _jit_xmm1_reg_imm_size_float64(gen, &inst, XMM1_MOV, + xmm_reg, &float64_value); + } + else + { + if(float64_value == (jit_float64) 0.0) + { + x86_fldz(inst); + } + else if(float64_value == (jit_float64) 1.0) + { + x86_fld1(inst); + } + else + { + jit_nint offset; + + ptr = _jit_cache_alloc(&(gen->posn), sizeof(jit_float64)); + jit_memcpy(ptr, &float64_value, sizeof(float64_value)); + + offset = (jit_nint)ptr - ((jit_nint)inst + 7); + if((offset >= jit_min_int) && (offset <= jit_max_int)) + { + /* We can use RIP relative addressing here */ + x86_64_fld_membase_size(inst, X86_64_RIP, offset, 8); + } + else if(((jit_nint)ptr >= jit_min_int) && + ((jit_nint)ptr <= jit_max_int)) + { + /* We can use absolute addressing */ + x86_64_fld_mem_size(inst, (jit_nint)ptr, 8); + } + else + { + /* We have to use an extra general register */ + /* TODO */ + } + } + } + } + break; + + case JIT_TYPE_NFLOAT: + { + jit_nfloat nfloat_value; + nfloat_value = jit_value_get_nfloat_constant(value); + if(IS_GENERAL_REG(reg) && sizeof(jit_nfloat) == sizeof(jit_float64)) + { + union + { + jit_nfloat nfloat_value; + jit_long long_value; + } un; + + un.nfloat_value = nfloat_value; + x86_64_mov_reg_imm_size(inst, _jit_reg_info[reg].cpu_reg, + un.long_value, 8); + } + else if(IS_XMM_REG(reg) && sizeof(jit_nfloat) == sizeof(jit_float64)) + { + jit_nint offset; + int xmm_reg = _jit_reg_info[reg].cpu_reg; + + ptr = _jit_cache_alloc(&(gen->posn), sizeof(jit_nfloat)); + jit_memcpy(ptr, &nfloat_value, sizeof(nfloat_value)); + offset = (jit_nint)ptr - + ((jit_nint)inst + (xmm_reg > 7 ? 9 : 8)); + if((offset >= jit_min_int) && (offset <= jit_max_int)) + { + /* We can use RIP relative addressing here */ + x86_64_movsd_reg_membase(inst, xmm_reg, X86_64_RIP, offset); + } + else if(((jit_nint)ptr >= jit_min_int) && + ((jit_nint)ptr <= jit_max_int)) + { + /* We can use absolute addressing */ + x86_64_movsd_reg_mem(inst, xmm_reg, (jit_nint)ptr); + } + else + { + /* We have to use an extra general register */ + /* TODO */ + } + } + else + { + if(nfloat_value == (jit_nfloat) 0.0) + { + x86_fldz(inst); + } + else if(nfloat_value == (jit_nfloat) 1.0) + { + x86_fld1(inst); + } + else + { + jit_nint offset; + + ptr = _jit_cache_alloc(&(gen->posn), sizeof(jit_nfloat)); + jit_memcpy(ptr, &nfloat_value, sizeof(nfloat_value)); + + offset = (jit_nint)ptr - ((jit_nint)inst + 7); + if((offset >= jit_min_int) && (offset <= jit_max_int)) + { + /* We can use RIP relative addressing here */ + if(sizeof(jit_nfloat) == sizeof(jit_float64)) + { + x86_64_fld_membase_size(inst, X86_64_RIP, offset, 8); + } + else + { + x86_64_fld_membase_size(inst, X86_64_RIP, offset, 10); + } + } + else if(((jit_nint)ptr >= jit_min_int) && + ((jit_nint)ptr <= jit_max_int)) + { + /* We can use absolute addressing */ + if(sizeof(jit_nfloat) == sizeof(jit_float64)) + { + x86_64_fld_mem_size(inst, (jit_nint)ptr, 8); + } + else + { + x86_64_fld_mem_size(inst, (jit_nint)ptr, 10); + } + } + else + { + /* We have to use an extra general register */ + /* TODO */ + } + } + } + } + break; + } + } + else if(value->in_register || value->in_global_register) + { + if(value->in_register) + { + src_reg = value->reg; + other_src_reg = -1; + } + else + { + src_reg = value->global_reg; + other_src_reg = -1; + } + + switch(type->kind) + { +#if 0 + case JIT_TYPE_SBYTE: + { + x86_widen_reg(inst, _jit_reg_info[reg].cpu_reg, + _jit_reg_info[src_reg].cpu_reg, 1, 0); + } + break; + + case JIT_TYPE_UBYTE: + { + x86_widen_reg(inst, _jit_reg_info[reg].cpu_reg, + _jit_reg_info[src_reg].cpu_reg, 0, 0); + } + break; + + case JIT_TYPE_SHORT: + { + x86_widen_reg(inst, _jit_reg_info[reg].cpu_reg, + _jit_reg_info[src_reg].cpu_reg, 1, 1); + } + break; + + case JIT_TYPE_USHORT: + { + x86_widen_reg(inst, _jit_reg_info[reg].cpu_reg, + _jit_reg_info[src_reg].cpu_reg, 0, 1); + } + break; +#else + case JIT_TYPE_SBYTE: + case JIT_TYPE_UBYTE: + case JIT_TYPE_SHORT: + case JIT_TYPE_USHORT: +#endif + case JIT_TYPE_INT: + case JIT_TYPE_UINT: + { + x86_64_mov_reg_reg_size(inst, _jit_reg_info[reg].cpu_reg, + _jit_reg_info[src_reg].cpu_reg, 4); + } + break; + + case JIT_TYPE_LONG: + case JIT_TYPE_ULONG: + { + x86_64_mov_reg_reg_size(inst, _jit_reg_info[reg].cpu_reg, + _jit_reg_info[src_reg].cpu_reg, 8); + } + break; + + case JIT_TYPE_FLOAT32: + { + if(IS_FPU_REG(reg)) + { + if(IS_FPU_REG(src_reg)) + { + x86_fld_reg(inst, fp_stack_index(gen, src_reg)); + } + else if(IS_XMM_REG(src_reg)) + { + /* Fix the position of the value in the stack frame */ + _jit_gen_fix_value(value); + offset = (int)(value->frame_offset); + + x86_64_movss_membase_reg(inst, X86_64_RBP, offset, + _jit_reg_info[src_reg].cpu_reg); + x86_64_fld_membase_size(inst, X86_64_RBP, offset, 4); + } + } + else if(IS_XMM_REG(reg)) + { + if(IS_FPU_REG(src_reg)) + { + /* Fix the position of the value in the stack frame */ + _jit_gen_fix_value(value); + offset = (int)(value->frame_offset); + + x86_64_fst_membase_size(inst, X86_64_RBP, offset, 4); + x86_64_movss_reg_membase(inst, _jit_reg_info[reg].cpu_reg, + X86_64_RBP, offset); + } + else if(IS_XMM_REG(src_reg)) + { + x86_64_movss_reg_reg(inst, _jit_reg_info[reg].cpu_reg, + _jit_reg_info[src_reg].cpu_reg); + } + } + } + break; + + case JIT_TYPE_FLOAT64: + { + if(IS_FPU_REG(reg)) + { + if(IS_FPU_REG(src_reg)) + { + x86_fld_reg(inst, fp_stack_index(gen, src_reg)); + } + else if(IS_XMM_REG(src_reg)) + { + /* Fix the position of the value in the stack frame */ + _jit_gen_fix_value(value); + offset = (int)(value->frame_offset); + + x86_64_movsd_membase_reg(inst, X86_64_RBP, offset, + _jit_reg_info[src_reg].cpu_reg); + x86_64_fld_membase_size(inst, X86_64_RBP, offset, 8); + } + } + else if(IS_XMM_REG(reg)) + { + if(IS_FPU_REG(src_reg)) + { + /* Fix the position of the value in the stack frame */ + _jit_gen_fix_value(value); + offset = (int)(value->frame_offset); + + x86_64_fst_membase_size(inst, X86_64_RBP, offset, 8); + x86_64_movsd_reg_membase(inst, _jit_reg_info[reg].cpu_reg, + X86_64_RBP, offset); + } + else if(IS_XMM_REG(src_reg)) + { + x86_64_movsd_reg_reg(inst, _jit_reg_info[reg].cpu_reg, + _jit_reg_info[src_reg].cpu_reg); + } + } + } + break; + + case JIT_TYPE_NFLOAT: + { + if(IS_FPU_REG(reg)) + { + if(IS_FPU_REG(src_reg)) + { + x86_fld_reg(inst, fp_stack_index(gen, src_reg)); + } + else + { + fputs("Unsupported native float reg - reg move\n", stderr); + } + } + } + break; + } + } + else + { + /* Fix the position of the value in the stack frame */ + _jit_gen_fix_value(value); + offset = (int)(value->frame_offset); + + /* Load the value into the specified register */ + switch(type->kind) + { + case JIT_TYPE_SBYTE: + { + x86_64_movsx8_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg, + X86_64_RBP, offset, 4); + } + break; + + case JIT_TYPE_UBYTE: + { + x86_64_movzx8_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg, + X86_64_RBP, offset, 4); + } + break; + + case JIT_TYPE_SHORT: + { + x86_64_movsx16_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg, + X86_64_RBP, offset, 4); + } + break; + + case JIT_TYPE_USHORT: + { + x86_64_movzx16_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg, + X86_64_RBP, offset, 4); + } + break; + + case JIT_TYPE_INT: + case JIT_TYPE_UINT: + { + x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg, + X86_64_RBP, offset, 4); + } + break; + + case JIT_TYPE_LONG: + case JIT_TYPE_ULONG: + { + x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg, + X86_64_RBP, offset, 8); + } + break; + + case JIT_TYPE_FLOAT32: + { + if(IS_GENERAL_REG(reg)) + { + x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg, + X86_64_RBP, offset, 4); + } + if(IS_XMM_REG(reg)) + { + x86_64_movss_reg_membase(inst, _jit_reg_info[reg].cpu_reg, + X86_64_RBP, offset); + } + else + { + x86_64_fld_membase_size(inst, X86_64_RBP, offset, 4); + } + } + break; + + case JIT_TYPE_FLOAT64: + { + if(IS_GENERAL_REG(reg)) + { + x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg, + X86_64_RBP, offset, 8); + } + else if(IS_XMM_REG(reg)) + { + x86_64_movsd_reg_membase(inst, _jit_reg_info[reg].cpu_reg, + X86_64_RBP, offset); + } + else + { + x86_64_fld_membase_size(inst, X86_64_RBP, offset, 8); + } + } + break; + + case JIT_TYPE_NFLOAT: + { + if(sizeof(jit_nfloat) == sizeof(jit_float64)) + { + if(IS_GENERAL_REG(reg)) + { + x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg, + X86_64_RBP, offset, 8); + } + else if(IS_XMM_REG(reg)) + { + x86_64_movsd_reg_membase(inst, _jit_reg_info[reg].cpu_reg, + X86_64_RBP, offset); + } + else + { + x86_64_fld_membase_size(inst, X86_64_RBP, offset, 8); + } + } + else + { + x86_64_fld_membase_size(inst, X86_64_RBP, offset, 10); + } + } + break; + + case JIT_TYPE_STRUCT: + case JIT_TYPE_UNION: + { + jit_nuint size = jit_type_get_size(type); + + if(IS_GENERAL_REG(reg)) + { + if(size == 1) + { + x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg, + X86_64_RBP, offset, 1); + } + else if(size == 2) + { + x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg, + X86_64_RBP, offset, 2); + } + else if(size <= 4) + { + x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg, + X86_64_RBP, offset, 4); + } + else if(size <= 8) + { + x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg, + X86_64_RBP, offset, 8); + } + } + else if(IS_XMM_REG(reg)) + { + if(size <= 4) + { + x86_64_movss_reg_membase(inst, _jit_reg_info[reg].cpu_reg, + X86_64_RBP, offset); + } + else if(size <= 8) + { + x86_64_movsd_reg_membase(inst, _jit_reg_info[reg].cpu_reg, + X86_64_RBP, offset); + } + else + { + int alignment = jit_type_get_alignment(type); + + if((alignment & 0xf) == 0) + { + x86_64_movaps_reg_membase(inst, _jit_reg_info[reg].cpu_reg, + X86_64_RBP, offset); + } + else + { + x86_64_movups_reg_membase(inst, _jit_reg_info[reg].cpu_reg, + X86_64_RBP, offset); + } + } + } + } + } + } + + /* End the code output process */ + jit_cache_end_output(); +} + +void +_jit_gen_get_elf_info(jit_elf_info_t *info) +{ + info->machine = 62; /* EM_X86_64 */ + info->abi = 0; /* ELFOSABI_SYSV */ + info->abi_version = 0; +} + +void * +_jit_gen_prolog(jit_gencode_t gen, jit_function_t func, void *buf) +{ + unsigned char prolog[JIT_PROLOG_SIZE]; + unsigned char *inst = prolog; + int reg; + int frame_size = 0; + int regs_to_save = 0; + + /* Push ebp onto the stack */ + x86_64_push_reg_size(inst, X86_64_RBP, 8); + + /* Initialize EBP for the current frame */ + x86_64_mov_reg_reg_size(inst, X86_64_RBP, X86_64_RSP, 8); + + /* Allocate space for the local variable frame */ + if(func->builder->frame_size > 0) + { + /* Make sure that the framesize is a multiple of 8 bytes */ + frame_size = (func->builder->frame_size + 0x7) & ~0x7; + } + + /* Get the number of registers we need to preserve */ + for(reg = 0; reg < 14; ++reg) + { + if(jit_reg_is_used(gen->touched, reg) && + (_jit_reg_info[reg].flags & JIT_REG_CALL_USED) == 0) + { + ++regs_to_save; + } + } + + /* add the register save area to the initial frame size */ + frame_size += (regs_to_save << 3); + + /* Make sure that the framesize is a multiple of 16 bytes */ + /* so that the final RSP will be alligned on a 16byte boundary. */ + frame_size = (frame_size + 0xf) & ~0xf; + + if(frame_size > 0) + { + x86_64_sub_reg_imm_size(inst, X86_64_RSP, frame_size, 8); + } + + if(regs_to_save > 0) + { + int current_offset = 0; + + /* Save registers that we need to preserve */ + for(reg = 0; reg <= 14; ++reg) + { + if(jit_reg_is_used(gen->touched, reg) && + (_jit_reg_info[reg].flags & JIT_REG_CALL_USED) == 0) + { + x86_64_mov_membase_reg_size(inst, X86_64_RSP, current_offset, + _jit_reg_info[reg].cpu_reg, 8); + current_offset += 8; + } + } + } + + /* Copy the prolog into place and return the adjusted entry position */ + reg = (int)(inst - prolog); + jit_memcpy(((unsigned char *)buf) + JIT_PROLOG_SIZE - reg, prolog, reg); + return (void *)(((unsigned char *)buf) + JIT_PROLOG_SIZE - reg); +} + +void +_jit_gen_epilog(jit_gencode_t gen, jit_function_t func) +{ + unsigned char *inst; + int reg; + int current_offset; + jit_int *fixup; + jit_int *next; + + /* Bail out if there is insufficient space for the epilog */ + if(!jit_cache_check_for_n(&(gen->posn), 48)) + { + jit_cache_mark_full(&(gen->posn)); + return; + } + + inst = gen->posn.ptr; + + /* Perform fixups on any blocks that jump to the epilog */ + fixup = (jit_int *)(gen->epilog_fixup); + while(fixup != 0) + { + if(DEBUG_FIXUPS) + { + fprintf(stderr, "Fixup Address: %lx, Value: %x\n", + (jit_nint)fixup, fixup[0]); + } + next = (jit_int *)_JIT_CALC_NEXT_FIXUP(fixup, fixup[0]); + fixup[0] = (jit_int)(((jit_nint)inst) - ((jit_nint)fixup) - 4); + fixup = next; + } + gen->epilog_fixup = 0; + + /* Restore the used callee saved registers */ + if(gen->stack_changed) + { + int frame_size = func->builder->frame_size; + int regs_saved = 0; + + /* Get the number of registers we preserves */ + for(reg = 0; reg < 14; ++reg) + { + if(jit_reg_is_used(gen->touched, reg) && + (_jit_reg_info[reg].flags & JIT_REG_CALL_USED) == 0) + { + ++regs_saved; + } + } + + /* add the register save area to the initial frame size */ + frame_size += (regs_saved << 3); + + /* Make sure that the framesize is a multiple of 16 bytes */ + /* so that the final RSP will be alligned on a 16byte boundary. */ + frame_size = (frame_size + 0xf) & ~0xf; + + current_offset = -frame_size; + + for(reg = 0; reg <= 14; ++reg) + { + if(jit_reg_is_used(gen->touched, reg) && + (_jit_reg_info[reg].flags & JIT_REG_CALL_USED) == 0) + { + x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg, + X86_64_RBP, current_offset, 8); + current_offset += 8; + } + } + } + else + { + current_offset = 0; + for(reg = 0; reg <= 14; ++reg) + { + if(jit_reg_is_used(gen->touched, reg) && + (_jit_reg_info[reg].flags & JIT_REG_CALL_USED) == 0) + { + x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg, + X86_64_RSP, current_offset, 8); + current_offset += 8; + } + } + } + + /* Restore stackpointer and frame register */ + x86_64_mov_reg_reg_size(inst, X86_64_RSP, X86_64_RBP, 8); + x86_64_pop_reg_size(inst, X86_64_RBP, 8); + + /* and return */ + x86_64_ret(inst); + + gen->posn.ptr = inst; +} + +/* + * Copy a small block. This code will be inlined. + * Set is_aligned to 0 if you don't know if the source and target locations + * are aligned on a 16byte boundary and != 0 if you know that both blocks are + * aligned. + * We assume that offset + size is in the range -2GB ... +2GB. + */ +static unsigned char * +small_block_copy(jit_gencode_t gen, unsigned char *inst, + int dreg, jit_nint doffset, + int sreg, jit_nint soffset, jit_int size, + int scratch_reg, int scratch_xreg, int is_aligned) +{ + int offset = 0; + + while(size >= 16) + { + if(is_aligned) + { + x86_64_movaps_reg_membase(inst, scratch_xreg, + sreg, soffset + offset); + x86_64_movaps_membase_reg(inst, dreg, doffset + offset, + scratch_xreg); + } + else + { + x86_64_movups_reg_membase(inst, scratch_xreg, + sreg, soffset + offset); + x86_64_movups_membase_reg(inst, dreg, doffset + offset, + scratch_xreg); + } + size -= 16; + offset += 16; + } + /* Now copy the rest of the struct */ + if(size >= 8) + { + x86_64_mov_reg_membase_size(inst, scratch_reg, + sreg, soffset + offset, 8); + x86_64_mov_membase_reg_size(inst, dreg, doffset + offset, + scratch_reg, 8); + size -= 8; + offset += 8; + } + if(size >= 4) + { + x86_64_mov_reg_membase_size(inst, scratch_reg, + sreg, soffset + offset, 4); + x86_64_mov_membase_reg_size(inst, dreg, doffset + offset, + scratch_reg, 4); + size -= 4; + offset += 4; + } + if(size >= 2) + { + x86_64_mov_reg_membase_size(inst, scratch_reg, + sreg, soffset + offset, 2); + x86_64_mov_membase_reg_size(inst, dreg, doffset + offset, + scratch_reg, 2); + size -= 2; + offset += 2; + } + if(size >= 1) + { + x86_64_mov_reg_membase_size(inst, scratch_reg, + sreg, soffset + offset, 1); + x86_64_mov_membase_reg_size(inst, dreg, doffset + offset, + scratch_reg, 1); + size -= 1; + offset += 1; + } + return inst; +} + +/* + * Copy a struct. + * The size of the type must be <= 4 * 16bytes + */ +static unsigned char * +small_struct_copy(jit_gencode_t gen, unsigned char *inst, + int dreg, jit_nint doffset, + int sreg, jit_nint soffset, jit_type_t type, + int scratch_reg, int scratch_xreg) +{ + int size = jit_type_get_size(type); + int alignment = jit_type_get_alignment(type); + + return small_block_copy(gen, inst, dreg, doffset, + sreg, soffset, size, scratch_reg, + scratch_xreg, ((alignment & 0xf) == 0)); +} + +/* + * Copy a block of memory that has a specific size. All call clobbered + * registers must be unused at this point. + */ +static unsigned char * +memory_copy(jit_gencode_t gen, unsigned char *inst, + int dreg, jit_nint doffset, + int sreg, jit_nint soffset, jit_nint size) +{ + if(dreg == X86_64_RDI) + { + if(sreg != X86_64_RSI) + { + x86_64_mov_reg_reg_size(inst, X86_64_RSI, sreg, 8); + } + } + else if(dreg == X86_64_RSI) + { + if(sreg == X86_64_RDI) + { + /* The registers are swapped so we need a temporary register */ + x86_64_mov_reg_reg_size(inst, X86_64_RCX, X86_64_RSI, 8); + x86_64_mov_reg_reg_size(inst, X86_64_RSI, X86_64_RDI, 8); + x86_64_mov_reg_reg_size(inst, X86_64_RDI, X86_64_RCX, 8); + } + else + { + x86_64_mov_reg_reg_size(inst, X86_64_RDI, X86_64_RSI, 8); + if(sreg != X86_64_RSI) + { + x86_64_mov_reg_reg_size(inst, X86_64_RSI, sreg, 8); + } + } + } + else + { + x86_64_mov_reg_reg_size(inst, X86_64_RSI, sreg, 8); + x86_64_mov_reg_reg_size(inst, X86_64_RDI, dreg, 8); + } + /* Move the size to argument register 3 now */ + if((size > 0) && (size <= jit_max_uint)) + { + x86_64_mov_reg_imm_size(inst, X86_64_RDX, size, 4); + } + else + { + x86_64_mov_reg_imm_size(inst, X86_64_RDX, size, 8); + } + if(soffset != 0) + { + x86_64_add_reg_imm_size(inst, X86_64_RSI, soffset, 8); + } + if(doffset != 0) + { + x86_64_add_reg_imm_size(inst, X86_64_RDI, doffset, 8); + } + inst = x86_64_call_code(inst, (jit_nint)jit_memcpy); + return inst; +} + +void +_jit_gen_start_block(jit_gencode_t gen, jit_block_t block) +{ + jit_int *fixup; + jit_int *next; + void **absolute_fixup; + void **absolute_next; + + /* Set the address of this block */ + block->address = (void *)(gen->posn.ptr); + + /* If this block has pending fixups, then apply them now */ + fixup = (jit_int *)(block->fixup_list); + if(DEBUG_FIXUPS && fixup) + { + fprintf(stderr, "Block: %lx\n", (jit_nint)block); + fprintf(stderr, "Limit: %lx\n", (jit_nint)_JIT_GET_FIXVALUE(gen)); + } + while(fixup != 0) + { + if(DEBUG_FIXUPS) + { + fprintf(stderr, "Fixup Address: %lx, Value: %x\n", + (jit_nint)fixup, fixup[0]); + } + next = (jit_int *)_JIT_CALC_NEXT_FIXUP(fixup, fixup[0]); + fixup[0] = (jit_int) + (((jit_nint)(block->address)) - ((jit_nint)fixup) - 4); + fixup = next; + } + block->fixup_list = 0; + + /* Absolute fixups contain complete pointers */ + absolute_fixup = (void**)(block->fixup_absolute_list); + while(absolute_fixup != 0) + { + absolute_next = (void **)(absolute_fixup[0]); + absolute_fixup[0] = (void *)((jit_nint)(block->address)); + absolute_fixup = absolute_next; + } + block->fixup_absolute_list = 0; +} + +void +_jit_gen_end_block(jit_gencode_t gen, jit_block_t block) +{ + /* Nothing to do here for x86 */ +} + +int +_jit_gen_is_global_candidate(jit_type_t type) +{ + switch(jit_type_remove_tags(type)->kind) + { + case JIT_TYPE_INT: + case JIT_TYPE_UINT: + case JIT_TYPE_LONG: + case JIT_TYPE_ULONG: + case JIT_TYPE_NINT: + case JIT_TYPE_NUINT: + case JIT_TYPE_PTR: + case JIT_TYPE_SIGNATURE: + { + return 1; + } + } + return 0; +} + +/* + * Do the stuff usually handled in jit-rules.c for native implementations + * here too because the common implementation is not enough for x86_64. + */ + +/* + * Flag that a parameter is passed on the stack. + */ +#define JIT_ARG_CLASS_STACK 0xFFFF + +/* + * Define the way the parameter is passed to a specific function + */ +typedef struct +{ + jit_value_t value; + jit_ushort arg_class; + jit_ushort stack_pad; /* Number of stack words needed for padding */ + union + { + unsigned char reg[4]; + jit_int offset; + } un; +} _jit_param_t; + +/* + * Structure that is used to help with parameter passing. + */ +typedef struct +{ + int stack_size; /* Number of bytes needed on the */ + /* stack for parameter passing */ + int stack_pad; /* Number of stack words we have */ + /* to push before pushing the */ + /* parameters for keeping the stack */ + /* aligned */ + unsigned int word_index; /* Number of word registers */ + /* allocated */ + unsigned int max_word_regs; /* Number of word registers */ + /* available for parameter passing */ + const int *word_regs; + unsigned int float_index; + unsigned int max_float_regs; + const int *float_regs; + _jit_param_t *params; + +} jit_param_passing_t; + +/* + * Allcate the slot for a parameter passed on the stack. + */ +static void +_jit_alloc_param_slot(jit_param_passing_t *passing, _jit_param_t *param, + jit_type_t type) +{ + jit_int size = jit_type_get_size(type); + jit_int alignment = jit_type_get_alignment(type); + + /* Expand the size to a multiple of the stack slot size */ + size = ROUND_STACK(size); + + /* Expand the alignment to a multiple of the stack slot size */ + /* We expect the alignment to be a power of two after this step */ + alignment = ROUND_STACK(alignment); + + /* Make sure the current offset is aligned propperly for the type */ + if((passing->stack_size & (alignment -1)) != 0) + { + /* We need padding on the stack to fix the alignment constraint */ + jit_int padding = passing->stack_size & (alignment -1); + + /* Add the padding to the stack region */ + passing->stack_size += padding; + + /* record the number of pad words needed after pushing this arg */ + param->stack_pad = STACK_SLOTS_USED(padding); + } + /* Record the offset of the parameter in the arg region. */ + param->un.offset = passing->stack_size; + + /* And increase the argument region used. */ + passing->stack_size += size; +} + +/* + * Determine if a type corresponds to a structure or union. + */ +static int +is_struct_or_union(jit_type_t type) +{ + type = jit_type_normalize(type); + if(type) + { + if(type->kind == JIT_TYPE_STRUCT || type->kind == JIT_TYPE_UNION) + { + return 1; + } + } + return 0; +} + +/* + * Classify the argument type. + * The type has to be in it's normalized form. + */ +static int +_jit_classify_arg(jit_type_t arg_type, int is_return) +{ + switch(arg_type->kind) + { + case JIT_TYPE_SBYTE: + case JIT_TYPE_UBYTE: + case JIT_TYPE_SHORT: + case JIT_TYPE_USHORT: + case JIT_TYPE_INT: + case JIT_TYPE_UINT: + case JIT_TYPE_NINT: + case JIT_TYPE_NUINT: + case JIT_TYPE_LONG: + case JIT_TYPE_ULONG: + case JIT_TYPE_SIGNATURE: + case JIT_TYPE_PTR: + { + return X86_64_ARG_INTEGER; + } + break; + + case JIT_TYPE_FLOAT32: + case JIT_TYPE_FLOAT64: + { + return X86_64_ARG_SSE; + } + break; + + case JIT_TYPE_NFLOAT: + { + /* we assume the nfloat type to be long double (80bit) */ + if(is_return) + { + return X86_64_ARG_X87; + } + else + { + return X86_64_ARG_MEMORY; + } + } + break; + + case JIT_TYPE_STRUCT: + case JIT_TYPE_UNION: + { + int size = jit_type_get_size(arg_type); + + if(size > 16) + { + return X86_64_ARG_MEMORY; + } + else if(size <= 8) + { + return X86_64_ARG_INTEGER; + } + /* For structs and unions with sizes between 8 ant 16 bytes */ + /* we have to look at the elements. */ + /* TODO */ + } + } + return X86_64_ARG_NO_CLASS; +} + +/* + * On X86_64 the alignment of native types matches their size. + * This leads to the result that all types except nfloats and aggregates + * (structs and unions) must start and end in an eightbyte (or the part + * we are looking at). + */ +static int +_jit_classify_structpart(jit_type_t struct_type, unsigned int start, + unsigned int start_offset, unsigned int end_offset) +{ + int arg_class = X86_64_ARG_NO_CLASS; + unsigned int num_fields = jit_type_num_fields(struct_type); + unsigned int current_field; + + for(current_field = 0; current_field < num_fields; ++current_field) + { + jit_nuint field_offset = jit_type_get_offset(struct_type, + current_field); + + if(field_offset <= end_offset) + { + /* The field starts at a place that's inerresting for us */ + jit_type_t field_type = jit_type_get_field(struct_type, + current_field); + jit_nuint field_size = jit_type_get_size(field_type); + + if(field_offset + field_size > start_offset) + { + /* The field is at least partially in the part we are */ + /* looking at */ + int arg_class2 = X86_64_ARG_NO_CLASS; + + if(is_struct_or_union(field_type)) + { + /* We have to check this struct recursively */ + unsigned int current_start; + unsigned int nested_struct_start; + unsigned int nested_struct_end; + + current_start = start + start_offset; + if(field_offset < current_start) + { + nested_struct_start = current_start - field_offset; + } + else + { + nested_struct_start = 0; + } + if(field_offset + field_size - 1 > end_offset) + { + /* The struct ends beyond the part we are looking at */ + nested_struct_end = field_offset + field_size - + (nested_struct_start + 1); + } + else + { + nested_struct_end = field_size - 1; + } + arg_class2 = _jit_classify_structpart(field_type, + start + field_offset, + nested_struct_start, + nested_struct_end); + } + else + { + if((start + start_offset) & (field_size - 1)) + { + /* The field is misaligned */ + return X86_64_ARG_MEMORY; + } + arg_class2 = _jit_classify_arg(field_type, 0); + } + if(arg_class == X86_64_ARG_NO_CLASS) + { + arg_class = arg_class2; + } + else if(arg_class != arg_class2) + { + if(arg_class == X86_64_ARG_MEMORY || + arg_class2 == X86_64_ARG_MEMORY) + { + arg_class = X86_64_ARG_MEMORY; + } + else if(arg_class == X86_64_ARG_INTEGER || + arg_class2 == X86_64_ARG_INTEGER) + { + arg_class = X86_64_ARG_INTEGER; + } + else if(arg_class == X86_64_ARG_X87 || + arg_class2 == X86_64_ARG_X87) + { + arg_class = X86_64_ARG_MEMORY; + } + else + { + arg_class = X86_64_ARG_SSE; + } + } + } + } + } + return arg_class; +} + +static int +_jit_classify_struct(jit_param_passing_t *passing, + _jit_param_t *param, jit_type_t param_type) +{ + jit_nuint size = (jit_nuint)jit_type_get_size(param_type); + + if(size <= 8) + { + int arg_class; + + arg_class = _jit_classify_structpart(param_type, 0, 0, size - 1); + if(arg_class == X86_64_ARG_NO_CLASS) + { + arg_class = X86_64_ARG_SSE; + } + if(arg_class == X86_64_ARG_INTEGER) + { + if(passing->word_index < passing->max_word_regs) + { + /* Set the arg class to the number of registers used */ + param->arg_class = 1; + + /* Set the first register to the register used */ + param->un.reg[0] = passing->word_regs[passing->word_index]; + ++(passing->word_index); + } + else + { + /* Set the arg class to stack */ + param->arg_class = JIT_ARG_CLASS_STACK; + + /* Allocate the slot in the arg passing frame */ + _jit_alloc_param_slot(passing, param, param_type); + } + } + else if(arg_class == X86_64_ARG_SSE) + { + if(passing->float_index < passing->max_float_regs) + { + /* Set the arg class to the number of registers used */ + param->arg_class = 1; + + /* Set the first register to the register used */ + param->un.reg[0] = passing->float_regs[passing->float_index]; + ++(passing->float_index); + } + else + { + /* Set the arg class to stack */ + param->arg_class = JIT_ARG_CLASS_STACK; + + /* Allocate the slot in the arg passing frame */ + _jit_alloc_param_slot(passing, param, param_type); + } + } + else + { + /* Set the arg class to stack */ + param->arg_class = JIT_ARG_CLASS_STACK; + + /* Allocate the slot in the arg passing frame */ + _jit_alloc_param_slot(passing, param, param_type); + } + } + else if(size <= 16) + { + int arg_class1; + int arg_class2; + + arg_class1 = _jit_classify_structpart(param_type, 0, 0, 7); + arg_class2 = _jit_classify_structpart(param_type, 0, 8, size - 1); + if(arg_class1 == X86_64_ARG_NO_CLASS) + { + arg_class1 = X86_64_ARG_SSE; + } + if(arg_class2 == X86_64_ARG_NO_CLASS) + { + arg_class2 = X86_64_ARG_SSE; + } + if(arg_class1 == X86_64_ARG_SSE && arg_class2 == X86_64_ARG_SSE) + { + /* We use only one sse register in this case */ + if(passing->float_index < passing->max_float_regs) + { + /* Set the arg class to the number of registers used */ + param->arg_class = 1; + + /* Set the first register to the register used */ + param->un.reg[0] = passing->float_regs[passing->float_index]; + ++(passing->float_index); + } + else + { + /* Set the arg class to stack */ + param->arg_class = JIT_ARG_CLASS_STACK; + + /* Allocate the slot in the arg passing frame */ + _jit_alloc_param_slot(passing, param, param_type); + } + } + else if(arg_class1 == X86_64_ARG_MEMORY || + arg_class2 == X86_64_ARG_MEMORY) + { + /* Set the arg class to stack */ + param->arg_class = JIT_ARG_CLASS_STACK; + + /* Allocate the slot in the arg passing frame */ + _jit_alloc_param_slot(passing, param, param_type); + } + else if(arg_class1 == X86_64_ARG_INTEGER && + arg_class2 == X86_64_ARG_INTEGER) + { + /* We need two general purpose registers in this case */ + if((passing->word_index + 1) < passing->max_word_regs) + { + /* Set the arg class to the number of registers used */ + param->arg_class = 2; + + /* Assign the registers */ + param->un.reg[0] = passing->word_regs[passing->word_index]; + ++(passing->word_index); + param->un.reg[1] = passing->word_regs[passing->word_index]; + ++(passing->word_index); + } + else + { + /* Set the arg class to stack */ + param->arg_class = JIT_ARG_CLASS_STACK; + + /* Allocate the slot in the arg passing frame */ + _jit_alloc_param_slot(passing, param, param_type); + } + } + else + { + /* We need one xmm and one general purpose register */ + if((passing->word_index < passing->max_word_regs) && + (passing->float_index < passing->max_float_regs)) + { + /* Set the arg class to the number of registers used */ + param->arg_class = 2; + + if(arg_class1 == X86_64_ARG_INTEGER) + { + param->un.reg[0] = passing->word_regs[passing->word_index]; + ++(passing->word_index); + param->un.reg[1] = passing->float_regs[passing->float_index]; + ++(passing->float_index); + } + else + { + param->un.reg[0] = passing->float_regs[passing->float_index]; + ++(passing->float_index); + param->un.reg[1] = passing->word_regs[passing->word_index]; + ++(passing->word_index); + } + } + else + { + /* Set the arg class to stack */ + param->arg_class = JIT_ARG_CLASS_STACK; + + /* Allocate the slot in the arg passing frame */ + _jit_alloc_param_slot(passing, param, param_type); + } + } + } + else + { + /* Set the arg class to stack */ + param->arg_class = JIT_ARG_CLASS_STACK; + + /* Allocate the slot in the arg passing frame */ + _jit_alloc_param_slot(passing, param, param_type); + } + return 1; +} + +int +_jit_classify_param(jit_param_passing_t *passing, + _jit_param_t *param, jit_type_t param_type) +{ + if(is_struct_or_union(param_type)) + { + return _jit_classify_struct(passing, param, param_type); + } + else + { + int arg_class; + + arg_class = _jit_classify_arg(param_type, 0); + + switch(arg_class) + { + case X86_64_ARG_INTEGER: + { + if(passing->word_index < passing->max_word_regs) + { + /* Set the arg class to the number of registers used */ + param->arg_class = 1; + + /* Set the first register to the register used */ + param->un.reg[0] = passing->word_regs[passing->word_index]; + ++(passing->word_index); + } + else + { + /* Set the arg class to stack */ + param->arg_class = JIT_ARG_CLASS_STACK; + + /* Allocate the slot in the arg passing frame */ + _jit_alloc_param_slot(passing, param, param_type); + } + } + break; + + case X86_64_ARG_SSE: + { + if(passing->float_index < passing->max_float_regs) + { + /* Set the arg class to the number of registers used */ + param->arg_class = 1; + + /* Set the first register to the register used */ + param->un.reg[0] = passing->float_regs[passing->float_index]; + ++(passing->float_index); + } + else + { + /* Set the arg class to stack */ + param->arg_class = JIT_ARG_CLASS_STACK; + + /* Allocate the slot in the arg passing frame */ + _jit_alloc_param_slot(passing, param, param_type); + } + } + break; + + case X86_64_ARG_MEMORY: + { + /* Set the arg class to stack */ + param->arg_class = JIT_ARG_CLASS_STACK; + + /* Allocate the slot in the arg passing frame */ + _jit_alloc_param_slot(passing, param, param_type); + } + break; + } + } + return 1; +} + +static int +_jit_classify_struct_return(jit_param_passing_t *passing, + _jit_param_t *param, jit_type_t return_type) +{ + /* Initialize the param passing structure */ + jit_memset(passing, 0, sizeof(jit_param_passing_t)); + jit_memset(param, 0, sizeof(_jit_param_t)); + + passing->word_regs = _jit_word_return_regs; + passing->max_word_regs = _jit_num_word_return_regs; + passing->float_regs = _jit_sse_return_regs; + passing->max_float_regs = _jit_num_sse_return_regs; + + if(!(_jit_classify_struct(passing, param, return_type))) + { + return 0; + } + + return 1; +} + +/* + * Load a struct to the register(s) in which it will be returned. + */ +static unsigned char * +return_struct(unsigned char *inst, jit_function_t func, int ptr_reg) +{ + jit_type_t return_type; + jit_type_t signature = jit_function_get_signature(func); + + return_type = jit_type_get_return(signature); + if(is_struct_or_union(return_type)) + { + jit_nuint size; + jit_param_passing_t passing; + _jit_param_t return_param; + + if(!_jit_classify_struct_return(&passing, &return_param, + return_type)) + { + /* It's an error so simply return insn */ + return inst; + } + + size = jit_type_get_size(return_type); + if(size <= 8) + { + /* one register is used for returning the value */ + if(IS_GENERAL_REG(return_param.un.reg[0])) + { + int reg = _jit_reg_info[return_param.un.reg[0]].cpu_reg; + + if(size <= 4) + { + x86_64_mov_reg_regp_size(inst, reg, ptr_reg, 4); + } + else + { + x86_64_mov_reg_regp_size(inst, reg, ptr_reg, 8); + } + } + else + { + int reg = _jit_reg_info[return_param.un.reg[0]].cpu_reg; + + if(size <= 4) + { + x86_64_movss_reg_regp(inst, reg, ptr_reg); + } + else + { + x86_64_movsd_reg_regp(inst, reg, ptr_reg); + } + } + } + else + { + /* In this case we might need up to two registers */ + if(return_param.arg_class == 1) + { + /* This must be one xmm register */ + int reg = _jit_reg_info[return_param.un.reg[0]].cpu_reg; + int alignment = jit_type_get_alignment(return_type); + + if((alignment & 0xf) == 0) + { + /* The type is aligned on a 16 byte boundary */ + x86_64_movaps_reg_regp(inst, reg, ptr_reg); + } + else + { + x86_64_movups_reg_regp(inst, reg, ptr_reg); + } + } + else + { + int reg = _jit_reg_info[return_param.un.reg[0]].cpu_reg; + + if(IS_GENERAL_REG(return_param.un.reg[0])) + { + x86_64_mov_reg_regp_size(inst, reg, + ptr_reg, 8); + } + else + { + x86_64_movsd_reg_regp(inst, reg, ptr_reg); + } + size -= 8; + reg = _jit_reg_info[return_param.un.reg[1]].cpu_reg; + if(IS_GENERAL_REG(return_param.un.reg[1])) + { + if(size <= 4) + { + x86_64_mov_reg_membase_size(inst, reg, ptr_reg, + 8, 4); + } + else + { + x86_64_mov_reg_membase_size(inst, reg, ptr_reg, + 8, 8); + } + } + else + { + if(size <= 4) + { + x86_64_movss_reg_membase(inst, reg, + ptr_reg, 8); + } + else + { + x86_64_movsd_reg_membase(inst, reg, + ptr_reg, 8); + } + } + } + } + } + return inst; +} + +/* + * Flush a struct return value from the registers to the value + * on the stack. + */ +static unsigned char * +flush_return_struct(unsigned char *inst, jit_value_t value) +{ + jit_type_t return_type; + + return_type = jit_value_get_type(value); + if(is_struct_or_union(return_type)) + { + jit_nuint size; + jit_nint offset; + jit_param_passing_t passing; + _jit_param_t return_param; + + if(!_jit_classify_struct_return(&passing, &return_param, return_type)) + { + /* It's an error so simply return insn */ + return inst; + } + + return_param.value = value; + + _jit_gen_fix_value(value); + size = jit_type_get_size(return_type); + offset = value->frame_offset; + if(size <= 8) + { + /* one register is used for returning the value */ + if(IS_GENERAL_REG(return_param.un.reg[0])) + { + int reg = _jit_reg_info[return_param.un.reg[0]].cpu_reg; + + if(size <= 4) + { + x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset, reg, 4); + } + else + { + x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset, reg, 8); + } + } + else + { + int reg = _jit_reg_info[return_param.un.reg[0]].cpu_reg; + + if(size <= 4) + { + x86_64_movss_membase_reg(inst, X86_64_RBP, offset, reg); + } + else + { + x86_64_movsd_membase_reg(inst, X86_64_RBP, offset, reg); + } + } + } + else + { + /* In this case we might need up to two registers */ + if(return_param.arg_class == 1) + { + /* This must be one xmm register */ + int reg = _jit_reg_info[return_param.un.reg[0]].cpu_reg; + int alignment = jit_type_get_alignment(return_type); + + if((alignment & 0xf) == 0) + { + /* The type is aligned on a 16 byte boundary */ + x86_64_movaps_membase_reg(inst, X86_64_RBP, offset, reg); + } + else + { + x86_64_movups_membase_reg(inst, X86_64_RBP, offset, reg); + } + } + else + { + int reg = _jit_reg_info[return_param.un.reg[0]].cpu_reg; + + if(IS_GENERAL_REG(return_param.un.reg[0])) + { + x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset, + reg, 8); + } + else + { + x86_64_movsd_membase_reg(inst, X86_64_RBP, offset, reg); + } + size -= 8; + reg = _jit_reg_info[return_param.un.reg[1]].cpu_reg; + if(IS_GENERAL_REG(return_param.un.reg[1])) + { + if(size <= 4) + { + x86_64_mov_membase_reg_size(inst, X86_64_RBP, + offset + 8, reg, 4); + } + else + { + x86_64_mov_membase_reg_size(inst, X86_64_RBP, + offset + 8, reg, 8); + } + } + else + { + if(size <= 4) + { + x86_64_movss_membase_reg(inst, X86_64_RBP, + offset + 8, reg); + } + else + { + x86_64_movsd_membase_reg(inst, X86_64_RBP, + offset + 8, reg); + } + } + } + } + } + return inst; +} + +#define TODO() \ + do { \ + fprintf(stderr, "TODO at %s, %d\n", __FILE__, (int)__LINE__); \ + } while (0) + +void +_jit_gen_insn(jit_gencode_t gen, jit_function_t func, + jit_block_t block, jit_insn_t insn) +{ + switch(insn->opcode) + { + #define JIT_INCLUDE_RULES + #include "jit-rules-x86-64.inc" + #undef JIT_INCLUDE_RULES + + default: + { + fprintf(stderr, "TODO(%x) at %s, %d\n", + (int)(insn->opcode), __FILE__, (int)__LINE__); + } + break; + } +} + +/* + * Fixup the passing area after all parameters have been allocated either + * in registers or on the stack. + * This is typically used for adding pad words for keeping the stack aligned. + */ +void +_jit_fix_call_stack(jit_param_passing_t *passing) +{ + if((passing->stack_size & 0x0f) != 0) + { + passing->stack_size = (passing->stack_size + 0x0f) & ~((jit_nint)0x0f); + passing->stack_pad = 1; + } +} + +/* + * Setup the call stack before pushing any parameters. + * This is used usually for pushing pad words for alignment. + * The function is needed only if the backend doesn't work with the + * parameter area. + */ +int +_jit_setup_call_stack(jit_function_t func, jit_param_passing_t *passing) +{ + if(passing->stack_pad) + { + int current; + jit_value_t pad_value; + + pad_value = jit_value_create_nint_constant(func, jit_type_nint, 0); + if(!pad_value) + { + return 0; + } + for(current = 0; current < passing->stack_pad; ++current) + { + if(!jit_insn_push(func, pad_value)) + { + return 0; + } + } + } + return 1; +} + +/* + * Push a parameter onto the stack. + */ +static int +push_param(jit_function_t func, _jit_param_t *param, jit_type_t type) +{ + if(is_struct_or_union(type) && !is_struct_or_union(param->value->type)) + { + jit_value_t value; + + if(!(value = jit_insn_address_of(func, param->value))) + { + return 0; + } + #ifdef JIT_USE_PARAM_AREA + /* Copy the value into the outgoing parameter area, by pointer */ + if(!jit_insn_set_param_ptr(func, value, type, param->un.offset)) + { + return 0; + } + #else + /* Push the parameter value onto the stack, by pointer */ + if(!jit_insn_push_ptr(func, value, type)) + { + return 0; + } + if(param->stack_pad) + { + int current; + jit_value_t pad_value; + + pad_value = jit_value_create_nint_constant(func, jit_type_nint, 0); + if(!pad_value) + { + return 0; + } + for(current = 0; current < param->stack_pad; ++current) + { + if(!jit_insn_push(func, pad_value)) + { + return 0; + } + } + } + #endif + } + else + { + #ifdef JIT_USE_PARAM_AREA + /* Copy the value into the outgoing parameter area */ + if(!jit_insn_set_param(func, param->value, param->un.offset)) + { + return 0; + } + #else + /* Push the parameter value onto the stack */ + if(!jit_insn_push(func, param->value)) + { + return 0; + } + if(param->stack_pad) + { + int current; + jit_value_t pad_value; + + pad_value = jit_value_create_nint_constant(func, jit_type_nint, 0); + if(!pad_value) + { + return 0; + } + for(current = 0; current < param->stack_pad; ++current) + { + if(!jit_insn_push(func, pad_value)) + { + return 0; + } + } + } + #endif + } + return 1; +} + +int +_jit_setup_incoming_param(jit_function_t func, _jit_param_t *param, + jit_type_t param_type) +{ + if(param->arg_class == JIT_ARG_CLASS_STACK) + { + /* The parameter is passed on the stack */ + if(!jit_insn_incoming_frame_posn + (func, param->value, param->un.offset)) + { + return 0; + } + } + else + { + param_type = jit_type_remove_tags(param_type); + + switch(param_type->kind) + { + case JIT_TYPE_STRUCT: + case JIT_TYPE_UNION: + { + if(param->arg_class == 1) + { + if(!jit_insn_incoming_reg(func, param->value, param->un.reg[0])) + { + return 0; + } + } + else + { + /* These cases have to be handled specially */ + } + } + break; + + default: + { + if(!jit_insn_incoming_reg(func, param->value, param->un.reg[0])) + { + return 0; + } + } + break; + } + } + return 1; +} + +int +_jit_setup_outgoing_param(jit_function_t func, _jit_param_t *param, + jit_type_t param_type) +{ + if(param->arg_class == JIT_ARG_CLASS_STACK) + { + /* The parameter is passed on the stack */ + if(!push_param(func, param, param_type)) + { + return 0; + } + } + else + { + param_type = jit_type_remove_tags(param_type); + + switch(param_type->kind) + { + case JIT_TYPE_STRUCT: + case JIT_TYPE_UNION: + { + /* These cases have to be handled specially */ + if(param->arg_class == 1) + { + /* Only one xmm register is used for passing this argument */ + if(!jit_insn_outgoing_reg(func, param->value, param->un.reg[0])) + { + return 0; + } + } + else + { + /* We need two registers for passing the value */ + jit_nuint size = (jit_nuint)jit_type_get_size(param_type); + + jit_value_t struct_ptr; + + if(!(struct_ptr = jit_insn_address_of(func, param->value))) + { + return 0; + } + if(IS_GENERAL_REG(param->un.reg[0])) + { + jit_value_t param_value; + + param_value = jit_insn_load_relative(func, struct_ptr, + 0, jit_type_ulong); + if(!param_value) + { + return 0; + } + if(!jit_insn_outgoing_reg(func, param_value, param->un.reg[0])) + { + return 0; + } + } + else + { + jit_value_t param_value; + + param_value = jit_insn_load_relative(func, struct_ptr, + 0, jit_type_float64); + if(!param_value) + { + return 0; + } + if(!jit_insn_outgoing_reg(func, param_value, param->un.reg[0])) + { + return 0; + } + } + size -= 8; + if(IS_GENERAL_REG(param->un.reg[1])) + { + if(size == 1) + { + jit_value_t param_value; + + param_value = jit_insn_load_relative(func, struct_ptr, + 8, jit_type_ubyte); + if(!param_value) + { + return 0; + } + if(!jit_insn_outgoing_reg(func, param_value, param->un.reg[1])) + { + return 0; + } + } + else if(size == 2) + { + jit_value_t param_value; + + param_value = jit_insn_load_relative(func, struct_ptr, + 8, jit_type_ushort); + if(!param_value) + { + return 0; + } + if(!jit_insn_outgoing_reg(func, param_value, param->un.reg[0])) + { + return 0; + } + } + else if(size <= 4) + { + jit_value_t param_value; + + param_value = jit_insn_load_relative(func, struct_ptr, + 8, jit_type_uint); + if(!param_value) + { + return 0; + } + if(!jit_insn_outgoing_reg(func, param_value, param->un.reg[0])) + { + return 0; + } + } + else + { + jit_value_t param_value; + + param_value = jit_insn_load_relative(func, struct_ptr, + 8, jit_type_ulong); + if(!param_value) + { + return 0; + } + if(!jit_insn_outgoing_reg(func, param_value, param->un.reg[0])) + { + return 0; + } + } + } + else + { + if(size <= 4) + { + jit_value_t param_value; + + param_value = jit_insn_load_relative(func, struct_ptr, + 8, jit_type_float32); + if(!param_value) + { + return 0; + } + if(!jit_insn_outgoing_reg(func, param_value, param->un.reg[0])) + { + return 0; + } + } + else + { + jit_value_t param_value; + + param_value = jit_insn_load_relative(func, struct_ptr, + 8, jit_type_float64); + if(!param_value) + { + return 0; + } + if(!jit_insn_outgoing_reg(func, param_value, param->un.reg[0])) + { + return 0; + } + } + } + } + } + break; + + default: + { + if(!jit_insn_outgoing_reg(func, param->value, param->un.reg[0])) + { + return 0; + } + } + break; + } + } + return 1; +} + +int +_jit_setup_return_value(jit_function_t func, jit_value_t return_value, + jit_type_t return_type) + +{ + /* Structure values must be flushed into the frame, and + everything else ends up in a register */ + if(is_struct_or_union(return_type)) + { + jit_param_passing_t passing; + _jit_param_t return_param; + + if(!_jit_classify_struct_return(&passing, &return_param, return_type)) + { + /* It's an error so simply return insn */ + return 0; + } + + if(return_param.arg_class == 1) + { + if(!jit_insn_return_reg(func, return_value, + return_param.un.reg[0])) + { + return 0; + } + } + else + { + if(!jit_insn_flush_struct(func, return_value)) + { + return 0; + } + } + } + else if(return_type == jit_type_float32 || + return_type == jit_type_float64) + { + if(!jit_insn_return_reg(func, return_value, X86_64_REG_XMM0)) + { + return 0; + } + } + else if(return_type == jit_type_nfloat) + { + if(!jit_insn_return_reg(func, return_value, X86_64_REG_ST0)) + { + return 0; + } + } + else if(return_type->kind != JIT_TYPE_VOID) + { + if(!jit_insn_return_reg(func, return_value, X86_64_REG_RAX)) + { + return 0; + } + } + return 1; +} + +void +_jit_init_args(int abi, jit_param_passing_t *passing) +{ + passing->max_word_regs = _jit_num_word_regs; + passing->word_regs = _jit_word_arg_regs; + passing->max_float_regs = _jit_num_float_regs; + passing->float_regs = _jit_float_arg_regs; +} + +int +_jit_create_entry_insns(jit_function_t func) +{ + jit_type_t signature = func->signature; + int abi = jit_type_get_abi(signature); + unsigned int num_args = jit_type_num_params(signature); + jit_param_passing_t passing; + _jit_param_t param[num_args]; + _jit_param_t nested_param; + _jit_param_t struct_return_param; + int current_param; + + /* Reset the local variable frame size for this function */ + func->builder->frame_size = JIT_INITIAL_FRAME_SIZE; + + /* Initialize the param passing structure */ + jit_memset(&passing, 0, sizeof(jit_param_passing_t)); + jit_memset(param, 0, sizeof(_jit_param_t) * num_args); + + passing.params = param; + passing.stack_size = JIT_INITIAL_STACK_OFFSET; + + /* Let the specific backend initialize it's part of the params */ + _jit_init_args(abi, &passing); + + /* If the function is nested, then we need an extra parameter + to pass the pointer to the parent's local variable frame */ + if(func->nested_parent) + { + jit_memset(&nested_param, 0, sizeof(_jit_param_t)); + if(!(_jit_classify_param(&passing, &nested_param, + jit_type_void_ptr))) + { + return 0; + } + } + + /* Allocate the structure return pointer */ + if(jit_value_get_struct_pointer(func)) + { + jit_memset(&struct_return_param, 0, sizeof(_jit_param_t)); + if(!(_jit_classify_param(&passing, &struct_return_param, + jit_type_void_ptr))) + { + return 0; + } + } + + /* Let the backend classify the parameters */ + for(current_param = 0; current_param < num_args; current_param++) + { + jit_type_t param_type; + + param_type = jit_type_get_param(signature, current_param); + param_type = jit_type_normalize(param_type); + + if(!(_jit_classify_param(&passing, &(passing.params[current_param]), + param_type))) + { + return 0; + } + } + + /* Now we can setup the incoming parameters */ + for(current_param = 0; current_param < num_args; current_param++) + { + jit_type_t param_type; + + param_type = jit_type_get_param(signature, current_param); + if(!(param[current_param].value)) + { + if(!(param[current_param].value = jit_value_get_param(func, current_param))) + { + return 0; + } + } + if(!_jit_setup_incoming_param(func, &(param[current_param]), param_type)) + { + return 0; + } + } + + return 1; +} + +int _jit_create_call_setup_insns + (jit_function_t func, jit_type_t signature, + jit_value_t *args, unsigned int num_args, + int is_nested, int nesting_level, jit_value_t *struct_return, int flags) +{ + int abi = jit_type_get_abi(signature); + jit_type_t return_type; + jit_value_t value; + jit_value_t return_ptr; + int current_param; + jit_param_passing_t passing; + _jit_param_t param[num_args]; + _jit_param_t nested_param; + _jit_param_t struct_return_param; + + /* Initialize the param passing structure */ + jit_memset(&passing, 0, sizeof(jit_param_passing_t)); + jit_memset(param, 0, sizeof(_jit_param_t) * num_args); + + passing.params = param; + passing.stack_size = 0; + + /* Let the specific backend initialize it's part of the params */ + _jit_init_args(abi, &passing); + + /* Determine how many parameters are going to end up in word registers, + and compute the largest stack size needed to pass stack parameters */ + if(is_nested) + { + jit_memset(&nested_param, 0, sizeof(_jit_param_t)); + if(!(_jit_classify_param(&passing, &nested_param, + jit_type_void_ptr))) + { + return 0; + } + } + + /* Determine if we need an extra hidden parameter for returning a + structure */ + return_type = jit_type_get_return(signature); + if(jit_type_return_via_pointer(return_type)) + { + value = jit_value_create(func, return_type); + if(!value) + { + return 0; + } + *struct_return = value; + return_ptr = jit_insn_address_of(func, value); + if(!return_ptr) + { + return 0; + } + jit_memset(&struct_return_param, 0, sizeof(_jit_param_t)); + if(!(_jit_classify_param(&passing, &struct_return_param, + jit_type_void_ptr))) + { + return 0; + } + struct_return_param.value = return_ptr; + } + else + { + *struct_return = 0; + return_ptr = 0; + } + + /* Let the backend classify the parameters */ + for(current_param = 0; current_param < num_args; current_param++) + { + jit_type_t param_type; + + param_type = jit_type_get_param(signature, current_param); + param_type = jit_type_normalize(param_type); + + if(!(_jit_classify_param(&passing, &(passing.params[current_param]), + param_type))) + { + return 0; + } + /* Set the argument value */ + passing.params[current_param].value = args[current_param]; + } + +#ifdef JIT_USE_PARAM_AREA + if(passing.stack_size > func->builder->param_area_size) + { + func->builder->param_area_size = passing.stack_size; + } +#else + /* Let the backend do final adjustments to the passing area */ + _jit_fix_call_stack(&passing); + + /* Flush deferred stack pops from previous calls if too many + parameters have collected up on the stack since last time */ + if(!jit_insn_flush_defer_pop(func, 32 - passing.stack_size)) + { + return 0; + } + + if(!_jit_setup_call_stack(func, &passing)) + { + return 0; + } +#endif + + /* Now setup the arguments on the stack or in the registers in reverse order */ + current_param = num_args; + while(current_param > 0) + { + jit_type_t param_type; + + --current_param; + param_type = jit_type_get_param(signature, current_param); + if(!_jit_setup_outgoing_param(func, &(param[current_param]), param_type)) + { + return 0; + } + } + + /* Add the structure return pointer if required */ + if(return_ptr) + { + if(!_jit_setup_outgoing_param(func, &struct_return_param, return_type)) + { + return 0; + } + } + + return 1; +} + +int +_jit_create_call_return_insns(jit_function_t func, jit_type_t signature, + jit_value_t *args, unsigned int num_args, + jit_value_t return_value, int is_nested) +{ + int abi = jit_type_get_abi(signature); + jit_type_t return_type; + int ptr_return; + int current_param; +#ifndef JIT_USE_PARAM_AREA + jit_param_passing_t passing; + _jit_param_t param[num_args]; + _jit_param_t nested_param; + _jit_param_t struct_return_param; +#endif /* !JIT_USE_PARAM_AREA */ + + return_type = jit_type_normalize(jit_type_get_return(signature)); + ptr_return = jit_type_return_via_pointer(return_type); +#ifndef JIT_USE_PARAM_AREA + /* Initialize the param passing structure */ + jit_memset(&passing, 0, sizeof(jit_param_passing_t)); + jit_memset(param, 0, sizeof(_jit_param_t) * num_args); + + passing.params = param; + passing.stack_size = 0; + + /* Let the specific backend initialize it's part of the params */ + _jit_init_args(abi, &passing); + + /* Determine how many parameters are going to end up in word registers, + and compute the largest stack size needed to pass stack parameters */ + if(is_nested) + { + jit_memset(&nested_param, 0, sizeof(_jit_param_t)); + if(!(_jit_classify_param(&passing, &nested_param, + jit_type_void_ptr))) + { + return 0; + } + } + + /* Determine if we need an extra hidden parameter for returning a + structure */ + if(ptr_return) + { + jit_memset(&struct_return_param, 0, sizeof(_jit_param_t)); + if(!(_jit_classify_param(&passing, &struct_return_param, + jit_type_void_ptr))) + { + return 0; + } + } + + /* Let the backend classify the parameters */ + for(current_param = 0; current_param < num_args; current_param++) + { + jit_type_t param_type; + + param_type = jit_type_get_param(signature, current_param); + param_type = jit_type_normalize(param_type); + + if(!(_jit_classify_param(&passing, &(passing.params[current_param]), + param_type))) + { + return 0; + } + } + + /* Let the backend do final adjustments to the passing area */ + _jit_fix_call_stack(&passing); + + /* Pop the bytes from the system stack */ + if(passing.stack_size > 0) + { + if(!jit_insn_defer_pop_stack(func, passing.stack_size)) + { + return 0; + } + } +#endif /* !JIT_USE_PARAM_AREA */ + + /* Bail out now if we don't need to worry about return values */ + if(!return_value || ptr_return) + { + return 1; + } + + if(!_jit_setup_return_value(func, return_value, return_type)) + { + return 0; + } + + /* Everything is back where it needs to be */ + return 1; +} + +#endif /* JIT_BACKEND_X86_64 */ diff --git a/jit/jit-rules-x86-64.h b/jit/jit-rules-x86-64.h new file mode 100644 index 0000000..6256c2c --- /dev/null +++ b/jit/jit-rules-x86-64.h @@ -0,0 +1,126 @@ +/* + * jit-rules-x86-64.h - Rules that define the characteristics of the x86_64. + * + * Copyright (C) 2008 Southern Storm Software, Pty Ltd. + * + * This file is part of the libjit library. + * + * The libjit library is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation, either version 2.1 of + * the License, or (at your option) any later version. + * + * The libjit library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with the libjit library. If not, see + * . + */ + +#ifndef _JIT_RULES_X86_64_H +#define _JIT_RULES_X86_64_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Information about all of the registers, in allocation order. + */ +#define JIT_REG_X86_64_FLOAT \ + (JIT_REG_FLOAT32 | JIT_REG_FLOAT64 | JIT_REG_NFLOAT) +#define JIT_REG_X86_64_XMM \ + (JIT_REG_FLOAT32 | JIT_REG_FLOAT64) +#define JIT_REG_X86_64_GENERAL \ + (JIT_REG_WORD | JIT_REG_LONG) +#define JIT_REG_INFO \ + {"rax", 0, -1, JIT_REG_X86_64_GENERAL | JIT_REG_CALL_USED}, \ + {"rcx", 1, -1, JIT_REG_X86_64_GENERAL | JIT_REG_CALL_USED}, \ + {"rdx", 2, -1, JIT_REG_X86_64_GENERAL | JIT_REG_CALL_USED}, \ + {"rbx", 3, -1, JIT_REG_X86_64_GENERAL | JIT_REG_GLOBAL}, \ + {"rsi", 6, -1, JIT_REG_X86_64_GENERAL | JIT_REG_CALL_USED}, \ + {"rdi", 7, -1, JIT_REG_X86_64_GENERAL | JIT_REG_CALL_USED}, \ + {"r8", 8, -1, JIT_REG_X86_64_GENERAL | JIT_REG_CALL_USED}, \ + {"r9", 9, -1, JIT_REG_X86_64_GENERAL | JIT_REG_CALL_USED}, \ + {"r10", 10, -1, JIT_REG_X86_64_GENERAL | JIT_REG_CALL_USED}, \ + {"r11", 11, -1, JIT_REG_X86_64_GENERAL | JIT_REG_CALL_USED}, \ + {"r12", 12, -1, JIT_REG_X86_64_GENERAL | JIT_REG_GLOBAL}, \ + {"r13", 13, -1, JIT_REG_X86_64_GENERAL | JIT_REG_GLOBAL}, \ + {"r14", 14, -1, JIT_REG_X86_64_GENERAL | JIT_REG_GLOBAL}, \ + {"r15", 15, -1, JIT_REG_X86_64_GENERAL | JIT_REG_GLOBAL}, \ + {"rbp", 5, -1, JIT_REG_FRAME | JIT_REG_FIXED | JIT_REG_CALL_USED}, \ + {"rsp", 4, -1, JIT_REG_STACK_PTR | JIT_REG_FIXED | JIT_REG_CALL_USED}, \ + {"xmm0", 0, -1, JIT_REG_X86_64_XMM | JIT_REG_CALL_USED}, \ + {"xmm1", 1, -1, JIT_REG_X86_64_XMM | JIT_REG_CALL_USED}, \ + {"xmm2", 2, -1, JIT_REG_X86_64_XMM | JIT_REG_CALL_USED}, \ + {"xmm3", 3, -1, JIT_REG_X86_64_XMM | JIT_REG_CALL_USED}, \ + {"xmm4", 4, -1, JIT_REG_X86_64_XMM | JIT_REG_CALL_USED}, \ + {"xmm5", 5, -1, JIT_REG_X86_64_XMM | JIT_REG_CALL_USED}, \ + {"xmm6", 6, -1, JIT_REG_X86_64_XMM | JIT_REG_CALL_USED}, \ + {"xmm7", 7, -1, JIT_REG_X86_64_XMM | JIT_REG_CALL_USED}, \ + {"xmm8", 8, -1, JIT_REG_X86_64_XMM | JIT_REG_CALL_USED}, \ + {"xmm9", 9, -1, JIT_REG_X86_64_XMM | JIT_REG_CALL_USED}, \ + {"xmm10", 10, -1, JIT_REG_X86_64_XMM | JIT_REG_CALL_USED}, \ + {"xmm11", 11, -1, JIT_REG_X86_64_XMM | JIT_REG_CALL_USED}, \ + {"xmm12", 12, -1, JIT_REG_X86_64_XMM | JIT_REG_CALL_USED}, \ + {"xmm13", 13, -1, JIT_REG_X86_64_XMM | JIT_REG_CALL_USED}, \ + {"xmm14", 14, -1, JIT_REG_X86_64_XMM | JIT_REG_CALL_USED}, \ + {"xmm15", 15, -1, JIT_REG_X86_64_XMM | JIT_REG_CALL_USED}, \ + {"st0", 0, -1, JIT_REG_X86_64_FLOAT | JIT_REG_CALL_USED | JIT_REG_IN_STACK}, \ + {"st1", 1, -1, JIT_REG_X86_64_FLOAT | JIT_REG_CALL_USED | JIT_REG_IN_STACK}, \ + {"st2", 2, -1, JIT_REG_X86_64_FLOAT | JIT_REG_CALL_USED | JIT_REG_IN_STACK}, \ + {"st3", 3, -1, JIT_REG_X86_64_FLOAT | JIT_REG_CALL_USED | JIT_REG_IN_STACK}, \ + {"st4", 4, -1, JIT_REG_X86_64_FLOAT | JIT_REG_CALL_USED | JIT_REG_IN_STACK}, \ + {"st5", 5, -1, JIT_REG_X86_64_FLOAT | JIT_REG_CALL_USED | JIT_REG_IN_STACK}, \ + {"st6", 6, -1, JIT_REG_X86_64_FLOAT | JIT_REG_CALL_USED | JIT_REG_IN_STACK}, \ + {"st7", 7, -1, JIT_REG_X86_64_FLOAT | JIT_REG_CALL_USED | JIT_REG_IN_STACK}, +#define JIT_NUM_REGS 40 +#define JIT_NUM_GLOBAL_REGS 5 + +#define JIT_REG_STACK 1 +#define JIT_REG_STACK_START 32 +#define JIT_REG_STACK_END 39 + +/* + * Define to 1 if we should always load values into registers + * before operating on them. i.e. the CPU does not have reg-mem + * and mem-reg addressing modes. + */ +#define JIT_ALWAYS_REG_REG 0 + +/* + * The maximum number of bytes to allocate for the prolog. + * This may be shortened once we know the true prolog size. + */ +#define JIT_PROLOG_SIZE 64 + +/* + * Preferred alignment for the start of functions. + */ +#define JIT_FUNCTION_ALIGNMENT 32 + +/* + * Define this to 1 if the platform allows reads and writes on + * any byte boundary. Define to 0 if only properly-aligned + * memory accesses are allowed. + */ +#define JIT_ALIGN_OVERRIDES 1 + +/* + * Parameter passing rules. + */ +/* +#define JIT_CDECL_WORD_REG_PARAMS {5, 4, 2, 1, 6, 7, -1} +#define JIT_MAX_WORD_REG_PARAMS 6 +*/ +#define JIT_INITIAL_STACK_OFFSET (2 * sizeof(void *)) +#define JIT_INITIAL_FRAME_SIZE 0 + +#ifdef __cplusplus +}; +#endif + +#endif /* _JIT_RULES_X86_64_H */ diff --git a/jit/jit-rules-x86-64.ins b/jit/jit-rules-x86-64.ins new file mode 100644 index 0000000..cc04227 --- /dev/null +++ b/jit/jit-rules-x86-64.ins @@ -0,0 +1,1941 @@ +/* + * jit-rules-x86-64.ins - Instruction selector for x86_64. + * + * Copyright (C) 2008 Southern Storm Software, Pty Ltd. + * + * This file is part of the libjit library. + * + * The libjit library is free software: you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation, either version 2.1 of + * the License, or (at your option) any later version. + * + * The libjit library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with the libjit library. If not, see + * . + */ + +%regclass reg x86_64_reg +%regclass creg x86_64_creg +%regclass rreg x86_64_rreg +%regclass freg x86_64_freg +%regclass xreg x86_64_xreg + +/* + * Conversion opcodes. + */ + +JIT_OP_TRUNC_SBYTE: + [=reg, reg] -> { + x86_64_movsx8_reg_reg_size(inst, $1, $2, 4); + } + +JIT_OP_TRUNC_UBYTE: + [=reg, reg] -> { + x86_64_movzx8_reg_reg_size(inst, $1, $2, 4); + } + +JIT_OP_TRUNC_SHORT: + [=reg, reg] -> { + x86_64_movsx16_reg_reg_size(inst, $1, $2, 4); + } + +JIT_OP_TRUNC_USHORT: + [=reg, reg] -> { + x86_64_movzx16_reg_reg_size(inst, $1, $2, 4); + } + +JIT_OP_TRUNC_INT: + [=reg, reg] -> { + if($1 != $2) + { + x86_64_mov_reg_reg_size(inst, $1, $2, 4); + } + } + +JIT_OP_TRUNC_UINT: + [=reg, reg] -> { + if($1 != $2) + { + x86_64_mov_reg_reg_size(inst, $1, $2, 4); + } + } + +JIT_OP_LOW_WORD: + [=reg, imm] -> { + x86_64_mov_reg_imm_size(inst, $1, $2, 4); + } + [=reg, local] -> { + x86_64_mov_reg_membase_size(inst, $1, X86_64_RBP, $2, 4); + } + [=reg, reg] -> { + if($1 != $2) + { + x86_64_mov_reg_reg_size(inst, $1, $2, 4); + } + } + +JIT_OP_EXPAND_INT: + [=reg, reg] -> { + x86_64_movsx32_reg_reg_size(inst, $1, $2, 8); + } + +JIT_OP_EXPAND_UINT: + [=reg, reg] -> { + x86_64_mov_reg_reg_size(inst, $1, $2, 4); + } + +JIT_OP_NFLOAT_TO_INT: stack + [=reg, freg] -> { + /* allocate space on the stack for 2 shorts and 1 int */ + x86_64_sub_reg_imm_size(inst, X86_ESP, 8, 8); + /* store FPU control word */ + x86_64_fnstcw_membase(inst, X86_64_RSP, 0); + /* set "round toward zero" mode */ + x86_64_mov_reg_membase_size(inst, $1, X86_64_RSP, 0, 2); + x86_64_or_reg_imm_size(inst, $1, 0xc00, 2); + x86_64_mov_membase_reg_size(inst, X86_64_RSP, 2, $1, 2); + x86_64_fldcw_membase(inst, X86_64_RSP, 2); + /* convert float to int */ + x86_64_fistp_membase_size(inst, X86_64_RSP, 4, 4); + /* restore FPU control word */ + x86_64_fldcw_membase(inst, X86_64_RSP, 0); + /* move result to the destination */ + x86_64_mov_reg_membase_size(inst, $1, X86_64_RSP, 4, 4); + /* restore the stack */ + x86_64_add_reg_imm_size(inst, X86_64_RSP, 8, 8); + } + +JIT_OP_NFLOAT_TO_LONG: stack + [=reg, freg] -> { + /* allocate space on the stack for 2 shorts and 1 long */ + x86_64_sub_reg_imm_size(inst, X86_64_RSP, 12, 8); + /* store FPU control word */ + x86_64_fnstcw_membase(inst, X86_64_RSP, 0); + /* set "round toward zero" mode */ + x86_64_mov_reg_membase_size(inst, $1, X86_64_RSP, 0, 2); + x86_64_or_reg_imm_size(inst, $1, 0xc00, 2); + x86_64_mov_membase_reg_size(inst, X86_64_RSP, 2, $1, 2); + x86_64_fldcw_membase(inst, X86_64_RSP, 2); + /* convert float to long */ + x86_64_fistp_membase_size(inst, X86_64_RSP, 4, 8); + /* restore FPU control word */ + x86_64_fldcw_membase(inst, X86_64_RSP, 0); + /* move result to the destination */ + x86_64_mov_reg_membase_size(inst, $1, X86_64_RSP, 4, 8); + /* restore the stack */ + x86_64_add_reg_imm_size(inst, X86_64_RSP, 12, 8); + } + +JIT_OP_NFLOAT_TO_FLOAT32: stack + [=xreg, freg] -> { + /* Avoid modifying the stack pointer by simply using negative */ + /* offsets here. */ + x86_64_fstp_membase_size(inst, X86_64_RSP, -8, 4); + x86_64_movss_reg_membase(inst, $1, X86_64_RSP, -8); + } + +JIT_OP_NFLOAT_TO_FLOAT64: stack + [=xreg, freg] -> { + /* Avoid modifying the stack pointer by simply using negative */ + /* offsets here. */ + x86_64_fstp_membase_size(inst, X86_64_RSP, -8, 8); + x86_64_movsd_reg_membase(inst, $1, X86_64_RSP, -8); + } + +/* + * Data manipulation. + */ + +JIT_OP_COPY_LOAD_SBYTE, JIT_OP_COPY_LOAD_UBYTE, JIT_OP_COPY_STORE_BYTE: copy + [=local, imm] -> { + x86_64_mov_membase_imm_size(inst, X86_64_RBP, $1, $2, 1); + } + [=local, reg] -> { + x86_64_mov_membase_reg_size(inst, X86_64_RBP, $1, $2, 1); + } + [reg] -> {} + +JIT_OP_COPY_LOAD_SHORT, JIT_OP_COPY_LOAD_USHORT, JIT_OP_COPY_STORE_SHORT: copy + [=local, imm] -> { + x86_64_mov_membase_imm_size(inst, X86_64_RBP, $1, $2, 2); + } + [=local, reg] -> { + x86_64_mov_membase_reg_size(inst, X86_64_RBP, $1, $2, 2); + } + [reg] -> {} + +JIT_OP_COPY_INT: copy + [=local, imm] -> { + x86_64_mov_membase_imm_size(inst, X86_64_RBP, $1, $2, 4); + } + [reg] -> {} + +JIT_OP_COPY_LONG: copy + [reg] -> {} + +JIT_OP_COPY_FLOAT32: copy + [=local, xreg] -> { + x86_64_movss_membase_reg(inst, X86_64_RBP, $1, $2); + } + [xreg] -> {} + +JIT_OP_COPY_FLOAT64: copy + [=local, xreg] -> { + x86_64_movsd_membase_reg(inst, X86_64_RBP, $1, $2); + } + [xreg] -> {} + +JIT_OP_COPY_NFLOAT: copy, stack + [freg] -> {} + +JIT_OP_COPY_STRUCT: + [=frame, frame, scratch reg, scratch xreg, + if("jit_type_get_size(jit_value_get_type(insn->dest)) <= _JIT_MAX_MEMCPY_INLINE")] -> { + inst = small_struct_copy(gen, inst, X86_64_RBP, $1, X86_64_RBP, $2, + jit_value_get_type(insn->dest), $3, $4); + } + [=frame, frame, clobber(creg), clobber(xreg)] -> { + inst = memory_copy(gen, inst, X86_64_RBP, $1, X86_64_RBP, $2, + jit_type_get_size(jit_value_get_type(insn->dest))); + } + +JIT_OP_ADDRESS_OF: + [=reg, frame] -> { + x86_64_lea_membase_size(inst, $1, X86_64_RBP, $2, 8); + } + +/* + * Stack pushes and pops. + */ + +JIT_OP_PUSH_INT: note + [imm] -> { + x86_64_push_imm(inst, $1); + gen->stack_changed = 1; + } + [local] -> { + x86_64_push_membase_size(inst, X86_64_RBP, $1, 4); + gen->stack_changed = 1; + } + [reg] -> { + x86_64_push_reg_size(inst, $1, 4); + gen->stack_changed = 1; + } + +JIT_OP_PUSH_LONG: note + [imm] -> { + if(($1 >= jit_min_int) && ($1 <= jit_max_int)) + { + x86_64_push_imm(inst, $1); + } + else + { + jit_int *ptr = (jit_int *)&($1); + x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8); + x86_64_mov_membase_imm_size(inst, X86_64_RSP, 4, ptr[1], 4); + x86_64_mov_membase_imm_size(inst, X86_64_RSP, 0, ptr[0], 4); + } + gen->stack_changed = 1; + } + [local] -> { + x86_64_push_membase_size(inst, X86_64_RBP, $1, 8); + gen->stack_changed = 1; + } + [reg] -> { + x86_64_push_reg_size(inst, $1, 8); + gen->stack_changed = 1; + } + +JIT_OP_PUSH_FLOAT32: note, stack + [imm] -> { + jit_int *ptr = (jit_int *)($1); + x86_64_push_imm_size(inst, ptr[0], 4); + gen->stack_changed = 1; + } + [local] -> { + x86_64_push_membase_size(inst, X86_64_RBP, $1, 4); + gen->stack_changed = 1; + } + [xreg] -> { + x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8); + x86_64_movss_membase_reg(inst, X86_64_RSP, 0, $1); + gen->stack_changed = 1; + } + [freg] -> { + x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8); + x86_64_fstp_membase_size(inst, X86_64_RSP, 0, 4); + gen->stack_changed = 1; + } + +JIT_OP_PUSH_FLOAT64: note, stack + [imm] -> { + jit_int *ptr = (jit_int *)($1); + x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8); + x86_64_mov_membase_imm_size(inst, X86_64_RSP, 4, ptr[1], 4); + x86_64_mov_membase_imm_size(inst, X86_64_RSP, 0, ptr[0], 4); + gen->stack_changed = 1; + } + [local] -> { + x86_64_push_membase_size(inst, X86_64_RBP, $1, 8); + gen->stack_changed = 1; + } + [xreg] -> { + x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8); + x86_64_movsd_membase_reg(inst, X86_64_RSP, 0, $1); + gen->stack_changed = 1; + } + [freg] -> { + x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8); + x86_64_fstp_membase_size(inst, X86_64_RSP, 0, 8); + gen->stack_changed = 1; + } + +JIT_OP_PUSH_NFLOAT: note, stack + [imm] -> { + jit_int *ptr = (jit_int *)($1); + if(sizeof(jit_nfloat) != sizeof(jit_float64)) + { + x86_64_sub_reg_imm_size(inst, X86_64_RSP, 16, 8); + x86_64_mov_membase_imm_size(inst, X86_64_RSP, 8, ptr[2], 4); + } + else + { + x86_64_sub_reg_imm_size(inst, X86_64_RSP, sizeof(jit_float64), 8); + } + x86_64_mov_membase_imm_size(inst, X86_64_RSP, 4, ptr[1], 4); + x86_64_mov_membase_imm_size(inst, X86_64_RSP, 0, ptr[0], 4); + gen->stack_changed = 1; + } + [local, scratch reg] -> { + if(sizeof(jit_nfloat) != sizeof(jit_float64)) + { + x86_64_sub_reg_imm_size(inst, X86_64_RSP, 16, 8); + x86_64_mov_reg_membase_size(inst, $2, X86_64_RBP, $1 + 8, 4); + x86_64_mov_membase_reg_size(inst, X86_64_RSP, 8, $2, 4); + } + else + { + x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8); + } + x86_64_mov_reg_membase_size(inst, $2, X86_64_RBP, $1, 8); + x86_64_mov_membase_reg_size(inst, X86_64_RSP, 0, $2, 8); + gen->stack_changed = 1; + } + [freg] -> { + if(sizeof(jit_nfloat) != sizeof(jit_float64)) + { + x86_64_sub_reg_imm_size(inst, X86_64_RSP, 16, 8); + x86_64_fstp_membase_size(inst, X86_64_RSP, 0, 10); + } + else + { + x86_64_sub_reg_imm_size(inst, X86_64_RSP, sizeof(jit_float64), 8); + x86_64_fstp_membase_size(inst, X86_64_RSP, 0, 8); + } + gen->stack_changed = 1; + } + +JIT_OP_PUSH_STRUCT: note, more_space + [reg, if("((jit_nuint)jit_value_get_nint_constant(insn->value2)) <= 32")] -> { + jit_nuint size; + jit_nuint last_part; + size = (jit_nuint)jit_value_get_nint_constant(insn->value2); + last_part = size & 0x7; + if(last_part) + { + /* Handle the possible last part smaller than 8 bytes */ + size -= last_part; + + /* We don't care about the last not needed bytes */ + x86_64_push_membase_size(inst, $1, size, 8); + } + /* Handle full multiple pointer sized parts */ + while(size > 0) + { + size -= sizeof(void *); + x86_64_push_membase_size(inst, $1, size, 8); + } + gen->stack_changed = 1; + } + [reg, clobber(creg), clobber(xreg)] -> { + /* Handle arbitrary-sized structures */ + jit_nuint size; + size = (jit_nuint)jit_value_get_nint_constant(insn->value2); + /* TODO: Maybe we should check for sizes > 2GB? */ + x86_64_sub_reg_imm_size(inst, X86_64_RSP, ROUND_STACK(size), 8); + inst = memory_copy(gen, inst, X86_64_RSP, 0, $1, 0, size); + gen->stack_changed = 1; + } + +JIT_OP_POP_STACK: + [] -> { + x86_64_add_reg_imm_size(inst, X86_64_RSP, insn->value1->address, 8); + gen->stack_changed = 1; + } + +JIT_OP_FLUSH_SMALL_STRUCT: + [] -> { + inst = flush_return_struct(inst, insn->value1); + } + +JIT_OP_RETURN: + [] -> { + inst = jump_to_epilog(gen, inst, block); + } + +JIT_OP_RETURN_REG: manual + [] -> { + /* Nothing to do here */; + } + +JIT_OP_RETURN_INT: note + [reg("rax")] -> { + inst = jump_to_epilog(gen, inst, block); + } + +JIT_OP_RETURN_LONG: note + [reg("rax")] -> { + inst = jump_to_epilog(gen, inst, block); + } + +JIT_OP_RETURN_FLOAT32: note + [xreg("xmm0")] -> { + inst = jump_to_epilog(gen, inst, block); + } + +JIT_OP_RETURN_FLOAT64: note + [xreg("xmm0")] -> { + inst = jump_to_epilog(gen, inst, block); + } + +JIT_OP_RETURN_NFLOAT: note, stack + [freg, clobber(freg)] -> { + /* clobber(freg) frees all registers on the fp stack */ + inst = jump_to_epilog(gen, inst, block); + } + +JIT_OP_RETURN_SMALL_STRUCT: note + [rreg, imm] -> { + inst = return_struct(inst, func, $1); + inst = jump_to_epilog(gen, inst, block); + } + +/* + * Pointer-relative loads and stores. + */ + +JIT_OP_LOAD_RELATIVE_SBYTE: + [=reg, reg, imm] -> { + if($3 == 0) + { + x86_64_movsx8_reg_regp_size(inst, $1, $2, 8); + } + else + { + x86_64_movsx8_reg_membase_size(inst, $1, $2, $3, 8); + } + } + +JIT_OP_LOAD_RELATIVE_UBYTE: + [=reg, reg, imm] -> { + if($3 == 0) + { + x86_64_movzx8_reg_regp_size(inst, $1, $2, 8); + } + else + { + x86_64_movzx8_reg_membase_size(inst, $1, $2, $3, 8); + } + } + +JIT_OP_LOAD_RELATIVE_SHORT: + [=reg, reg, imm] -> { + if($3 == 0) + { + x86_64_movsx16_reg_regp_size(inst, $1, $2, 8); + } + else + { + x86_64_movsx16_reg_membase_size(inst, $1, $2, $3, 8); + } + } + +JIT_OP_LOAD_RELATIVE_USHORT: + [=reg, reg, imm] -> { + if($3 == 0) + { + x86_64_movzx16_reg_regp_size(inst, $1, $2, 8); + } + else + { + x86_64_movzx16_reg_membase_size(inst, $1, $2, $3, 8); + } + } + +JIT_OP_LOAD_RELATIVE_INT: + [=reg, reg, imm] -> { + if($3 == 0) + { + x86_64_mov_reg_regp_size(inst, $1, $2, 4); + } + else + { + x86_64_mov_reg_membase_size(inst, $1, $2, $3, 4); + } + } + +JIT_OP_LOAD_RELATIVE_LONG: + [=reg, reg, imm] -> { + if($3 == 0) + { + x86_64_mov_reg_regp_size(inst, $1, $2, 8); + } + else + { + x86_64_mov_reg_membase_size(inst, $1, $2, $3, 8); + } + } + +JIT_OP_LOAD_RELATIVE_FLOAT32: + [=xreg, reg, imm] -> { + if($3 == 0) + { + x86_64_movss_reg_regp(inst, $1, $2); + } + else + { + x86_64_movss_reg_membase(inst, $1, $2, $3); + } + } + +JIT_OP_LOAD_RELATIVE_FLOAT64: + [=xreg, reg, imm] -> { + if($3 == 0) + { + x86_64_movsd_reg_regp(inst, $1, $2); + } + else + { + x86_64_movsd_reg_membase(inst, $1, $2, $3); + } + } + +JIT_OP_LOAD_RELATIVE_NFLOAT: + [=freg, reg, imm, if("sizeof(jit_nfloat) != sizeof(jit_float64)")] -> { + x86_64_fld_membase_size(inst, $2, $3, 10); + } + [=freg, reg, imm, if("sizeof(jit_nfloat) == sizeof(jit_float64)")] -> { + x86_64_fld_membase_size(inst, $2, $3, 8); + } + +JIT_OP_LOAD_RELATIVE_STRUCT: more_space + [=frame, reg, imm, scratch reg, scratch xreg, + if("jit_type_get_size(jit_value_get_type(insn->dest)) <= _JIT_MAX_MEMCPY_INLINE")] -> { + inst = small_struct_copy(gen, inst, X86_64_RBP, $1, $2, $3, + jit_value_get_type(insn->dest), $4, $5); + } + [=frame, reg, imm, clobber(creg), clobber(xreg)] -> { + inst = memory_copy(gen, inst, X86_EBP, $1, $2, $3, + jit_type_get_size(jit_value_get_type(insn->dest))); + } + +JIT_OP_STORE_RELATIVE_BYTE: ternary + [reg, imm, imm] -> { + if($3 == 0) + { + x86_64_mov_regp_imm_size(inst, $1, $2, 1); + } + else + { + x86_64_mov_membase_imm_size(inst, $1, $3, $2, 1); + } + } + [reg, reg, imm] -> { + if($3 == 0) + { + x86_64_mov_regp_reg_size(inst, $1, $2, 1); + } + else + { + x86_64_mov_membase_reg_size(inst, $1, $3, $2, 1); + } + } + +JIT_OP_STORE_RELATIVE_SHORT: ternary + [reg, imm, imm] -> { + if($3 == 0) + { + x86_64_mov_regp_imm_size(inst, $1, $2, 2); + } + else + { + x86_64_mov_membase_imm_size(inst, $1, $3, $2, 2); + } + } + [reg, reg, imm] -> { + if($3 == 0) + { + x86_64_mov_regp_reg_size(inst, $1, $2, 2); + } + else + { + x86_64_mov_membase_reg_size(inst, $1, $3, $2, 2); + } + } + +JIT_OP_STORE_RELATIVE_INT: ternary + [reg, imm, imm] -> { + if($3 == 0) + { + x86_64_mov_regp_imm_size(inst, $1, $2, 4); + } + else + { + x86_64_mov_membase_imm_size(inst, $1, $3, $2, 4); + } + } + [reg, reg, imm] -> { + if($3 == 0) + { + x86_64_mov_regp_reg_size(inst, $1, $2, 4); + } + else + { + x86_64_mov_membase_reg_size(inst, $1, $3, $2, 4); + } + } + +JIT_OP_STORE_RELATIVE_LONG: ternary + [reg, imm, imm, if("$2 >= jit_min_int && $2 <= jit_max_int")] -> { + if($3 == 0) + { + x86_64_mov_regp_imm_size(inst, $1, $2, 8); + } + else + { + x86_64_mov_membase_imm_size(inst, $1, $3, $2, 8); + } + } + [reg, reg, imm] -> { + if($3 == 0) + { + x86_64_mov_regp_reg_size(inst, $1, $2, 8); + } + else + { + x86_64_mov_membase_reg_size(inst, $1, $3, $2, 8); + } + } + +JIT_OP_STORE_RELATIVE_FLOAT32: ternary + [reg, imm, imm] -> { + if($3 == 0) + { + x86_64_mov_regp_imm_size(inst, $1, ((jit_int *)($2))[0], 4); + } + else + { + x86_64_mov_membase_imm_size(inst, $1, $3, ((jit_int *)($2))[0], 4); + } + } + [reg, xreg, imm] -> { + if($3 == 0) + { + x86_64_movss_regp_reg(inst, $1, $2); + } + else + { + x86_64_movss_membase_reg(inst, $1, $3, $2); + } + } + +JIT_OP_STORE_RELATIVE_FLOAT64: ternary + [reg, imm, imm] -> { + x86_64_mov_membase_imm_size(inst, $1, $3, ((int *)($2))[0], 4); + x86_64_mov_membase_imm_size(inst, $1, $3 + 4, ((int *)($2))[1], 4); + } + [reg, xreg, imm] -> { + if($3 == 0) + { + x86_64_movsd_regp_reg(inst, $1, $2); + } + else + { + x86_64_movsd_membase_reg(inst, $1, $3, $2); + } + } + +JIT_OP_STORE_RELATIVE_STRUCT: ternary + [reg, frame, imm, scratch reg, scratch xreg, + if("jit_type_get_size(jit_value_get_type(insn->value1)) <= _JIT_MAX_MEMCPY_INLINE")] -> { + inst = small_struct_copy(gen, inst, $1, $3, X86_64_RBP, $2, + jit_value_get_type(insn->value1), $4, $5); + } + [reg, frame, imm, clobber(creg), clobber(xreg)] -> { + inst = memory_copy(gen, inst, $1, $3, X86_64_RBP, $2, + jit_type_get_size(jit_value_get_type(insn->value1))); + } + +JIT_OP_ADD_RELATIVE: + [reg, imm, if("$2 >= jit_min_int && $2 <= jit_max_int")] -> { + if(insn->value2->address != 0) + { + x86_64_add_reg_imm_size(inst, $1, $2, 8); + } + } + +/* + * Array element loads and stores. + */ + +JIT_OP_LOAD_ELEMENT_SBYTE: + [=reg, reg, reg] -> { + x86_64_movsx8_reg_memindex_size(inst, $1, $2, 0, $3, 0, 4); + } + +JIT_OP_LOAD_ELEMENT_UBYTE: + [=reg, reg, reg] -> { + x86_64_movzx8_reg_memindex_size(inst, $1, $2, 0, $3, 0, 4); + } + +JIT_OP_LOAD_ELEMENT_SHORT: + [=reg, reg, reg] -> { + x86_64_movsx16_reg_memindex_size(inst, $1, $2, 0, $3, 1, 4); + } + +JIT_OP_LOAD_ELEMENT_USHORT: + [=reg, reg, reg] -> { + x86_64_movzx16_reg_memindex_size(inst, $1, $2, 0, $3, 1, 4); + } + +JIT_OP_LOAD_ELEMENT_INT: + [=reg, reg, reg] -> { + x86_64_mov_reg_memindex_size(inst, $1, $2, 0, $3, 2, 4); + } + +JIT_OP_LOAD_ELEMENT_LONG: + [=reg, reg, reg] -> { + x86_64_mov_reg_memindex_size(inst, $1, $2, 0, $3, 3, 8); + } + +JIT_OP_LOAD_ELEMENT_FLOAT32: + [=xreg, reg, reg] -> { + x86_64_movss_reg_memindex(inst, $1, $2, 0, $3, 2); + } + +JIT_OP_LOAD_ELEMENT_FLOAT64: + [=xreg, reg, reg] -> { + x86_64_movsd_reg_memindex(inst, $1, $2, 0, $3, 3); + } + +JIT_OP_STORE_ELEMENT_BYTE: ternary + [reg, reg, reg] -> { + x86_64_mov_memindex_reg_size(inst, $1, 0, $2, 0, $3, 1); + } + +JIT_OP_STORE_ELEMENT_SHORT: ternary + [reg, reg, reg] -> { + x86_64_mov_memindex_reg_size(inst, $1, 0, $2, 1, $3, 2); + } + +JIT_OP_STORE_ELEMENT_INT: ternary + [reg, reg, reg] -> { + x86_64_mov_memindex_reg_size(inst, $1, 0, $2, 2, $3, 4); + } + +JIT_OP_STORE_ELEMENT_LONG: ternary + [reg, reg, imm] -> { + if($3 >= jit_min_int && $3 <= jit_max_int) + { + x86_64_mov_memindex_imm_size(inst, $1, 0, $2, 3, $3, 8); + } + else + { + jit_int *long_ptr = (jit_int *)(&($3)); + + x86_64_mov_memindex_imm_size(inst, $1, 0, $2, 3, long_ptr[0], 4); + x86_64_mov_memindex_imm_size(inst, $1, 4, $2, 3, long_ptr[1], 4); + } + } + [reg, reg, reg] -> { + x86_64_mov_memindex_reg_size(inst, $1, 0, $2, 3, $3, 8); + } + +JIT_OP_STORE_ELEMENT_FLOAT32: ternary + [reg, reg, xreg] -> { + x86_64_movss_memindex_reg(inst, $1, 0, $2, 2, $3); + } + +JIT_OP_STORE_ELEMENT_FLOAT64: ternary + [reg, reg, xreg] -> { + x86_64_movsd_memindex_reg(inst, $1, 0, $2, 3, $3); + } + +/* + * Arithmetic opcodes. + */ + +/* + * 4 byte integer versions + */ + +JIT_OP_IADD: commutative + [reg, imm] -> { + if($2 == 1) + { + x86_64_inc_reg_size(inst, $1, 4); + } + else + { + x86_64_add_reg_imm_size(inst, $1, $2, 4); + } + } + [reg, local] -> { + x86_64_add_reg_membase_size(inst, $1, X86_64_RBP, $2, 4); + } + [reg, reg] -> { + x86_64_add_reg_reg_size(inst, $1, $2, 4); + } + +JIT_OP_ISUB: + [reg, imm] -> { + if($2 == 1) + { + x86_64_dec_reg_size(inst, $1, 4); + } + else + { + x86_64_sub_reg_imm_size(inst, $1, $2, 4); + } + } + [reg, local] -> { + x86_64_sub_reg_membase_size(inst, $1, X86_64_RBP, $2, 4); + } + [reg, reg] -> { + x86_64_sub_reg_reg_size(inst, $1, $2, 4); + } + +JIT_OP_INEG: + [reg] -> { + x86_64_neg_reg_size(inst, $1, 4); + } + +/* + * 8 byte integer versions + */ + +JIT_OP_LADD: commutative + [reg, imm, if("$2 >= jit_min_int && $2 <= jit_max_int")] -> { + if($2 == 1) + { + x86_64_inc_reg_size(inst, $1, 8); + } + else + { + x86_64_add_reg_imm_size(inst, $1, $2, 8); + } + } + [reg, local] -> { + x86_64_add_reg_membase_size(inst, $1, X86_64_RBP, $2, 8); + } + [reg, reg] -> { + x86_64_add_reg_reg_size(inst, $1, $2, 8); + } + +JIT_OP_LSUB: + [reg, imm, if("$2 >= jit_min_int && $2 <= jit_max_int")] -> { + if($2 == 1) + { + x86_64_dec_reg_size(inst, $1, 8); + } + else + { + x86_64_sub_reg_imm_size(inst, $1, $2, 8); + } + } + [reg, local] -> { + x86_64_sub_reg_membase_size(inst, $1, X86_64_RBP, $2, 8); + } + [reg, reg] -> { + x86_64_sub_reg_reg_size(inst, $1, $2, 8); + } + +JIT_OP_LNEG: + [reg] -> { + x86_64_neg_reg_size(inst, $1, 8); + } +/* + * single precision float versions + */ + +JIT_OP_FADD: + [xreg, imm] -> { + _jit_xmm1_reg_imm_size_float32(gen, &inst, XMM1_ADD, $1, (jit_float32 *)$2); + } + [xreg, local] -> { + x86_64_addss_reg_membase(inst, $1, X86_64_RBP, $2); + } + [xreg, xreg] -> { + x86_64_addss_reg_reg(inst, $1, $2); + } + +JIT_OP_FSUB: + [xreg, imm] -> { + _jit_xmm1_reg_imm_size_float32(gen, &inst, XMM1_SUB, $1, (jit_float32 *)$2); + } + [xreg, xreg] -> { + x86_64_subss_reg_reg(inst, $1, $2); + } + [xreg, local] -> { + x86_64_subss_reg_membase(inst, $1, X86_64_RBP, $2); + } + +JIT_OP_FMUL: + [xreg, imm] -> { + _jit_xmm1_reg_imm_size_float32(gen, &inst, XMM1_MUL, $1, (jit_float32 *)$2); + } + [xreg, xreg] -> { + x86_64_mulss_reg_reg(inst, $1, $2); + } + [xreg, local] -> { + x86_64_mulss_reg_membase(inst, $1, X86_64_RBP, $2); + } + +JIT_OP_FDIV: + [xreg, imm] -> { + _jit_xmm1_reg_imm_size_float32(gen, &inst, XMM1_DIV, $1, (jit_float32 *)$2); + } + [xreg, xreg] -> { + x86_64_divss_reg_reg(inst, $1, $2); + } + [xreg, local] -> { + x86_64_divss_reg_membase(inst, $1, X86_64_RBP, $2); + } + +/* + * double precision float versions + */ + +JIT_OP_DADD: + [xreg, imm] -> { + _jit_xmm1_reg_imm_size_float64(gen, &inst, XMM1_ADD, $1, (jit_float64 *)$2); + } + [xreg, local] -> { + x86_64_addsd_reg_membase(inst, $1, X86_64_RBP, $2); + } + [xreg, xreg] -> { + x86_64_addsd_reg_reg(inst, $1, $2); + } + +JIT_OP_DSUB: + [xreg, imm] -> { + _jit_xmm1_reg_imm_size_float64(gen, &inst, XMM1_SUB, $1, (jit_float64 *)$2); + } + [xreg, local] -> { + x86_64_subsd_reg_membase(inst, $1, X86_64_RBP, $2); + } + [xreg, xreg] -> { + x86_64_subsd_reg_reg(inst, $1, $2); + } + +JIT_OP_DMUL: + [xreg, imm] -> { + _jit_xmm1_reg_imm_size_float64(gen, &inst, XMM1_MUL, $1, (jit_float64 *)$2); + } + [xreg, local] -> { + x86_64_mulsd_reg_membase(inst, $1, X86_64_RBP, $2); + } + [xreg, xreg] -> { + x86_64_mulsd_reg_reg(inst, $1, $2); + } + +JIT_OP_DDIV: + [xreg, imm] -> { + _jit_xmm1_reg_imm_size_float64(gen, &inst, XMM1_DIV, $1, (jit_float64 *)$2); + } + [xreg, local] -> { + x86_64_divsd_reg_membase(inst, $1, X86_64_RBP, $2); + } + [xreg, xreg] -> { + x86_64_divsd_reg_reg(inst, $1, $2); + } + +/* + * Bitwise opcodes. + */ + +JIT_OP_IAND: commutative + [reg, imm] -> { + x86_64_and_reg_imm_size(inst, $1, $2, 4); + } + [reg, local] -> { + x86_64_and_reg_membase_size(inst, $1, X86_64_RBP, $2, 4); + } + [reg, reg] -> { + x86_64_and_reg_reg_size(inst, $1, $2, 4); + } + +JIT_OP_IOR: commutative + [reg, imm] -> { + x86_64_or_reg_imm_size(inst, $1, $2, 4); + } + [reg, local] -> { + x86_64_or_reg_membase_size(inst, $1, X86_64_RBP, $2, 4); + } + [reg, reg] -> { + x86_64_or_reg_reg_size(inst, $1, $2, 4); + } + +JIT_OP_IXOR: commutative + [reg, imm] -> { + x86_64_xor_reg_imm_size(inst, $1, $2, 4); + } + [reg, local] -> { + x86_64_xor_reg_membase_size(inst, $1, X86_64_RBP, $2, 4); + } + [reg, reg] -> { + x86_64_xor_reg_reg_size(inst, $1, $2, 4); + } + +JIT_OP_INOT: + [reg] -> { + x86_64_not_reg_size(inst, $1, 4); + } + +JIT_OP_LAND: commutative + [reg, imm, if("($2 >= jit_min_int && $2 <= jit_max_int)")] -> { + x86_64_and_reg_imm_size(inst, $1, $2, 8); + } + [reg, local] -> { + x86_64_and_reg_membase_size(inst, $1, X86_64_RBP, $2, 8); + } + [reg, reg] -> { + x86_64_and_reg_reg_size(inst, $1, $2, 8); + } + +JIT_OP_LOR: commutative + [reg, imm, if("($2 >= jit_min_int && $2 <= jit_max_int)")] -> { + x86_64_or_reg_imm_size(inst, $1, $2, 8); + } + [reg, local] -> { + x86_64_or_reg_membase_size(inst, $1, X86_64_RBP, $2, 8); + } + [reg, reg] -> { + x86_64_or_reg_reg_size(inst, $1, $2, 8); + } + +JIT_OP_LXOR: commutative + [reg, imm, if("($2 >= jit_min_int && $2 <= jit_max_int)")] -> { + x86_64_xor_reg_imm_size(inst, $1, $2, 8); + } + [reg, local] -> { + x86_64_xor_reg_membase_size(inst, $1, X86_64_RBP, $2, 8); + } + [reg, reg] -> { + x86_64_xor_reg_reg_size(inst, $1, $2, 8); + } + +JIT_OP_LNOT: + [reg] -> { + x86_64_not_reg_size(inst, $1, 8); + } + + + +/* + * Branch opcodes. + */ + +JIT_OP_BR: branch + [] -> { + inst = output_branch(func, inst, 0xEB /* jmp */, insn); + } + +JIT_OP_BR_IFALSE: branch + [reg] -> { + x86_64_or_reg_reg_size(inst, $1, $1, 4); + inst = output_branch(func, inst, 0x74 /* eq */, insn); + } + +JIT_OP_BR_ITRUE: branch + [reg] -> { + x86_64_or_reg_reg_size(inst, $1, $1, 4); + inst = output_branch(func, inst, 0x75 /* ne */, insn); + } + +JIT_OP_BR_IEQ: branch + [reg, immzero] -> { + x86_64_or_reg_reg_size(inst, $1, $1, 4); + inst = output_branch(func, inst, 0x74 /* eq */, insn); + } + [reg, imm] -> { + x86_64_cmp_reg_imm_size(inst, $1, $2, 4); + inst = output_branch(func, inst, 0x74 /* eq */, insn); + } + [reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 4); + inst = output_branch(func, inst, 0x74 /* eq */, insn); + } + [reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $1, $2, 4); + inst = output_branch(func, inst, 0x74 /* eq */, insn); + } + +JIT_OP_BR_INE: branch + [reg, immzero] -> { + x86_64_or_reg_reg_size(inst, $1, $1, 4); + inst = output_branch(func, inst, 0x75 /* ne */, insn); + } + [reg, imm] -> { + x86_64_cmp_reg_imm_size(inst, $1, $2, 4); + inst = output_branch(func, inst, 0x75 /* ne */, insn); + } + [reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 4); + inst = output_branch(func, inst, 0x75 /* ne */, insn); + } + [reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $1, $2, 4); + inst = output_branch(func, inst, 0x75 /* ne */, insn); + } + +JIT_OP_BR_ILT: branch + [reg, imm] -> { + x86_64_cmp_reg_imm_size(inst, $1, $2, 4); + inst = output_branch(func, inst, 0x7C /* lt */, insn); + } + [reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 4); + inst = output_branch(func, inst, 0x7C /* lt */, insn); + } + [reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $1, $2, 4); + inst = output_branch(func, inst, 0x7C /* lt */, insn); + } + +JIT_OP_BR_ILT_UN: branch + [reg, imm] -> { + x86_64_cmp_reg_imm_size(inst, $1, $2, 4); + inst = output_branch(func, inst, 0x72 /* lt_un */, insn); + } + [reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 4); + inst = output_branch(func, inst, 0x72 /* lt_un */, insn); + } + [reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $1, $2, 4); + inst = output_branch(func, inst, 0x72 /* lt_un */, insn); + } + +JIT_OP_BR_ILE: branch + [reg, imm] -> { + x86_64_cmp_reg_imm_size(inst, $1, $2, 4); + inst = output_branch(func, inst, 0x7E /* le */, insn); + } + [reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 4); + inst = output_branch(func, inst, 0x7E /* le */, insn); + } + [reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $1, $2, 4); + inst = output_branch(func, inst, 0x7E /* le */, insn); + } + +JIT_OP_BR_ILE_UN: branch + [reg, imm] -> { + x86_64_cmp_reg_imm_size(inst, $1, $2, 4); + inst = output_branch(func, inst, 0x76 /* le_un */, insn); + } + [reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 4); + inst = output_branch(func, inst, 0x76 /* le_un */, insn); + } + [reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $1, $2, 4); + inst = output_branch(func, inst, 0x76 /* le_un */, insn); + } + +JIT_OP_BR_IGT: branch + [reg, imm] -> { + x86_64_cmp_reg_imm_size(inst, $1, $2, 4); + inst = output_branch(func, inst, 0x7F /* gt */, insn); + } + [reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 4); + inst = output_branch(func, inst, 0x7F /* gt */, insn); + } + [reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $1, $2, 4); + inst = output_branch(func, inst, 0x7F /* gt */, insn); + } + +JIT_OP_BR_IGT_UN: branch + [reg, imm] -> { + x86_64_cmp_reg_imm_size(inst, $1, $2, 4); + inst = output_branch(func, inst, 0x77 /* gt_un */, insn); + } + [reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 4); + inst = output_branch(func, inst, 0x77 /* gt_un */, insn); + } + [reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $1, $2, 4); + inst = output_branch(func, inst, 0x77 /* gt_un */, insn); + } + +JIT_OP_BR_IGE: branch + [reg, imm] -> { + x86_64_cmp_reg_imm_size(inst, $1, $2, 4); + inst = output_branch(func, inst, 0x7D /* ge */, insn); + } + [reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 4); + inst = output_branch(func, inst, 0x7D /* ge */, insn); + } + [reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $1, $2, 4); + inst = output_branch(func, inst, 0x7D /* ge */, insn); + } + +JIT_OP_BR_IGE_UN: branch + [reg, imm] -> { + x86_64_cmp_reg_imm_size(inst, $1, $2, 4); + inst = output_branch(func, inst, 0x73 /* ge_un */, insn); + } + [reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 4); + inst = output_branch(func, inst, 0x73 /* ge_un */, insn); + } + [reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $1, $2, 4); + inst = output_branch(func, inst, 0x73 /* ge_un */, insn); + } + +JIT_OP_BR_LFALSE: branch + [reg] -> { + x86_64_or_reg_reg_size(inst, $1, $1, 8); + inst = output_branch(func, inst, 0x74 /* eq */, insn); + } + +JIT_OP_BR_LTRUE: branch + [reg] -> { + x86_64_or_reg_reg_size(inst, $1, $1, 8); + inst = output_branch(func, inst, 0x75 /* ne */, insn); + } + +JIT_OP_BR_LEQ: branch + [reg, immzero] -> { + x86_64_or_reg_reg_size(inst, $1, $1, 8); + inst = output_branch(func, inst, 0x74 /* eq */, insn); + } + [reg, imm, if("($2 >= jit_min_int && $2 <= jit_max_int)")] -> { + x86_64_cmp_reg_imm_size(inst, $1, $2, 8); + inst = output_branch(func, inst, 0x74 /* eq */, insn); + } + [reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 8); + inst = output_branch(func, inst, 0x74 /* eq */, insn); + } + [reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $1, $2, 8); + inst = output_branch(func, inst, 0x74 /* eq */, insn); + } + +JIT_OP_BR_LNE: branch + [reg, immzero] -> { + x86_64_or_reg_reg_size(inst, $1, $1, 8); + inst = output_branch(func, inst, 0x75 /* ne */, insn); + } + [reg, imm, if("($2 >= jit_min_int && $2 <= jit_max_int)")] -> { + x86_64_cmp_reg_imm_size(inst, $1, $2, 8); + inst = output_branch(func, inst, 0x75 /* ne */, insn); + } + [reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 8); + inst = output_branch(func, inst, 0x75 /* ne */, insn); + } + [reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $1, $2, 8); + inst = output_branch(func, inst, 0x75 /* ne */, insn); + } + +JIT_OP_BR_LLT: branch + [reg, imm, if("($2 >= jit_min_int && $2 <= jit_max_int)")] -> { + x86_64_cmp_reg_imm_size(inst, $1, $2, 8); + inst = output_branch(func, inst, 0x7C /* lt */, insn); + } + [reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 8); + inst = output_branch(func, inst, 0x7C /* lt */, insn); + } + [reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $1, $2, 8); + inst = output_branch(func, inst, 0x7C /* lt */, insn); + } + +JIT_OP_BR_LLT_UN: branch + [reg, imm, if("($2 >= jit_min_int && $2 <= jit_max_int)")] -> { + x86_64_cmp_reg_imm_size(inst, $1, $2, 8); + inst = output_branch(func, inst, 0x72 /* lt_un */, insn); + } + [reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 8); + inst = output_branch(func, inst, 0x72 /* lt_un */, insn); + } + [reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $1, $2, 8); + inst = output_branch(func, inst, 0x72 /* lt_un */, insn); + } + +JIT_OP_BR_LLE: branch + [reg, imm, if("($2 >= jit_min_int && $2 <= jit_max_int)")] -> { + x86_64_cmp_reg_imm_size(inst, $1, $2, 8); + inst = output_branch(func, inst, 0x7E /* le */, insn); + } + [reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 8); + inst = output_branch(func, inst, 0x7E /* le */, insn); + } + [reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $1, $2, 8); + inst = output_branch(func, inst, 0x7E /* le */, insn); + } + +JIT_OP_BR_LLE_UN: branch + [reg, imm, if("($2 >= jit_min_int && $2 <= jit_max_int)")] -> { + x86_64_cmp_reg_imm_size(inst, $1, $2, 8); + inst = output_branch(func, inst, 0x76 /* le_un */, insn); + } + [reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 8); + inst = output_branch(func, inst, 0x76 /* le_un */, insn); + } + [reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $1, $2, 8); + inst = output_branch(func, inst, 0x76 /* le_un */, insn); + } + +JIT_OP_BR_LGT: branch + [reg, imm, if("($2 >= jit_min_int && $2 <= jit_max_int)")] -> { + x86_64_cmp_reg_imm_size(inst, $1, $2, 8); + inst = output_branch(func, inst, 0x7F /* gt */, insn); + } + [reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 8); + inst = output_branch(func, inst, 0x7F /* gt */, insn); + } + [reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $1, $2, 8); + inst = output_branch(func, inst, 0x7F /* gt */, insn); + } + +JIT_OP_BR_LGT_UN: branch + [reg, imm, if("($2 >= jit_min_int && $2 <= jit_max_int)")] -> { + x86_64_cmp_reg_imm_size(inst, $1, $2, 8); + inst = output_branch(func, inst, 0x77 /* gt_un */, insn); + } + [reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 8); + inst = output_branch(func, inst, 0x77 /* gt_un */, insn); + } + [reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $1, $2, 8); + inst = output_branch(func, inst, 0x77 /* gt_un */, insn); + } + +JIT_OP_BR_LGE: branch + [reg, imm, if("($2 >= jit_min_int && $2 <= jit_max_int)")] -> { + x86_64_cmp_reg_imm_size(inst, $1, $2, 8); + inst = output_branch(func, inst, 0x7D /* ge */, insn); + } + [reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 8); + inst = output_branch(func, inst, 0x7D /* ge */, insn); + } + [reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $1, $2, 8); + inst = output_branch(func, inst, 0x7D /* ge */, insn); + } + +JIT_OP_BR_LGE_UN: branch + [reg, imm, if("($2 >= jit_min_int && $2 <= jit_max_int)")] -> { + x86_64_cmp_reg_imm_size(inst, $1, $2, 8); + inst = output_branch(func, inst, 0x73 /* ge_un */, insn); + } + [reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $1, X86_64_RBP, $2, 8); + inst = output_branch(func, inst, 0x73 /* ge_un */, insn); + } + [reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $1, $2, 8); + inst = output_branch(func, inst, 0x73 /* ge_un */, insn); + } + +/* + * Comparison opcodes. + */ + +JIT_OP_IEQ: + [=reg, reg, immzero] -> { + x86_64_or_reg_reg_size(inst, $2, $2, 4); + inst = setcc_reg(inst, $1, X86_CC_EQ, 0); + } + [=reg, reg, imm] -> { + x86_64_cmp_reg_imm_size(inst, $2, $3, 4); + inst = setcc_reg(inst, $1, X86_CC_EQ, 0); + } + [=reg, reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 4); + inst = setcc_reg(inst, $1, X86_CC_EQ, 0); + } + [=reg, reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $2, $3, 4); + inst = setcc_reg(inst, $1, X86_CC_EQ, 0); + } + +JIT_OP_INE: + [=reg, reg, immzero] -> { + x86_64_or_reg_reg_size(inst, $2, $2, 4); + inst = setcc_reg(inst, $1, X86_CC_NE, 0); + } + [=reg, reg, imm] -> { + x86_64_cmp_reg_imm_size(inst, $2, $3, 4); + inst = setcc_reg(inst, $1, X86_CC_NE, 0); + } + [=reg, reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 4); + inst = setcc_reg(inst, $1, X86_CC_NE, 0); + } + [=reg, reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $2, $3, 4); + inst = setcc_reg(inst, $1, X86_CC_NE, 0); + } + +JIT_OP_ILT: + [=reg, reg, imm] -> { + x86_64_cmp_reg_imm_size(inst, $2, $3, 4); + inst = setcc_reg(inst, $1, X86_CC_LT, 1); + } + [=reg, reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 4); + inst = setcc_reg(inst, $1, X86_CC_LT, 1); + } + [=reg, reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $2, $3, 4); + inst = setcc_reg(inst, $1, X86_CC_LT, 1); + } + +JIT_OP_ILT_UN: + [=reg, reg, imm] -> { + x86_64_cmp_reg_imm_size(inst, $2, $3, 4); + inst = setcc_reg(inst, $1, X86_CC_LT, 0); + } + [=reg, reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 4); + inst = setcc_reg(inst, $1, X86_CC_LT, 0); + } + [=reg, reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $2, $3, 4); + inst = setcc_reg(inst, $1, X86_CC_LT, 0); + } + +JIT_OP_ILE: + [=reg, reg, imm] -> { + x86_64_cmp_reg_imm_size(inst, $2, $3, 4); + inst = setcc_reg(inst, $1, X86_CC_LE, 1); + } + [=reg, reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 4); + inst = setcc_reg(inst, $1, X86_CC_LE, 1); + } + [=reg, reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $2, $3, 4); + inst = setcc_reg(inst, $1, X86_CC_LE, 1); + } + +JIT_OP_ILE_UN: + [=reg, reg, imm] -> { + x86_64_cmp_reg_imm_size(inst, $2, $3, 4); + inst = setcc_reg(inst, $1, X86_CC_LE, 0); + } + [=reg, reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 4); + inst = setcc_reg(inst, $1, X86_CC_LE, 0); + } + [=reg, reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $2, $3, 4); + inst = setcc_reg(inst, $1, X86_CC_LE, 0); + } + +JIT_OP_IGT: + [=reg, reg, imm] -> { + x86_64_cmp_reg_imm_size(inst, $2, $3, 4); + inst = setcc_reg(inst, $1, X86_CC_GT, 1); + } + [=reg, reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 4); + inst = setcc_reg(inst, $1, X86_CC_GT, 1); + } + [=reg, reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $2, $3, 4); + inst = setcc_reg(inst, $1, X86_CC_GT, 1); + } + +JIT_OP_IGT_UN: + [=reg, reg, imm] -> { + x86_64_cmp_reg_imm_size(inst, $2, $3, 4); + inst = setcc_reg(inst, $1, X86_CC_GT, 0); + } + [=reg, reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 4); + inst = setcc_reg(inst, $1, X86_CC_GT, 0); + } + [=reg, reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $2, $3, 4); + inst = setcc_reg(inst, $1, X86_CC_GT, 0); + } + +JIT_OP_IGE: + [=reg, reg, imm] -> { + x86_64_cmp_reg_imm_size(inst, $2, $3, 4); + inst = setcc_reg(inst, $1, X86_CC_GE, 1); + } + [=reg, reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 4); + inst = setcc_reg(inst, $1, X86_CC_GE, 1); + } + [=reg, reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $2, $3, 4); + inst = setcc_reg(inst, $1, X86_CC_GE, 1); + } + +JIT_OP_IGE_UN: + [=reg, reg, imm] -> { + x86_64_cmp_reg_imm_size(inst, $2, $3, 4); + inst = setcc_reg(inst, $1, X86_CC_GE, 0); + } + [=reg, reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 4); + inst = setcc_reg(inst, $1, X86_CC_GE, 0); + } + [=reg, reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $2, $3, 4); + inst = setcc_reg(inst, $1, X86_CC_GE, 0); + } + +JIT_OP_LEQ: + [=reg, reg, immzero] -> { + x86_64_or_reg_reg_size(inst, $2, $2, 8); + inst = setcc_reg(inst, $1, X86_CC_EQ, 0); + } + [=reg, reg, imm, if("$3 >= jit_min_int && $3 <= jit_max_int")] -> { + x86_64_cmp_reg_imm_size(inst, $2, $3, 8); + inst = setcc_reg(inst, $1, X86_CC_EQ, 0); + } + [=reg, reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 8); + inst = setcc_reg(inst, $1, X86_CC_EQ, 0); + } + [=reg, reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $2, $3, 8); + inst = setcc_reg(inst, $1, X86_CC_EQ, 0); + } + +JIT_OP_LNE: + [=reg, reg, immzero] -> { + x86_64_or_reg_reg_size(inst, $2, $2, 8); + inst = setcc_reg(inst, $1, X86_CC_NE, 0); + } + [=reg, reg, imm, if("$3 >= jit_min_int && $3 <= jit_max_int")] -> { + x86_64_cmp_reg_imm_size(inst, $2, $3, 8); + inst = setcc_reg(inst, $1, X86_CC_NE, 0); + } + [=reg, reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 8); + inst = setcc_reg(inst, $1, X86_CC_NE, 0); + } + [=reg, reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $2, $3, 8); + inst = setcc_reg(inst, $1, X86_CC_NE, 0); + } + +JIT_OP_LLT: + [=reg, reg, imm, if("$3 >= jit_min_int && $3 <= jit_max_int")] -> { + x86_64_cmp_reg_imm_size(inst, $2, $3, 8); + inst = setcc_reg(inst, $1, X86_CC_LT, 1); + } + [=reg, reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 8); + inst = setcc_reg(inst, $1, X86_CC_LT, 1); + } + [=reg, reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $2, $3, 8); + inst = setcc_reg(inst, $1, X86_CC_LT, 1); + } + +JIT_OP_LLT_UN: + [=reg, reg, imm, if("$3 >= jit_min_int && $3 <= jit_max_int")] -> { + x86_64_cmp_reg_imm_size(inst, $2, $3, 8); + inst = setcc_reg(inst, $1, X86_CC_LT, 0); + } + [=reg, reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 8); + inst = setcc_reg(inst, $1, X86_CC_LT, 0); + } + [=reg, reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $2, $3, 8); + inst = setcc_reg(inst, $1, X86_CC_LT, 0); + } + +JIT_OP_LLE: + [=reg, reg, imm, if("$3 >= jit_min_int && $3 <= jit_max_int")] -> { + x86_64_cmp_reg_imm_size(inst, $2, $3, 8); + inst = setcc_reg(inst, $1, X86_CC_LE, 1); + } + [=reg, reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 8); + inst = setcc_reg(inst, $1, X86_CC_LE, 1); + } + [=reg, reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $2, $3, 8); + inst = setcc_reg(inst, $1, X86_CC_LE, 1); + } + +JIT_OP_LLE_UN: + [=reg, reg, imm, if("$3 >= jit_min_int && $3 <= jit_max_int")] -> { + x86_64_cmp_reg_imm_size(inst, $2, $3, 8); + inst = setcc_reg(inst, $1, X86_CC_LE, 0); + } + [=reg, reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 8); + inst = setcc_reg(inst, $1, X86_CC_LE, 0); + } + [=reg, reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $2, $3, 8); + inst = setcc_reg(inst, $1, X86_CC_LE, 0); + } + +JIT_OP_LGT: + [=reg, reg, imm, if("$3 >= jit_min_int && $3 <= jit_max_int")] -> { + x86_64_cmp_reg_imm_size(inst, $2, $3, 8); + inst = setcc_reg(inst, $1, X86_CC_GT, 1); + } + [=reg, reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 8); + inst = setcc_reg(inst, $1, X86_CC_GT, 1); + } + [=reg, reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $2, $3, 8); + inst = setcc_reg(inst, $1, X86_CC_GT, 1); + } + +JIT_OP_LGT_UN: + [=reg, reg, imm, if("$3 >= jit_min_int && $3 <= jit_max_int")] -> { + x86_64_cmp_reg_imm_size(inst, $2, $3, 8); + inst = setcc_reg(inst, $1, X86_CC_GT, 0); + } + [=reg, reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 8); + inst = setcc_reg(inst, $1, X86_CC_GT, 0); + } + [=reg, reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $2, $3, 8); + inst = setcc_reg(inst, $1, X86_CC_GT, 0); + } + +JIT_OP_LGE: + [=reg, reg, imm, if("$3 >= jit_min_int && $3 <= jit_max_int")] -> { + x86_64_cmp_reg_imm_size(inst, $2, $3, 8); + inst = setcc_reg(inst, $1, X86_CC_GE, 1); + } + [=reg, reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 8); + inst = setcc_reg(inst, $1, X86_CC_GE, 1); + } + [=reg, reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $2, $3, 8); + inst = setcc_reg(inst, $1, X86_CC_GE, 1); + } + +JIT_OP_LGE_UN: + [=reg, reg, imm, if("$3 >= jit_min_int && $3 <= jit_max_int")] -> { + x86_64_cmp_reg_imm_size(inst, $2, $3, 8); + inst = setcc_reg(inst, $1, X86_CC_GE, 0); + } + [=reg, reg, local] -> { + x86_64_cmp_reg_membase_size(inst, $2, X86_64_RBP, $3, 8); + inst = setcc_reg(inst, $1, X86_CC_GE, 0); + } + [=reg, reg, reg] -> { + x86_64_cmp_reg_reg_size(inst, $2, $3, 8); + inst = setcc_reg(inst, $1, X86_CC_GE, 0); + } + +/* + * Pointer check opcodes. + */ + +JIT_OP_CHECK_NULL: note + [reg] -> { +#if 0 && defined(JIT_USE_SIGNALS) + /* if $1 contains NULL this generates SEGV and the signal + handler will throw the exception */ + x86_64_cmp_reg_membase_size(inst, $1, $1, 0, 8); +#else + unsigned char *patch; + x86_64_or_reg_reg_size(inst, $1, $1, 8); + patch = inst; + x86_branch8(inst, X86_CC_NE, 0, 0); + inst = throw_builtin(inst, func, JIT_RESULT_NULL_REFERENCE); + x86_patch(patch, inst); +#endif + } + +/* + * Function calls. + */ + +JIT_OP_CALL: + [] -> { + jit_function_t func = (jit_function_t)(insn->dest); + inst = x86_64_call_code(inst, (jit_nint)jit_function_to_closure(func)); + } + +JIT_OP_CALL_TAIL: + [] -> { + jit_function_t func = (jit_function_t)(insn->dest); + x86_64_mov_reg_reg_size(inst, X86_64_RSP, X86_64_RBP, 8); + x86_64_pop_reg_size(inst, X86_64_RBP, 8); + x86_64_jump_to_code(inst, (jit_nint)jit_function_to_closure(func)); + } + +JIT_OP_CALL_INDIRECT: + [] -> { + x86_64_call_reg(inst, X86_64_SCRATCH); + } + +JIT_OP_CALL_INDIRECT_TAIL: + [] -> { + x86_64_mov_reg_reg_size(inst, X86_64_RSP, X86_64_RBP, 8); + x86_64_pop_reg_size(inst, X86_64_RBP, 8); + x86_64_jmp_reg(inst, X86_64_SCRATCH); + } + +JIT_OP_CALL_VTABLE_PTR: + [] -> { + x86_64_call_reg(inst, X86_64_SCRATCH); + } + +JIT_OP_CALL_VTABLE_PTR_TAIL: + [] -> { + x86_64_mov_reg_reg_size(inst, X86_64_RSP, X86_64_RBP, 8); + x86_64_pop_reg_size(inst, X86_64_RBP, 8); + x86_64_jmp_reg(inst, X86_64_SCRATCH); + } + +JIT_OP_CALL_EXTERNAL: + [] -> { + inst = x86_64_call_code(inst, (jit_nint)(insn->dest)); + } + +JIT_OP_CALL_EXTERNAL_TAIL: + [] -> { + x86_64_mov_reg_reg_size(inst, X86_64_RSP, X86_64_RBP, 8); + x86_64_pop_reg_size(inst, X86_64_RBP, 8); + x86_64_jump_to_code(inst, (jit_nint)(insn->dest)); + } + + +/* + * Exception handling. + */ + +JIT_OP_THROW: branch + [reg] -> { + x86_64_mov_reg_reg_size(inst, X86_64_RDI, $1, 8); + if(func->builder->setjmp_value != 0) + { + jit_nint pc_offset; + + /* We have a "setjmp" block in the current function, + so we must record the location of the throw first */ + _jit_gen_fix_value(func->builder->setjmp_value); + pc_offset = func->builder->setjmp_value->frame_offset + + jit_jmp_catch_pc_offset; + + x86_64_lea_membase_size(inst, X86_64_SCRATCH, X86_64_RIP, 0, 8); + x86_64_mov_membase_reg_size(inst, X86_64_RBP, pc_offset, + X86_64_SCRATCH, 8); + } + inst = x86_64_call_code(inst, (jit_nint)jit_exception_throw); + } + +JIT_OP_RETHROW: manual + [] -> { /* Not used in native code back ends */ } + +JIT_OP_LOAD_PC: + [=reg] -> { + x86_64_lea_membase_size(inst, $1, X86_64_RIP, 0, 8); + } + +JIT_OP_LOAD_EXCEPTION_PC: manual + [] -> { /* Not used in native code back ends */ } + +JIT_OP_ENTER_FINALLY: + [] -> { + /* The return address is on the stack */ + x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8); + } + +JIT_OP_LEAVE_FINALLY: branch + [] -> { + /* The "finally" return address is on the stack */ + x86_64_add_reg_imm_size(inst, X86_64_RSP, 8, 8); + x86_64_ret(inst); + } + +JIT_OP_CALL_FINALLY: branch + [] -> { + jit_block_t block; + + block = jit_block_from_label(func, (jit_label_t)(insn->dest)); + if(!block) + { + return; + } + + if(block->address) + { + inst = x86_64_call_code(inst, (jit_nint)block->address); + } + else + { + jit_int fixup; + + if(block->fixup_list) + { + fixup = _JIT_CALC_FIXUP(block->fixup_list, inst + 1); + } + else + { + fixup = 0; + } + block->fixup_list = (void *)(inst + 1); + x86_64_call_imm(inst, fixup); + } + } + +JIT_OP_ADDRESS_OF_LABEL: + [=reg] -> { + jit_int *fixup; + + block = jit_block_from_label(func, (jit_label_t)(insn->value1)); + if(block->address) + { + /* The label is in the current function so we assume that the */ + /* displacement to the current instruction is in the +-2GB range */ + + x86_64_lea_membase_size(inst, $1, X86_64_RIP, 0, 8); + fixup = (jit_int *)(inst - 4); + fixup[0] = (jit_int)((jit_nint)block->address - (jit_nint)inst); + } + else + { + /* Output a placeholder and record on the block's fixup list */ + /* The label is in the current function so we assume that the */ + /* displacement to the current instruction will be in the +-2GB range */ + x86_64_lea_membase_size(inst, $1, X86_64_RIP, 0, 8); + fixup = (jit_int *)(inst - 4); + if(block->fixup_list) + { + fixup[0] = _JIT_CALC_FIXUP(block->fixup_list, fixup); + } + block->fixup_list = (void *)fixup; + } + } + +/* + * Block operations. + */ + +JIT_OP_MEMCPY: ternary + [any, any, imm, if("$3 <= 0")] -> { } + [reg, reg, imm, scratch reg, scratch xreg, + if("$3 <= _JIT_MAX_MEMCPY_INLINE")] -> { + inst = small_block_copy(gen, inst, $1, 0, $2, 0, $3, $4, $5, 0); + } + [reg, reg, imm, clobber(creg), clobber(xreg)] -> { + inst = memory_copy(gen, inst, $1, 0, $2, 0, $3); + } + +JIT_OP_JUMP_TABLE: ternary, branch + [reg, imm, imm, scratch reg, space("64")] -> { + unsigned char *patch_jump_table; + unsigned char *patch_fall_through; + int index; + jit_label_t *labels; + jit_nint num_labels; + jit_block_t block; + + labels = (jit_label_t *) $2; + num_labels = $3; + + patch_jump_table = (unsigned char *)_jit_cache_alloc(&(gen->posn), + sizeof(void *) * $3); + if(!patch_jump_table) + { + /* The cache is full */ + return; + } + + x86_64_mov_reg_imm_size(inst, $4, (jit_nint)patch_jump_table, 8); + x86_64_cmp_reg_imm_size(inst, $1, num_labels, 8); + patch_fall_through = inst; + x86_branch32(inst, X86_CC_AE, 0, 0); + + if(func->builder->position_independent) + { + /* TODO */ + TODO(); + } + else + { + x86_64_jmp_memindex(inst, $4, 0, $1, 3); + } + + for(index = 0; index < num_labels; index++) + { + block = jit_block_from_label(func, labels[index]); + if(!block) + { + return; + } + + if(func->builder->position_independent) + { + /* TODO */ + TODO(); + } + else + { + if(block->address) + { + x86_64_imm_emit64(patch_jump_table, (jit_nint)(block->address)); + } + else + { + /* Output a placeholder and record on the block's absolute fixup list */ + x86_64_imm_emit64(patch_jump_table, (jit_nint)(block->fixup_absolute_list)); + block->fixup_absolute_list = (void *)(patch_jump_table - 8); + } + } + } + + x86_patch(patch_fall_through, inst); + } diff --git a/jit/jit-rules.h b/jit/jit-rules.h index 624316f..c3eefe0 100644 --- a/jit/jit-rules.h +++ b/jit/jit-rules.h @@ -42,6 +42,9 @@ extern "C" { #elif defined(__alpha) || defined(__alpha__) #define JIT_BACKEND_ALPHA 1 #define JIT_HAVE_BACKEND 1 +#elif defined(__amd64) || defined(__amd64__) || defined(_x86_64) || defined(_x86_64__) + #define JIT_BACKEND_X86_64 1 + #define JIT_HAVE_BACKEND 1 #endif /*#define JIT_BACKEND_ARM 1*/ #if !defined(JIT_HAVE_BACKEND) @@ -88,6 +91,8 @@ typedef struct #include "jit-rules-arm.h" #elif defined(JIT_BACKEND_ALPHA) #include "jit-rules-alpha.h" +#elif defined(JIT_BACKEND_X86_64) + #include "jit-rules-x86-64.h" #else #error "unknown jit backend type" #endif diff --git a/jit/jit-value.c b/jit/jit-value.c index 40fcafc..86867b1 100644 --- a/jit/jit-value.c +++ b/jit/jit-value.c @@ -447,8 +447,13 @@ jit_value_t jit_value_create_constant case JIT_TYPE_LONG: case JIT_TYPE_ULONG: +#ifdef JIT_NATIVE_INT64 + return jit_value_create_nint_constant + (func, const_value->type, const_value->un.long_value); +#else return jit_value_create_long_constant (func, const_value->type, const_value->un.long_value); +#endif case JIT_TYPE_FLOAT32: return jit_value_create_float32_constant -- 2.47.3