From: Klaus Treichel Date: Mon, 24 Mar 2008 12:42:50 +0000 (+0000) Subject: Add more X86_64 support. X-Git-Tag: before.move.to.git~98 X-Git-Url: https://git.unchartedbackwaters.co.uk/w/?a=commitdiff_plain;h=8931ed85dbeda048ffc06dcd670fda10b8a44421;p=francis%2Flibjit.git Add more X86_64 support. --- diff --git a/ChangeLog b/ChangeLog index 80c766f..04241bb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -2,6 +2,22 @@ * jit/jit-function.c (cleanup_on_restart): Reset the epilog_fixup. + * jit/jit-apply-x86-64.c: Move parameter passing handling from + jit-rules-x86-64.c to this file because they are needed by apply too. + + * jit/jit-apply-x86-64.h: Add declarations needed for parameter passing. + + * jit/jit-gen-x86-64.h: Add macros for shift opcodes, register exchanges + and moves from general purpose register to xmm register and vice versa. + + * jit/jit-rules-x86-64.c: Move parameter handling to jit-apply.x86-64.c. + Add handling of structs in _jit_gen_load_value and _spill_reg. Fix more + parameter passing issues. + + * jit/jit-rules-x86-64.ins: Do some cleanup. Add casts to jit_nint when + checking for valid ranges. Add integer and long shift opcodes. Add + memset opcode. + 2008-03-04 Klaus Treichel * jit/Makefile.am: Add jit-rules-x86-64.inc to CLEANFILES to fix diff --git a/jit/jit-apply-x86-64.c b/jit/jit-apply-x86-64.c index 3d07e98..895fccc 100644 --- a/jit/jit-apply-x86-64.c +++ b/jit/jit-apply-x86-64.c @@ -28,6 +28,21 @@ #include "jit-gen-x86-64.h" +/* + * X86_64 argument types as specified in the X86_64 SysV ABI. + */ +#define X86_64_ARG_NO_CLASS 0x00 +#define X86_64_ARG_INTEGER 0x01 +#define X86_64_ARG_MEMORY 0x02 +#define X86_64_ARG_SSE 0x11 +#define X86_64_ARG_SSEUP 0x12 +#define X86_64_ARG_X87 0x21 +#define X86_64_ARG_X87UP 0x22 + +#define X86_64_ARG_IS_SSE(arg) (((arg) & 0x10) != 0) +#define X86_64_ARG_IS_X87(arg) (((arg) & 0x20) != 0) + + void _jit_create_closure(unsigned char *buf, void *func, void *closure, void *_type) { @@ -97,7 +112,7 @@ void *_jit_create_redirector(unsigned char *buf, void *func, /* Save all registers used for argument passing */ /* At this point RSP is not aligned on a 16 byte boundary because */ /* the return address is pushed on the stack. */ - /* We need (7 * 8) + (8 * 8) bytes for the registers */ + /* We need (7 * 8) + (8 * 16) bytes for the registers */ x86_64_sub_reg_imm_size(buf, X86_64_RSP, 0xB8, 8); x86_64_mov_membase_reg_size(buf, X86_64_RSP, 0xB0, X86_64_RAX, 8); @@ -181,7 +196,7 @@ void *_jit_create_indirector(unsigned char *buf, void **entry) } else { - jit_nint offset = (jit_nint)entry - ((jit_nint)buf + 7); + jit_nint offset = (jit_nint)entry - ((jit_nint)buf + 6); if((offset >= jit_min_int) && (offset <= jit_max_int)) { @@ -230,4 +245,827 @@ void _jit_pad_buffer(unsigned char *buf, int len) } } +/* + * Allcate the slot for a parameter passed on the stack. + */ +static void +_jit_alloc_param_slot(jit_param_passing_t *passing, _jit_param_t *param, + jit_type_t type) +{ + jit_int size = jit_type_get_size(type); + jit_int alignment = jit_type_get_alignment(type); + + /* Expand the size to a multiple of the stack slot size */ + size = ROUND_STACK(size); + + /* Expand the alignment to a multiple of the stack slot size */ + /* We expect the alignment to be a power of two after this step */ + alignment = ROUND_STACK(alignment); + + /* Make sure the current offset is aligned propperly for the type */ + if((passing->stack_size & (alignment -1)) != 0) + { + /* We need padding on the stack to fix the alignment constraint */ + jit_int padding = passing->stack_size & (alignment -1); + + /* Add the padding to the stack region */ + passing->stack_size += padding; + + /* record the number of pad words needed after pushing this arg */ + param->stack_pad = STACK_SLOTS_USED(padding); + } + /* Record the offset of the parameter in the arg region. */ + param->un.offset = passing->stack_size; + + /* And increase the argument region used. */ + passing->stack_size += size; +} + +/* + * Determine if a type corresponds to a structure or union. + */ +static int +is_struct_or_union(jit_type_t type) +{ + type = jit_type_normalize(type); + if(type) + { + if(type->kind == JIT_TYPE_STRUCT || type->kind == JIT_TYPE_UNION) + { + return 1; + } + } + return 0; +} + +/* + * Classify the argument type. + * The type has to be in it's normalized form. + */ +static int +_jit_classify_arg(jit_type_t arg_type, int is_return) +{ + switch(arg_type->kind) + { + case JIT_TYPE_SBYTE: + case JIT_TYPE_UBYTE: + case JIT_TYPE_SHORT: + case JIT_TYPE_USHORT: + case JIT_TYPE_INT: + case JIT_TYPE_UINT: + case JIT_TYPE_NINT: + case JIT_TYPE_NUINT: + case JIT_TYPE_LONG: + case JIT_TYPE_ULONG: + case JIT_TYPE_SIGNATURE: + case JIT_TYPE_PTR: + { + return X86_64_ARG_INTEGER; + } + break; + + case JIT_TYPE_FLOAT32: + case JIT_TYPE_FLOAT64: + { + return X86_64_ARG_SSE; + } + break; + + case JIT_TYPE_NFLOAT: + { + /* we assume the nfloat type to be long double (80bit) */ + if(is_return) + { + return X86_64_ARG_X87; + } + else + { + return X86_64_ARG_MEMORY; + } + } + break; + + case JIT_TYPE_STRUCT: + case JIT_TYPE_UNION: + { + int size = jit_type_get_size(arg_type); + + if(size > 16) + { + return X86_64_ARG_MEMORY; + } + else if(size <= 8) + { + return X86_64_ARG_INTEGER; + } + /* For structs and unions with sizes between 8 ant 16 bytes */ + /* we have to look at the elements. */ + /* TODO */ + } + } + return X86_64_ARG_NO_CLASS; +} + +/* + * On X86_64 the alignment of native types matches their size. + * This leads to the result that all types except nfloats and aggregates + * (structs and unions) must start and end in an eightbyte (or the part + * we are looking at). + */ +static int +_jit_classify_structpart(jit_type_t struct_type, unsigned int start, + unsigned int start_offset, unsigned int end_offset) +{ + int arg_class = X86_64_ARG_NO_CLASS; + unsigned int num_fields = jit_type_num_fields(struct_type); + unsigned int current_field; + + for(current_field = 0; current_field < num_fields; ++current_field) + { + jit_nuint field_offset = jit_type_get_offset(struct_type, + current_field); + + if(field_offset <= end_offset) + { + /* The field starts at a place that's inerresting for us */ + jit_type_t field_type = jit_type_get_field(struct_type, + current_field); + jit_nuint field_size = jit_type_get_size(field_type); + + if(field_offset + field_size > start_offset) + { + /* The field is at least partially in the part we are */ + /* looking at */ + int arg_class2 = X86_64_ARG_NO_CLASS; + + if(is_struct_or_union(field_type)) + { + /* We have to check this struct recursively */ + unsigned int current_start; + unsigned int nested_struct_start; + unsigned int nested_struct_end; + + current_start = start + start_offset; + if(field_offset < current_start) + { + nested_struct_start = current_start - field_offset; + } + else + { + nested_struct_start = 0; + } + if(field_offset + field_size - 1 > end_offset) + { + /* The struct ends beyond the part we are looking at */ + nested_struct_end = field_offset + field_size - + (nested_struct_start + 1); + } + else + { + nested_struct_end = field_size - 1; + } + arg_class2 = _jit_classify_structpart(field_type, + start + field_offset, + nested_struct_start, + nested_struct_end); + } + else + { + if((start + start_offset) & (field_size - 1)) + { + /* The field is misaligned */ + return X86_64_ARG_MEMORY; + } + arg_class2 = _jit_classify_arg(field_type, 0); + } + if(arg_class == X86_64_ARG_NO_CLASS) + { + arg_class = arg_class2; + } + else if(arg_class != arg_class2) + { + if(arg_class == X86_64_ARG_MEMORY || + arg_class2 == X86_64_ARG_MEMORY) + { + arg_class = X86_64_ARG_MEMORY; + } + else if(arg_class == X86_64_ARG_INTEGER || + arg_class2 == X86_64_ARG_INTEGER) + { + arg_class = X86_64_ARG_INTEGER; + } + else if(arg_class == X86_64_ARG_X87 || + arg_class2 == X86_64_ARG_X87) + { + arg_class = X86_64_ARG_MEMORY; + } + else + { + arg_class = X86_64_ARG_SSE; + } + } + } + } + } + return arg_class; +} + +int +_jit_classify_struct(jit_param_passing_t *passing, + _jit_param_t *param, jit_type_t param_type) +{ + jit_nuint size = (jit_nuint)jit_type_get_size(param_type); + + if(size <= 8) + { + int arg_class; + + arg_class = _jit_classify_structpart(param_type, 0, 0, size - 1); + if(arg_class == X86_64_ARG_NO_CLASS) + { + arg_class = X86_64_ARG_SSE; + } + if(arg_class == X86_64_ARG_INTEGER) + { + if(passing->word_index < passing->max_word_regs) + { + /* Set the arg class to the number of registers used */ + param->arg_class = 1; + + /* Set the first register to the register used */ + param->un.reg_info[0].reg = passing->word_regs[passing->word_index]; + param->un.reg_info[0].value = param->value; + ++(passing->word_index); + } + else + { + /* Set the arg class to stack */ + param->arg_class = JIT_ARG_CLASS_STACK; + + /* Allocate the slot in the arg passing frame */ + _jit_alloc_param_slot(passing, param, param_type); + } + } + else if(arg_class == X86_64_ARG_SSE) + { + if(passing->float_index < passing->max_float_regs) + { + /* Set the arg class to the number of registers used */ + param->arg_class = 1; + + /* Set the first register to the register used */ + param->un.reg_info[0].reg = passing->float_regs[passing->float_index]; + param->un.reg_info[0].value = param->value; + ++(passing->float_index); + } + else + { + /* Set the arg class to stack */ + param->arg_class = JIT_ARG_CLASS_STACK; + + /* Allocate the slot in the arg passing frame */ + _jit_alloc_param_slot(passing, param, param_type); + } + } + else + { + /* Set the arg class to stack */ + param->arg_class = JIT_ARG_CLASS_STACK; + + /* Allocate the slot in the arg passing frame */ + _jit_alloc_param_slot(passing, param, param_type); + } + } + else if(size <= 16) + { + int arg_class1; + int arg_class2; + + arg_class1 = _jit_classify_structpart(param_type, 0, 0, 7); + arg_class2 = _jit_classify_structpart(param_type, 0, 8, size - 1); + if(arg_class1 == X86_64_ARG_NO_CLASS) + { + arg_class1 = X86_64_ARG_SSE; + } + if(arg_class2 == X86_64_ARG_NO_CLASS) + { + arg_class2 = X86_64_ARG_SSE; + } + if(arg_class1 == X86_64_ARG_SSE && arg_class2 == X86_64_ARG_SSE) + { + /* We use only one sse register in this case */ + if(passing->float_index < passing->max_float_regs) + { + /* Set the arg class to the number of registers used */ + param->arg_class = 1; + + /* Set the first register to the register used */ + param->un.reg_info[0].reg = passing->float_regs[passing->float_index]; + param->un.reg_info[0].value = param->value; + ++(passing->float_index); + } + else + { + /* Set the arg class to stack */ + param->arg_class = JIT_ARG_CLASS_STACK; + + /* Allocate the slot in the arg passing frame */ + _jit_alloc_param_slot(passing, param, param_type); + } + } + else if(arg_class1 == X86_64_ARG_MEMORY || + arg_class2 == X86_64_ARG_MEMORY) + { + /* Set the arg class to stack */ + param->arg_class = JIT_ARG_CLASS_STACK; + + /* Allocate the slot in the arg passing frame */ + _jit_alloc_param_slot(passing, param, param_type); + } + else if(arg_class1 == X86_64_ARG_INTEGER && + arg_class2 == X86_64_ARG_INTEGER) + { + /* We need two general purpose registers in this case */ + if((passing->word_index + 1) < passing->max_word_regs) + { + /* Set the arg class to the number of registers used */ + param->arg_class = 2; + + /* Assign the registers */ + param->un.reg_info[0].reg = passing->word_regs[passing->word_index]; + ++(passing->word_index); + param->un.reg_info[1].reg = passing->word_regs[passing->word_index]; + ++(passing->word_index); + } + else + { + /* Set the arg class to stack */ + param->arg_class = JIT_ARG_CLASS_STACK; + + /* Allocate the slot in the arg passing frame */ + _jit_alloc_param_slot(passing, param, param_type); + } + } + else + { + /* We need one xmm and one general purpose register */ + if((passing->word_index < passing->max_word_regs) && + (passing->float_index < passing->max_float_regs)) + { + /* Set the arg class to the number of registers used */ + param->arg_class = 2; + + if(arg_class1 == X86_64_ARG_INTEGER) + { + param->un.reg_info[0].reg = passing->word_regs[passing->word_index]; + ++(passing->word_index); + param->un.reg_info[1].reg = passing->float_regs[passing->float_index]; + ++(passing->float_index); + } + else + { + param->un.reg_info[0].reg = passing->float_regs[passing->float_index]; + ++(passing->float_index); + param->un.reg_info[1].reg = passing->word_regs[passing->word_index]; + ++(passing->word_index); + } + } + else + { + /* Set the arg class to stack */ + param->arg_class = JIT_ARG_CLASS_STACK; + + /* Allocate the slot in the arg passing frame */ + _jit_alloc_param_slot(passing, param, param_type); + } + } + } + else + { + /* Set the arg class to stack */ + param->arg_class = JIT_ARG_CLASS_STACK; + + /* Allocate the slot in the arg passing frame */ + _jit_alloc_param_slot(passing, param, param_type); + } + return 1; +} + +int +_jit_classify_param(jit_param_passing_t *passing, + _jit_param_t *param, jit_type_t param_type) +{ + if(is_struct_or_union(param_type)) + { + return _jit_classify_struct(passing, param, param_type); + } + else + { + int arg_class; + + arg_class = _jit_classify_arg(param_type, 0); + + switch(arg_class) + { + case X86_64_ARG_INTEGER: + { + if(passing->word_index < passing->max_word_regs) + { + /* Set the arg class to the number of registers used */ + param->arg_class = 1; + + /* Set the first register to the register used */ + param->un.reg_info[0].reg = passing->word_regs[passing->word_index]; + param->un.reg_info[0].value = param->value; + ++(passing->word_index); + } + else + { + /* Set the arg class to stack */ + param->arg_class = JIT_ARG_CLASS_STACK; + + /* Allocate the slot in the arg passing frame */ + _jit_alloc_param_slot(passing, param, param_type); + } + } + break; + + case X86_64_ARG_SSE: + { + if(passing->float_index < passing->max_float_regs) + { + /* Set the arg class to the number of registers used */ + param->arg_class = 1; + + /* Set the first register to the register used */ + param->un.reg_info[0].reg = passing->float_regs[passing->float_index]; + param->un.reg_info[0].value = param->value; + ++(passing->float_index); + } + else + { + /* Set the arg class to stack */ + param->arg_class = JIT_ARG_CLASS_STACK; + + /* Allocate the slot in the arg passing frame */ + _jit_alloc_param_slot(passing, param, param_type); + } + } + break; + + case X86_64_ARG_MEMORY: + { + /* Set the arg class to stack */ + param->arg_class = JIT_ARG_CLASS_STACK; + + /* Allocate the slot in the arg passing frame */ + _jit_alloc_param_slot(passing, param, param_type); + } + break; + } + } + return 1; +} + +void +_jit_builtin_apply_add_struct(jit_apply_builder *builder, + void *value, + jit_type_t struct_type) +{ + unsigned int size = jit_type_get_size(struct_type); + + if(size <= 16) + { + if(size <= 8) + { + int arg_class; + + arg_class = _jit_classify_structpart(struct_type, 0, 0, size - 1); + if(arg_class == X86_64_ARG_NO_CLASS) + { + arg_class = X86_64_ARG_SSE; + } + if((arg_class == X86_64_ARG_INTEGER) && + (builder->word_used < JIT_APPLY_NUM_WORD_REGS)) + { + /* The struct is passed in a general purpose register */ + jit_memcpy(&(builder->apply_args->word_regs[builder->word_used]), + value, size); + ++(builder->word_used); + } + else if((arg_class == X86_64_ARG_SSE) && + (builder->float_used < JIT_APPLY_NUM_FLOAT_REGS)) + { + /* The struct is passed in one sse register */ + jit_memcpy(&(builder->apply_args->float_regs[builder->float_used]), + value, size); + ++(builder->float_used); + } + else + { + unsigned int align = jit_type_get_alignment(struct_type); + + jit_apply_builder_add_struct(builder, value, size, align); + } + } + else + { + int arg_class1; + int arg_class2; + + arg_class1 = _jit_classify_structpart(struct_type, 0, 0, 7); + arg_class2 = _jit_classify_structpart(struct_type, 0, 8, size - 1); + if(arg_class1 == X86_64_ARG_NO_CLASS) + { + arg_class1 = X86_64_ARG_SSE; + } + if(arg_class2 == X86_64_ARG_NO_CLASS) + { + arg_class2 = X86_64_ARG_SSE; + } + if(arg_class1 == X86_64_ARG_SSE && arg_class2 == X86_64_ARG_SSE && + (builder->float_used < JIT_APPLY_NUM_FLOAT_REGS)) + { + /* The struct is passed in one sse register */ + jit_memcpy(&(builder->apply_args->float_regs[builder->float_used]), + value, size); + ++(builder->float_used); + } + else if(arg_class1 == X86_64_ARG_INTEGER && + arg_class2 == X86_64_ARG_INTEGER && + (builder->word_used < (JIT_APPLY_NUM_WORD_REGS + 1))) + { + /* The struct is passed in two general purpose registers */ + jit_memcpy(&(builder->apply_args->word_regs[builder->word_used]), + value, size); + (builder->word_used) += 2; + } + else if(arg_class1 == X86_64_ARG_INTEGER && + arg_class2 == X86_64_ARG_SSE && + (builder->float_used < JIT_APPLY_NUM_FLOAT_REGS) && + (builder->word_used < JIT_APPLY_NUM_WORD_REGS)) + { + /* The first eightbyte is passed in a general purpose */ + /* register and the second eightbyte in a sse register */ + builder->apply_args->word_regs[builder->word_used] = + ((jit_nint *)value)[0]; + ++(builder->word_used); + jit_memcpy(&(builder->apply_args->float_regs[builder->float_used]), + ((char *)value) + 8, size - 8); + ++(builder->float_used); + } + else if(arg_class1 == X86_64_ARG_SSE && + arg_class2 == X86_64_ARG_INTEGER && + (builder->float_used < JIT_APPLY_NUM_FLOAT_REGS) && + (builder->word_used < JIT_APPLY_NUM_WORD_REGS)) + { + /* The first eightbyte is passed in a sse register and */ + /* the second eightbyte in a general purpose register */ + jit_memcpy(&(builder->apply_args->float_regs[builder->float_used]), + value, 8); + ++(builder->float_used); + jit_memcpy(&(builder->apply_args->word_regs[builder->word_used]), + ((char *)value) + 8, size - 8); + ++(builder->word_used); + } + else + { + unsigned int align = jit_type_get_alignment(struct_type); + + jit_apply_builder_add_struct(builder, value, size, align); + } + } + } + else + { + unsigned int align = jit_type_get_alignment(struct_type); + + jit_apply_builder_add_struct(builder, value, size, align); + } +} + +void +_jit_builtin_apply_get_struct(jit_apply_builder *builder, + void *value, + jit_type_t struct_type) +{ + unsigned int size = jit_type_get_size(struct_type); + + if(size <= 16) + { + if(size <= 8) + { + int arg_class; + + arg_class = _jit_classify_structpart(struct_type, 0, 0, size - 1); + if(arg_class == X86_64_ARG_NO_CLASS) + { + arg_class = X86_64_ARG_SSE; + } + if((arg_class == X86_64_ARG_INTEGER) && + (builder->word_used < JIT_APPLY_NUM_WORD_REGS)) + { + /* The struct is passed in a general purpose register */ + jit_memcpy(value, + &(builder->apply_args->word_regs[builder->word_used]), + size); + ++(builder->word_used); + } + else if((arg_class == X86_64_ARG_SSE) && + (builder->float_used < JIT_APPLY_NUM_FLOAT_REGS)) + { + /* The struct is passed in one sse register */ + jit_memcpy(value, + &(builder->apply_args->float_regs[builder->float_used]), + size); + ++(builder->float_used); + } + else + { + /* TODO: always load the value from stack */ + unsigned int align = jit_type_get_alignment(struct_type); + + jit_apply_parser_get_struct(builder, size, align, value); + } + } + else + { + int arg_class1; + int arg_class2; + + arg_class1 = _jit_classify_structpart(struct_type, 0, 0, 7); + arg_class2 = _jit_classify_structpart(struct_type, 0, 8, size - 1); + if(arg_class1 == X86_64_ARG_NO_CLASS) + { + arg_class1 = X86_64_ARG_SSE; + } + if(arg_class2 == X86_64_ARG_NO_CLASS) + { + arg_class2 = X86_64_ARG_SSE; + } + if(arg_class1 == X86_64_ARG_SSE && arg_class2 == X86_64_ARG_SSE && + (builder->float_used < JIT_APPLY_NUM_FLOAT_REGS)) + { + /* The struct is passed in one sse register */ + jit_memcpy(value, + &(builder->apply_args->float_regs[builder->float_used]), + size); + ++(builder->float_used); + } + else if(arg_class1 == X86_64_ARG_INTEGER && + arg_class2 == X86_64_ARG_INTEGER && + (builder->word_used < (JIT_APPLY_NUM_WORD_REGS + 1))) + { + /* The struct is passed in two general purpose registers */ + jit_memcpy(value, + &(builder->apply_args->word_regs[builder->word_used]), + size); + (builder->word_used) += 2; + } + else if(arg_class1 == X86_64_ARG_INTEGER && + arg_class2 == X86_64_ARG_SSE && + (builder->float_used < JIT_APPLY_NUM_FLOAT_REGS) && + (builder->word_used < JIT_APPLY_NUM_WORD_REGS)) + { + /* The first eightbyte is passed in a general purpose */ + /* register and the second eightbyte in a sse register */ + ((jit_nint *)value)[0] = + builder->apply_args->word_regs[builder->word_used]; + ++(builder->word_used); + + jit_memcpy(((char *)value) + 8, + &(builder->apply_args->float_regs[builder->float_used]), + size - 8); + ++(builder->float_used); + } + else if(arg_class1 == X86_64_ARG_SSE && + arg_class2 == X86_64_ARG_INTEGER && + (builder->float_used < JIT_APPLY_NUM_FLOAT_REGS) && + (builder->word_used < JIT_APPLY_NUM_WORD_REGS)) + { + /* The first eightbyte is passed in a sse register and */ + /* the second eightbyte in a general purpose register */ + jit_memcpy(value, + &(builder->apply_args->float_regs[builder->float_used]), + 8); + ++(builder->float_used); + + jit_memcpy(((char *)value) + 8, + &(builder->apply_args->word_regs[builder->word_used]), + size - 8); + ++(builder->word_used); + } + else + { + /* TODO: always load the value from stack */ + unsigned int align = jit_type_get_alignment(struct_type); + + jit_apply_parser_get_struct(builder, size, align, value); + } + } + } + else + { + /* TODO: always load the value from stack */ + unsigned int align = jit_type_get_alignment(struct_type); + + jit_apply_parser_get_struct(builder, size, align, value); + } +} + +void +_jit_builtin_apply_get_struct_return(jit_apply_builder *builder, + void *return_value, + jit_apply_return *apply_return, + jit_type_t struct_type) +{ + unsigned int size = jit_type_get_size(struct_type); + + if(size <= 16) + { + if(size <= 8) + { + int arg_class; + + arg_class = _jit_classify_structpart(struct_type, 0, 0, size - 1); + if(arg_class == X86_64_ARG_NO_CLASS) + { + arg_class = X86_64_ARG_SSE; + } + if(arg_class == X86_64_ARG_INTEGER) + { + /* The struct is returned in %rax */ + jit_memcpy(return_value, (void *)apply_return, size); + return; + } + else if(arg_class == X86_64_ARG_SSE) + { + /* The struct is returned in %xmm0 */ + jit_memcpy(return_value, + &(((jit_ubyte *)apply_return)[16]), size); + return; + } + } + else + { + int arg_class1; + int arg_class2; + + arg_class1 = _jit_classify_structpart(struct_type, 0, 0, 7); + arg_class2 = _jit_classify_structpart(struct_type, 0, 8, size - 1); + if(arg_class1 == X86_64_ARG_NO_CLASS) + { + arg_class1 = X86_64_ARG_SSE; + } + if(arg_class2 == X86_64_ARG_NO_CLASS) + { + arg_class2 = X86_64_ARG_SSE; + } + if(arg_class1 == X86_64_ARG_SSE && arg_class2 == X86_64_ARG_SSE) + { + /* The struct is returned in %xmm0 */ + jit_memcpy(return_value, + &(((jit_ubyte *)apply_return)[16]), size); + return; + } + else if(arg_class1 == X86_64_ARG_INTEGER && + arg_class2 == X86_64_ARG_INTEGER) + { + /* The struct is returned in %rax and %rdx */ + jit_memcpy(return_value, (void *)apply_return, size); + return; + } + else if(arg_class1 == X86_64_ARG_INTEGER && + arg_class2 == X86_64_ARG_SSE) + { + /* The first eightbyte is returned in %rax and the second */ + /* eightbyte in %xmm0 */ + ((jit_nint *)return_value)[0] = + *(jit_nint *)apply_return; + + jit_memcpy(((char *)return_value) + 8, + &(((jit_ubyte *)apply_return)[16]), size - 8); + return; + } + else if(arg_class1 == X86_64_ARG_SSE && + arg_class2 == X86_64_ARG_INTEGER) + { + /* The first eightbyte is returned in %xmm0 and the second */ + /* eightbyte in %rax */ + jit_memcpy(return_value, + &(((jit_ubyte *)apply_return)[16]), 8); + + jit_memcpy(((char *)return_value) + 8, + (void *)apply_return, size - 8); + return; + } + } + } + /* All other cases are returned via return_ptr */ + if(builder->struct_return != return_value) + { + jit_memcpy(return_value, (builder)->struct_return, size); + } +} + #endif /* x86-64 */ diff --git a/jit/jit-apply-x86-64.h b/jit/jit-apply-x86-64.h index bc8c2c5..bc12879 100644 --- a/jit/jit-apply-x86-64.h +++ b/jit/jit-apply-x86-64.h @@ -23,6 +23,97 @@ #ifndef _JIT_APPLY_X86_64_H #define _JIT_APPLY_X86_64_H +#include "jit-internal.h" + +/* + * Flag that a parameter is passed on the stack. + */ +#define JIT_ARG_CLASS_STACK 0xFFFF + +/* + * Define the way the parameter is passed to a specific function + */ +typedef struct +{ + int reg; + jit_value_t value; +} _jit_structpassing_t; + +typedef struct +{ + jit_value_t value; + jit_ushort arg_class; + jit_ushort stack_pad; /* Number of stack words needed for padding */ + union + { + _jit_structpassing_t reg_info[4]; + jit_int offset; + } un; +} _jit_param_t; + +/* + * Structure that is used to help with parameter passing. + */ +typedef struct +{ + int stack_size; /* Number of bytes needed on the */ + /* stack for parameter passing */ + int stack_pad; /* Number of stack words we have */ + /* to push before pushing the */ + /* parameters for keeping the stack */ + /* aligned */ + unsigned int word_index; /* Number of word registers */ + /* allocated */ + unsigned int max_word_regs; /* Number of word registers */ + /* available for parameter passing */ + const int *word_regs; + unsigned int float_index; + unsigned int max_float_regs; + const int *float_regs; + _jit_param_t *params; + +} jit_param_passing_t; + +/* + * Determine how a parameter is passed. + */ +int +_jit_classify_param(jit_param_passing_t *passing, + _jit_param_t *param, jit_type_t param_type); + +/* + * Determine how a struct type is passed. + */ +int +_jit_classify_struct(jit_param_passing_t *passing, + _jit_param_t *param, jit_type_t param_type); + +/* + * We handle struct passing ourself + */ +#define HAVE_JIT_BUILTIN_APPLY_STRUCT 1 + +/* + * We handle struct returning ourself + */ +#define HAVE_JIT_BUILTIN_APPLY_STRUCT_RETURN 1 + +/* + * The granularity of the stack + */ +#define STACK_SLOT_SIZE sizeof(void *) + +/* + * Get he number of complete stack slots used + */ +#define STACK_SLOTS_USED(size) ((size) >> 3) + +/* + * Round a size up to a multiple of the stack word size. + */ +#define ROUND_STACK(size) \ + (((size) + (STACK_SLOT_SIZE - 1)) & ~(STACK_SLOT_SIZE - 1)) + /* * The "__builtin_apply" functionality in gcc orders the registers * in a strange way, which makes it difficult to use. Our replacement @@ -59,7 +150,7 @@ do { \ void *__func = (void *)(func); \ void *__args = (void *)(args); \ - long __size = (long)(size); \ + long __size = (((long)(size) + (long)0xf) & ~(long)0xf); \ void *__return_buf = alloca(64); \ (return_buf) = __return_buf; \ __asm__ ( \ @@ -165,6 +256,10 @@ return; \ } while (0) +#define jit_builtin_return_struct(return_buf, type) \ + do { \ + } while (0) + #endif /* GNUC */ /* diff --git a/jit/jit-gen-x86-64.h b/jit/jit-gen-x86-64.h index 87ea111..4a63820 100644 --- a/jit/jit-gen-x86-64.h +++ b/jit/jit-gen-x86-64.h @@ -1889,6 +1889,351 @@ typedef union #define x86_64_clear_reg(inst, reg) \ x86_64_xor_reg_reg_size((inst), (reg), (reg), 4) +/* + * shift instructions + */ +#define x86_64_shift_reg_imm_size(inst, opc, dreg, imm, size) \ + do { \ + if((imm) == 1) \ + { \ + if((size) == 2) \ + { \ + *(inst)++ = (unsigned char)0x66; \ + } \ + x86_64_rex_emit((inst), (size), 0, 0, (dreg)); \ + x86_64_opcode1_emit((inst), 0xd0, (size)); \ + x86_64_reg_emit((inst), (opc), (dreg)); \ + } \ + else \ + { \ + if((size) == 2) \ + { \ + *(inst)++ = (unsigned char)0x66; \ + } \ + x86_64_rex_emit((inst), (size), 0, 0, (dreg)); \ + x86_64_opcode1_emit((inst), 0xc0, (size)); \ + x86_64_reg_emit((inst), (opc), (dreg)); \ + x86_imm_emit8((inst), (imm)); \ + } \ + } while(0) + +#define x86_64_shift_mem_imm_size(inst, opc, mem, imm, size) \ + do { \ + if((imm) == 1) \ + { \ + if((size) == 2) \ + { \ + *(inst)++ = (unsigned char)0x66; \ + } \ + x86_64_rex_emit((inst), (size), 0, 0, 0); \ + x86_64_opcode1_emit((inst), 0xd0, (size)); \ + x86_64_mem_emit((inst), (opc), (mem)); \ + } \ + else \ + { \ + if((size) == 2) \ + { \ + *(inst)++ = (unsigned char)0x66; \ + } \ + x86_64_rex_emit((inst), (size), 0, 0, 0); \ + x86_64_opcode1_emit((inst), 0xc0, (size)); \ + x86_64_mem_emit((inst), (opc), (mem)); \ + x86_imm_emit8((inst), (imm)); \ + } \ + } while(0) + +#define x86_64_shift_regp_imm_size(inst, opc, dregp, imm, size) \ + do { \ + if((imm) == 1) \ + { \ + if((size) == 2) \ + { \ + *(inst)++ = (unsigned char)0x66; \ + } \ + x86_64_rex_emit((inst), (size), 0, 0, (dregp)); \ + x86_64_opcode1_emit((inst), 0xd0, (size)); \ + x86_64_regp_emit((inst), (opc), (dregp)); \ + } \ + else \ + { \ + if((size) == 2) \ + { \ + *(inst)++ = (unsigned char)0x66; \ + } \ + x86_64_rex_emit((inst), (size), 0, 0, (dregp)); \ + x86_64_opcode1_emit((inst), 0xc0, (size)); \ + x86_64_regp_emit((inst), (opc), (dregp)); \ + x86_imm_emit8((inst), (imm)); \ + } \ + } while(0) + +#define x86_64_shift_membase_imm_size(inst, opc, basereg, disp, imm, size) \ + do { \ + if((imm) == 1) \ + { \ + if((size) == 2) \ + { \ + *(inst)++ = (unsigned char)0x66; \ + } \ + x86_64_rex_emit((inst), (size), 0, 0, (basereg)); \ + x86_64_opcode1_emit((inst), 0xd0, (size)); \ + x86_64_membase_emit((inst), (opc), (basereg), (disp)); \ + } \ + else \ + { \ + if((size) == 2) \ + { \ + *(inst)++ = (unsigned char)0x66; \ + } \ + x86_64_rex_emit((inst), (size), 0, 0, (basereg)); \ + x86_64_opcode1_emit((inst), 0xc0, (size)); \ + x86_64_membase_emit((inst), (opc), (basereg), (disp)); \ + x86_imm_emit8((inst), (imm)); \ + } \ + } while(0) + +#define x86_64_shift_memindex_imm_size(inst, opc, basereg, disp, indexreg, shift, imm, size) \ + do { \ + if((imm) == 1) \ + { \ + if((size) == 2) \ + { \ + *(inst)++ = (unsigned char)0x66; \ + } \ + x86_64_rex_emit((inst), (size), 0, (indexreg), (basereg)); \ + x86_64_opcode1_emit((inst), 0xd0, (size)); \ + x86_64_memindex_emit((inst), (opc), (basereg), (disp), (indexreg), (shift)); \ + } \ + else \ + { \ + if((size) == 2) \ + { \ + *(inst)++ = (unsigned char)0x66; \ + } \ + x86_64_rex_emit((inst), (size), 0, (indexreg), (basereg)); \ + x86_64_opcode1_emit((inst), 0xc0, (size)); \ + x86_64_memindex_emit((inst), (opc), (basereg), (disp), (indexreg), (shift)); \ + x86_imm_emit8((inst), (imm)); \ + } \ + } while(0) + +/* + * shift by the number of bits in %cl + */ +#define x86_64_shift_reg_size(inst, opc, dreg, size) \ + do { \ + if((size) == 2) \ + { \ + *(inst)++ = (unsigned char)0x66; \ + } \ + x86_64_rex_emit((inst), (size), 0, 0, (dreg)); \ + x86_64_opcode1_emit((inst), 0xd2, (size)); \ + x86_64_reg_emit((inst), (opc), (dreg)); \ + } while(0) + +#define x86_64_shift_mem_size(inst, opc, mem, size) \ + do { \ + if((size) == 2) \ + { \ + *(inst)++ = (unsigned char)0x66; \ + } \ + x86_64_rex_emit((inst), (size), 0, 0, 0); \ + x86_64_opcode1_emit((inst), 0xd2, (size)); \ + x86_64_mem_emit((inst), (opc), (mem)); \ + } while(0) + +#define x86_64_shift_regp_size(inst, opc, dregp, size) \ + do { \ + if((size) == 2) \ + { \ + *(inst)++ = (unsigned char)0x66; \ + } \ + x86_64_rex_emit((inst), (size), 0, 0, (dregp)); \ + x86_64_opcode1_emit((inst), 0xd2, (size)); \ + x86_64_regp_emit((inst), (opc), (dregp)); \ + } while(0) + +#define x86_64_shift_membase_size(inst, opc, basereg, disp, size) \ + do { \ + if((size) == 2) \ + { \ + *(inst)++ = (unsigned char)0x66; \ + } \ + x86_64_rex_emit((inst), (size), 0, 0, (basereg)); \ + x86_64_opcode1_emit((inst), 0xd2, (size)); \ + x86_64_membase_emit((inst), (opc), (basereg), (disp)); \ + } while(0) + +#define x86_64_shift_memindex_size(inst, opc, basereg, disp, indexreg, shift, size) \ + do { \ + if((size) == 2) \ + { \ + *(inst)++ = (unsigned char)0x66; \ + } \ + x86_64_rex_emit((inst), (size), 0, (indexreg), (basereg)); \ + x86_64_opcode1_emit((inst), 0xd2, (size)); \ + x86_64_memindex_emit((inst), (opc), (basereg), (disp), (indexreg), (shift)); \ + } while(0) + +/* + * shl: Shit left (clear the least significant bit) + */ +#define x86_64_shl_reg_imm_size(inst, dreg, imm, size) \ + do { \ + x86_64_shift_reg_imm_size((inst), 4, (dreg), (imm), (size)); \ + } while(0) + +#define x86_64_shl_mem_imm_size(inst, mem, imm, size) \ + do { \ + x86_64_shift_mem_imm_size((inst), 4, (mem), (imm), (size)); \ + } while(0) + +#define x86_64_shl_regp_imm_size(inst, dregp, imm, size) \ + do { \ + x86_64_shift_regp_imm_size((inst), 4, (dregp), (imm), (size)); \ + } while(0) + +#define x86_64_shl_membase_imm_size(inst, basereg, disp, imm, size) \ + do { \ + x86_64_shift_membase_imm_size((inst), 4, (basereg), (disp), (imm), (size)); \ + } while(0) + +#define x86_64_shl_memindex_imm_size(inst, basereg, disp, indexreg, shift, imm, size) \ + do { \ + x86_64_shift_memindex_imm_size((inst), 4, (basereg), (disp), (indexreg), (shift), (imm), (size)); \ + } while(0) + +#define x86_64_shl_reg_size(inst, dreg, size) \ + do { \ + x86_64_shift_reg_size((inst), 4, (dreg), (size)); \ + } while(0) + +#define x86_64_shl_mem_size(inst, mem, size) \ + do { \ + x86_64_shift_mem_size((inst), 4, (mem), (size)); \ + } while(0) + +#define x86_64_shl_regp_size(inst, dregp, size) \ + do { \ + x86_64_shift_regp_size((inst), 4, (dregp), (size)); \ + } while(0) + +#define x86_64_shl_membase_size(inst, basereg, disp, size) \ + do { \ + x86_64_shift_membase_size((inst), 4, (basereg), (disp), (size)); \ + } while(0) + +#define x86_64_shl_memindex_size(inst, basereg, disp, indexreg, shift, size) \ + do { \ + x86_64_shift_memindex_size((inst), 4, (basereg), (disp), (indexreg), (shift), (size)); \ + } while(0) + +/* + * shr: Unsigned shit right (clear the most significant bit) + */ +#define x86_64_shr_reg_imm_size(inst, dreg, imm, size) \ + do { \ + x86_64_shift_reg_imm_size((inst), 5, (dreg), (imm), (size)); \ + } while(0) + +#define x86_64_shr_mem_imm_size(inst, mem, imm, size) \ + do { \ + x86_64_shift_mem_imm_size((inst), 5, (mem), (imm), (size)); \ + } while(0) + +#define x86_64_shr_regp_imm_size(inst, dregp, imm, size) \ + do { \ + x86_64_shift_regp_imm_size((inst), 5, (dregp), (imm), (size)); \ + } while(0) + +#define x86_64_shr_membase_imm_size(inst, basereg, disp, imm, size) \ + do { \ + x86_64_shift_membase_imm_size((inst), 5, (basereg), (disp), (imm), (size)); \ + } while(0) + +#define x86_64_shr_memindex_imm_size(inst, basereg, disp, indexreg, shift, imm, size) \ + do { \ + x86_64_shift_memindex_imm_size((inst), 5, (basereg), (disp), (indexreg), (shift), (imm), (size)); \ + } while(0) + +#define x86_64_shr_reg_size(inst, dreg, size) \ + do { \ + x86_64_shift_reg_size((inst), 5, (dreg), (size)); \ + } while(0) + +#define x86_64_shr_mem_size(inst, mem, size) \ + do { \ + x86_64_shift_mem_size((inst), 5, (mem), (size)); \ + } while(0) + +#define x86_64_shr_regp_size(inst, dregp, size) \ + do { \ + x86_64_shift_regp_size((inst), 5, (dregp), (size)); \ + } while(0) + +#define x86_64_shr_membase_size(inst, basereg, disp, size) \ + do { \ + x86_64_shift_membase_size((inst), 5, (basereg), (disp), (size)); \ + } while(0) + +#define x86_64_shr_memindex_size(inst, basereg, disp, indexreg, shift, size) \ + do { \ + x86_64_shift_memindex_size((inst), 5, (basereg), (disp), (indexreg), (shift), (size)); \ + } while(0) + +/* + * sar: Signed shit right (keep the most significant bit) + */ +#define x86_64_sar_reg_imm_size(inst, dreg, imm, size) \ + do { \ + x86_64_shift_reg_imm_size((inst), 7, (dreg), (imm), (size)); \ + } while(0) + +#define x86_64_sar_mem_imm_size(inst, mem, imm, size) \ + do { \ + x86_64_shift_mem_imm_size((inst), 7, (mem), (imm), (size)); \ + } while(0) + +#define x86_64_sar_regp_imm_size(inst, dregp, imm, size) \ + do { \ + x86_64_shift_regp_imm_size((inst), 7, (dregp), (imm), (size)); \ + } while(0) + +#define x86_64_sar_membase_imm_size(inst, basereg, disp, imm, size) \ + do { \ + x86_64_shift_membase_imm_size((inst), 7, (basereg), (disp), (imm), (size)); \ + } while(0) + +#define x86_64_sar_memindex_imm_size(inst, basereg, disp, indexreg, shift, imm, size) \ + do { \ + x86_64_shift_memindex_imm_size((inst), 7, (basereg), (disp), (indexreg), (shift), (imm), (size)); \ + } while(0) + +#define x86_64_sar_reg_size(inst, dreg, size) \ + do { \ + x86_64_shift_reg_size((inst), 7, (dreg), (size)); \ + } while(0) + +#define x86_64_sar_mem_size(inst, mem, size) \ + do { \ + x86_64_shift_mem_size((inst), 7, (mem), (size)); \ + } while(0) + +#define x86_64_sar_regp_size(inst, dregp, size) \ + do { \ + x86_64_shift_regp_size((inst), 7, (dregp), (size)); \ + } while(0) + +#define x86_64_sar_membase_size(inst, basereg, disp, size) \ + do { \ + x86_64_shift_membase_size((inst), 7, (basereg), (disp), (size)); \ + } while(0) + +#define x86_64_sar_memindex_size(inst, basereg, disp, indexreg, shift, size) \ + do { \ + x86_64_shift_memindex_size((inst), 7, (basereg), (disp), (indexreg), (shift), (size)); \ + } while(0) + /* * Lea instructions */ @@ -2026,7 +2371,7 @@ typedef union case 8: \ { \ jit_nint __x86_64_imm = (imm); \ - if(__x86_64_imm >= jit_min_int && __x86_64_imm <= jit_max_int) \ + if(__x86_64_imm >= (jit_nint)jit_min_int && __x86_64_imm <= (jit_nint)jit_max_int) \ { \ *(inst)++ = (unsigned char)0xc7; \ x86_64_reg_emit((inst), 0, (dreg)); \ @@ -2609,6 +2954,41 @@ typedef union x86_ret((inst)); \ } while(0) +/* + * xchg: Exchange values + */ +#define x86_64_xchg_reg_reg_size(inst, dreg, sreg, size) \ + do { \ + if(((size) > 1) && ((dreg) == X86_64_RAX || (sreg) == X86_64_RAX)) \ + { \ + if((size) == 2) \ + { \ + *(inst)++ = (unsigned char)0x66; \ + } \ + if((dreg) == X86_64_RAX) \ + { \ + x86_64_rex_emit((inst), (size), 0, 0, (sreg)); \ + *(inst)++ = (unsigned char)(0x90 + (unsigned char)(sreg & 0x7)); \ + } \ + else \ + { \ + x86_64_rex_emit((inst), (size), 0, 0, (dreg)); \ + *(inst)++ = (unsigned char)(0x90 + (unsigned char)(dreg & 0x7)); \ + } \ + } \ + else \ + { \ + if((size) == 1) \ + { \ + x86_64_alu1_reg_reg_size((inst), 0x86, (dreg), (sreg), (size)); \ + } \ + else \ + { \ + x86_64_alu1_reg_reg_size((inst), 0x87, (dreg), (sreg), (size)); \ + } \ + } \ + } while(0) + /* * XMM instructions */ @@ -2736,6 +3116,112 @@ typedef union * Move instructions */ +/* + * movd: Move doubleword from/to xmm register + */ +#define x86_64_movd_xreg_reg(inst, dreg, sreg) \ + do { \ + x86_64_p1_xmm2_reg_reg_size((inst), 0x66, 0x0f, 0x6e, (dreg), (sreg), 4); \ + } while(0) + +#define x86_64_movd_xreg_mem(inst, dreg, mem) \ + do { \ + x86_64_p1_xmm2_reg_mem_size((inst), 0x66, 0x0f, 0x6e, (dreg), (mem), 4); \ + } while(0) + +#define x86_64_movd_xreg_regp(inst, dreg, sregp) \ + do { \ + x86_64_p1_xmm2_reg_regp_size((inst), 0x66, 0x0f, 0x6e, (dreg), (sregp), 4); \ + } while(0) + +#define x86_64_movd_xreg_membase(inst, dreg, basereg, disp) \ + do { \ + x86_64_p1_xmm2_reg_membase_size((inst), 0x66, 0x0f, 0x6e, (dreg), (basereg), (disp), 4); \ + } while(0) + +#define x86_64_movd_xreg_memindex(inst, dreg, basereg, disp, indexreg, shift) \ + do { \ + x86_64_p1_xmm2_reg_memindex_size((inst), 0x66, 0x0f, 0x6e, (dreg), (basereg), (disp), (indexreg), (shift), 4); \ + } while(0) + +#define x86_64_movd_reg_xreg(inst, dreg, sreg) \ + do { \ + x86_64_p1_xmm2_reg_reg_size((inst), 0x66, 0x0f, 0x7e, (sreg), (dreg), 4); \ + } while(0) + +#define x86_64_movd_mem_xreg(inst, mem, sreg) \ + do { \ + x86_64_p1_xmm2_reg_mem_size((inst), 0x66, 0x0f, 0x7e, (sreg), (mem), 4); \ + } while(0) + +#define x86_64_movd_regp_xreg(inst, dregp, sreg) \ + do { \ + x86_64_p1_xmm2_reg_regp_size((inst), 0x66, 0x0f, 0x7e, (sreg), (dregp), 4); \ + } while(0) + +#define x86_64_movd_membase_xreg(inst, basereg, disp, sreg) \ + do { \ + x86_64_p1_xmm2_reg_membase_size((inst), 0x66, 0x0f, 0x7e, (sreg), (basereg), (disp), 4); \ + } while(0) + +#define x86_64_movd_memindex_xreg(inst, basereg, disp, indexreg, shift, sreg) \ + do { \ + x86_64_p1_xmm2_reg_memindex_size((inst), 0x66, 0x0f, 0x7e, (sreg), (basereg), (disp), (indexreg), (shift), 4); \ + } while(0) + +/* + * movq: Move quadword from/to xmm register + */ +#define x86_64_movq_xreg_reg(inst, dreg, sreg) \ + do { \ + x86_64_p1_xmm2_reg_reg_size((inst), 0x66, 0x0f, 0x6e, (dreg), (sreg), 8); \ + } while(0) + +#define x86_64_movq_xreg_mem(inst, dreg, mem) \ + do { \ + x86_64_p1_xmm2_reg_mem_size((inst), 0x66, 0x0f, 0x6e, (dreg), (mem), 8); \ + } while(0) + +#define x86_64_movq_xreg_regp(inst, dreg, sregp) \ + do { \ + x86_64_p1_xmm2_reg_regp_size((inst), 0x66, 0x0f, 0x6e, (dreg), (sregp), 8); \ + } while(0) + +#define x86_64_movq_xreg_membase(inst, dreg, basereg, disp) \ + do { \ + x86_64_p1_xmm2_reg_membase_size((inst), 0x66, 0x0f, 0x6e, (dreg), (basereg), (disp), 8); \ + } while(0) + +#define x86_64_movq_xreg_memindex(inst, dreg, basereg, disp, indexreg, shift) \ + do { \ + x86_64_p1_xmm2_reg_memindex_size((inst), 0x66, 0x0f, 0x6e, (dreg), (basereg), (disp), (indexreg), (shift), 8); \ + } while(0) + +#define x86_64_movq_reg_xreg(inst, dreg, sreg) \ + do { \ + x86_64_p1_xmm2_reg_reg_size((inst), 0x66, 0x0f, 0x7e, (sreg), (dreg), 8); \ + } while(0) + +#define x86_64_movq_mem_xreg(inst, mem, sreg) \ + do { \ + x86_64_p1_xmm2_reg_mem_size((inst), 0x66, 0x0f, 0x7e, (sreg), (mem), 8); \ + } while(0) + +#define x86_64_movq_regp_xreg(inst, dregp, sreg) \ + do { \ + x86_64_p1_xmm2_reg_regp_size((inst), 0x66, 0x0f, 0x7e, (sreg), (dregp), 8); \ + } while(0) + +#define x86_64_movq_membase_xreg(inst, basereg, disp, sreg) \ + do { \ + x86_64_p1_xmm2_reg_membase_size((inst), 0x66, 0x0f, 0x7e, (sreg), (basereg), (disp), 8); \ + } while(0) + +#define x86_64_movq_memindex_xreg(inst, basereg, disp, indexreg, shift, sreg) \ + do { \ + x86_64_p1_xmm2_reg_memindex_size((inst), 0x66, 0x0f, 0x7e, (sreg), (basereg), (disp), (indexreg), (shift), 8); \ + } while(0) + /* * movaps: Move aligned quadword (16 bytes) */ diff --git a/jit/jit-rules-x86-64.c b/jit/jit-rules-x86-64.c index 0dc87da..bcb15bc 100644 --- a/jit/jit-rules-x86-64.c +++ b/jit/jit-rules-x86-64.c @@ -103,36 +103,6 @@ */ #define HAVE_RED_ZONE 1 -/* - * X86_64 argument types as specified in the X86_64 SysV ABI. - */ -#define X86_64_ARG_NO_CLASS 0x00 -#define X86_64_ARG_INTEGER 0x01 -#define X86_64_ARG_MEMORY 0x02 -#define X86_64_ARG_SSE 0x11 -#define X86_64_ARG_SSEUP 0x12 -#define X86_64_ARG_X87 0x21 -#define X86_64_ARG_X87UP 0x22 - -#define X86_64_ARG_IS_SSE(arg) (((arg) & 0x10) != 0) -#define X86_64_ARG_IS_X87(arg) (((arg) & 0x20) != 0) - -/* - * The granularity of the stack - */ -#define STACK_SLOT_SIZE sizeof(void *) - -/* - * Get he number of complete stack slots used - */ -#define STACK_SLOTS_USED(size) ((size) >> 3) - -/* - * Round a size up to a multiple of the stack word size. - */ -#define ROUND_STACK(size) \ - (((size) + (STACK_SLOT_SIZE - 1)) & ~(STACK_SLOT_SIZE - 1)) - /* * Setup or teardown the x86 code output process. */ @@ -197,6 +167,10 @@ static _jit_regclass_t *x86_64_creg; /* X86_64 call clobbered general */ /* purpose registers */ static _jit_regclass_t *x86_64_rreg; /* general purpose registers not used*/ /* for returning values */ +static _jit_regclass_t *x86_64_sreg; /* general purpose registers that can*/ + /* be used for the value to be */ + /* shifted (all but %rcx)*/ + /* for returning values */ static _jit_regclass_t *x86_64_freg; /* X86_64 fpu registers */ static _jit_regclass_t *x86_64_xreg; /* X86_64 xmm registers */ @@ -232,6 +206,17 @@ _jit_init_backend(void) X86_64_REG_R12, X86_64_REG_R13, X86_64_REG_R14, X86_64_REG_R15); + /* register class with all registers that can be used for shifted values */ + x86_64_sreg = _jit_regclass_create( + "sreg", JIT_REG_WORD | JIT_REG_LONG, 13, + X86_64_REG_RAX, X86_64_REG_RDX, + X86_64_REG_RBX, X86_64_REG_RSI, + X86_64_REG_RDI, X86_64_REG_R8, + X86_64_REG_R9, X86_64_REG_R10, + X86_64_REG_R11, X86_64_REG_R12, + X86_64_REG_R13, X86_64_REG_R14, + X86_64_REG_R15); + x86_64_freg = _jit_regclass_create( "freg", JIT_REG_X86_64_FLOAT | JIT_REG_IN_STACK, 8, X86_64_REG_ST0, X86_64_REG_ST1, @@ -716,11 +701,10 @@ setcc_reg(unsigned char *inst, int reg, int cond, int is_signed) * * We have only 4 bytes for the jump offsets. * Therefore we have do something tricky here. - * We need some fixed value that is known to be fix throughout the - * building of the function and that will be near the emitted code. - * The posn limit looks like the perfect value to use. + * The fixup pointer in the block/gen points to the last fixup. + * The fixup itself contains the offset to the previous fixup or + * null if it's the last fixup in the list. */ -#define _JIT_GET_FIXVALUE(gen) ((gen)->posn.limit) /* * Calculate the fixup value @@ -1028,8 +1012,17 @@ _jit_gen_load_value(jit_gencode_t gen, int reg, int other_reg, jit_value_t value } else { - x86_64_mov_reg_imm_size(inst, _jit_reg_info[reg].cpu_reg, - (jit_nint)(value->address), 8); + if((jit_nint)(value->address) > 0 && (jit_nint)(value->address) <= (jit_nint)jit_max_uint) + { + x86_64_mov_reg_imm_size(inst, _jit_reg_info[reg].cpu_reg, + (jit_nint)(value->address), 4); + + } + else + { + x86_64_mov_reg_imm_size(inst, _jit_reg_info[reg].cpu_reg, + (jit_nint)(value->address), 8); + } } } break; @@ -1416,6 +1409,49 @@ _jit_gen_load_value(jit_gencode_t gen, int reg, int other_reg, jit_value_t value } } break; + + case JIT_TYPE_STRUCT: + case JIT_TYPE_UNION: + { + if(IS_GENERAL_REG(reg)) + { + if(IS_GENERAL_REG(src_reg)) + { + x86_64_mov_reg_reg_size(inst, _jit_reg_info[reg].cpu_reg, + _jit_reg_info[src_reg].cpu_reg, 8); + } + else if(IS_XMM_REG(src_reg)) + { + x86_64_movq_reg_xreg(inst, _jit_reg_info[reg].cpu_reg, + _jit_reg_info[src_reg].cpu_reg); + } + else + { + fputs("Unsupported struct/union reg - reg move\n", stderr); + } + } + else if(IS_XMM_REG(reg)) + { + if(IS_GENERAL_REG(src_reg)) + { + x86_64_movq_xreg_reg(inst, _jit_reg_info[reg].cpu_reg, + _jit_reg_info[src_reg].cpu_reg); + } + else if(IS_XMM_REG(src_reg)) + { + x86_64_movaps_reg_reg(inst, _jit_reg_info[reg].cpu_reg, + _jit_reg_info[src_reg].cpu_reg); + } + else + { + fputs("Unsupported struct/union reg - reg move\n", stderr); + } + } + else + { + fputs("Unsupported struct/union reg - reg move\n", stderr); + } + } } } else @@ -1936,7 +1972,6 @@ _jit_gen_start_block(jit_gencode_t gen, jit_block_t block) if(DEBUG_FIXUPS && fixup) { fprintf(stderr, "Block: %lx\n", (jit_nint)block); - fprintf(stderr, "Limit: %lx\n", (jit_nint)_JIT_GET_FIXVALUE(gen)); } while(fixup != 0) { @@ -1974,6 +2009,10 @@ _jit_gen_is_global_candidate(jit_type_t type) { switch(jit_type_remove_tags(type)->kind) { + case JIT_TYPE_SBYTE: + case JIT_TYPE_UBYTE: + case JIT_TYPE_SHORT: + case JIT_TYPE_USHORT: case JIT_TYPE_INT: case JIT_TYPE_UINT: case JIT_TYPE_LONG: @@ -1994,85 +2033,6 @@ _jit_gen_is_global_candidate(jit_type_t type) * here too because the common implementation is not enough for x86_64. */ -/* - * Flag that a parameter is passed on the stack. - */ -#define JIT_ARG_CLASS_STACK 0xFFFF - -/* - * Define the way the parameter is passed to a specific function - */ -typedef struct -{ - jit_value_t value; - jit_ushort arg_class; - jit_ushort stack_pad; /* Number of stack words needed for padding */ - union - { - unsigned char reg[4]; - jit_int offset; - } un; -} _jit_param_t; - -/* - * Structure that is used to help with parameter passing. - */ -typedef struct -{ - int stack_size; /* Number of bytes needed on the */ - /* stack for parameter passing */ - int stack_pad; /* Number of stack words we have */ - /* to push before pushing the */ - /* parameters for keeping the stack */ - /* aligned */ - unsigned int word_index; /* Number of word registers */ - /* allocated */ - unsigned int max_word_regs; /* Number of word registers */ - /* available for parameter passing */ - const int *word_regs; - unsigned int float_index; - unsigned int max_float_regs; - const int *float_regs; - _jit_param_t *params; - -} jit_param_passing_t; - -/* - * Allcate the slot for a parameter passed on the stack. - */ -static void -_jit_alloc_param_slot(jit_param_passing_t *passing, _jit_param_t *param, - jit_type_t type) -{ - jit_int size = jit_type_get_size(type); - jit_int alignment = jit_type_get_alignment(type); - - /* Expand the size to a multiple of the stack slot size */ - size = ROUND_STACK(size); - - /* Expand the alignment to a multiple of the stack slot size */ - /* We expect the alignment to be a power of two after this step */ - alignment = ROUND_STACK(alignment); - - /* Make sure the current offset is aligned propperly for the type */ - if((passing->stack_size & (alignment -1)) != 0) - { - /* We need padding on the stack to fix the alignment constraint */ - jit_int padding = passing->stack_size & (alignment -1); - - /* Add the padding to the stack region */ - passing->stack_size += padding; - - /* record the number of pad words needed after pushing this arg */ - param->stack_pad = STACK_SLOTS_USED(padding); - } - /* Record the offset of the parameter in the arg region. */ - param->un.offset = passing->stack_size; - - /* And increase the argument region used. */ - passing->stack_size += size; -} - /* * Determine if a type corresponds to a structure or union. */ @@ -2090,605 +2050,181 @@ is_struct_or_union(jit_type_t type) return 0; } +static int +_jit_classify_struct_return(jit_param_passing_t *passing, + _jit_param_t *param, jit_type_t return_type) +{ + /* Initialize the param passing structure */ + jit_memset(passing, 0, sizeof(jit_param_passing_t)); + jit_memset(param, 0, sizeof(_jit_param_t)); + + passing->word_regs = _jit_word_return_regs; + passing->max_word_regs = _jit_num_word_return_regs; + passing->float_regs = _jit_sse_return_regs; + passing->max_float_regs = _jit_num_sse_return_regs; + + if(!(_jit_classify_struct(passing, param, return_type))) + { + return 0; + } + + return 1; +} + /* - * Classify the argument type. - * The type has to be in it's normalized form. + * Load a struct to the register(s) in which it will be returned. */ -static int -_jit_classify_arg(jit_type_t arg_type, int is_return) +static unsigned char * +return_struct(unsigned char *inst, jit_function_t func, int ptr_reg) { - switch(arg_type->kind) + jit_type_t return_type; + jit_type_t signature = jit_function_get_signature(func); + + return_type = jit_type_get_return(signature); + if(is_struct_or_union(return_type)) { - case JIT_TYPE_SBYTE: - case JIT_TYPE_UBYTE: - case JIT_TYPE_SHORT: - case JIT_TYPE_USHORT: - case JIT_TYPE_INT: - case JIT_TYPE_UINT: - case JIT_TYPE_NINT: - case JIT_TYPE_NUINT: - case JIT_TYPE_LONG: - case JIT_TYPE_ULONG: - case JIT_TYPE_SIGNATURE: - case JIT_TYPE_PTR: - { - return X86_64_ARG_INTEGER; - } - break; + jit_nuint size; + jit_param_passing_t passing; + _jit_param_t return_param; - case JIT_TYPE_FLOAT32: - case JIT_TYPE_FLOAT64: + if(!_jit_classify_struct_return(&passing, &return_param, + return_type)) { - return X86_64_ARG_SSE; + /* It's an error so simply return insn */ + return inst; } - break; - - case JIT_TYPE_NFLOAT: + + size = jit_type_get_size(return_type); + if(size <= 8) { - /* we assume the nfloat type to be long double (80bit) */ - if(is_return) + /* one register is used for returning the value */ + if(IS_GENERAL_REG(return_param.un.reg_info[0].reg)) { - return X86_64_ARG_X87; + int reg = _jit_reg_info[return_param.un.reg_info[0].reg].cpu_reg; + + if(size <= 4) + { + x86_64_mov_reg_regp_size(inst, reg, ptr_reg, 4); + } + else + { + x86_64_mov_reg_regp_size(inst, reg, ptr_reg, 8); + } } else { - return X86_64_ARG_MEMORY; - } - } - break; - - case JIT_TYPE_STRUCT: - case JIT_TYPE_UNION: - { - int size = jit_type_get_size(arg_type); + int reg = _jit_reg_info[return_param.un.reg_info[0].reg].cpu_reg; - if(size > 16) - { - return X86_64_ARG_MEMORY; - } - else if(size <= 8) - { - return X86_64_ARG_INTEGER; + if(size <= 4) + { + x86_64_movss_reg_regp(inst, reg, ptr_reg); + } + else + { + x86_64_movsd_reg_regp(inst, reg, ptr_reg); + } } - /* For structs and unions with sizes between 8 ant 16 bytes */ - /* we have to look at the elements. */ - /* TODO */ } - } - return X86_64_ARG_NO_CLASS; -} - -/* - * On X86_64 the alignment of native types matches their size. - * This leads to the result that all types except nfloats and aggregates - * (structs and unions) must start and end in an eightbyte (or the part - * we are looking at). - */ -static int -_jit_classify_structpart(jit_type_t struct_type, unsigned int start, - unsigned int start_offset, unsigned int end_offset) -{ - int arg_class = X86_64_ARG_NO_CLASS; - unsigned int num_fields = jit_type_num_fields(struct_type); - unsigned int current_field; - - for(current_field = 0; current_field < num_fields; ++current_field) - { - jit_nuint field_offset = jit_type_get_offset(struct_type, - current_field); - - if(field_offset <= end_offset) + else { - /* The field starts at a place that's inerresting for us */ - jit_type_t field_type = jit_type_get_field(struct_type, - current_field); - jit_nuint field_size = jit_type_get_size(field_type); - - if(field_offset + field_size > start_offset) + /* In this case we might need up to two registers */ + if(return_param.arg_class == 1) { - /* The field is at least partially in the part we are */ - /* looking at */ - int arg_class2 = X86_64_ARG_NO_CLASS; + /* This must be one xmm register */ + int reg = _jit_reg_info[return_param.un.reg_info[0].reg].cpu_reg; + int alignment = jit_type_get_alignment(return_type); - if(is_struct_or_union(field_type)) + if((alignment & 0xf) == 0) { - /* We have to check this struct recursively */ - unsigned int current_start; - unsigned int nested_struct_start; - unsigned int nested_struct_end; - - current_start = start + start_offset; - if(field_offset < current_start) - { - nested_struct_start = current_start - field_offset; - } - else - { - nested_struct_start = 0; - } - if(field_offset + field_size - 1 > end_offset) - { - /* The struct ends beyond the part we are looking at */ - nested_struct_end = field_offset + field_size - - (nested_struct_start + 1); - } - else - { - nested_struct_end = field_size - 1; - } - arg_class2 = _jit_classify_structpart(field_type, - start + field_offset, - nested_struct_start, - nested_struct_end); + /* The type is aligned on a 16 byte boundary */ + x86_64_movaps_reg_regp(inst, reg, ptr_reg); } else { - if((start + start_offset) & (field_size - 1)) - { - /* The field is misaligned */ - return X86_64_ARG_MEMORY; - } - arg_class2 = _jit_classify_arg(field_type, 0); + x86_64_movups_reg_regp(inst, reg, ptr_reg); + } + } + else + { + int reg = _jit_reg_info[return_param.un.reg_info[0].reg].cpu_reg; + + if(IS_GENERAL_REG(return_param.un.reg_info[0].reg)) + { + x86_64_mov_reg_regp_size(inst, reg, + ptr_reg, 8); } - if(arg_class == X86_64_ARG_NO_CLASS) + else { - arg_class = arg_class2; + x86_64_movsd_reg_regp(inst, reg, ptr_reg); } - else if(arg_class != arg_class2) + size -= 8; + reg = _jit_reg_info[return_param.un.reg_info[1].reg].cpu_reg; + if(IS_GENERAL_REG(return_param.un.reg_info[1].reg)) { - if(arg_class == X86_64_ARG_MEMORY || - arg_class2 == X86_64_ARG_MEMORY) + if(size <= 4) { - arg_class = X86_64_ARG_MEMORY; + x86_64_mov_reg_membase_size(inst, reg, ptr_reg, + 8, 4); } - else if(arg_class == X86_64_ARG_INTEGER || - arg_class2 == X86_64_ARG_INTEGER) + else { - arg_class = X86_64_ARG_INTEGER; + x86_64_mov_reg_membase_size(inst, reg, ptr_reg, + 8, 8); } - else if(arg_class == X86_64_ARG_X87 || - arg_class2 == X86_64_ARG_X87) + } + else + { + if(size <= 4) { - arg_class = X86_64_ARG_MEMORY; + x86_64_movss_reg_membase(inst, reg, + ptr_reg, 8); } else { - arg_class = X86_64_ARG_SSE; + x86_64_movsd_reg_membase(inst, reg, + ptr_reg, 8); } } } } } - return arg_class; + return inst; } -static int -_jit_classify_struct(jit_param_passing_t *passing, - _jit_param_t *param, jit_type_t param_type) +/* + * Flush a struct return value from the registers to the value + * on the stack. + */ +static unsigned char * +flush_return_struct(unsigned char *inst, jit_value_t value) { - jit_nuint size = (jit_nuint)jit_type_get_size(param_type); + jit_type_t return_type; - if(size <= 8) + return_type = jit_value_get_type(value); + if(is_struct_or_union(return_type)) { - int arg_class; - - arg_class = _jit_classify_structpart(param_type, 0, 0, size - 1); - if(arg_class == X86_64_ARG_NO_CLASS) + jit_nuint size; + jit_nint offset; + jit_param_passing_t passing; + _jit_param_t return_param; + + if(!_jit_classify_struct_return(&passing, &return_param, return_type)) { - arg_class = X86_64_ARG_SSE; + /* It's an error so simply return insn */ + return inst; } - if(arg_class == X86_64_ARG_INTEGER) + + return_param.value = value; + + _jit_gen_fix_value(value); + size = jit_type_get_size(return_type); + offset = value->frame_offset; + if(size <= 8) { - if(passing->word_index < passing->max_word_regs) + /* one register is used for returning the value */ + if(IS_GENERAL_REG(return_param.un.reg_info[0].reg)) { - /* Set the arg class to the number of registers used */ - param->arg_class = 1; - - /* Set the first register to the register used */ - param->un.reg[0] = passing->word_regs[passing->word_index]; - ++(passing->word_index); - } - else - { - /* Set the arg class to stack */ - param->arg_class = JIT_ARG_CLASS_STACK; - - /* Allocate the slot in the arg passing frame */ - _jit_alloc_param_slot(passing, param, param_type); - } - } - else if(arg_class == X86_64_ARG_SSE) - { - if(passing->float_index < passing->max_float_regs) - { - /* Set the arg class to the number of registers used */ - param->arg_class = 1; - - /* Set the first register to the register used */ - param->un.reg[0] = passing->float_regs[passing->float_index]; - ++(passing->float_index); - } - else - { - /* Set the arg class to stack */ - param->arg_class = JIT_ARG_CLASS_STACK; - - /* Allocate the slot in the arg passing frame */ - _jit_alloc_param_slot(passing, param, param_type); - } - } - else - { - /* Set the arg class to stack */ - param->arg_class = JIT_ARG_CLASS_STACK; - - /* Allocate the slot in the arg passing frame */ - _jit_alloc_param_slot(passing, param, param_type); - } - } - else if(size <= 16) - { - int arg_class1; - int arg_class2; - - arg_class1 = _jit_classify_structpart(param_type, 0, 0, 7); - arg_class2 = _jit_classify_structpart(param_type, 0, 8, size - 1); - if(arg_class1 == X86_64_ARG_NO_CLASS) - { - arg_class1 = X86_64_ARG_SSE; - } - if(arg_class2 == X86_64_ARG_NO_CLASS) - { - arg_class2 = X86_64_ARG_SSE; - } - if(arg_class1 == X86_64_ARG_SSE && arg_class2 == X86_64_ARG_SSE) - { - /* We use only one sse register in this case */ - if(passing->float_index < passing->max_float_regs) - { - /* Set the arg class to the number of registers used */ - param->arg_class = 1; - - /* Set the first register to the register used */ - param->un.reg[0] = passing->float_regs[passing->float_index]; - ++(passing->float_index); - } - else - { - /* Set the arg class to stack */ - param->arg_class = JIT_ARG_CLASS_STACK; - - /* Allocate the slot in the arg passing frame */ - _jit_alloc_param_slot(passing, param, param_type); - } - } - else if(arg_class1 == X86_64_ARG_MEMORY || - arg_class2 == X86_64_ARG_MEMORY) - { - /* Set the arg class to stack */ - param->arg_class = JIT_ARG_CLASS_STACK; - - /* Allocate the slot in the arg passing frame */ - _jit_alloc_param_slot(passing, param, param_type); - } - else if(arg_class1 == X86_64_ARG_INTEGER && - arg_class2 == X86_64_ARG_INTEGER) - { - /* We need two general purpose registers in this case */ - if((passing->word_index + 1) < passing->max_word_regs) - { - /* Set the arg class to the number of registers used */ - param->arg_class = 2; - - /* Assign the registers */ - param->un.reg[0] = passing->word_regs[passing->word_index]; - ++(passing->word_index); - param->un.reg[1] = passing->word_regs[passing->word_index]; - ++(passing->word_index); - } - else - { - /* Set the arg class to stack */ - param->arg_class = JIT_ARG_CLASS_STACK; - - /* Allocate the slot in the arg passing frame */ - _jit_alloc_param_slot(passing, param, param_type); - } - } - else - { - /* We need one xmm and one general purpose register */ - if((passing->word_index < passing->max_word_regs) && - (passing->float_index < passing->max_float_regs)) - { - /* Set the arg class to the number of registers used */ - param->arg_class = 2; - - if(arg_class1 == X86_64_ARG_INTEGER) - { - param->un.reg[0] = passing->word_regs[passing->word_index]; - ++(passing->word_index); - param->un.reg[1] = passing->float_regs[passing->float_index]; - ++(passing->float_index); - } - else - { - param->un.reg[0] = passing->float_regs[passing->float_index]; - ++(passing->float_index); - param->un.reg[1] = passing->word_regs[passing->word_index]; - ++(passing->word_index); - } - } - else - { - /* Set the arg class to stack */ - param->arg_class = JIT_ARG_CLASS_STACK; - - /* Allocate the slot in the arg passing frame */ - _jit_alloc_param_slot(passing, param, param_type); - } - } - } - else - { - /* Set the arg class to stack */ - param->arg_class = JIT_ARG_CLASS_STACK; - - /* Allocate the slot in the arg passing frame */ - _jit_alloc_param_slot(passing, param, param_type); - } - return 1; -} - -int -_jit_classify_param(jit_param_passing_t *passing, - _jit_param_t *param, jit_type_t param_type) -{ - if(is_struct_or_union(param_type)) - { - return _jit_classify_struct(passing, param, param_type); - } - else - { - int arg_class; - - arg_class = _jit_classify_arg(param_type, 0); - - switch(arg_class) - { - case X86_64_ARG_INTEGER: - { - if(passing->word_index < passing->max_word_regs) - { - /* Set the arg class to the number of registers used */ - param->arg_class = 1; - - /* Set the first register to the register used */ - param->un.reg[0] = passing->word_regs[passing->word_index]; - ++(passing->word_index); - } - else - { - /* Set the arg class to stack */ - param->arg_class = JIT_ARG_CLASS_STACK; - - /* Allocate the slot in the arg passing frame */ - _jit_alloc_param_slot(passing, param, param_type); - } - } - break; - - case X86_64_ARG_SSE: - { - if(passing->float_index < passing->max_float_regs) - { - /* Set the arg class to the number of registers used */ - param->arg_class = 1; - - /* Set the first register to the register used */ - param->un.reg[0] = passing->float_regs[passing->float_index]; - ++(passing->float_index); - } - else - { - /* Set the arg class to stack */ - param->arg_class = JIT_ARG_CLASS_STACK; - - /* Allocate the slot in the arg passing frame */ - _jit_alloc_param_slot(passing, param, param_type); - } - } - break; - - case X86_64_ARG_MEMORY: - { - /* Set the arg class to stack */ - param->arg_class = JIT_ARG_CLASS_STACK; - - /* Allocate the slot in the arg passing frame */ - _jit_alloc_param_slot(passing, param, param_type); - } - break; - } - } - return 1; -} - -static int -_jit_classify_struct_return(jit_param_passing_t *passing, - _jit_param_t *param, jit_type_t return_type) -{ - /* Initialize the param passing structure */ - jit_memset(passing, 0, sizeof(jit_param_passing_t)); - jit_memset(param, 0, sizeof(_jit_param_t)); - - passing->word_regs = _jit_word_return_regs; - passing->max_word_regs = _jit_num_word_return_regs; - passing->float_regs = _jit_sse_return_regs; - passing->max_float_regs = _jit_num_sse_return_regs; - - if(!(_jit_classify_struct(passing, param, return_type))) - { - return 0; - } - - return 1; -} - -/* - * Load a struct to the register(s) in which it will be returned. - */ -static unsigned char * -return_struct(unsigned char *inst, jit_function_t func, int ptr_reg) -{ - jit_type_t return_type; - jit_type_t signature = jit_function_get_signature(func); - - return_type = jit_type_get_return(signature); - if(is_struct_or_union(return_type)) - { - jit_nuint size; - jit_param_passing_t passing; - _jit_param_t return_param; - - if(!_jit_classify_struct_return(&passing, &return_param, - return_type)) - { - /* It's an error so simply return insn */ - return inst; - } - - size = jit_type_get_size(return_type); - if(size <= 8) - { - /* one register is used for returning the value */ - if(IS_GENERAL_REG(return_param.un.reg[0])) - { - int reg = _jit_reg_info[return_param.un.reg[0]].cpu_reg; - - if(size <= 4) - { - x86_64_mov_reg_regp_size(inst, reg, ptr_reg, 4); - } - else - { - x86_64_mov_reg_regp_size(inst, reg, ptr_reg, 8); - } - } - else - { - int reg = _jit_reg_info[return_param.un.reg[0]].cpu_reg; - - if(size <= 4) - { - x86_64_movss_reg_regp(inst, reg, ptr_reg); - } - else - { - x86_64_movsd_reg_regp(inst, reg, ptr_reg); - } - } - } - else - { - /* In this case we might need up to two registers */ - if(return_param.arg_class == 1) - { - /* This must be one xmm register */ - int reg = _jit_reg_info[return_param.un.reg[0]].cpu_reg; - int alignment = jit_type_get_alignment(return_type); - - if((alignment & 0xf) == 0) - { - /* The type is aligned on a 16 byte boundary */ - x86_64_movaps_reg_regp(inst, reg, ptr_reg); - } - else - { - x86_64_movups_reg_regp(inst, reg, ptr_reg); - } - } - else - { - int reg = _jit_reg_info[return_param.un.reg[0]].cpu_reg; - - if(IS_GENERAL_REG(return_param.un.reg[0])) - { - x86_64_mov_reg_regp_size(inst, reg, - ptr_reg, 8); - } - else - { - x86_64_movsd_reg_regp(inst, reg, ptr_reg); - } - size -= 8; - reg = _jit_reg_info[return_param.un.reg[1]].cpu_reg; - if(IS_GENERAL_REG(return_param.un.reg[1])) - { - if(size <= 4) - { - x86_64_mov_reg_membase_size(inst, reg, ptr_reg, - 8, 4); - } - else - { - x86_64_mov_reg_membase_size(inst, reg, ptr_reg, - 8, 8); - } - } - else - { - if(size <= 4) - { - x86_64_movss_reg_membase(inst, reg, - ptr_reg, 8); - } - else - { - x86_64_movsd_reg_membase(inst, reg, - ptr_reg, 8); - } - } - } - } - } - return inst; -} - -/* - * Flush a struct return value from the registers to the value - * on the stack. - */ -static unsigned char * -flush_return_struct(unsigned char *inst, jit_value_t value) -{ - jit_type_t return_type; - - return_type = jit_value_get_type(value); - if(is_struct_or_union(return_type)) - { - jit_nuint size; - jit_nint offset; - jit_param_passing_t passing; - _jit_param_t return_param; - - if(!_jit_classify_struct_return(&passing, &return_param, return_type)) - { - /* It's an error so simply return insn */ - return inst; - } - - return_param.value = value; - - _jit_gen_fix_value(value); - size = jit_type_get_size(return_type); - offset = value->frame_offset; - if(size <= 8) - { - /* one register is used for returning the value */ - if(IS_GENERAL_REG(return_param.un.reg[0])) - { - int reg = _jit_reg_info[return_param.un.reg[0]].cpu_reg; + int reg = _jit_reg_info[return_param.un.reg_info[0].reg].cpu_reg; if(size <= 4) { @@ -2701,7 +2237,7 @@ flush_return_struct(unsigned char *inst, jit_value_t value) } else { - int reg = _jit_reg_info[return_param.un.reg[0]].cpu_reg; + int reg = _jit_reg_info[return_param.un.reg_info[0].reg].cpu_reg; if(size <= 4) { @@ -2719,7 +2255,7 @@ flush_return_struct(unsigned char *inst, jit_value_t value) if(return_param.arg_class == 1) { /* This must be one xmm register */ - int reg = _jit_reg_info[return_param.un.reg[0]].cpu_reg; + int reg = _jit_reg_info[return_param.un.reg_info[0].reg].cpu_reg; int alignment = jit_type_get_alignment(return_type); if((alignment & 0xf) == 0) @@ -2734,9 +2270,9 @@ flush_return_struct(unsigned char *inst, jit_value_t value) } else { - int reg = _jit_reg_info[return_param.un.reg[0]].cpu_reg; + int reg = _jit_reg_info[return_param.un.reg_info[0].reg].cpu_reg; - if(IS_GENERAL_REG(return_param.un.reg[0])) + if(IS_GENERAL_REG(return_param.un.reg_info[0].reg)) { x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset, reg, 8); @@ -2746,8 +2282,8 @@ flush_return_struct(unsigned char *inst, jit_value_t value) x86_64_movsd_membase_reg(inst, X86_64_RBP, offset, reg); } size -= 8; - reg = _jit_reg_info[return_param.un.reg[1]].cpu_reg; - if(IS_GENERAL_REG(return_param.un.reg[1])) + reg = _jit_reg_info[return_param.un.reg_info[1].reg].cpu_reg; + if(IS_GENERAL_REG(return_param.un.reg_info[1].reg)) { if(size <= 4) { @@ -2932,62 +2468,121 @@ push_param(jit_function_t func, _jit_param_t *param, jit_type_t type) } int -_jit_setup_incoming_param(jit_function_t func, _jit_param_t *param, - jit_type_t param_type) +_jit_setup_reg_param(jit_function_t func, _jit_param_t *param, + jit_type_t param_type) { - if(param->arg_class == JIT_ARG_CLASS_STACK) + if(param->arg_class == 1) { - /* The parameter is passed on the stack */ - if(!jit_insn_incoming_frame_posn - (func, param->value, param->un.offset)) - { - return 0; - } + param->un.reg_info[0].value = param->value; } - else + else if(param->arg_class == 2) { - param_type = jit_type_remove_tags(param_type); + jit_nint size = jit_type_get_size(param_type); + jit_value_t value_ptr; - switch(param_type->kind) + if(!(value_ptr = jit_insn_address_of(func, param->value))) { - case JIT_TYPE_STRUCT: - case JIT_TYPE_UNION: + return 0; + } + if(IS_GENERAL_REG(param->un.reg_info[0].reg)) + { + param->un.reg_info[0].value = + jit_insn_load_relative(func, value_ptr, 0, jit_type_long); + if(!(param->un.reg_info[0].value)) { - if(param->arg_class == 1) + return 0; + } + } + else + { + param->un.reg_info[0].value = + jit_insn_load_relative(func, value_ptr, 0, jit_type_float64); + if(!(param->un.reg_info[0].value)) + { + return 0; + } + } + size -= 8; + if(IS_GENERAL_REG(param->un.reg_info[1].reg)) + { + if(size <= 4) + { + param->un.reg_info[1].value = + jit_insn_load_relative(func, value_ptr, 8, jit_type_int); + if(!(param->un.reg_info[1].value)) { - if(!jit_insn_incoming_reg(func, param->value, param->un.reg[0])) - { - return 0; - } + return 0; } - else + } + else + { + param->un.reg_info[1].value = + jit_insn_load_relative(func, value_ptr, 8, jit_type_long); + if(!(param->un.reg_info[1].value)) { - /* These cases have to be handled specially */ + return 0; } } - break; - - default: + } + else + { + if(size <= 4) { - if(!jit_insn_incoming_reg(func, param->value, param->un.reg[0])) + param->un.reg_info[1].value = + jit_insn_load_relative(func, value_ptr, 8, jit_type_float32); + if(!(param->un.reg_info[1].value)) + { + return 0; + } + } + else + { + param->un.reg_info[1].value = + jit_insn_load_relative(func, value_ptr, 8, jit_type_float64); + if(!(param->un.reg_info[1].value)) { return 0; } } - break; } } return 1; } int -_jit_setup_outgoing_param(jit_function_t func, _jit_param_t *param, +_jit_flush_incoming_struct(jit_function_t func, _jit_param_t *param, + jit_type_t param_type) +{ + if(param->arg_class == 2) + { + jit_value_t address; + + /* Now store the two values in place */ + if(!(address = jit_insn_address_of(func, param->value))) + { + return 0; + } + if(!jit_insn_store_relative(func, address, 0, param->un.reg_info[0].value)) + { + return 0; + } + if(!jit_insn_store_relative(func, address, 8, param->un.reg_info[1].value)) + { + return 0; + } + } + return 1; +} + +int +_jit_setup_incoming_param(jit_function_t func, _jit_param_t *param, jit_type_t param_type) { if(param->arg_class == JIT_ARG_CLASS_STACK) { /* The parameter is passed on the stack */ - if(!push_param(func, param, param_type)) + if(!jit_insn_incoming_frame_posn + (func, param->value, param->un.offset)) { return 0; } @@ -3001,115 +2596,50 @@ _jit_setup_outgoing_param(jit_function_t func, _jit_param_t *param, case JIT_TYPE_STRUCT: case JIT_TYPE_UNION: { - /* These cases have to be handled specially */ if(param->arg_class == 1) { - /* Only one xmm register is used for passing this argument */ - if(!jit_insn_outgoing_reg(func, param->value, param->un.reg[0])) + if(!jit_insn_incoming_reg(func, param->value, param->un.reg_info[0].reg)) { return 0; } } else { - /* We need two registers for passing the value */ - jit_nuint size = (jit_nuint)jit_type_get_size(param_type); - - jit_value_t struct_ptr; + /* These cases have to be handled specially */ + /* The struct is passed in two registers */ + jit_nuint size = jit_type_get_size(param_type); - if(!(struct_ptr = jit_insn_address_of(func, param->value))) - { - return 0; - } - if(IS_GENERAL_REG(param->un.reg[0])) + /* The first part is allways a full eightbyte */ + if(IS_GENERAL_REG(param->un.reg_info[0].reg)) { - jit_value_t param_value; - - param_value = jit_insn_load_relative(func, struct_ptr, - 0, jit_type_ulong); - if(!param_value) - { - return 0; - } - if(!jit_insn_outgoing_reg(func, param_value, param->un.reg[0])) + if(!(param->un.reg_info[0].value = jit_value_create(func, jit_type_long))) { return 0; } } else { - jit_value_t param_value; - - param_value = jit_insn_load_relative(func, struct_ptr, - 0, jit_type_float64); - if(!param_value) - { - return 0; - } - if(!jit_insn_outgoing_reg(func, param_value, param->un.reg[0])) + if(!(param->un.reg_info[0].value = jit_value_create(func, jit_type_float64))) { return 0; } } size -= 8; - if(IS_GENERAL_REG(param->un.reg[1])) + /* The second part might be of any size <= 8 */ + if(IS_GENERAL_REG(param->un.reg_info[1].reg)) { - if(size == 1) - { - jit_value_t param_value; - - param_value = jit_insn_load_relative(func, struct_ptr, - 8, jit_type_ubyte); - if(!param_value) - { - return 0; - } - if(!jit_insn_outgoing_reg(func, param_value, param->un.reg[1])) - { - return 0; - } - } - else if(size == 2) - { - jit_value_t param_value; - - param_value = jit_insn_load_relative(func, struct_ptr, - 8, jit_type_ushort); - if(!param_value) - { - return 0; - } - if(!jit_insn_outgoing_reg(func, param_value, param->un.reg[0])) - { - return 0; - } - } - else if(size <= 4) + if(size <= 4) { - jit_value_t param_value; - - param_value = jit_insn_load_relative(func, struct_ptr, - 8, jit_type_uint); - if(!param_value) - { - return 0; - } - if(!jit_insn_outgoing_reg(func, param_value, param->un.reg[0])) + if(!(param->un.reg_info[1].value = + jit_value_create(func, jit_type_int))) { return 0; } } else { - jit_value_t param_value; - - param_value = jit_insn_load_relative(func, struct_ptr, - 8, jit_type_ulong); - if(!param_value) - { - return 0; - } - if(!jit_insn_outgoing_reg(func, param_value, param->un.reg[0])) + if(!(param->un.reg_info[1].value = + jit_value_create(func, jit_type_long))) { return 0; } @@ -3119,42 +2649,40 @@ _jit_setup_outgoing_param(jit_function_t func, _jit_param_t *param, { if(size <= 4) { - jit_value_t param_value; - - param_value = jit_insn_load_relative(func, struct_ptr, - 8, jit_type_float32); - if(!param_value) - { - return 0; - } - if(!jit_insn_outgoing_reg(func, param_value, param->un.reg[0])) + if(!(param->un.reg_info[1].value = + jit_value_create(func, jit_type_float32))) { return 0; } } else { - jit_value_t param_value; - - param_value = jit_insn_load_relative(func, struct_ptr, - 8, jit_type_float64); - if(!param_value) - { - return 0; - } - if(!jit_insn_outgoing_reg(func, param_value, param->un.reg[0])) + if(!(param->un.reg_info[1].value = + jit_value_create(func, jit_type_float64))) { return 0; } } } + if(!jit_insn_incoming_reg(func, + param->un.reg_info[0].value, + param->un.reg_info[0].reg)) + { + return 0; + } + if(!jit_insn_incoming_reg(func, + param->un.reg_info[1].value, + param->un.reg_info[1].reg)) + { + return 0; + } } } break; default: { - if(!jit_insn_outgoing_reg(func, param->value, param->un.reg[0])) + if(!jit_insn_incoming_reg(func, param->value, param->un.reg_info[0].reg)) { return 0; } @@ -3165,6 +2693,37 @@ _jit_setup_outgoing_param(jit_function_t func, _jit_param_t *param, return 1; } +int +_jit_setup_outgoing_param(jit_function_t func, _jit_param_t *param, + jit_type_t param_type) +{ + if(param->arg_class == JIT_ARG_CLASS_STACK) + { + /* The parameter is passed on the stack */ + if(!push_param(func, param, param_type)) + { + return 0; + } + } + else + { + if(!jit_insn_outgoing_reg(func, param->un.reg_info[0].value, + param->un.reg_info[0].reg)) + { + return 0; + } + if(param->arg_class == 2) + { + if(!jit_insn_outgoing_reg(func, param->un.reg_info[1].value, + param->un.reg_info[1].reg)) + { + return 0; + } + } + } + return 1; +} + int _jit_setup_return_value(jit_function_t func, jit_value_t return_value, jit_type_t return_type) @@ -3186,7 +2745,7 @@ _jit_setup_return_value(jit_function_t func, jit_value_t return_value, if(return_param.arg_class == 1) { if(!jit_insn_return_reg(func, return_value, - return_param.un.reg[0])) + return_param.un.reg_info[0].reg)) { return 0; } @@ -3236,6 +2795,8 @@ _jit_init_args(int abi, jit_param_passing_t *passing) int _jit_create_entry_insns(jit_function_t func) { + jit_value_t value; + int has_struct_return = 0; jit_type_t signature = func->signature; int abi = jit_type_get_abi(signature); unsigned int num_args = jit_type_num_params(signature); @@ -3271,7 +2832,7 @@ _jit_create_entry_insns(jit_function_t func) } /* Allocate the structure return pointer */ - if(jit_value_get_struct_pointer(func)) + if((value = jit_value_get_struct_pointer(func))) { jit_memset(&struct_return_param, 0, sizeof(_jit_param_t)); if(!(_jit_classify_param(&passing, &struct_return_param, @@ -3279,6 +2840,8 @@ _jit_create_entry_insns(jit_function_t func) { return 0; } + struct_return_param.value = value; + has_struct_return = 1; } /* Let the backend classify the parameters */ @@ -3315,6 +2878,30 @@ _jit_create_entry_insns(jit_function_t func) } } + if(has_struct_return) + { + if(!_jit_setup_incoming_param(func, &struct_return_param, jit_type_void_ptr)) + { + return 0; + } + } + + /* Now we flush the incoming structs passed in registers */ + for(current_param = 0; current_param < num_args; current_param++) + { + if(param[current_param].arg_class != JIT_ARG_CLASS_STACK) + { + jit_type_t param_type; + + param_type = jit_type_get_param(signature, current_param); + if(!_jit_flush_incoming_struct(func, &(param[current_param]), + param_type)) + { + return 0; + } + } + } + return 1; } @@ -3372,12 +2959,12 @@ int _jit_create_call_setup_insns return 0; } jit_memset(&struct_return_param, 0, sizeof(_jit_param_t)); + struct_return_param.value = return_ptr; if(!(_jit_classify_param(&passing, &struct_return_param, jit_type_void_ptr))) { return 0; } - struct_return_param.value = return_ptr; } else { @@ -3425,25 +3012,95 @@ int _jit_create_call_setup_insns #endif /* Now setup the arguments on the stack or in the registers in reverse order */ + /* First process the params passed on the stack */ current_param = num_args; while(current_param > 0) { - jit_type_t param_type; + --current_param; + if(param[current_param].arg_class == JIT_ARG_CLASS_STACK) + { + jit_type_t param_type; + param_type = jit_type_get_param(signature, current_param); + if(!_jit_setup_outgoing_param(func, &(param[current_param]), param_type)) + { + return 0; + } + } + } + + /* Handle the structure return pointer if it's passed on the stack */ + if(return_ptr) + { + if(struct_return_param.arg_class == JIT_ARG_CLASS_STACK) + { + if(!_jit_setup_outgoing_param(func, &struct_return_param, + jit_type_void_ptr)) + { + return 0; + } + } + } + + /* Now setup the values passed in registers */ + current_param = num_args; + while(current_param > 0) + { --current_param; - param_type = jit_type_get_param(signature, current_param); - if(!_jit_setup_outgoing_param(func, &(param[current_param]), param_type)) + + if(param[current_param].arg_class != JIT_ARG_CLASS_STACK) { - return 0; + jit_type_t param_type; + + param_type = jit_type_get_param(signature, current_param); + if(!_jit_setup_reg_param(func, &(param[current_param]), param_type)) + { + return 0; + } + } + } + + /* Handle the structure return pointer if required */ + if(return_ptr) + { + if(struct_return_param.arg_class != JIT_ARG_CLASS_STACK) + { + if(!_jit_setup_reg_param(func, &struct_return_param, + jit_type_void_ptr)) + { + return 0; + } + } + } + + /* And finally assign the registers */ + current_param = num_args; + while(current_param > 0) + { + --current_param; + if(param[current_param].arg_class != JIT_ARG_CLASS_STACK) + { + jit_type_t param_type; + + param_type = jit_type_get_param(signature, current_param); + if(!_jit_setup_outgoing_param(func, &(param[current_param]), + param_type)) + { + return 0; + } } } /* Add the structure return pointer if required */ if(return_ptr) { - if(!_jit_setup_outgoing_param(func, &struct_return_param, return_type)) + if(struct_return_param.arg_class != JIT_ARG_CLASS_STACK) { - return 0; + if(!_jit_setup_outgoing_param(func, &struct_return_param, + jit_type_void_ptr)) + { + return 0; + } } } diff --git a/jit/jit-rules-x86-64.ins b/jit/jit-rules-x86-64.ins index cc04227..b0ae621 100644 --- a/jit/jit-rules-x86-64.ins +++ b/jit/jit-rules-x86-64.ins @@ -23,6 +23,7 @@ %regclass reg x86_64_reg %regclass creg x86_64_creg %regclass rreg x86_64_rreg +%regclass sreg x86_64_sreg %regclass freg x86_64_freg %regclass xreg x86_64_xreg @@ -93,7 +94,7 @@ JIT_OP_EXPAND_UINT: JIT_OP_NFLOAT_TO_INT: stack [=reg, freg] -> { /* allocate space on the stack for 2 shorts and 1 int */ - x86_64_sub_reg_imm_size(inst, X86_ESP, 8, 8); + x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8); /* store FPU control word */ x86_64_fnstcw_membase(inst, X86_64_RSP, 0); /* set "round toward zero" mode */ @@ -177,6 +178,9 @@ JIT_OP_COPY_INT: copy [reg] -> {} JIT_OP_COPY_LONG: copy + [=local, imm, if("$2 >= jit_min_int && $2 <= jit_max_int")] -> { + x86_64_mov_membase_imm_size(inst, X86_64_RBP, $1, $2, 8); + } [reg] -> {} JIT_OP_COPY_FLOAT32: copy @@ -230,7 +234,7 @@ JIT_OP_PUSH_INT: note JIT_OP_PUSH_LONG: note [imm] -> { - if(($1 >= jit_min_int) && ($1 <= jit_max_int)) + if(($1 >= (jit_nint)jit_min_int) && ($1 <= (jit_nint)jit_max_int)) { x86_64_push_imm(inst, $1); } @@ -252,7 +256,7 @@ JIT_OP_PUSH_LONG: note gen->stack_changed = 1; } -JIT_OP_PUSH_FLOAT32: note, stack +JIT_OP_PUSH_FLOAT32: note [imm] -> { jit_int *ptr = (jit_int *)($1); x86_64_push_imm_size(inst, ptr[0], 4); @@ -267,13 +271,8 @@ JIT_OP_PUSH_FLOAT32: note, stack x86_64_movss_membase_reg(inst, X86_64_RSP, 0, $1); gen->stack_changed = 1; } - [freg] -> { - x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8); - x86_64_fstp_membase_size(inst, X86_64_RSP, 0, 4); - gen->stack_changed = 1; - } -JIT_OP_PUSH_FLOAT64: note, stack +JIT_OP_PUSH_FLOAT64: note [imm] -> { jit_int *ptr = (jit_int *)($1); x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8); @@ -290,11 +289,6 @@ JIT_OP_PUSH_FLOAT64: note, stack x86_64_movsd_membase_reg(inst, X86_64_RSP, 0, $1); gen->stack_changed = 1; } - [freg] -> { - x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8); - x86_64_fstp_membase_size(inst, X86_64_RSP, 0, 8); - gen->stack_changed = 1; - } JIT_OP_PUSH_NFLOAT: note, stack [imm] -> { @@ -541,7 +535,7 @@ JIT_OP_LOAD_RELATIVE_STRUCT: more_space jit_value_get_type(insn->dest), $4, $5); } [=frame, reg, imm, clobber(creg), clobber(xreg)] -> { - inst = memory_copy(gen, inst, X86_EBP, $1, $2, $3, + inst = memory_copy(gen, inst, X86_64_RBP, $1, $2, $3, jit_type_get_size(jit_value_get_type(insn->dest))); } @@ -612,7 +606,7 @@ JIT_OP_STORE_RELATIVE_INT: ternary } JIT_OP_STORE_RELATIVE_LONG: ternary - [reg, imm, imm, if("$2 >= jit_min_int && $2 <= jit_max_int")] -> { + [reg, imm, imm, if("$2 >= (jit_nint)jit_min_int && $2 <= (jit_nint)jit_max_int")] -> { if($3 == 0) { x86_64_mov_regp_imm_size(inst, $1, $2, 8); @@ -683,8 +677,8 @@ JIT_OP_STORE_RELATIVE_STRUCT: ternary } JIT_OP_ADD_RELATIVE: - [reg, imm, if("$2 >= jit_min_int && $2 <= jit_max_int")] -> { - if(insn->value2->address != 0) + [reg, imm, if("$2 >= (jit_nint)jit_min_int && $2 <= (jit_nint)jit_max_int")] -> { + if($2 != 0) { x86_64_add_reg_imm_size(inst, $1, $2, 8); } @@ -751,7 +745,7 @@ JIT_OP_STORE_ELEMENT_INT: ternary JIT_OP_STORE_ELEMENT_LONG: ternary [reg, reg, imm] -> { - if($3 >= jit_min_int && $3 <= jit_max_int) + if($3 >= (jit_nint)jit_min_int && $3 <= (jit_nint)jit_max_int) { x86_64_mov_memindex_imm_size(inst, $1, 0, $2, 3, $3, 8); } @@ -831,7 +825,7 @@ JIT_OP_INEG: */ JIT_OP_LADD: commutative - [reg, imm, if("$2 >= jit_min_int && $2 <= jit_max_int")] -> { + [reg, imm, if("$2 >= (jit_nint)jit_min_int && $2 <= (jit_nint)jit_max_int")] -> { if($2 == 1) { x86_64_inc_reg_size(inst, $1, 8); @@ -849,7 +843,7 @@ JIT_OP_LADD: commutative } JIT_OP_LSUB: - [reg, imm, if("$2 >= jit_min_int && $2 <= jit_max_int")] -> { + [reg, imm, if("$2 >= (jit_nint)jit_min_int && $2 <= (jit_nint)jit_max_int")] -> { if($2 == 1) { x86_64_dec_reg_size(inst, $1, 8); @@ -1008,8 +1002,32 @@ JIT_OP_INOT: x86_64_not_reg_size(inst, $1, 4); } +JIT_OP_ISHL: + [reg, imm] -> { + x86_64_shl_reg_imm_size(inst, $1, ($2 & 0x1F), 4); + } + [sreg, reg("rcx")] -> { + x86_64_shl_reg_size(inst, $1, 4); + } + +JIT_OP_ISHR: + [reg, imm] -> { + x86_64_sar_reg_imm_size(inst, $1, ($2 & 0x1F), 4); + } + [sreg, reg("rcx")] -> { + x86_64_sar_reg_size(inst, $1, 4); + } + +JIT_OP_ISHR_UN: + [reg, imm] -> { + x86_64_shr_reg_imm_size(inst, $1, ($2 & 0x1F), 4); + } + [sreg, reg("rcx")] -> { + x86_64_shr_reg_size(inst, $1, 4); + } + JIT_OP_LAND: commutative - [reg, imm, if("($2 >= jit_min_int && $2 <= jit_max_int)")] -> { + [reg, imm, if("($2 >= (jit_nint)jit_min_int && $2 <= (jit_nint)jit_max_int)")] -> { x86_64_and_reg_imm_size(inst, $1, $2, 8); } [reg, local] -> { @@ -1020,7 +1038,7 @@ JIT_OP_LAND: commutative } JIT_OP_LOR: commutative - [reg, imm, if("($2 >= jit_min_int && $2 <= jit_max_int)")] -> { + [reg, imm, if("($2 >= (jit_nint)jit_min_int && $2 <= (jit_nint)jit_max_int)")] -> { x86_64_or_reg_imm_size(inst, $1, $2, 8); } [reg, local] -> { @@ -1031,7 +1049,7 @@ JIT_OP_LOR: commutative } JIT_OP_LXOR: commutative - [reg, imm, if("($2 >= jit_min_int && $2 <= jit_max_int)")] -> { + [reg, imm, if("($2 >= (jit_nint)jit_min_int && $2 <= (jit_nint)jit_max_int)")] -> { x86_64_xor_reg_imm_size(inst, $1, $2, 8); } [reg, local] -> { @@ -1046,7 +1064,29 @@ JIT_OP_LNOT: x86_64_not_reg_size(inst, $1, 8); } +JIT_OP_LSHL: + [reg, imm] -> { + x86_64_shl_reg_imm_size(inst, $1, ($2 & 0x3F), 8); + } + [sreg, reg("rcx")] -> { + x86_64_shl_reg_size(inst, $1, 8); + } +JIT_OP_LSHR: + [reg, imm] -> { + x86_64_sar_reg_imm_size(inst, $1, ($2 & 0x3F), 8); + } + [sreg, reg("rcx")] -> { + x86_64_sar_reg_size(inst, $1, 8); + } + +JIT_OP_LSHR_UN: + [reg, imm] -> { + x86_64_shr_reg_imm_size(inst, $1, ($2 & 0x3F), 8); + } + [sreg, reg("rcx")] -> { + x86_64_shr_reg_size(inst, $1, 8); + } /* * Branch opcodes. @@ -1234,7 +1274,7 @@ JIT_OP_BR_LEQ: branch x86_64_or_reg_reg_size(inst, $1, $1, 8); inst = output_branch(func, inst, 0x74 /* eq */, insn); } - [reg, imm, if("($2 >= jit_min_int && $2 <= jit_max_int)")] -> { + [reg, imm, if("($2 >= (jit_nint)jit_min_int && $2 <= (jit_nint)jit_max_int)")] -> { x86_64_cmp_reg_imm_size(inst, $1, $2, 8); inst = output_branch(func, inst, 0x74 /* eq */, insn); } @@ -1252,7 +1292,7 @@ JIT_OP_BR_LNE: branch x86_64_or_reg_reg_size(inst, $1, $1, 8); inst = output_branch(func, inst, 0x75 /* ne */, insn); } - [reg, imm, if("($2 >= jit_min_int && $2 <= jit_max_int)")] -> { + [reg, imm, if("($2 >= (jit_nint)jit_min_int && $2 <= (jit_nint)jit_max_int)")] -> { x86_64_cmp_reg_imm_size(inst, $1, $2, 8); inst = output_branch(func, inst, 0x75 /* ne */, insn); } @@ -1266,7 +1306,7 @@ JIT_OP_BR_LNE: branch } JIT_OP_BR_LLT: branch - [reg, imm, if("($2 >= jit_min_int && $2 <= jit_max_int)")] -> { + [reg, imm, if("($2 >= (jit_nint)jit_min_int && $2 <= (jit_nint)jit_max_int)")] -> { x86_64_cmp_reg_imm_size(inst, $1, $2, 8); inst = output_branch(func, inst, 0x7C /* lt */, insn); } @@ -1280,7 +1320,7 @@ JIT_OP_BR_LLT: branch } JIT_OP_BR_LLT_UN: branch - [reg, imm, if("($2 >= jit_min_int && $2 <= jit_max_int)")] -> { + [reg, imm, if("($2 >= (jit_nint)jit_min_int && $2 <= (jit_nint)jit_max_int)")] -> { x86_64_cmp_reg_imm_size(inst, $1, $2, 8); inst = output_branch(func, inst, 0x72 /* lt_un */, insn); } @@ -1294,7 +1334,7 @@ JIT_OP_BR_LLT_UN: branch } JIT_OP_BR_LLE: branch - [reg, imm, if("($2 >= jit_min_int && $2 <= jit_max_int)")] -> { + [reg, imm, if("($2 >= (jit_nint)jit_min_int && $2 <= (jit_nint)jit_max_int)")] -> { x86_64_cmp_reg_imm_size(inst, $1, $2, 8); inst = output_branch(func, inst, 0x7E /* le */, insn); } @@ -1308,7 +1348,7 @@ JIT_OP_BR_LLE: branch } JIT_OP_BR_LLE_UN: branch - [reg, imm, if("($2 >= jit_min_int && $2 <= jit_max_int)")] -> { + [reg, imm, if("($2 >= (jit_nint)jit_min_int && $2 <= (jit_nint)jit_max_int)")] -> { x86_64_cmp_reg_imm_size(inst, $1, $2, 8); inst = output_branch(func, inst, 0x76 /* le_un */, insn); } @@ -1322,7 +1362,7 @@ JIT_OP_BR_LLE_UN: branch } JIT_OP_BR_LGT: branch - [reg, imm, if("($2 >= jit_min_int && $2 <= jit_max_int)")] -> { + [reg, imm, if("($2 >= (jit_nint)jit_min_int && $2 <= (jit_nint)jit_max_int)")] -> { x86_64_cmp_reg_imm_size(inst, $1, $2, 8); inst = output_branch(func, inst, 0x7F /* gt */, insn); } @@ -1336,7 +1376,7 @@ JIT_OP_BR_LGT: branch } JIT_OP_BR_LGT_UN: branch - [reg, imm, if("($2 >= jit_min_int && $2 <= jit_max_int)")] -> { + [reg, imm, if("($2 >= (jit_nint)jit_min_int && $2 <= (jit_nint)jit_max_int)")] -> { x86_64_cmp_reg_imm_size(inst, $1, $2, 8); inst = output_branch(func, inst, 0x77 /* gt_un */, insn); } @@ -1350,7 +1390,7 @@ JIT_OP_BR_LGT_UN: branch } JIT_OP_BR_LGE: branch - [reg, imm, if("($2 >= jit_min_int && $2 <= jit_max_int)")] -> { + [reg, imm, if("($2 >= (jit_nint)jit_min_int && $2 <= (jit_nint)jit_max_int)")] -> { x86_64_cmp_reg_imm_size(inst, $1, $2, 8); inst = output_branch(func, inst, 0x7D /* ge */, insn); } @@ -1364,7 +1404,7 @@ JIT_OP_BR_LGE: branch } JIT_OP_BR_LGE_UN: branch - [reg, imm, if("($2 >= jit_min_int && $2 <= jit_max_int)")] -> { + [reg, imm, if("($2 >= (jit_nint)jit_min_int && $2 <= (jit_nint)jit_max_int)")] -> { x86_64_cmp_reg_imm_size(inst, $1, $2, 8); inst = output_branch(func, inst, 0x73 /* ge_un */, insn); } @@ -1534,7 +1574,7 @@ JIT_OP_LEQ: x86_64_or_reg_reg_size(inst, $2, $2, 8); inst = setcc_reg(inst, $1, X86_CC_EQ, 0); } - [=reg, reg, imm, if("$3 >= jit_min_int && $3 <= jit_max_int")] -> { + [=reg, reg, imm, if("$3 >= (jit_nint)jit_min_int && $3 <= (jit_nint)jit_max_int")] -> { x86_64_cmp_reg_imm_size(inst, $2, $3, 8); inst = setcc_reg(inst, $1, X86_CC_EQ, 0); } @@ -1552,7 +1592,7 @@ JIT_OP_LNE: x86_64_or_reg_reg_size(inst, $2, $2, 8); inst = setcc_reg(inst, $1, X86_CC_NE, 0); } - [=reg, reg, imm, if("$3 >= jit_min_int && $3 <= jit_max_int")] -> { + [=reg, reg, imm, if("$3 >= (jit_nint)jit_min_int && $3 <= (jit_nint)jit_max_int")] -> { x86_64_cmp_reg_imm_size(inst, $2, $3, 8); inst = setcc_reg(inst, $1, X86_CC_NE, 0); } @@ -1566,7 +1606,7 @@ JIT_OP_LNE: } JIT_OP_LLT: - [=reg, reg, imm, if("$3 >= jit_min_int && $3 <= jit_max_int")] -> { + [=reg, reg, imm, if("$3 >= (jit_nint)jit_min_int && $3 <= (jit_nint)jit_max_int")] -> { x86_64_cmp_reg_imm_size(inst, $2, $3, 8); inst = setcc_reg(inst, $1, X86_CC_LT, 1); } @@ -1580,7 +1620,7 @@ JIT_OP_LLT: } JIT_OP_LLT_UN: - [=reg, reg, imm, if("$3 >= jit_min_int && $3 <= jit_max_int")] -> { + [=reg, reg, imm, if("$3 >= (jit_nint)jit_min_int && $3 <= (jit_nint)jit_max_int")] -> { x86_64_cmp_reg_imm_size(inst, $2, $3, 8); inst = setcc_reg(inst, $1, X86_CC_LT, 0); } @@ -1594,7 +1634,7 @@ JIT_OP_LLT_UN: } JIT_OP_LLE: - [=reg, reg, imm, if("$3 >= jit_min_int && $3 <= jit_max_int")] -> { + [=reg, reg, imm, if("$3 >= (jit_nint)jit_min_int && $3 <= (jit_nint)jit_max_int")] -> { x86_64_cmp_reg_imm_size(inst, $2, $3, 8); inst = setcc_reg(inst, $1, X86_CC_LE, 1); } @@ -1608,7 +1648,7 @@ JIT_OP_LLE: } JIT_OP_LLE_UN: - [=reg, reg, imm, if("$3 >= jit_min_int && $3 <= jit_max_int")] -> { + [=reg, reg, imm, if("$3 >= (jit_nint)jit_min_int && $3 <= (jit_nint)jit_max_int")] -> { x86_64_cmp_reg_imm_size(inst, $2, $3, 8); inst = setcc_reg(inst, $1, X86_CC_LE, 0); } @@ -1622,7 +1662,7 @@ JIT_OP_LLE_UN: } JIT_OP_LGT: - [=reg, reg, imm, if("$3 >= jit_min_int && $3 <= jit_max_int")] -> { + [=reg, reg, imm, if("$3 >= (jit_nint)jit_min_int && $3 <= (jit_nint)jit_max_int")] -> { x86_64_cmp_reg_imm_size(inst, $2, $3, 8); inst = setcc_reg(inst, $1, X86_CC_GT, 1); } @@ -1636,7 +1676,7 @@ JIT_OP_LGT: } JIT_OP_LGT_UN: - [=reg, reg, imm, if("$3 >= jit_min_int && $3 <= jit_max_int")] -> { + [=reg, reg, imm, if("$3 >= (jit_nint)jit_min_int && $3 <= (jit_nint)jit_max_int")] -> { x86_64_cmp_reg_imm_size(inst, $2, $3, 8); inst = setcc_reg(inst, $1, X86_CC_GT, 0); } @@ -1650,7 +1690,7 @@ JIT_OP_LGT_UN: } JIT_OP_LGE: - [=reg, reg, imm, if("$3 >= jit_min_int && $3 <= jit_max_int")] -> { + [=reg, reg, imm, if("$3 >= (jit_nint)jit_min_int && $3 <= (jit_nint)jit_max_int")] -> { x86_64_cmp_reg_imm_size(inst, $2, $3, 8); inst = setcc_reg(inst, $1, X86_CC_GE, 1); } @@ -1664,7 +1704,7 @@ JIT_OP_LGE: } JIT_OP_LGE_UN: - [=reg, reg, imm, if("$3 >= jit_min_int && $3 <= jit_max_int")] -> { + [=reg, reg, imm, if("$3 >= (jit_nint)jit_min_int && $3 <= (jit_nint)jit_max_int")] -> { x86_64_cmp_reg_imm_size(inst, $2, $3, 8); inst = setcc_reg(inst, $1, X86_CC_GE, 0); } @@ -1873,6 +1913,14 @@ JIT_OP_MEMCPY: ternary [reg, reg, imm, clobber(creg), clobber(xreg)] -> { inst = memory_copy(gen, inst, $1, 0, $2, 0, $3); } + [reg("rdi"), reg("rsi"), reg("rdx"), clobber(creg), clobber(xreg)] -> { + inst = x86_64_call_code(inst, (jit_nint)jit_memcpy); + } + +JIT_OP_MEMSET: ternary + [reg("rdi"), reg("rsi"), reg("rdx"), clobber(creg), clobber(xreg)] -> { + inst = x86_64_call_code(inst, (jit_nint)jit_memset); + } JIT_OP_JUMP_TABLE: ternary, branch [reg, imm, imm, scratch reg, space("64")] -> {