From a064ef75497dd6f8dc9aecbf61177d5b9565cd84 Mon Sep 17 00:00:00 2001 From: Rhys Weatherley Date: Mon, 31 May 2004 10:50:09 +0000 Subject: [PATCH] Inline and optimize divisions for x86. --- ChangeLog | 3 + jit/jit-rules-x86.c | 25 ++ jit/jit-rules-x86.sel | 595 ++++++++++++++++++++++++++++++++++++++++ tools/gen-sel-parser.y | 11 +- tools/gen-sel-scanner.l | 1 + 5 files changed, 632 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8133f9d..55cf553 100644 --- a/ChangeLog +++ b/ChangeLog @@ -3,6 +3,9 @@ * jit/jit-rules-x86.sel: optimize multiplications for x86. + * jit/jit-rules-x86.c, jit/jit-rules-x86.sel, tools/gen-sel-parser.y, + tools/gen-sel-scanner.l: inline and optimize divisions for x86. + 2004-05-30 Rhys Weatherley * doc/libjit.texi: clarify the text that describes LLVM, at the diff --git a/jit/jit-rules-x86.c b/jit/jit-rules-x86.c index 745c7c7..5ef3de9 100644 --- a/jit/jit-rules-x86.c +++ b/jit/jit-rules-x86.c @@ -26,6 +26,7 @@ #include "jit-gen-x86.h" #include "jit-reg-alloc.h" +#include "jit-setjmp.h" #include /* @@ -1409,6 +1410,30 @@ static unsigned char *mov_membase_reg_byte return inst; } +/* + * Throw a builtin exception. + */ +static unsigned char *throw_builtin + (unsigned char *inst, jit_function_t func, int type) +{ + /* We need to update "catch_pc" if we have a "try" block */ + if(func->builder->setjmp_value != 0) + { + _jit_gen_fix_value(func->builder->setjmp_value); + x86_call_imm(inst, 0); + x86_pop_membase(inst, X86_EBP, + func->builder->setjmp_value->frame_offset + + jit_jmp_catch_pc_offset); + } + + /* Push the exception type onto the stack */ + x86_push_imm(inst, type); + + /* Call the "jit_exception_builtin" function, which will never return */ + x86_call_code(inst, jit_exception_builtin); + return inst; +} + #define TODO() \ do { \ fprintf(stderr, "TODO at %s, %d\n", __FILE__, (int)__LINE__); \ diff --git a/jit/jit-rules-x86.sel b/jit/jit-rules-x86.sel index cdc8e06..f40003d 100644 --- a/jit/jit-rules-x86.sel +++ b/jit/jit-rules-x86.sel @@ -350,6 +350,601 @@ JIT_OP_IMUL: binary x86_imul_reg_reg(inst, $1, $2); } +/* Spill before division to ensure that the arguments end up in + EAX and ECX, and that EDX is free */ +JIT_OP_IDIV: binary, spill_before, more_space + [reg, imm] -> { + switch($2) + { + case 0: + { + inst = throw_builtin(inst, func, JIT_RESULT_DIVISION_BY_ZERO); + } + break; + + case 1: break; + + case -1: + { + /* Dividing by -1 gives an exception if the argument + is minint, or simply negates for other values */ + unsigned char *patch = inst; + x86_alu_reg_imm(inst, X86_CMP, $1, jit_min_int); + x86_branch8(inst, X86_CC_NE, 0, 0); + inst = throw_builtin(inst, func, JIT_RESULT_ARITHMETIC); + x86_patch(patch, inst); + x86_neg_reg(inst, $1); + } + break; + + case 2: + { + x86_shift_reg_imm(inst, X86_SAR, $1, 1); + } + break; + + case 4: + { + x86_shift_reg_imm(inst, X86_SAR, $1, 2); + } + break; + + case 8: + { + x86_shift_reg_imm(inst, X86_SAR, $1, 3); + } + break; + + case 16: + { + x86_shift_reg_imm(inst, X86_SAR, $1, 4); + } + break; + + case 32: + { + x86_shift_reg_imm(inst, X86_SAR, $1, 5); + } + break; + + case 64: + { + x86_shift_reg_imm(inst, X86_SAR, $1, 6); + } + break; + + case 128: + { + x86_shift_reg_imm(inst, X86_SAR, $1, 7); + } + break; + + case 256: + { + x86_shift_reg_imm(inst, X86_SAR, $1, 8); + } + break; + + case 512: + { + x86_shift_reg_imm(inst, X86_SAR, $1, 9); + } + break; + + case 1024: + { + x86_shift_reg_imm(inst, X86_SAR, $1, 10); + } + break; + + case 2048: + { + x86_shift_reg_imm(inst, X86_SAR, $1, 11); + } + break; + + case 4096: + { + x86_shift_reg_imm(inst, X86_SAR, $1, 12); + } + break; + + case 8192: + { + x86_shift_reg_imm(inst, X86_SAR, $1, 13); + } + break; + + case 16384: + { + x86_shift_reg_imm(inst, X86_SAR, $1, 14); + } + break; + + case 32768: + { + x86_shift_reg_imm(inst, X86_SAR, $1, 15); + } + break; + + case 65536: + { + x86_shift_reg_imm(inst, X86_SAR, $1, 16); + } + break; + + case 0x00020000: + { + x86_shift_reg_imm(inst, X86_SAR, $1, 17); + } + break; + + case 0x00040000: + { + x86_shift_reg_imm(inst, X86_SAR, $1, 18); + } + break; + + case 0x00080000: + { + x86_shift_reg_imm(inst, X86_SAR, $1, 19); + } + break; + + case 0x00100000: + { + x86_shift_reg_imm(inst, X86_SAR, $1, 20); + } + break; + + case 0x00200000: + { + x86_shift_reg_imm(inst, X86_SAR, $1, 21); + } + break; + + case 0x00400000: + { + x86_shift_reg_imm(inst, X86_SAR, $1, 22); + } + break; + + case 0x00800000: + { + x86_shift_reg_imm(inst, X86_SAR, $1, 23); + } + break; + + case 0x01000000: + { + x86_shift_reg_imm(inst, X86_SAR, $1, 24); + } + break; + + case 0x02000000: + { + x86_shift_reg_imm(inst, X86_SAR, $1, 25); + } + break; + + case 0x04000000: + { + x86_shift_reg_imm(inst, X86_SAR, $1, 26); + } + break; + + case 0x08000000: + { + x86_shift_reg_imm(inst, X86_SAR, $1, 27); + } + break; + + case 0x10000000: + { + x86_shift_reg_imm(inst, X86_SAR, $1, 28); + } + break; + + case 0x20000000: + { + x86_shift_reg_imm(inst, X86_SAR, $1, 29); + } + break; + + case 0x40000000: + { + x86_shift_reg_imm(inst, X86_SAR, $1, 30); + } + break; + + case (jit_nint)0x80000000: + { + x86_shift_reg_imm(inst, X86_SAR, $1, 31); + } + break; + + default: + { + x86_mov_reg_imm(inst, X86_ECX, $2); + x86_cdq(inst); + x86_div_reg(inst, X86_ECX, 1); + } + break; + } + } + [reg, reg] -> { + unsigned char *patch, *patch2; + x86_alu_reg_reg(inst, X86_OR, $2, $2); + patch = inst; + x86_branch8(inst, X86_CC_NE, 0, 0); + inst = throw_builtin(inst, func, JIT_RESULT_DIVISION_BY_ZERO); + x86_patch(patch, inst); + x86_alu_reg_imm(inst, X86_CMP, $2, -1); + patch = inst; + x86_branch8(inst, X86_CC_NE, 0, 0); + x86_alu_reg_imm(inst, X86_CMP, $1, jit_min_int); + patch2 = inst; + x86_branch8(inst, X86_CC_NE, 0, 0); + inst = throw_builtin(inst, func, JIT_RESULT_ARITHMETIC); + x86_patch(patch, inst); + x86_patch(patch2, inst); + x86_cdq(inst); + x86_div_reg(inst, $2, 1); + } + +JIT_OP_IDIV_UN: binary, spill_before, more_space + [reg, imm] -> { + switch($2) + { + case 0: + { + inst = throw_builtin(inst, func, JIT_RESULT_DIVISION_BY_ZERO); + } + break; + + case 1: break; + + case 2: + { + x86_shift_reg_imm(inst, X86_SHR, $1, 1); + } + break; + + case 4: + { + x86_shift_reg_imm(inst, X86_SHR, $1, 2); + } + break; + + case 8: + { + x86_shift_reg_imm(inst, X86_SHR, $1, 3); + } + break; + + case 16: + { + x86_shift_reg_imm(inst, X86_SHR, $1, 4); + } + break; + + case 32: + { + x86_shift_reg_imm(inst, X86_SHR, $1, 5); + } + break; + + case 64: + { + x86_shift_reg_imm(inst, X86_SHR, $1, 6); + } + break; + + case 128: + { + x86_shift_reg_imm(inst, X86_SHR, $1, 7); + } + break; + + case 256: + { + x86_shift_reg_imm(inst, X86_SHR, $1, 8); + } + break; + + case 512: + { + x86_shift_reg_imm(inst, X86_SHR, $1, 9); + } + break; + + case 1024: + { + x86_shift_reg_imm(inst, X86_SHR, $1, 10); + } + break; + + case 2048: + { + x86_shift_reg_imm(inst, X86_SHR, $1, 11); + } + break; + + case 4096: + { + x86_shift_reg_imm(inst, X86_SHR, $1, 12); + } + break; + + case 8192: + { + x86_shift_reg_imm(inst, X86_SHR, $1, 13); + } + break; + + case 16384: + { + x86_shift_reg_imm(inst, X86_SHR, $1, 14); + } + break; + + case 32768: + { + x86_shift_reg_imm(inst, X86_SHR, $1, 15); + } + break; + + case 65536: + { + x86_shift_reg_imm(inst, X86_SHR, $1, 16); + } + break; + + case 0x00020000: + { + x86_shift_reg_imm(inst, X86_SHR, $1, 17); + } + break; + + case 0x00040000: + { + x86_shift_reg_imm(inst, X86_SHR, $1, 18); + } + break; + + case 0x00080000: + { + x86_shift_reg_imm(inst, X86_SHR, $1, 19); + } + break; + + case 0x00100000: + { + x86_shift_reg_imm(inst, X86_SHR, $1, 20); + } + break; + + case 0x00200000: + { + x86_shift_reg_imm(inst, X86_SHR, $1, 21); + } + break; + + case 0x00400000: + { + x86_shift_reg_imm(inst, X86_SHR, $1, 22); + } + break; + + case 0x00800000: + { + x86_shift_reg_imm(inst, X86_SHR, $1, 23); + } + break; + + case 0x01000000: + { + x86_shift_reg_imm(inst, X86_SHR, $1, 24); + } + break; + + case 0x02000000: + { + x86_shift_reg_imm(inst, X86_SHR, $1, 25); + } + break; + + case 0x04000000: + { + x86_shift_reg_imm(inst, X86_SHR, $1, 26); + } + break; + + case 0x08000000: + { + x86_shift_reg_imm(inst, X86_SHR, $1, 27); + } + break; + + case 0x10000000: + { + x86_shift_reg_imm(inst, X86_SHR, $1, 28); + } + break; + + case 0x20000000: + { + x86_shift_reg_imm(inst, X86_SHR, $1, 29); + } + break; + + case 0x40000000: + { + x86_shift_reg_imm(inst, X86_SHR, $1, 30); + } + break; + + case (jit_nint)0x80000000: + { + x86_shift_reg_imm(inst, X86_SHR, $1, 31); + } + break; + + default: + { + x86_mov_reg_imm(inst, X86_ECX, $2); + x86_clear_reg(inst, X86_EDX); + x86_div_reg(inst, X86_ECX, 0); + } + break; + } + } + [reg, reg] -> { + unsigned char *patch; + x86_alu_reg_reg(inst, X86_OR, $2, $2); + patch = inst; + x86_branch8(inst, X86_CC_NE, 0, 0); + inst = throw_builtin(inst, func, JIT_RESULT_DIVISION_BY_ZERO); + x86_patch(patch, inst); + x86_clear_reg(inst, X86_EDX); + x86_div_reg(inst, $2, 0); + } + +JIT_OP_IREM: binary, spill_before, more_space + [reg, imm] -> { + switch($2) + { + case 0: + { + inst = throw_builtin(inst, func, JIT_RESULT_DIVISION_BY_ZERO); + } + break; + + case 1: + { + x86_clear_reg(inst, $1); + } + break; + + case -1: + { + /* Dividing by -1 gives an exception if the argument + is minint, or simply gives a remainder of zero */ + unsigned char *patch = inst; + x86_alu_reg_imm(inst, X86_CMP, $1, jit_min_int); + x86_branch8(inst, X86_CC_NE, 0, 0); + inst = throw_builtin(inst, func, JIT_RESULT_ARITHMETIC); + x86_patch(patch, inst); + x86_clear_reg(inst, $1); + } + break; + + default: + { + x86_mov_reg_imm(inst, X86_ECX, $2); + x86_cdq(inst); + x86_div_reg(inst, X86_ECX, 1); + /* TODO: rearrange register assignments to avoid the move */ + x86_mov_reg_reg(inst, X86_EAX, X86_EDX, 4); + } + break; + } + } + [reg, reg] -> { + unsigned char *patch, *patch2; + x86_alu_reg_reg(inst, X86_OR, $2, $2); + patch = inst; + x86_branch8(inst, X86_CC_NE, 0, 0); + inst = throw_builtin(inst, func, JIT_RESULT_DIVISION_BY_ZERO); + x86_patch(patch, inst); + x86_alu_reg_imm(inst, X86_CMP, $2, -1); + patch = inst; + x86_branch8(inst, X86_CC_NE, 0, 0); + x86_alu_reg_imm(inst, X86_CMP, $1, jit_min_int); + patch2 = inst; + x86_branch8(inst, X86_CC_NE, 0, 0); + inst = throw_builtin(inst, func, JIT_RESULT_ARITHMETIC); + x86_patch(patch, inst); + x86_patch(patch2, inst); + x86_cdq(inst); + x86_div_reg(inst, $2, 1); + x86_mov_reg_reg(inst, X86_EAX, X86_EDX, 4); + } + +JIT_OP_IREM_UN: binary, spill_before, more_space + [reg, imm] -> { + switch($2) + { + case 0: + { + inst = throw_builtin(inst, func, JIT_RESULT_DIVISION_BY_ZERO); + } + break; + + case 1: + { + x86_clear_reg(inst, $1); + } + break; + + case 2: + case 4: + case 8: + case 16: + case 32: + case 64: + case 128: + case 256: + case 512: + case 1024: + case 2048: + case 4096: + case 8192: + case 16384: + case 32768: + case 65536: + case 0x00020000: + case 0x00040000: + case 0x00080000: + case 0x00100000: + case 0x00200000: + case 0x00400000: + case 0x00800000: + case 0x01000000: + case 0x02000000: + case 0x04000000: + case 0x08000000: + case 0x10000000: + case 0x20000000: + case 0x40000000: + case (jit_nint)0x80000000: + { + x86_alu_reg_imm(inst, X86_AND, $1, $2 - 1); + } + break; + + default: + { + x86_mov_reg_imm(inst, X86_ECX, $2); + x86_clear_reg(inst, X86_EDX); + x86_div_reg(inst, X86_ECX, 0); + x86_mov_reg_reg(inst, X86_EAX, X86_EDX, 4); + } + break; + } + } + [reg, reg] -> { + unsigned char *patch; + x86_alu_reg_reg(inst, X86_OR, $2, $2); + patch = inst; + x86_branch8(inst, X86_CC_NE, 0, 0); + inst = throw_builtin(inst, func, JIT_RESULT_DIVISION_BY_ZERO); + x86_patch(patch, inst); + x86_clear_reg(inst, X86_EDX); + x86_div_reg(inst, $2, 0); + x86_mov_reg_reg(inst, X86_EAX, X86_EDX, 4); + } + JIT_OP_INEG: unary [reg] -> { x86_neg_reg(inst, $1); diff --git a/tools/gen-sel-parser.y b/tools/gen-sel-parser.y index 5b44506..7c70215 100644 --- a/tools/gen-sel-parser.y +++ b/tools/gen-sel-parser.y @@ -59,6 +59,7 @@ static char *gensel_inst_type = "unsigned char *"; * Amount of space to reserve for the primary instruction output. */ static int gensel_reserve_space = 32; +static int gensel_reserve_more_space = 128; /* * First register in a stack arrangement. @@ -79,6 +80,7 @@ static int gensel_first_stack_reg = 8; /* st0 under x86 */ #define GENSEL_OPT_MANUAL 0x0100 #define GENSEL_OPT_UNARY_NOTE 0x0200 #define GENSEL_OPT_BINARY_NOTE 0x0400 +#define GENSEL_OPT_MORE_SPACE 0x0800 /* * Pattern values. @@ -267,12 +269,13 @@ static void gensel_output_clause_code(gensel_clause_t clause) /* * Output a single clause for a rule. */ -static void gensel_output_clause(gensel_clause_t clause) +static void gensel_output_clause(gensel_clause_t clause, int options) { /* Cache the instruction pointer into "inst" */ printf("\t\tinst = (%s)(gen->posn.ptr);\n", gensel_inst_type); printf("\t\tif(!jit_cache_check_for_n(&(gen->posn), %d))\n", - gensel_reserve_space); + (((options & GENSEL_OPT_MORE_SPACE) == 0) + ? gensel_reserve_space : gensel_reserve_more_space)); printf("\t\t{\n"); printf("\t\t\tjit_cache_mark_full(&(gen->posn));\n"); printf("\t\t\treturn;\n"); @@ -587,7 +590,7 @@ static void gensel_output_clauses(gensel_clause_t clauses, int options) /* Spill all other registers back to their original positions */ printf("\t\t_jit_regs_spill_all(gen);\n"); } - gensel_output_clause(clause); + gensel_output_clause(clause, options); printf("\t}\n"); first = 0; clause = clause->next; @@ -681,6 +684,7 @@ static void gensel_output_supported(void) %token K_TERNARY "`ternary'" %token K_STACK "`stack'" %token K_ONLY "`only'" +%token K_MORE_SPACE "`more_space'" %token K_MANUAL "`manual'" %token K_INST_TYPE "`%inst_type'" @@ -764,6 +768,7 @@ Option | K_TERNARY { $$ = GENSEL_OPT_TERNARY; } | K_STACK { $$ = GENSEL_OPT_STACK; } | K_ONLY { $$ = GENSEL_OPT_ONLY; } + | K_MORE_SPACE { $$ = GENSEL_OPT_MORE_SPACE; } | K_MANUAL { $$ = GENSEL_OPT_MANUAL; } ; diff --git a/tools/gen-sel-scanner.l b/tools/gen-sel-scanner.l index 5a9320f..374a533 100644 --- a/tools/gen-sel-scanner.l +++ b/tools/gen-sel-scanner.l @@ -103,6 +103,7 @@ WHITE [ \t\v\r\f] "manual" { RETURNTOK(K_MANUAL); } "stack" { RETURNTOK(K_STACK); } "only" { RETURNTOK(K_ONLY); } +"more_space" { RETURNTOK(K_MORE_SPACE); } "%inst_type" { RETURNTOK(K_INST_TYPE); } {IDALPHA}({DIGIT}|{IDALPHA})* { -- 2.47.3