]> git.unchartedbackwaters.co.uk Git - francis/libjit.git/commitdiff
Inline and optimize divisions for x86.
authorRhys Weatherley <rweather@southern-storm.com.au>
Mon, 31 May 2004 10:50:09 +0000 (10:50 +0000)
committerRhys Weatherley <rweather@southern-storm.com.au>
Mon, 31 May 2004 10:50:09 +0000 (10:50 +0000)
ChangeLog
jit/jit-rules-x86.c
jit/jit-rules-x86.sel
tools/gen-sel-parser.y
tools/gen-sel-scanner.l

index 8133f9d9e54f584eb1ba9494ee317065014bf443..55cf553935b919b99999de664b674d8e7c57fcec 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -3,6 +3,9 @@
 
        * jit/jit-rules-x86.sel: optimize multiplications for x86.
 
+       * jit/jit-rules-x86.c, jit/jit-rules-x86.sel, tools/gen-sel-parser.y,
+       tools/gen-sel-scanner.l: inline and optimize divisions for x86.
+
 2004-05-30  Rhys Weatherley  <rweather@southern-storm.com.au>
 
        * doc/libjit.texi: clarify the text that describes LLVM, at the
index 745c7c7ed7ffedec4f904351d87f7661745f6f0d..5ef3de97558b05a6560823fbd1090e3534d71d46 100644 (file)
@@ -26,6 +26,7 @@
 
 #include "jit-gen-x86.h"
 #include "jit-reg-alloc.h"
+#include "jit-setjmp.h"
 #include <stdio.h>
 
 /*
@@ -1409,6 +1410,30 @@ static unsigned char *mov_membase_reg_byte
        return inst;
 }
 
+/*
+ * Throw a builtin exception.
+ */
+static unsigned char *throw_builtin
+               (unsigned char *inst, jit_function_t func, int type)
+{
+       /* We need to update "catch_pc" if we have a "try" block */
+       if(func->builder->setjmp_value != 0)
+       {
+               _jit_gen_fix_value(func->builder->setjmp_value);
+               x86_call_imm(inst, 0);
+               x86_pop_membase(inst, X86_EBP,
+                                               func->builder->setjmp_value->frame_offset +
+                                               jit_jmp_catch_pc_offset);
+       }
+
+       /* Push the exception type onto the stack */
+       x86_push_imm(inst, type);
+
+       /* Call the "jit_exception_builtin" function, which will never return */
+       x86_call_code(inst, jit_exception_builtin);
+       return inst;
+}
+
 #define        TODO()          \
        do { \
                fprintf(stderr, "TODO at %s, %d\n", __FILE__, (int)__LINE__); \
index cdc8e069c02f0059c51f5f7d3b274c141c3dbaab..f40003d171ed4e32ed0c84c4e80d798114d6535d 100644 (file)
@@ -350,6 +350,601 @@ JIT_OP_IMUL: binary
                x86_imul_reg_reg(inst, $1, $2);
        }
 
+/* Spill before division to ensure that the arguments end up in
+   EAX and ECX, and that EDX is free */
+JIT_OP_IDIV: binary, spill_before, more_space
+       [reg, imm] -> {
+               switch($2)
+               {
+                       case 0:
+                       {
+                               inst = throw_builtin(inst, func, JIT_RESULT_DIVISION_BY_ZERO);
+                       }
+                       break;
+
+                       case 1: break;
+
+                       case -1:
+                       {
+                               /* Dividing by -1 gives an exception if the argument
+                                  is minint, or simply negates for other values */
+                               unsigned char *patch = inst;
+                               x86_alu_reg_imm(inst, X86_CMP, $1, jit_min_int);
+                               x86_branch8(inst, X86_CC_NE, 0, 0);
+                               inst = throw_builtin(inst, func, JIT_RESULT_ARITHMETIC);
+                               x86_patch(patch, inst);
+                               x86_neg_reg(inst, $1);
+                       }
+                       break;
+
+                       case 2:
+                       {
+                               x86_shift_reg_imm(inst, X86_SAR, $1, 1);
+                       }
+                       break;
+
+                       case 4:
+                       {
+                               x86_shift_reg_imm(inst, X86_SAR, $1, 2);
+                       }
+                       break;
+
+                       case 8:
+                       {
+                               x86_shift_reg_imm(inst, X86_SAR, $1, 3);
+                       }
+                       break;
+
+                       case 16:
+                       {
+                               x86_shift_reg_imm(inst, X86_SAR, $1, 4);
+                       }
+                       break;
+
+                       case 32:
+                       {
+                               x86_shift_reg_imm(inst, X86_SAR, $1, 5);
+                       }
+                       break;
+
+                       case 64:
+                       {
+                               x86_shift_reg_imm(inst, X86_SAR, $1, 6);
+                       }
+                       break;
+
+                       case 128:
+                       {
+                               x86_shift_reg_imm(inst, X86_SAR, $1, 7);
+                       }
+                       break;
+
+                       case 256:
+                       {
+                               x86_shift_reg_imm(inst, X86_SAR, $1, 8);
+                       }
+                       break;
+
+                       case 512:
+                       {
+                               x86_shift_reg_imm(inst, X86_SAR, $1, 9);
+                       }
+                       break;
+
+                       case 1024:
+                       {
+                               x86_shift_reg_imm(inst, X86_SAR, $1, 10);
+                       }
+                       break;
+
+                       case 2048:
+                       {
+                               x86_shift_reg_imm(inst, X86_SAR, $1, 11);
+                       }
+                       break;
+
+                       case 4096:
+                       {
+                               x86_shift_reg_imm(inst, X86_SAR, $1, 12);
+                       }
+                       break;
+
+                       case 8192:
+                       {
+                               x86_shift_reg_imm(inst, X86_SAR, $1, 13);
+                       }
+                       break;
+
+                       case 16384:
+                       {
+                               x86_shift_reg_imm(inst, X86_SAR, $1, 14);
+                       }
+                       break;
+
+                       case 32768:
+                       {
+                               x86_shift_reg_imm(inst, X86_SAR, $1, 15);
+                       }
+                       break;
+
+                       case 65536:
+                       {
+                               x86_shift_reg_imm(inst, X86_SAR, $1, 16);
+                       }
+                       break;
+
+                       case 0x00020000:
+                       {
+                               x86_shift_reg_imm(inst, X86_SAR, $1, 17);
+                       }
+                       break;
+
+                       case 0x00040000:
+                       {
+                               x86_shift_reg_imm(inst, X86_SAR, $1, 18);
+                       }
+                       break;
+
+                       case 0x00080000:
+                       {
+                               x86_shift_reg_imm(inst, X86_SAR, $1, 19);
+                       }
+                       break;
+
+                       case 0x00100000:
+                       {
+                               x86_shift_reg_imm(inst, X86_SAR, $1, 20);
+                       }
+                       break;
+
+                       case 0x00200000:
+                       {
+                               x86_shift_reg_imm(inst, X86_SAR, $1, 21);
+                       }
+                       break;
+
+                       case 0x00400000:
+                       {
+                               x86_shift_reg_imm(inst, X86_SAR, $1, 22);
+                       }
+                       break;
+
+                       case 0x00800000:
+                       {
+                               x86_shift_reg_imm(inst, X86_SAR, $1, 23);
+                       }
+                       break;
+
+                       case 0x01000000:
+                       {
+                               x86_shift_reg_imm(inst, X86_SAR, $1, 24);
+                       }
+                       break;
+
+                       case 0x02000000:
+                       {
+                               x86_shift_reg_imm(inst, X86_SAR, $1, 25);
+                       }
+                       break;
+
+                       case 0x04000000:
+                       {
+                               x86_shift_reg_imm(inst, X86_SAR, $1, 26);
+                       }
+                       break;
+
+                       case 0x08000000:
+                       {
+                               x86_shift_reg_imm(inst, X86_SAR, $1, 27);
+                       }
+                       break;
+
+                       case 0x10000000:
+                       {
+                               x86_shift_reg_imm(inst, X86_SAR, $1, 28);
+                       }
+                       break;
+
+                       case 0x20000000:
+                       {
+                               x86_shift_reg_imm(inst, X86_SAR, $1, 29);
+                       }
+                       break;
+
+                       case 0x40000000:
+                       {
+                               x86_shift_reg_imm(inst, X86_SAR, $1, 30);
+                       }
+                       break;
+
+                       case (jit_nint)0x80000000:
+                       {
+                               x86_shift_reg_imm(inst, X86_SAR, $1, 31);
+                       }
+                       break;
+
+                       default:
+                       {
+                               x86_mov_reg_imm(inst, X86_ECX, $2);
+                               x86_cdq(inst);
+                               x86_div_reg(inst, X86_ECX, 1);
+                       }
+                       break;
+               }
+       }
+       [reg, reg] -> {
+               unsigned char *patch, *patch2;
+               x86_alu_reg_reg(inst, X86_OR, $2, $2);
+               patch = inst;
+               x86_branch8(inst, X86_CC_NE, 0, 0);
+               inst = throw_builtin(inst, func, JIT_RESULT_DIVISION_BY_ZERO);
+               x86_patch(patch, inst);
+               x86_alu_reg_imm(inst, X86_CMP, $2, -1);
+               patch = inst;
+               x86_branch8(inst, X86_CC_NE, 0, 0);
+               x86_alu_reg_imm(inst, X86_CMP, $1, jit_min_int);
+               patch2 = inst;
+               x86_branch8(inst, X86_CC_NE, 0, 0);
+               inst = throw_builtin(inst, func, JIT_RESULT_ARITHMETIC);
+               x86_patch(patch, inst);
+               x86_patch(patch2, inst);
+               x86_cdq(inst);
+               x86_div_reg(inst, $2, 1);
+       }
+
+JIT_OP_IDIV_UN: binary, spill_before, more_space
+       [reg, imm] -> {
+               switch($2)
+               {
+                       case 0:
+                       {
+                               inst = throw_builtin(inst, func, JIT_RESULT_DIVISION_BY_ZERO);
+                       }
+                       break;
+
+                       case 1: break;
+
+                       case 2:
+                       {
+                               x86_shift_reg_imm(inst, X86_SHR, $1, 1);
+                       }
+                       break;
+
+                       case 4:
+                       {
+                               x86_shift_reg_imm(inst, X86_SHR, $1, 2);
+                       }
+                       break;
+
+                       case 8:
+                       {
+                               x86_shift_reg_imm(inst, X86_SHR, $1, 3);
+                       }
+                       break;
+
+                       case 16:
+                       {
+                               x86_shift_reg_imm(inst, X86_SHR, $1, 4);
+                       }
+                       break;
+
+                       case 32:
+                       {
+                               x86_shift_reg_imm(inst, X86_SHR, $1, 5);
+                       }
+                       break;
+
+                       case 64:
+                       {
+                               x86_shift_reg_imm(inst, X86_SHR, $1, 6);
+                       }
+                       break;
+
+                       case 128:
+                       {
+                               x86_shift_reg_imm(inst, X86_SHR, $1, 7);
+                       }
+                       break;
+
+                       case 256:
+                       {
+                               x86_shift_reg_imm(inst, X86_SHR, $1, 8);
+                       }
+                       break;
+
+                       case 512:
+                       {
+                               x86_shift_reg_imm(inst, X86_SHR, $1, 9);
+                       }
+                       break;
+
+                       case 1024:
+                       {
+                               x86_shift_reg_imm(inst, X86_SHR, $1, 10);
+                       }
+                       break;
+
+                       case 2048:
+                       {
+                               x86_shift_reg_imm(inst, X86_SHR, $1, 11);
+                       }
+                       break;
+
+                       case 4096:
+                       {
+                               x86_shift_reg_imm(inst, X86_SHR, $1, 12);
+                       }
+                       break;
+
+                       case 8192:
+                       {
+                               x86_shift_reg_imm(inst, X86_SHR, $1, 13);
+                       }
+                       break;
+
+                       case 16384:
+                       {
+                               x86_shift_reg_imm(inst, X86_SHR, $1, 14);
+                       }
+                       break;
+
+                       case 32768:
+                       {
+                               x86_shift_reg_imm(inst, X86_SHR, $1, 15);
+                       }
+                       break;
+
+                       case 65536:
+                       {
+                               x86_shift_reg_imm(inst, X86_SHR, $1, 16);
+                       }
+                       break;
+
+                       case 0x00020000:
+                       {
+                               x86_shift_reg_imm(inst, X86_SHR, $1, 17);
+                       }
+                       break;
+
+                       case 0x00040000:
+                       {
+                               x86_shift_reg_imm(inst, X86_SHR, $1, 18);
+                       }
+                       break;
+
+                       case 0x00080000:
+                       {
+                               x86_shift_reg_imm(inst, X86_SHR, $1, 19);
+                       }
+                       break;
+
+                       case 0x00100000:
+                       {
+                               x86_shift_reg_imm(inst, X86_SHR, $1, 20);
+                       }
+                       break;
+
+                       case 0x00200000:
+                       {
+                               x86_shift_reg_imm(inst, X86_SHR, $1, 21);
+                       }
+                       break;
+
+                       case 0x00400000:
+                       {
+                               x86_shift_reg_imm(inst, X86_SHR, $1, 22);
+                       }
+                       break;
+
+                       case 0x00800000:
+                       {
+                               x86_shift_reg_imm(inst, X86_SHR, $1, 23);
+                       }
+                       break;
+
+                       case 0x01000000:
+                       {
+                               x86_shift_reg_imm(inst, X86_SHR, $1, 24);
+                       }
+                       break;
+
+                       case 0x02000000:
+                       {
+                               x86_shift_reg_imm(inst, X86_SHR, $1, 25);
+                       }
+                       break;
+
+                       case 0x04000000:
+                       {
+                               x86_shift_reg_imm(inst, X86_SHR, $1, 26);
+                       }
+                       break;
+
+                       case 0x08000000:
+                       {
+                               x86_shift_reg_imm(inst, X86_SHR, $1, 27);
+                       }
+                       break;
+
+                       case 0x10000000:
+                       {
+                               x86_shift_reg_imm(inst, X86_SHR, $1, 28);
+                       }
+                       break;
+
+                       case 0x20000000:
+                       {
+                               x86_shift_reg_imm(inst, X86_SHR, $1, 29);
+                       }
+                       break;
+
+                       case 0x40000000:
+                       {
+                               x86_shift_reg_imm(inst, X86_SHR, $1, 30);
+                       }
+                       break;
+
+                       case (jit_nint)0x80000000:
+                       {
+                               x86_shift_reg_imm(inst, X86_SHR, $1, 31);
+                       }
+                       break;
+
+                       default:
+                       {
+                               x86_mov_reg_imm(inst, X86_ECX, $2);
+                               x86_clear_reg(inst, X86_EDX);
+                               x86_div_reg(inst, X86_ECX, 0);
+                       }
+                       break;
+               }
+       }
+       [reg, reg] -> {
+               unsigned char *patch;
+               x86_alu_reg_reg(inst, X86_OR, $2, $2);
+               patch = inst;
+               x86_branch8(inst, X86_CC_NE, 0, 0);
+               inst = throw_builtin(inst, func, JIT_RESULT_DIVISION_BY_ZERO);
+               x86_patch(patch, inst);
+               x86_clear_reg(inst, X86_EDX);
+               x86_div_reg(inst, $2, 0);
+       }
+
+JIT_OP_IREM: binary, spill_before, more_space
+       [reg, imm] -> {
+               switch($2)
+               {
+                       case 0:
+                       {
+                               inst = throw_builtin(inst, func, JIT_RESULT_DIVISION_BY_ZERO);
+                       }
+                       break;
+
+                       case 1:
+                       {
+                               x86_clear_reg(inst, $1);
+                       }
+                       break;
+
+                       case -1:
+                       {
+                               /* Dividing by -1 gives an exception if the argument
+                                  is minint, or simply gives a remainder of zero */
+                               unsigned char *patch = inst;
+                               x86_alu_reg_imm(inst, X86_CMP, $1, jit_min_int);
+                               x86_branch8(inst, X86_CC_NE, 0, 0);
+                               inst = throw_builtin(inst, func, JIT_RESULT_ARITHMETIC);
+                               x86_patch(patch, inst);
+                               x86_clear_reg(inst, $1);
+                       }
+                       break;
+
+                       default:
+                       {
+                               x86_mov_reg_imm(inst, X86_ECX, $2);
+                               x86_cdq(inst);
+                               x86_div_reg(inst, X86_ECX, 1);
+                               /* TODO: rearrange register assignments to avoid the move */
+                               x86_mov_reg_reg(inst, X86_EAX, X86_EDX, 4);
+                       }
+                       break;
+               }
+       }
+       [reg, reg] -> {
+               unsigned char *patch, *patch2;
+               x86_alu_reg_reg(inst, X86_OR, $2, $2);
+               patch = inst;
+               x86_branch8(inst, X86_CC_NE, 0, 0);
+               inst = throw_builtin(inst, func, JIT_RESULT_DIVISION_BY_ZERO);
+               x86_patch(patch, inst);
+               x86_alu_reg_imm(inst, X86_CMP, $2, -1);
+               patch = inst;
+               x86_branch8(inst, X86_CC_NE, 0, 0);
+               x86_alu_reg_imm(inst, X86_CMP, $1, jit_min_int);
+               patch2 = inst;
+               x86_branch8(inst, X86_CC_NE, 0, 0);
+               inst = throw_builtin(inst, func, JIT_RESULT_ARITHMETIC);
+               x86_patch(patch, inst);
+               x86_patch(patch2, inst);
+               x86_cdq(inst);
+               x86_div_reg(inst, $2, 1);
+               x86_mov_reg_reg(inst, X86_EAX, X86_EDX, 4);
+       }
+
+JIT_OP_IREM_UN: binary, spill_before, more_space
+       [reg, imm] -> {
+               switch($2)
+               {
+                       case 0:
+                       {
+                               inst = throw_builtin(inst, func, JIT_RESULT_DIVISION_BY_ZERO);
+                       }
+                       break;
+
+                       case 1:
+                       {
+                               x86_clear_reg(inst, $1);
+                       }
+                       break;
+
+                       case 2:
+                       case 4:
+                       case 8:
+                       case 16:
+                       case 32:
+                       case 64:
+                       case 128:
+                       case 256:
+                       case 512:
+                       case 1024:
+                       case 2048:
+                       case 4096:
+                       case 8192:
+                       case 16384:
+                       case 32768:
+                       case 65536:
+                       case 0x00020000:
+                       case 0x00040000:
+                       case 0x00080000:
+                       case 0x00100000:
+                       case 0x00200000:
+                       case 0x00400000:
+                       case 0x00800000:
+                       case 0x01000000:
+                       case 0x02000000:
+                       case 0x04000000:
+                       case 0x08000000:
+                       case 0x10000000:
+                       case 0x20000000:
+                       case 0x40000000:
+                       case (jit_nint)0x80000000:
+                       {
+                               x86_alu_reg_imm(inst, X86_AND, $1, $2 - 1);
+                       }
+                       break;
+
+                       default:
+                       {
+                               x86_mov_reg_imm(inst, X86_ECX, $2);
+                               x86_clear_reg(inst, X86_EDX);
+                               x86_div_reg(inst, X86_ECX, 0);
+                               x86_mov_reg_reg(inst, X86_EAX, X86_EDX, 4);
+                       }
+                       break;
+               }
+       }
+       [reg, reg] -> {
+               unsigned char *patch;
+               x86_alu_reg_reg(inst, X86_OR, $2, $2);
+               patch = inst;
+               x86_branch8(inst, X86_CC_NE, 0, 0);
+               inst = throw_builtin(inst, func, JIT_RESULT_DIVISION_BY_ZERO);
+               x86_patch(patch, inst);
+               x86_clear_reg(inst, X86_EDX);
+               x86_div_reg(inst, $2, 0);
+               x86_mov_reg_reg(inst, X86_EAX, X86_EDX, 4);
+       }
+
 JIT_OP_INEG: unary
        [reg] -> {
                x86_neg_reg(inst, $1);
index 5b445069b05503d0c1eafe5764ce58dc86c57b75..7c702158c0dfc08ba3c6c12a46c5073aad5259e7 100644 (file)
@@ -59,6 +59,7 @@ static char *gensel_inst_type = "unsigned char *";
  * Amount of space to reserve for the primary instruction output.
  */
 static int gensel_reserve_space = 32;
+static int gensel_reserve_more_space = 128;
 
 /*
  * First register in a stack arrangement.
@@ -79,6 +80,7 @@ static int gensel_first_stack_reg = 8;        /* st0 under x86 */
 #define        GENSEL_OPT_MANUAL                               0x0100
 #define        GENSEL_OPT_UNARY_NOTE                   0x0200
 #define        GENSEL_OPT_BINARY_NOTE                  0x0400
+#define        GENSEL_OPT_MORE_SPACE                   0x0800
 
 /*
  * Pattern values.
@@ -267,12 +269,13 @@ static void gensel_output_clause_code(gensel_clause_t clause)
 /*
  * Output a single clause for a rule.
  */
-static void gensel_output_clause(gensel_clause_t clause)
+static void gensel_output_clause(gensel_clause_t clause, int options)
 {
        /* Cache the instruction pointer into "inst" */
        printf("\t\tinst = (%s)(gen->posn.ptr);\n", gensel_inst_type);
        printf("\t\tif(!jit_cache_check_for_n(&(gen->posn), %d))\n",
-                  gensel_reserve_space);
+                  (((options & GENSEL_OPT_MORE_SPACE) == 0)
+                               ? gensel_reserve_space : gensel_reserve_more_space));
        printf("\t\t{\n");
        printf("\t\t\tjit_cache_mark_full(&(gen->posn));\n");
        printf("\t\t\treturn;\n");
@@ -587,7 +590,7 @@ static void gensel_output_clauses(gensel_clause_t clauses, int options)
                        /* Spill all other registers back to their original positions */
                        printf("\t\t_jit_regs_spill_all(gen);\n");
                }
-               gensel_output_clause(clause);
+               gensel_output_clause(clause, options);
                printf("\t}\n");
                first = 0;
                clause = clause->next;
@@ -681,6 +684,7 @@ static void gensel_output_supported(void)
 %token K_TERNARY                       "`ternary'"
 %token K_STACK                         "`stack'"
 %token K_ONLY                          "`only'"
+%token K_MORE_SPACE                    "`more_space'"
 %token K_MANUAL                                "`manual'"
 %token K_INST_TYPE                     "`%inst_type'"
 
@@ -764,6 +768,7 @@ Option
        | K_TERNARY                                     { $$ = GENSEL_OPT_TERNARY; }
        | K_STACK                                       { $$ = GENSEL_OPT_STACK; }
        | K_ONLY                                        { $$ = GENSEL_OPT_ONLY; }
+       | K_MORE_SPACE                          { $$ = GENSEL_OPT_MORE_SPACE; }
        | K_MANUAL                                      { $$ = GENSEL_OPT_MANUAL; }
        ;
 
index 5a9320feb738cdc5a76eb00f045d8d27467773b1..374a533f4282fe491964e8271fcfdc000410709f 100644 (file)
@@ -103,6 +103,7 @@ WHITE                                       [ \t\v\r\f]
 "manual"                               { RETURNTOK(K_MANUAL); }
 "stack"                                        { RETURNTOK(K_STACK); }
 "only"                                 { RETURNTOK(K_ONLY); }
+"more_space"                   { RETURNTOK(K_MORE_SPACE); }
 "%inst_type"                   { RETURNTOK(K_INST_TYPE); }
 
 {IDALPHA}({DIGIT}|{IDALPHA})*  {