From f52c62f478ead769935dbe984f0c9d09ff143e52 Mon Sep 17 00:00:00 2001 From: Toni Wilen Date: Sat, 2 Jan 2021 16:51:30 +0200 Subject: [PATCH] Another JIT shift instruction update. --- jit/compemu_midfunc_x86.cpp | 93 ++++++------ jit/compemu_midfunc_x86.h | 3 +- jit/gencomp.cpp | 279 +++++++++++++++++++++++++----------- 3 files changed, 251 insertions(+), 124 deletions(-) diff --git a/jit/compemu_midfunc_x86.cpp b/jit/compemu_midfunc_x86.cpp index 41303730..25b0ad3e 100644 --- a/jit/compemu_midfunc_x86.cpp +++ b/jit/compemu_midfunc_x86.cpp @@ -134,11 +134,13 @@ MIDFUNC(0,clear_overflow,(void)) raw_popfl(); } -MIDFUNC(3,setcc_for_cntzero,(RR4 /* cnt */, RR4 data, int size, int ov)) +// This is complex because x86 shift behavior is different than 680x0. +// - shift count 0: does not modify any flags | clears C, modifies Z and N. Does not modify X. +// - shift count larger or same than data size : C undefined | C always equals last bit shifted out. +// - shift count mask: masked by 31 (except if 64bit data size) | masked by 63. +MIDFUNC(6, setcc_for_cntzero, (RR4 /* cnt */, RR4 data, RR4 odata, int obit, int size, int ov)) { - uae_u8 *branchadd1a, *branchadd1b; - uae_u8* branchadd2; - uae_u8* branchadd3; + uae_u8 *branchadd1, *branchadd2, *branchadd3, *branchadd4; evict(FLAGX); make_flags_live_internal(); @@ -157,63 +159,61 @@ MIDFUNC(3,setcc_for_cntzero,(RR4 /* cnt */, RR4 data, int size, int ov)) emit_byte(0xff); } - /* - * shift count can only be in CL register; see shrl_b_rr - */ + // Shift count can only be in CL register; see shrl_b_rr + // Zero shift count? raw_test_b_rr(X86_CL, X86_CL); - /* if zero, leave X unaffected; carry flag will already be cleared */ raw_jz_b_oponly(); - branchadd1a = get_target(); + branchadd4 = get_target(); skip_byte(); - /* if >= 32, recalculate all flags */ - raw_cmp_b_ri(X86_CL, 31); - raw_jcc_b_oponly(NATIVE_CC_HI); - branchadd1b = get_target(); - skip_byte(); - - /* shift count was non-zero; update also x-flag */ - raw_popfl(); - COMPCALL(setcc_m)((uintptr)live.state[FLAGX].mem, NATIVE_CC_CS); - log_vwrite(FLAGX); - raw_jmp_b_oponly(); + // Shift count lower than data size? + raw_cmp_b_ri(X86_CL, size == 0 ? 7 : (size == 1 ? 15 : 31)); + raw_jcc_b_oponly(NATIVE_CC_LS); branchadd2 = get_target(); skip_byte(); - *branchadd1a = (uintptr)get_target() - ((uintptr)branchadd1a + 1); - - /* shift count was zero; need to set Z & N flags since the native flags were unaffected */ + *branchadd4 = (uintptr)get_target() - ((uintptr)branchadd4 + 1); + // Shift count: zero, same or larger than data size + // Need to update C, N and Z. raw_popfl(); - data = readreg(data, size); + data = readreg(data, 4); + /* Update Z and N (Clears also C). */ switch (size) { - case 1: raw_test_b_rr(data, data); break; - case 2: raw_test_w_rr(data, data); break; - case 4: raw_test_l_rr(data, data); break; + case 0: raw_test_b_rr(data, data); break; + case 1: raw_test_w_rr(data, data); break; + case 2: raw_test_l_rr(data, data); break; } + unlock2(data); + // Update C (BT does not modify other flags). + odata = readreg(odata, 4); + raw_bt_l_ri(odata, obit); + unlock2(odata); + raw_pushfl(); + // If zero shift count: X must not be modified. + raw_test_b_rr(X86_CL, X86_CL); + raw_jz_b_oponly(); + branchadd1 = get_target(); + skip_byte(); + + // Non-zero shift count. + // Do not modify C, N and Z. + // C -> X + *branchadd2 = (uintptr)get_target() - ((uintptr)branchadd2 + 1); + raw_popfl(); + // Execute "duplicate_carry()" + COMPCALL(setcc_m)((uintptr)live.state[FLAGX].mem, NATIVE_CC_CS); + log_vwrite(FLAGX); raw_jmp_b_oponly(); branchadd3 = get_target(); skip_byte(); - *branchadd1b = (uintptr)get_target() - ((uintptr)branchadd1b + 1); - /* shift count was >=32, set all flags */ + // Zero shift count after CNZ adjustments + *branchadd1 = (uintptr)get_target() - ((uintptr)branchadd1 + 1); raw_popfl(); - /* Set Z and N */ - switch (size) - { - case 1: raw_test_b_rr(data, data); break; - case 2: raw_test_w_rr(data, data); break; - case 4: raw_test_l_rr(data, data); break; - } - /* Set C */ - raw_bt_l_ri(data, 0); *branchadd3 = (uintptr)get_target() - ((uintptr)branchadd3 + 1); - - unlock2(data); - - *branchadd2 = (uintptr)get_target() - ((uintptr)branchadd2 + 1); } /* @@ -2234,6 +2234,15 @@ MIDFUNC(2,cmp_b,(RR1 d, RR1 s)) unlock2(s); } +MIDFUNC(2, cmp_b_ri, (RR1 r, IMM i)) +{ + CLOBBER_CMP; + r = readreg(r, 1); + + raw_cmp_b_ri(r, i); + unlock2(r); +} + MIDFUNC(2,xor_l,(RW4 d, RR4 s)) { diff --git a/jit/compemu_midfunc_x86.h b/jit/compemu_midfunc_x86.h index a5b6df56..a25102ed 100644 --- a/jit/compemu_midfunc_x86.h +++ b/jit/compemu_midfunc_x86.h @@ -180,13 +180,14 @@ DECLARE_MIDFUNC(cmp_l(RR4 d, RR4 s)); DECLARE_MIDFUNC(cmp_l_ri(RR4 r, IMM i)); DECLARE_MIDFUNC(cmp_w(RR2 d, RR2 s)); DECLARE_MIDFUNC(cmp_b(RR1 d, RR1 s)); +DECLARE_MIDFUNC(cmp_b_ri(RR1 r, IMM i)); DECLARE_MIDFUNC(xor_l(RW4 d, RR4 s)); DECLARE_MIDFUNC(xor_w(RW2 d, RR2 s)); DECLARE_MIDFUNC(xor_b(RW1 d, RR1 s)); DECLARE_MIDFUNC(live_flags(void)); DECLARE_MIDFUNC(dont_care_flags(void)); DECLARE_MIDFUNC(duplicate_carry(void)); -DECLARE_MIDFUNC(setcc_for_cntzero(RR4 d, RR4 data, int size, int ov)); +DECLARE_MIDFUNC(setcc_for_cntzero(RR4 d, RR4 data, RR4 odata, int obit, int size, int ov)); DECLARE_MIDFUNC(clear_overflow(void)); DECLARE_MIDFUNC(restore_carry(void)); DECLARE_MIDFUNC(start_needflags(void)); diff --git a/jit/gencomp.cpp b/jit/gencomp.cpp index 55b10827..4142813a 100644 --- a/jit/gencomp.cpp +++ b/jit/gencomp.cpp @@ -2320,41 +2320,55 @@ gen_opcode(unsigned int opcode) if (curi->smode != immi) { uses_cmov; start_brace(); - comprintf("\tint zero = scratchie++;\n"); + comprintf("\tint cdata = scratchie++;\n"); comprintf("\tint tmpcnt = scratchie++;\n"); - comprintf("\tint minus1 = scratchie++;\n"); - comprintf("\tint cdata = minus1;\n"); - comprintf("\tmov_l_rr(tmpcnt,cnt);\n"); - comprintf("\tand_l_ri(tmpcnt,63);\n"); - comprintf("\tmov_l_ri(zero, 0);\n"); - comprintf("\tmov_l_ri(minus1, -1);\n"); + comprintf("\tint setval = scratchie++;\n"); + if (!noflags) { + comprintf("\tint odata = scratchie++;\n"); + } + comprintf("\tmov_l_ri(cdata, 0);\n"); + comprintf("\tmov_l_ri(setval, 0xffffffff);\n"); + // if high bit = 0: setval = 0x00000000, else setval = 0xffffffff + comprintf("\ttest_l_ri(data, 0x%08x);\n", curi->size == sz_byte ? 0x80 : (curi->size == sz_word ? 0x8000 : 0x80000000)); + comprintf("\tcmov_l_rr(setval, cdata, NATIVE_CC_EQ);\n"); + comprintf("\tmov_l_rr(cdata, setval);\n"); + if (!noflags) { + // setval -> odata + comprintf("\tmov_l_rr(odata, setval);\n"); + } + comprintf("\tmov_l_rr(tmpcnt, cnt);\n"); + comprintf("\tand_l_ri(tmpcnt, 63);\n"); + if (!noflags) { + // shift == 0: tmpcnt (0) -> odata (C is always zero) + comprintf("\tcmov_l_rr(odata, tmpcnt, NATIVE_CC_EQ);\n"); + } + switch (curi->size) { case sz_byte: - comprintf("\ttest_b_rr(data,data);\n"); - comprintf("\tcmov_l_rr(zero, minus1, NATIVE_CC_MI);\n"); - comprintf("\ttest_l_ri(tmpcnt, 0x38);\n"); - comprintf("\tmov_l_rr(cdata,data);\n"); - comprintf("\tcmov_l_rr(cdata, zero, NATIVE_CC_NE);\n"); - comprintf("\tshra_b_rr(cdata,tmpcnt);\n"); - comprintf("\tmov_b_rr(data,cdata);\n"); + comprintf("\tcmp_b_ri(tmpcnt, 0x08);\n"); + // shift > 8: setval -> cdata + comprintf("\tcmov_l_rr(cdata, setval, NATIVE_CC_HI);\n"); + // shift <= 8: data -> cdata ("normal" shift) + comprintf("\tcmov_l_rr(cdata, data, NATIVE_CC_LS);\n"); + comprintf("\tshra_b_rr(cdata, tmpcnt);\n"); break; case sz_word: - comprintf("\ttest_w_rr(data,data);\n"); - comprintf("\tcmov_l_rr(zero, minus1, NATIVE_CC_MI);\n"); - comprintf("\ttest_l_ri(tmpcnt, 0x30);\n"); - comprintf("\tmov_l_rr(cdata,data);\n"); - comprintf("\tcmov_l_rr(cdata, zero, NATIVE_CC_NE);\n"); - comprintf("\tshra_w_rr(cdata,tmpcnt);\n"); - comprintf("\tmov_w_rr(data,cdata);\n"); + comprintf("\tcmp_b_ri(tmpcnt, 0x10);\n"); + // shift > 16: setval -> cdata + comprintf("\tcmov_l_rr(cdata, setval, NATIVE_CC_HI);\n"); + // shift <= 16: data -> cdata ("normal" shift) + comprintf("\tcmov_l_rr(cdata, data, NATIVE_CC_LS);\n"); + comprintf("\tshra_w_rr(cdata, tmpcnt);\n"); break; case sz_long: - comprintf("\ttest_l_rr(data,data);\n"); - comprintf("\tcmov_l_rr(zero, minus1, NATIVE_CC_MI);\n"); - comprintf("\ttest_l_ri(tmpcnt, 0x20);\n"); - comprintf("\tmov_l_rr(cdata,data);\n"); - comprintf("\tcmov_l_rr(cdata, zero, NATIVE_CC_NE);\n"); - comprintf("\tshra_l_rr(cdata,tmpcnt);\n"); - comprintf("\tmov_l_rr(data,cdata);\n"); + comprintf("\tcmp_b_ri(tmpcnt, 0x20);\n"); + // shift > 32: setval -> cdata + comprintf("\tcmov_l_rr(cdata, setval, NATIVE_CC_HI);\n"); + // shift == 32? 0 -> cdata (x86 masks count by 31, 680x0 uses mask 63) + comprintf("\tcmov_l_rr(data, setval, NATIVE_CC_EQ);\n"); + // shift <= 32: data -> cdata ("normal" shift) + comprintf("\tcmov_l_rr(cdata, data, NATIVE_CC_LS);\n"); + comprintf("\tshra_l_rr(cdata, tmpcnt);\n"); break; default: assert(0); } @@ -2371,12 +2385,22 @@ gen_opcode(unsigned int opcode) if (!noflags) { comprintf("\tlive_flags();\n"); comprintf("\tend_needflags();\n"); - if (curi->smode != immi) - comprintf("\tsetcc_for_cntzero(tmpcnt, data, %d, 0);\n", curi->size == sz_byte ? 1 : curi->size == sz_word ? 2 : 4); - else + if (curi->smode != immi) { + comprintf("\tsetcc_for_cntzero(tmpcnt, cdata, odata, 0, %d, 0);\n", curi->size); + } else { comprintf("\tduplicate_carry();\n"); + } comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); } + if (curi->smode != immi) { + switch (curi->size) { + case sz_byte: comprintf("\tmov_b_rr(data, cdata);\n"); break; + case sz_word: comprintf("\tmov_w_rr(data, cdata);\n"); break; + case sz_long: comprintf("\tmov_l_rr(data, cdata);\n"); break; + default: assert(0); + } + } + genastore("data", curi->dmode, "dstreg", curi->size, "data"); break; @@ -2414,28 +2438,51 @@ gen_opcode(unsigned int opcode) uses_cmov; start_brace(); comprintf("\tint cdata = scratchie++;\n"); - comprintf("\tint tmpcnt=scratchie++;\n"); - comprintf("\tmov_l_rr(tmpcnt,cnt);\n"); - comprintf("\tand_l_ri(tmpcnt,63);\n"); + comprintf("\tint tmpcnt = scratchie++;\n"); + if (!noflags) { + comprintf("\tint odata = scratchie++;\n"); + comprintf("\tmov_l_rr(odata, data);\n"); + } comprintf("\tmov_l_ri(cdata, 0);\n"); + comprintf("\tmov_l_rr(tmpcnt, cnt);\n"); + comprintf("\tand_l_ri(tmpcnt, 63);\n"); + if (!noflags) { + // shift == 0? cdata (0) -> odata (C is always zero) + comprintf("\tcmov_l_rr(odata, cdata, NATIVE_CC_EQ);\n"); + } + switch (curi->size) { case sz_byte: - comprintf("\ttest_l_ri(tmpcnt, 0x38);\n"); - comprintf("\tcmov_l_rr(cdata, data, NATIVE_CC_EQ);\n"); - comprintf("\tshll_b_rr(cdata,tmpcnt);\n"); - comprintf("\tmov_b_rr(data, cdata);\n"); + // shift > 8? 0 -> odata (C is always zero) + comprintf("\tcmp_b_ri(tmpcnt, 0x08);\n"); + if (!noflags) { + comprintf("\tcmov_l_rr(odata, cdata, NATIVE_CC_HI);\n"); + } + // shift <= 8? cdata -> cdata ("normal" shift) + comprintf("\tcmov_l_rr(cdata, data, NATIVE_CC_LS);\n"); + comprintf("\tshll_b_rr(cdata, tmpcnt);\n"); break; case sz_word: - comprintf("\ttest_l_ri(tmpcnt, 0x30);\n"); - comprintf("\tcmov_l_rr(cdata, data, NATIVE_CC_EQ);\n"); - comprintf("\tshll_w_rr(cdata,tmpcnt);\n"); - comprintf("\tmov_w_rr(data, cdata);\n"); + // shift > 16? 0 -> odata (C is always zero) + comprintf("\tcmp_b_ri(tmpcnt, 0x10);\n"); + if (!noflags) { + comprintf("\tcmov_l_rr(odata, cdata, NATIVE_CC_HI);\n"); + } + // shift <= 16? cdata -> cdata ("normal" shift) + comprintf("\tcmov_l_rr(cdata, data, NATIVE_CC_LS);\n"); + comprintf("\tshll_w_rr(cdata, tmpcnt);\n"); break; case sz_long: - comprintf("\ttest_l_ri(tmpcnt, 0x20);\n"); - comprintf("\tcmov_l_rr(cdata, data, NATIVE_CC_EQ);\n"); - comprintf("\tshll_l_rr(cdata,tmpcnt);\n"); - comprintf("\tmov_l_rr(data, cdata);\n"); + // shift > 32? 0 -> odata (C is always zero) + comprintf("\tcmp_b_ri(tmpcnt, 0x20);\n"); + if (!noflags) { + comprintf("\tcmov_l_rr(odata, cdata, NATIVE_CC_HI);\n"); + } + // shift == 32? 0 -> cdata (x86 masks count by 31, 680x0 uses mask 63) + comprintf("\tcmov_l_rr(data, cdata, NATIVE_CC_EQ);\n"); + // shift <= 32? cdata -> cdata ("normal" shift) + comprintf("\tcmov_l_rr(cdata, data, NATIVE_CC_LS);\n"); + comprintf("\tshll_l_rr(cdata, tmpcnt);\n"); break; default: assert(0); } @@ -2452,12 +2499,22 @@ gen_opcode(unsigned int opcode) if (!noflags) { comprintf("\tlive_flags();\n"); comprintf("\tend_needflags();\n"); - if (curi->smode != immi) - comprintf("\tsetcc_for_cntzero(tmpcnt, data, %d, 0);\n", curi->size == sz_byte ? 1 : curi->size == sz_word ? 2 : 4); - else + if (curi->smode != immi) { + comprintf("\tsetcc_for_cntzero(tmpcnt, cdata, odata, 0, %d, 1);\n", curi->size); + } else { + comprintf("\tclear_overflow();\n"); comprintf("\tduplicate_carry();\n"); + } comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); } + if (curi->smode != immi) { + switch (curi->size) { + case sz_byte: comprintf("\tmov_b_rr(data, cdata);\n"); break; + case sz_word: comprintf("\tmov_w_rr(data, cdata);\n"); break; + case sz_long: comprintf("\tmov_l_rr(data, cdata);\n"); break; + default: assert(0); + } + } genastore("data", curi->dmode, "dstreg", curi->size, "data"); break; @@ -2485,30 +2542,52 @@ gen_opcode(unsigned int opcode) if (curi->smode != immi) { uses_cmov; start_brace(); - comprintf("\tint cdata=scratchie++;\n"); - comprintf("\tint tmpcnt=scratchie++;\n"); - comprintf("\tmov_l_rr(tmpcnt,cnt);\n"); - comprintf("\tand_l_ri(tmpcnt,63);\n"); + comprintf("\tint cdata = scratchie++;\n"); + comprintf("\tint tmpcnt = scratchie++;\n"); + if (!noflags) { + comprintf("\tint odata = scratchie++;\n"); + comprintf("\tmov_l_rr(odata, data);\n"); + } comprintf("\tmov_l_ri(cdata, 0);\n"); - + comprintf("\tmov_l_rr(tmpcnt, cnt);\n"); + comprintf("\tand_l_ri(tmpcnt, 63);\n"); + if (!noflags) { + // shift == 0? cdata (0) -> odata (C is always zero) + comprintf("\tcmov_l_rr(odata, cdata, NATIVE_CC_EQ);\n"); + } + switch (curi->size) { case sz_byte: - comprintf("\ttest_l_ri(tmpcnt, 0x38);\n"); - comprintf("\tcmov_l_rr(cdata, data, NATIVE_CC_EQ);\n"); - comprintf("\tshrl_b_rr(cdata,tmpcnt);\n"); - comprintf("\tmov_b_rr(data, cdata);\n"); + // shift > 8? 0 -> odata (C is always zero) + comprintf("\tcmp_b_ri(tmpcnt, 0x08);\n"); + if (!noflags) { + comprintf("\tcmov_l_rr(odata, cdata, NATIVE_CC_HI);\n"); + } + // shift <= 8? cdata -> cdata ("normal" shift) + comprintf("\tcmov_l_rr(cdata, data, NATIVE_CC_LS);\n"); + comprintf("\tshrl_b_rr(cdata, tmpcnt);\n"); break; case sz_word: - comprintf("\ttest_l_ri(tmpcnt, 0x30);\n"); - comprintf("\tcmov_l_rr(cdata, data, NATIVE_CC_EQ);\n"); - comprintf("\tshrl_w_rr(cdata,tmpcnt);\n"); - comprintf("\tmov_w_rr(data, cdata);\n"); + // shift > 16? 0 -> odata (C is always zero) + comprintf("\tcmp_b_ri(tmpcnt, 0x10);\n"); + if (!noflags) { + comprintf("\tcmov_l_rr(odata, cdata, NATIVE_CC_HI);\n"); + } + // shift <= 16? cdata -> cdata ("normal" shift) + comprintf("\tcmov_l_rr(cdata, data, NATIVE_CC_LS);\n"); + comprintf("\tshrl_w_rr(cdata, tmpcnt);\n"); break; case sz_long: - comprintf("\ttest_l_ri(tmpcnt, 0x20);\n"); - comprintf("\tcmov_l_rr(cdata, data, NATIVE_CC_EQ);\n"); + // shift > 32? 0 -> odata (C is always zero) + comprintf("\tcmp_b_ri(tmpcnt, 0x20);\n"); + if (!noflags) { + comprintf("\tcmov_l_rr(odata, cdata, NATIVE_CC_HI);\n"); + } + // shift == 32? 0 -> cdata (x86 masks count by 31, 680x0 uses mask 63) + comprintf("\tcmov_l_rr(data, cdata, NATIVE_CC_EQ);\n"); + // shift <= 32? cdata -> cdata ("normal" shift) + comprintf("\tcmov_l_rr(cdata, data, NATIVE_CC_LS);\n"); comprintf("\tshrl_l_rr(cdata, tmpcnt);\n"); - comprintf("\tmov_l_rr(data, cdata);\n"); break; default: assert(0); } @@ -2526,12 +2605,20 @@ gen_opcode(unsigned int opcode) comprintf("\tlive_flags();\n"); comprintf("\tend_needflags();\n"); if (curi->smode != immi) { - comprintf("\tsetcc_for_cntzero(tmpcnt, data, %d, 0);\n", curi->size == sz_byte ? 1 : curi->size == sz_word ? 2 : 4); + comprintf("\tsetcc_for_cntzero(tmpcnt, cdata, odata, %d, %d, 1);\n", curi->size == sz_byte ? 7 : curi->size == sz_word ? 15 : 31, curi->size); } else { comprintf("\tduplicate_carry();\n"); } comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); } + if (curi->smode != immi) { + switch (curi->size) { + case sz_byte: comprintf("\tmov_b_rr(data, cdata);\n"); break; + case sz_word: comprintf("\tmov_w_rr(data, cdata);\n"); break; + case sz_long: comprintf("\tmov_l_rr(data, cdata);\n"); break; + default: assert(0); + } + } genastore("data", curi->dmode, "dstreg", curi->size, "data"); break; @@ -2561,28 +2648,50 @@ gen_opcode(unsigned int opcode) start_brace(); comprintf("\tint cdata = scratchie++;\n"); comprintf("\tint tmpcnt = scratchie++;\n"); - comprintf("\tmov_l_rr(tmpcnt,cnt);\n"); - comprintf("\tand_l_ri(tmpcnt,63);\n"); + if (!noflags) { + comprintf("\tint odata = scratchie++;\n"); + comprintf("\tmov_l_rr(odata, data);\n"); + } comprintf("\tmov_l_ri(cdata, 0);\n"); + comprintf("\tmov_l_rr(tmpcnt, cnt);\n"); + comprintf("\tand_l_ri(tmpcnt, 63);\n"); + if (!noflags) { + // shift == 0? cdata (0) -> odata (C is always zero) + comprintf("\tcmov_l_rr(odata, cdata, NATIVE_CC_EQ);\n"); + } switch (curi->size) { case sz_byte: - comprintf("\ttest_l_ri(tmpcnt, 0x38);\n"); - comprintf("\tcmov_l_rr(cdata, data, NATIVE_CC_EQ);\n"); - comprintf("\tshll_b_rr(cdata,tmpcnt);\n"); - comprintf("\tmov_b_rr(data, cdata);\n"); + // shift > 8? 0 -> odata (C is always zero) + comprintf("\tcmp_b_ri(tmpcnt, 0x08);\n"); + if (!noflags) { + comprintf("\tcmov_l_rr(odata, cdata, NATIVE_CC_HI);\n"); + } + // shift <= 8? cdata -> cdata ("normal" shift) + comprintf("\tcmov_l_rr(cdata, data, NATIVE_CC_LS);\n"); + comprintf("\tshll_b_rr(cdata, tmpcnt);\n"); break; case sz_word: - comprintf("\ttest_l_ri(tmpcnt, 0x30);\n"); - comprintf("\tcmov_l_rr(cdata, data, NATIVE_CC_EQ);\n"); - comprintf("\tshll_w_rr(cdata,tmpcnt);\n"); - comprintf("\tmov_w_rr(data, cdata);\n"); + // shift > 16? 0 -> odata (C is always zero) + comprintf("\tcmp_b_ri(tmpcnt, 0x10);\n"); + if (!noflags) { + comprintf("\tcmov_l_rr(odata, cdata, NATIVE_CC_HI);\n"); + } + // shift <= 16? cdata -> cdata ("normal" shift) + comprintf("\tcmov_l_rr(cdata, data, NATIVE_CC_LS);\n"); + comprintf("\tshll_w_rr(cdata, tmpcnt);\n"); break; case sz_long: - comprintf("\ttest_l_ri(tmpcnt, 0x20);\n"); - comprintf("\tcmov_l_rr(cdata, data, NATIVE_CC_EQ);\n"); - comprintf("\tshll_l_rr(cdata,tmpcnt);\n"); - comprintf("\tmov_l_rr(data, cdata);\n"); + // shift > 32? 0 -> odata (C is always zero) + comprintf("\tcmp_b_ri(tmpcnt, 0x20);\n"); + if (!noflags) { + comprintf("\tcmov_l_rr(odata, cdata, NATIVE_CC_HI);\n"); + } + // shift == 32? 0 -> cdata (x86 masks count by 31, 680x0 uses mask 63) + comprintf("\tcmov_l_rr(data, cdata, NATIVE_CC_EQ);\n"); + // shift <= 32? cdata -> cdata ("normal" shift) + comprintf("\tcmov_l_rr(cdata, data, NATIVE_CC_LS);\n"); + comprintf("\tshll_l_rr(cdata, tmpcnt);\n"); break; default: assert(0); } @@ -2600,13 +2709,21 @@ gen_opcode(unsigned int opcode) comprintf("\tlive_flags();\n"); comprintf("\tend_needflags();\n"); if (curi->smode != immi) { - comprintf("\tsetcc_for_cntzero(tmpcnt, data, %d, 1);\n", curi->size == sz_byte ? 1 : curi->size == sz_word ? 2 : 4); + comprintf("\tsetcc_for_cntzero(tmpcnt, cdata, odata, 0, %d, 1);\n", curi->size); } else { comprintf("\tclear_overflow();\n"); comprintf("\tduplicate_carry();\n"); } comprintf("if (!(needed_flags & FLAG_CZNV)) dont_care_flags();\n"); } + if (curi->smode != immi) { + switch (curi->size) { + case sz_byte: comprintf("\tmov_b_rr(data, cdata);\n"); break; + case sz_word: comprintf("\tmov_w_rr(data, cdata);\n"); break; + case sz_long: comprintf("\tmov_l_rr(data, cdata);\n"); break; + default: assert(0); + } + } genastore("data", curi->dmode, "dstreg", curi->size, "data"); break; -- 2.47.3