From d75283111f93195a97a18aa758cb8832c613bdd4 Mon Sep 17 00:00:00 2001 From: Dimitris Panokostas Date: Sun, 31 May 2026 22:55:01 +0200 Subject: [PATCH] fix: address ARM64 JIT cputester regressions --- jit/arm/compemu_arm.cpp | 583 +++++++++++++++++++++++----- jit/arm/compemu_midfunc_arm.cpp | 7 + jit/arm/compemu_midfunc_arm.h | 1 + jit/arm/compemu_midfunc_arm64.cpp | 7 + jit/arm/compemu_midfunc_arm64_2.cpp | 128 ++++-- jit/arm/compemu_support_arm.cpp | 2 + jit/arm/gencomp_arm.c | 48 ++- 7 files changed, 633 insertions(+), 143 deletions(-) diff --git a/jit/arm/compemu_arm.cpp b/jit/arm/compemu_arm.cpp index 56e0c80b..541f54c2 100644 --- a/jit/arm/compemu_arm.cpp +++ b/jit/arm/compemu_arm.cpp @@ -12503,11 +12503,24 @@ uae_u32 REGPARAM2 op_4c00_0_comp_ff(uae_u32 opcode) { int dst = dstreg; if (extra & 0x0400) { int r3 = (extra & 7); - mov_l_rr(r3, dst); - if (extra & 0x0800) { - jff_MULS64(r2, r3); + if (r2 == r3) { + int tmp = alloc_scratch(); + mov_l_rr(tmp, r2); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jff_MULS64(tmp, r3); + } else { + jff_MULU64(tmp, r3); + } + if (currprefs.cpu_model >= 68040) + mov_l_rr(r2, tmp); } else { - jff_MULU64(r2, r3); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jff_MULS64(r2, r3); + } else { + jff_MULU64(r2, r3); + } } } else { if (extra & 0x0800) { @@ -12533,11 +12546,24 @@ uae_u32 REGPARAM2 op_4c10_0_comp_ff(uae_u32 opcode) { readlong(dsta, dst); if (extra & 0x0400) { int r3 = (extra & 7); - mov_l_rr(r3, dst); - if (extra & 0x0800) { - jff_MULS64(r2, r3); + if (r2 == r3) { + int tmp = alloc_scratch(); + mov_l_rr(tmp, r2); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jff_MULS64(tmp, r3); + } else { + jff_MULU64(tmp, r3); + } + if (currprefs.cpu_model >= 68040) + mov_l_rr(r2, tmp); } else { - jff_MULU64(r2, r3); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jff_MULS64(r2, r3); + } else { + jff_MULU64(r2, r3); + } } } else { if (extra & 0x0800) { @@ -12564,11 +12590,24 @@ uae_u32 REGPARAM2 op_4c18_0_comp_ff(uae_u32 opcode) { arm_ADD_l_ri8(dstreg + 8, 4); if (extra & 0x0400) { int r3 = (extra & 7); - mov_l_rr(r3, dst); - if (extra & 0x0800) { - jff_MULS64(r2, r3); + if (r2 == r3) { + int tmp = alloc_scratch(); + mov_l_rr(tmp, r2); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jff_MULS64(tmp, r3); + } else { + jff_MULU64(tmp, r3); + } + if (currprefs.cpu_model >= 68040) + mov_l_rr(r2, tmp); } else { - jff_MULU64(r2, r3); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jff_MULS64(r2, r3); + } else { + jff_MULU64(r2, r3); + } } } else { if (extra & 0x0800) { @@ -12595,11 +12634,24 @@ uae_u32 REGPARAM2 op_4c20_0_comp_ff(uae_u32 opcode) { readlong(dsta, dst); if (extra & 0x0400) { int r3 = (extra & 7); - mov_l_rr(r3, dst); - if (extra & 0x0800) { - jff_MULS64(r2, r3); + if (r2 == r3) { + int tmp = alloc_scratch(); + mov_l_rr(tmp, r2); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jff_MULS64(tmp, r3); + } else { + jff_MULU64(tmp, r3); + } + if (currprefs.cpu_model >= 68040) + mov_l_rr(r2, tmp); } else { - jff_MULU64(r2, r3); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jff_MULS64(r2, r3); + } else { + jff_MULU64(r2, r3); + } } } else { if (extra & 0x0800) { @@ -12626,11 +12678,24 @@ uae_u32 REGPARAM2 op_4c28_0_comp_ff(uae_u32 opcode) { readlong(dsta, dst); if (extra & 0x0400) { int r3 = (extra & 7); - mov_l_rr(r3, dst); - if (extra & 0x0800) { - jff_MULS64(r2, r3); + if (r2 == r3) { + int tmp = alloc_scratch(); + mov_l_rr(tmp, r2); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jff_MULS64(tmp, r3); + } else { + jff_MULU64(tmp, r3); + } + if (currprefs.cpu_model >= 68040) + mov_l_rr(r2, tmp); } else { - jff_MULU64(r2, r3); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jff_MULS64(r2, r3); + } else { + jff_MULU64(r2, r3); + } } } else { if (extra & 0x0800) { @@ -12657,11 +12722,24 @@ uae_u32 REGPARAM2 op_4c30_0_comp_ff(uae_u32 opcode) { readlong(dsta, dst); if (extra & 0x0400) { int r3 = (extra & 7); - mov_l_rr(r3, dst); - if (extra & 0x0800) { - jff_MULS64(r2, r3); + if (r2 == r3) { + int tmp = alloc_scratch(); + mov_l_rr(tmp, r2); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jff_MULS64(tmp, r3); + } else { + jff_MULU64(tmp, r3); + } + if (currprefs.cpu_model >= 68040) + mov_l_rr(r2, tmp); } else { - jff_MULU64(r2, r3); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jff_MULS64(r2, r3); + } else { + jff_MULU64(r2, r3); + } } } else { if (extra & 0x0800) { @@ -12687,11 +12765,24 @@ uae_u32 REGPARAM2 op_4c38_0_comp_ff(uae_u32 opcode) { readlong(dsta, dst); if (extra & 0x0400) { int r3 = (extra & 7); - mov_l_rr(r3, dst); - if (extra & 0x0800) { - jff_MULS64(r2, r3); + if (r2 == r3) { + int tmp = alloc_scratch(); + mov_l_rr(tmp, r2); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jff_MULS64(tmp, r3); + } else { + jff_MULU64(tmp, r3); + } + if (currprefs.cpu_model >= 68040) + mov_l_rr(r2, tmp); } else { - jff_MULU64(r2, r3); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jff_MULS64(r2, r3); + } else { + jff_MULU64(r2, r3); + } } } else { if (extra & 0x0800) { @@ -12717,11 +12808,24 @@ uae_u32 REGPARAM2 op_4c39_0_comp_ff(uae_u32 opcode) { readlong(dsta, dst); if (extra & 0x0400) { int r3 = (extra & 7); - mov_l_rr(r3, dst); - if (extra & 0x0800) { - jff_MULS64(r2, r3); + if (r2 == r3) { + int tmp = alloc_scratch(); + mov_l_rr(tmp, r2); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jff_MULS64(tmp, r3); + } else { + jff_MULU64(tmp, r3); + } + if (currprefs.cpu_model >= 68040) + mov_l_rr(r2, tmp); } else { - jff_MULU64(r2, r3); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jff_MULS64(r2, r3); + } else { + jff_MULU64(r2, r3); + } } } else { if (extra & 0x0800) { @@ -12750,11 +12854,24 @@ uae_u32 REGPARAM2 op_4c3a_0_comp_ff(uae_u32 opcode) { readlong(dsta, dst); if (extra & 0x0400) { int r3 = (extra & 7); - mov_l_rr(r3, dst); - if (extra & 0x0800) { - jff_MULS64(r2, r3); + if (r2 == r3) { + int tmp = alloc_scratch(); + mov_l_rr(tmp, r2); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jff_MULS64(tmp, r3); + } else { + jff_MULU64(tmp, r3); + } + if (currprefs.cpu_model >= 68040) + mov_l_rr(r2, tmp); } else { - jff_MULU64(r2, r3); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jff_MULS64(r2, r3); + } else { + jff_MULU64(r2, r3); + } } } else { if (extra & 0x0800) { @@ -12785,11 +12902,24 @@ uae_u32 REGPARAM2 op_4c3b_0_comp_ff(uae_u32 opcode) { readlong(dsta, dst); if (extra & 0x0400) { int r3 = (extra & 7); - mov_l_rr(r3, dst); - if (extra & 0x0800) { - jff_MULS64(r2, r3); + if (r2 == r3) { + int tmp = alloc_scratch(); + mov_l_rr(tmp, r2); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jff_MULS64(tmp, r3); + } else { + jff_MULU64(tmp, r3); + } + if (currprefs.cpu_model >= 68040) + mov_l_rr(r2, tmp); } else { - jff_MULU64(r2, r3); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jff_MULS64(r2, r3); + } else { + jff_MULU64(r2, r3); + } } } else { if (extra & 0x0800) { @@ -12813,11 +12943,24 @@ uae_u32 REGPARAM2 op_4c3c_0_comp_ff(uae_u32 opcode) { mov_l_ri(dst, comp_get_ilong((m68k_pc_offset += 4) - 4)); if (extra & 0x0400) { int r3 = (extra & 7); - mov_l_rr(r3, dst); - if (extra & 0x0800) { - jff_MULS64(r2, r3); + if (r2 == r3) { + int tmp = alloc_scratch(); + mov_l_rr(tmp, r2); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jff_MULS64(tmp, r3); + } else { + jff_MULU64(tmp, r3); + } + if (currprefs.cpu_model >= 68040) + mov_l_rr(r2, tmp); } else { - jff_MULU64(r2, r3); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jff_MULS64(r2, r3); + } else { + jff_MULU64(r2, r3); + } } } else { if (extra & 0x0800) { @@ -12844,6 +12987,11 @@ uae_u32 REGPARAM2 op_4c40_0_comp_ff(uae_u32 opcode) { uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); int r2=(extra>>12)&7; int r3=extra&7; + if (extra & 0x0400) { + FAIL(1); + m68k_pc_offset=m68k_pc_offset_thisinst; + return 0; + } int dst=dstreg; register_possible_exception(); if (extra & 0x0400) { @@ -12876,6 +13024,11 @@ uae_u32 REGPARAM2 op_4c50_0_comp_ff(uae_u32 opcode) { uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); int r2=(extra>>12)&7; int r3=extra&7; + if (extra & 0x0400) { + FAIL(1); + m68k_pc_offset=m68k_pc_offset_thisinst; + return 0; + } int dsta=dstreg+8; int dst=alloc_scratch(); readlong(dsta,dst); @@ -12910,6 +13063,11 @@ uae_u32 REGPARAM2 op_4c58_0_comp_ff(uae_u32 opcode) { uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); int r2=(extra>>12)&7; int r3=extra&7; + if (extra & 0x0400) { + FAIL(1); + m68k_pc_offset=m68k_pc_offset_thisinst; + return 0; + } int dsta=dstreg+8; int dst=alloc_scratch(); readlong(dsta,dst); @@ -12945,6 +13103,11 @@ uae_u32 REGPARAM2 op_4c60_0_comp_ff(uae_u32 opcode) { uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); int r2=(extra>>12)&7; int r3=extra&7; + if (extra & 0x0400) { + FAIL(1); + m68k_pc_offset=m68k_pc_offset_thisinst; + return 0; + } int dsta=dstreg+8; arm_SUB_l_ri8(dstreg+8,4); int dst=alloc_scratch(); @@ -12980,6 +13143,11 @@ uae_u32 REGPARAM2 op_4c68_0_comp_ff(uae_u32 opcode) { uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); int r2=(extra>>12)&7; int r3=extra&7; + if (extra & 0x0400) { + FAIL(1); + m68k_pc_offset=m68k_pc_offset_thisinst; + return 0; + } int dsta=alloc_scratch(); lea_l_brr(dsta,8+dstreg,(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2)); int dst=alloc_scratch(); @@ -13015,6 +13183,11 @@ uae_u32 REGPARAM2 op_4c70_0_comp_ff(uae_u32 opcode) { uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); int r2=(extra>>12)&7; int r3=extra&7; + if (extra & 0x0400) { + FAIL(1); + m68k_pc_offset=m68k_pc_offset_thisinst; + return 0; + } int dsta=alloc_scratch(); calc_disp_ea_020(dstreg+8,comp_get_iword((m68k_pc_offset+=2)-2),dsta); int dst=alloc_scratch(); @@ -13049,6 +13222,11 @@ uae_u32 REGPARAM2 op_4c78_0_comp_ff(uae_u32 opcode) { uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); int r2=(extra>>12)&7; int r3=extra&7; + if (extra & 0x0400) { + FAIL(1); + m68k_pc_offset=m68k_pc_offset_thisinst; + return 0; + } int dsta=alloc_scratch(); mov_l_ri(dsta,(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2)); int dst=alloc_scratch(); @@ -13083,6 +13261,11 @@ uae_u32 REGPARAM2 op_4c79_0_comp_ff(uae_u32 opcode) { uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); int r2=(extra>>12)&7; int r3=extra&7; + if (extra & 0x0400) { + FAIL(1); + m68k_pc_offset=m68k_pc_offset_thisinst; + return 0; + } int dsta=alloc_scratch(); mov_l_ri(dsta,comp_get_ilong((m68k_pc_offset+=4)-4)); /* absl */ int dst=alloc_scratch(); @@ -13118,6 +13301,11 @@ uae_u32 REGPARAM2 op_4c7a_0_comp_ff(uae_u32 opcode) { uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); int r2=(extra>>12)&7; int r3=extra&7; + if (extra & 0x0400) { + FAIL(1); + m68k_pc_offset=m68k_pc_offset_thisinst; + return 0; + } int dsta=alloc_scratch(); uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset; uae_s32 PC16off = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); @@ -13155,6 +13343,11 @@ uae_u32 REGPARAM2 op_4c7b_0_comp_ff(uae_u32 opcode) { uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); int r2=(extra>>12)&7; int r3=extra&7; + if (extra & 0x0400) { + FAIL(1); + m68k_pc_offset=m68k_pc_offset_thisinst; + return 0; + } int dsta=alloc_scratch(); int pctmp=alloc_scratch(); uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset; @@ -13193,6 +13386,11 @@ uae_u32 REGPARAM2 op_4c7c_0_comp_ff(uae_u32 opcode) { uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); int r2=(extra>>12)&7; int r3=extra&7; + if (extra & 0x0400) { + FAIL(1); + m68k_pc_offset=m68k_pc_offset_thisinst; + return 0; + } int dst=alloc_scratch(); mov_l_ri(dst,comp_get_ilong((m68k_pc_offset+=4)-4)); register_possible_exception(); @@ -18847,6 +19045,7 @@ uae_u32 REGPARAM2 op_80c0_0_comp_ff(uae_u32 opcode) { uae_u32 dstreg = (opcode >> 9) & 7; uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; m68k_pc_offset+=2; + save_flags(); dont_care_flags(); int src=srcreg; int dst=dstreg; @@ -18868,6 +19067,7 @@ uae_u32 REGPARAM2 op_80d0_0_comp_ff(uae_u32 opcode) { uae_u32 dstreg = (opcode >> 9) & 7; uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; m68k_pc_offset+=2; + save_flags(); dont_care_flags(); int srca=srcreg+8; int src=alloc_scratch(); @@ -18891,6 +19091,7 @@ uae_u32 REGPARAM2 op_80d8_0_comp_ff(uae_u32 opcode) { uae_u32 dstreg = (opcode >> 9) & 7; uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; m68k_pc_offset+=2; + save_flags(); dont_care_flags(); int srca=srcreg+8; int src=alloc_scratch(); @@ -18915,6 +19116,7 @@ uae_u32 REGPARAM2 op_80e0_0_comp_ff(uae_u32 opcode) { uae_u32 dstreg = (opcode >> 9) & 7; uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; m68k_pc_offset+=2; + save_flags(); dont_care_flags(); int srca=srcreg+8; arm_SUB_l_ri8(srcreg+8,2); @@ -18939,6 +19141,7 @@ uae_u32 REGPARAM2 op_80e8_0_comp_ff(uae_u32 opcode) { uae_u32 dstreg = (opcode >> 9) & 7; uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; m68k_pc_offset+=2; + save_flags(); dont_care_flags(); int srca=alloc_scratch(); lea_l_brr(srca,8+srcreg,(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2)); @@ -18963,6 +19166,7 @@ uae_u32 REGPARAM2 op_80f0_0_comp_ff(uae_u32 opcode) { uae_u32 dstreg = (opcode >> 9) & 7; uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; m68k_pc_offset+=2; + save_flags(); dont_care_flags(); int srca=alloc_scratch(); calc_disp_ea_020(srcreg+8,comp_get_iword((m68k_pc_offset+=2)-2),srca); @@ -18986,6 +19190,7 @@ uae_u32 REGPARAM2 op_80f8_0_comp_ff(uae_u32 opcode) { uae_u32 dstreg = (opcode >> 9) & 7; uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; m68k_pc_offset+=2; + save_flags(); dont_care_flags(); int srca=alloc_scratch(); mov_l_ri(srca,(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2)); @@ -19009,6 +19214,7 @@ uae_u32 REGPARAM2 op_80f9_0_comp_ff(uae_u32 opcode) { uae_u32 dstreg = (opcode >> 9) & 7; uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; m68k_pc_offset+=2; + save_flags(); dont_care_flags(); int srca=alloc_scratch(); mov_l_ri(srca,comp_get_ilong((m68k_pc_offset+=4)-4)); /* absl */ @@ -19032,6 +19238,7 @@ uae_u32 REGPARAM2 op_80fa_0_comp_ff(uae_u32 opcode) { uae_u32 dstreg = (opcode >> 9) & 7; uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; m68k_pc_offset+=2; + save_flags(); dont_care_flags(); int srca=alloc_scratch(); uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset; @@ -19057,6 +19264,7 @@ uae_u32 REGPARAM2 op_80fb_0_comp_ff(uae_u32 opcode) { uae_u32 dstreg = (opcode >> 9) & 7; uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; m68k_pc_offset+=2; + save_flags(); dont_care_flags(); int srca=alloc_scratch(); int pctmp=alloc_scratch(); @@ -19084,6 +19292,7 @@ uae_u32 REGPARAM2 op_80fc_0_comp_ff(uae_u32 opcode) { uae_u32 dstreg = (opcode >> 9) & 7; uae_u32 m68k_pc_offset_thisinst=m68k_pc_offset; m68k_pc_offset+=2; + save_flags(); dont_care_flags(); int src=alloc_scratch(); mov_l_ri(src,(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2)); @@ -39408,11 +39617,24 @@ uae_u32 REGPARAM2 op_4c00_0_comp_nf(uae_u32 opcode) { int dst = dstreg; if (extra & 0x0400) { int r3 = (extra & 7); - mov_l_rr(r3, dst); - if (extra & 0x0800) { - jnf_MULS64(r2, r3); + if (r2 == r3) { + int tmp = alloc_scratch(); + mov_l_rr(tmp, r2); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jnf_MULS64(tmp, r3); + } else { + jnf_MULU64(tmp, r3); + } + if (currprefs.cpu_model >= 68040) + mov_l_rr(r2, tmp); } else { - jnf_MULU64(r2, r3); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jnf_MULS64(r2, r3); + } else { + jnf_MULU64(r2, r3); + } } } else { if (extra & 0x0800) { @@ -39437,11 +39659,24 @@ uae_u32 REGPARAM2 op_4c10_0_comp_nf(uae_u32 opcode) { readlong(dsta, dst); if (extra & 0x0400) { int r3 = (extra & 7); - mov_l_rr(r3, dst); - if (extra & 0x0800) { - jnf_MULS64(r2, r3); + if (r2 == r3) { + int tmp = alloc_scratch(); + mov_l_rr(tmp, r2); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jnf_MULS64(tmp, r3); + } else { + jnf_MULU64(tmp, r3); + } + if (currprefs.cpu_model >= 68040) + mov_l_rr(r2, tmp); } else { - jnf_MULU64(r2, r3); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jnf_MULS64(r2, r3); + } else { + jnf_MULU64(r2, r3); + } } } else { if (extra & 0x0800) { @@ -39467,11 +39702,24 @@ uae_u32 REGPARAM2 op_4c18_0_comp_nf(uae_u32 opcode) { arm_ADD_l_ri8(dstreg + 8, 4); if (extra & 0x0400) { int r3 = (extra & 7); - mov_l_rr(r3, dst); - if (extra & 0x0800) { - jnf_MULS64(r2, r3); + if (r2 == r3) { + int tmp = alloc_scratch(); + mov_l_rr(tmp, r2); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jnf_MULS64(tmp, r3); + } else { + jnf_MULU64(tmp, r3); + } + if (currprefs.cpu_model >= 68040) + mov_l_rr(r2, tmp); } else { - jnf_MULU64(r2, r3); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jnf_MULS64(r2, r3); + } else { + jnf_MULU64(r2, r3); + } } } else { if (extra & 0x0800) { @@ -39497,11 +39745,24 @@ uae_u32 REGPARAM2 op_4c20_0_comp_nf(uae_u32 opcode) { readlong(dsta, dst); if (extra & 0x0400) { int r3 = (extra & 7); - mov_l_rr(r3, dst); - if (extra & 0x0800) { - jnf_MULS64(r2, r3); + if (r2 == r3) { + int tmp = alloc_scratch(); + mov_l_rr(tmp, r2); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jnf_MULS64(tmp, r3); + } else { + jnf_MULU64(tmp, r3); + } + if (currprefs.cpu_model >= 68040) + mov_l_rr(r2, tmp); } else { - jnf_MULU64(r2, r3); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jnf_MULS64(r2, r3); + } else { + jnf_MULU64(r2, r3); + } } } else { if (extra & 0x0800) { @@ -39527,11 +39788,24 @@ uae_u32 REGPARAM2 op_4c28_0_comp_nf(uae_u32 opcode) { readlong(dsta, dst); if (extra & 0x0400) { int r3 = (extra & 7); - mov_l_rr(r3, dst); - if (extra & 0x0800) { - jnf_MULS64(r2, r3); + if (r2 == r3) { + int tmp = alloc_scratch(); + mov_l_rr(tmp, r2); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jnf_MULS64(tmp, r3); + } else { + jnf_MULU64(tmp, r3); + } + if (currprefs.cpu_model >= 68040) + mov_l_rr(r2, tmp); } else { - jnf_MULU64(r2, r3); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jnf_MULS64(r2, r3); + } else { + jnf_MULU64(r2, r3); + } } } else { if (extra & 0x0800) { @@ -39557,11 +39831,24 @@ uae_u32 REGPARAM2 op_4c30_0_comp_nf(uae_u32 opcode) { readlong(dsta, dst); if (extra & 0x0400) { int r3 = (extra & 7); - mov_l_rr(r3, dst); - if (extra & 0x0800) { - jnf_MULS64(r2, r3); + if (r2 == r3) { + int tmp = alloc_scratch(); + mov_l_rr(tmp, r2); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jnf_MULS64(tmp, r3); + } else { + jnf_MULU64(tmp, r3); + } + if (currprefs.cpu_model >= 68040) + mov_l_rr(r2, tmp); } else { - jnf_MULU64(r2, r3); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jnf_MULS64(r2, r3); + } else { + jnf_MULU64(r2, r3); + } } } else { if (extra & 0x0800) { @@ -39586,11 +39873,24 @@ uae_u32 REGPARAM2 op_4c38_0_comp_nf(uae_u32 opcode) { readlong(dsta, dst); if (extra & 0x0400) { int r3 = (extra & 7); - mov_l_rr(r3, dst); - if (extra & 0x0800) { - jnf_MULS64(r2, r3); + if (r2 == r3) { + int tmp = alloc_scratch(); + mov_l_rr(tmp, r2); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jnf_MULS64(tmp, r3); + } else { + jnf_MULU64(tmp, r3); + } + if (currprefs.cpu_model >= 68040) + mov_l_rr(r2, tmp); } else { - jnf_MULU64(r2, r3); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jnf_MULS64(r2, r3); + } else { + jnf_MULU64(r2, r3); + } } } else { if (extra & 0x0800) { @@ -39615,11 +39915,24 @@ uae_u32 REGPARAM2 op_4c39_0_comp_nf(uae_u32 opcode) { readlong(dsta, dst); if (extra & 0x0400) { int r3 = (extra & 7); - mov_l_rr(r3, dst); - if (extra & 0x0800) { - jnf_MULS64(r2, r3); + if (r2 == r3) { + int tmp = alloc_scratch(); + mov_l_rr(tmp, r2); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jnf_MULS64(tmp, r3); + } else { + jnf_MULU64(tmp, r3); + } + if (currprefs.cpu_model >= 68040) + mov_l_rr(r2, tmp); } else { - jnf_MULU64(r2, r3); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jnf_MULS64(r2, r3); + } else { + jnf_MULU64(r2, r3); + } } } else { if (extra & 0x0800) { @@ -39647,11 +39960,24 @@ uae_u32 REGPARAM2 op_4c3a_0_comp_nf(uae_u32 opcode) { readlong(dsta, dst); if (extra & 0x0400) { int r3 = (extra & 7); - mov_l_rr(r3, dst); - if (extra & 0x0800) { - jnf_MULS64(r2, r3); + if (r2 == r3) { + int tmp = alloc_scratch(); + mov_l_rr(tmp, r2); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jnf_MULS64(tmp, r3); + } else { + jnf_MULU64(tmp, r3); + } + if (currprefs.cpu_model >= 68040) + mov_l_rr(r2, tmp); } else { - jnf_MULU64(r2, r3); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jnf_MULS64(r2, r3); + } else { + jnf_MULU64(r2, r3); + } } } else { if (extra & 0x0800) { @@ -39681,11 +40007,24 @@ uae_u32 REGPARAM2 op_4c3b_0_comp_nf(uae_u32 opcode) { readlong(dsta, dst); if (extra & 0x0400) { int r3 = (extra & 7); - mov_l_rr(r3, dst); - if (extra & 0x0800) { - jnf_MULS64(r2, r3); + if (r2 == r3) { + int tmp = alloc_scratch(); + mov_l_rr(tmp, r2); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jnf_MULS64(tmp, r3); + } else { + jnf_MULU64(tmp, r3); + } + if (currprefs.cpu_model >= 68040) + mov_l_rr(r2, tmp); } else { - jnf_MULU64(r2, r3); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jnf_MULS64(r2, r3); + } else { + jnf_MULU64(r2, r3); + } } } else { if (extra & 0x0800) { @@ -39708,11 +40047,24 @@ uae_u32 REGPARAM2 op_4c3c_0_comp_nf(uae_u32 opcode) { mov_l_ri(dst, comp_get_ilong((m68k_pc_offset += 4) - 4)); if (extra & 0x0400) { int r3 = (extra & 7); - mov_l_rr(r3, dst); - if (extra & 0x0800) { - jnf_MULS64(r2, r3); + if (r2 == r3) { + int tmp = alloc_scratch(); + mov_l_rr(tmp, r2); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jnf_MULS64(tmp, r3); + } else { + jnf_MULU64(tmp, r3); + } + if (currprefs.cpu_model >= 68040) + mov_l_rr(r2, tmp); } else { - jnf_MULU64(r2, r3); + mov_l_rr(r3, dst); + if (extra & 0x0800) { + jnf_MULS64(r2, r3); + } else { + jnf_MULU64(r2, r3); + } } } else { if (extra & 0x0800) { @@ -39738,6 +40090,11 @@ uae_u32 REGPARAM2 op_4c40_0_comp_nf(uae_u32 opcode) { uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); int r2=(extra>>12)&7; int r3=extra&7; + if (extra & 0x0400) { + FAIL(1); + m68k_pc_offset=m68k_pc_offset_thisinst; + return 0; + } int dst=dstreg; register_possible_exception(); if (extra & 0x0400) { @@ -39769,6 +40126,11 @@ uae_u32 REGPARAM2 op_4c50_0_comp_nf(uae_u32 opcode) { uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); int r2=(extra>>12)&7; int r3=extra&7; + if (extra & 0x0400) { + FAIL(1); + m68k_pc_offset=m68k_pc_offset_thisinst; + return 0; + } int dsta=dstreg+8; int dst=alloc_scratch(); readlong(dsta,dst); @@ -39802,6 +40164,11 @@ uae_u32 REGPARAM2 op_4c58_0_comp_nf(uae_u32 opcode) { uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); int r2=(extra>>12)&7; int r3=extra&7; + if (extra & 0x0400) { + FAIL(1); + m68k_pc_offset=m68k_pc_offset_thisinst; + return 0; + } int dsta=dstreg+8; int dst=alloc_scratch(); readlong(dsta,dst); @@ -39836,6 +40203,11 @@ uae_u32 REGPARAM2 op_4c60_0_comp_nf(uae_u32 opcode) { uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); int r2=(extra>>12)&7; int r3=extra&7; + if (extra & 0x0400) { + FAIL(1); + m68k_pc_offset=m68k_pc_offset_thisinst; + return 0; + } int dsta=dstreg+8; arm_SUB_l_ri8(dstreg+8,4); int dst=alloc_scratch(); @@ -39870,6 +40242,11 @@ uae_u32 REGPARAM2 op_4c68_0_comp_nf(uae_u32 opcode) { uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); int r2=(extra>>12)&7; int r3=extra&7; + if (extra & 0x0400) { + FAIL(1); + m68k_pc_offset=m68k_pc_offset_thisinst; + return 0; + } int dsta=alloc_scratch(); lea_l_brr(dsta,8+dstreg,(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2)); int dst=alloc_scratch(); @@ -39904,6 +40281,11 @@ uae_u32 REGPARAM2 op_4c70_0_comp_nf(uae_u32 opcode) { uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); int r2=(extra>>12)&7; int r3=extra&7; + if (extra & 0x0400) { + FAIL(1); + m68k_pc_offset=m68k_pc_offset_thisinst; + return 0; + } int dsta=alloc_scratch(); calc_disp_ea_020(dstreg+8,comp_get_iword((m68k_pc_offset+=2)-2),dsta); int dst=alloc_scratch(); @@ -39937,6 +40319,11 @@ uae_u32 REGPARAM2 op_4c78_0_comp_nf(uae_u32 opcode) { uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); int r2=(extra>>12)&7; int r3=extra&7; + if (extra & 0x0400) { + FAIL(1); + m68k_pc_offset=m68k_pc_offset_thisinst; + return 0; + } int dsta=alloc_scratch(); mov_l_ri(dsta,(uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2)); int dst=alloc_scratch(); @@ -39970,6 +40357,11 @@ uae_u32 REGPARAM2 op_4c79_0_comp_nf(uae_u32 opcode) { uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); int r2=(extra>>12)&7; int r3=extra&7; + if (extra & 0x0400) { + FAIL(1); + m68k_pc_offset=m68k_pc_offset_thisinst; + return 0; + } int dsta=alloc_scratch(); mov_l_ri(dsta,comp_get_ilong((m68k_pc_offset+=4)-4)); /* absl */ int dst=alloc_scratch(); @@ -40004,6 +40396,11 @@ uae_u32 REGPARAM2 op_4c7a_0_comp_nf(uae_u32 opcode) { uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); int r2=(extra>>12)&7; int r3=extra&7; + if (extra & 0x0400) { + FAIL(1); + m68k_pc_offset=m68k_pc_offset_thisinst; + return 0; + } int dsta=alloc_scratch(); uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset; uae_s32 PC16off = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2); @@ -40040,6 +40437,11 @@ uae_u32 REGPARAM2 op_4c7b_0_comp_nf(uae_u32 opcode) { uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); int r2=(extra>>12)&7; int r3=extra&7; + if (extra & 0x0400) { + FAIL(1); + m68k_pc_offset=m68k_pc_offset_thisinst; + return 0; + } int dsta=alloc_scratch(); int pctmp=alloc_scratch(); uae_u32 address=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset; @@ -40077,6 +40479,11 @@ uae_u32 REGPARAM2 op_4c7c_0_comp_nf(uae_u32 opcode) { uae_u16 extra=comp_get_iword((m68k_pc_offset+=2)-2); int r2=(extra>>12)&7; int r3=extra&7; + if (extra & 0x0400) { + FAIL(1); + m68k_pc_offset=m68k_pc_offset_thisinst; + return 0; + } int dst=alloc_scratch(); mov_l_ri(dst,comp_get_ilong((m68k_pc_offset+=4)-4)); register_possible_exception(); diff --git a/jit/arm/compemu_midfunc_arm.cpp b/jit/arm/compemu_midfunc_arm.cpp index 891048f0..e55b7911 100644 --- a/jit/arm/compemu_midfunc_arm.cpp +++ b/jit/arm/compemu_midfunc_arm.cpp @@ -163,6 +163,13 @@ MIDFUNC(0,make_flags_live,(void)) } MENDFUNC(0,make_flags_live,(void)) +MIDFUNC(0,save_flags,(void)) +{ + make_flags_live_internal(); + flush_flags(); +} +MENDFUNC(0,save_flags,(void)) + MIDFUNC(2,mov_l_mi,(IMPTR d, IM32 s)) { /* d points always to memory in regs struct */ diff --git a/jit/arm/compemu_midfunc_arm.h b/jit/arm/compemu_midfunc_arm.h index f32e7c08..8724d8ba 100644 --- a/jit/arm/compemu_midfunc_arm.h +++ b/jit/arm/compemu_midfunc_arm.h @@ -59,6 +59,7 @@ DECLARE_MIDFUNC(sub_w_ri(RW2 d, IM8 i)); DECLARE_MIDFUNC(live_flags(void)); DECLARE_MIDFUNC(dont_care_flags(void)); DECLARE_MIDFUNC(make_flags_live(void)); +DECLARE_MIDFUNC(save_flags(void)); DECLARE_MIDFUNC(forget_about(W4 r)); DECLARE_MIDFUNC(f_forget_about(FW r)); diff --git a/jit/arm/compemu_midfunc_arm64.cpp b/jit/arm/compemu_midfunc_arm64.cpp index 7126d473..997e19f7 100644 --- a/jit/arm/compemu_midfunc_arm64.cpp +++ b/jit/arm/compemu_midfunc_arm64.cpp @@ -162,6 +162,13 @@ MIDFUNC(0,make_flags_live,(void)) } MENDFUNC(0,make_flags_live,(void)) +MIDFUNC(0,save_flags,(void)) +{ + make_flags_live_internal(); + flush_flags(); +} +MENDFUNC(0,save_flags,(void)) + MIDFUNC(2,mov_l_mi,(IMPTR d, IMPTR s)) { /* d usually points to memory in regs struct, but can also be a global diff --git a/jit/arm/compemu_midfunc_arm64_2.cpp b/jit/arm/compemu_midfunc_arm64_2.cpp index 5c95e21e..620079b9 100644 --- a/jit/arm/compemu_midfunc_arm64_2.cpp +++ b/jit/arm/compemu_midfunc_arm64_2.cpp @@ -1037,10 +1037,22 @@ MIDFUNC(2,jff_ASL_b_reg,(RW1 d, RR4 i)) SET_xxCflag(REG_WORK4, REG_WORK4); // Calculate V Flag + CMP_wi(REG_WORK1, 8); + uae_u32* branch_normal_v = (uae_u32*)get_target(); + BLT_i(0); + TST_ww(REG_WORK3, REG_WORK3); + uae_u32* branch_done_zero = (uae_u32*)get_target(); + BEQ_i(0); + SET_xxVflag(REG_WORK4, REG_WORK4); + uae_u32* branch_done_large = (uae_u32*)get_target(); + B_i(0); + write_jmp_target(branch_normal_v, (uintptr)get_target()); CLS_ww(REG_WORK2, REG_WORK3); CMP_ww(REG_WORK2, REG_WORK1); BGE_i(2); SET_xxVflag(REG_WORK4, REG_WORK4); + write_jmp_target(branch_done_zero, (uintptr)get_target()); + write_jmp_target(branch_done_large, (uintptr)get_target()); MSR_NZCV_x(REG_WORK4); } else { @@ -1095,10 +1107,22 @@ MIDFUNC(2,jff_ASL_w_reg,(RW2 d, RR4 i)) SET_xxCflag(REG_WORK4, REG_WORK4); // Calculate V Flag + CMP_wi(REG_WORK1, 16); + uae_u32* branch_normal_v = (uae_u32*)get_target(); + BLT_i(0); + TST_ww(REG_WORK3, REG_WORK3); + uae_u32* branch_done_zero = (uae_u32*)get_target(); + BEQ_i(0); + SET_xxVflag(REG_WORK4, REG_WORK4); + uae_u32* branch_done_large = (uae_u32*)get_target(); + B_i(0); + write_jmp_target(branch_normal_v, (uintptr)get_target()); CLS_ww(REG_WORK2, REG_WORK3); CMP_ww(REG_WORK2, REG_WORK1); BGE_i(2); SET_xxVflag(REG_WORK4, REG_WORK4); + write_jmp_target(branch_done_zero, (uintptr)get_target()); + write_jmp_target(branch_done_large, (uintptr)get_target()); MSR_NZCV_x(REG_WORK4); } else { @@ -1153,10 +1177,22 @@ MIDFUNC(2,jff_ASL_l_reg,(RW4 d, RR4 i)) SET_xxCflag(REG_WORK4, REG_WORK4); // Calculate V Flag + CMP_wi(REG_WORK1, 32); + uae_u32* branch_normal_v = (uae_u32*)get_target(); + BLT_i(0); + TST_ww(REG_WORK3, REG_WORK3); + uae_u32* branch_done_zero = (uae_u32*)get_target(); + BEQ_i(0); + SET_xxVflag(REG_WORK4, REG_WORK4); + uae_u32* branch_done_large = (uae_u32*)get_target(); + B_i(0); + write_jmp_target(branch_normal_v, (uintptr)get_target()); CLS_ww(REG_WORK2, REG_WORK3); CMP_ww(REG_WORK2, REG_WORK1); BGE_i(2); SET_xxVflag(REG_WORK4, REG_WORK4); + write_jmp_target(branch_done_zero, (uintptr)get_target()); + write_jmp_target(branch_done_large, (uintptr)get_target()); MSR_NZCV_x(REG_WORK4); } else { @@ -2039,14 +2075,13 @@ MIDFUNC(4,jnf_BFINS_di,(RW4 d, RR4 s, RR4 offs, IM8 width)) LSR_xxx(REG_WORK2, REG_WORK2, REG_WORK4); // 64-bit shift so width==32 -> mask 0 BFI_xxii(REG_WORK2, REG_WORK2, 32, 32); ROR_xxx(REG_WORK2, REG_WORK2, REG_WORK3); - AND_xxx(d, d, REG_WORK2); ROR_www(REG_WORK1, s, REG_WORK4); BFI_xxii(REG_WORK1, REG_WORK1, 32, 32); ROR_xxx(REG_WORK1, REG_WORK1, REG_WORK3); - MVN_xx(REG_WORK2, REG_WORK2); - AND_xxx(REG_WORK1, REG_WORK1, REG_WORK2); + BIC_xxx(REG_WORK1, REG_WORK1, REG_WORK2); + AND_xxx(d, d, REG_WORK2); ORR_xxx(d, d, REG_WORK1); ROR_xxi(d, d, 32); MOV_ww(d, d); // Clean upper 32 bits after 64-bit BFINS operations @@ -2070,22 +2105,23 @@ MIDFUNC(4,jff_BFINS_di,(RW4 d, RR4 s, RR4 offs, IM8 width)) LSR_xxx(REG_WORK2, REG_WORK2, REG_WORK4); // 64-bit shift so width==32 -> mask 0 BFI_xxii(REG_WORK2, REG_WORK2, 32, 32); ROR_xxx(REG_WORK2, REG_WORK2, REG_WORK3); - AND_xxx(d, d, REG_WORK2); ROR_www(REG_WORK1, s, REG_WORK4); BFI_xxii(REG_WORK1, REG_WORK1, 32, 32); ROR_xxx(REG_WORK1, REG_WORK1, REG_WORK3); - MVN_xx(REG_WORK2, REG_WORK2); - AND_xxx(REG_WORK1, REG_WORK1, REG_WORK2); + BIC_xxx(REG_WORK1, REG_WORK1, REG_WORK2); + // Flags come from the source field, not the positioned/masked value: + // N = source bit (width-1), Z = (low `width` bits of source == 0). + SBFX_wwii(REG_WORK3, s, 0, width); + TST_ww(REG_WORK3, REG_WORK3); + MRS_NZCV_x(REG_WORK4); + + AND_xxx(d, d, REG_WORK2); ORR_xxx(d, d, REG_WORK1); ROR_xxi(d, d, 32); MOV_ww(d, d); // Clean upper 32 bits after 64-bit BFINS operations - - // Flags come from the source field, not the positioned/masked value: - // N = source bit (width-1), Z = (low `width` bits of source == 0). - SBFX_wwii(REG_WORK1, s, 0, width); - TST_ww(REG_WORK1, REG_WORK1); + MSR_NZCV_x(REG_WORK4); flags_carry_inverted = false; unlock2(offs); @@ -2111,14 +2147,13 @@ MIDFUNC(4,jnf_BFINS_id,(RW4 d, RR4 s, IM8 offs, RR4 width)) LSR_xxx(REG_WORK2, REG_WORK2, REG_WORK4); // 64-bit shift so width==32 -> mask 0 BFI_xxii(REG_WORK2, REG_WORK2, 32, 32); ROR_xxx(REG_WORK2, REG_WORK2, REG_WORK3); - AND_xxx(d, d, REG_WORK2); ROR_www(REG_WORK1, s, REG_WORK4); BFI_xxii(REG_WORK1, REG_WORK1, 32, 32); ROR_xxx(REG_WORK1, REG_WORK1, REG_WORK3); - MVN_xx(REG_WORK2, REG_WORK2); - AND_xxx(REG_WORK1, REG_WORK1, REG_WORK2); + BIC_xxx(REG_WORK1, REG_WORK1, REG_WORK2); + AND_xxx(d, d, REG_WORK2); ORR_xxx(d, d, REG_WORK1); ROR_xxi(d, d, 32); MOV_ww(d, d); // Clean upper 32 bits after 64-bit BFINS operations @@ -2144,25 +2179,26 @@ MIDFUNC(4,jff_BFINS_id,(RW4 d, RR4 s, IM8 offs, RR4 width)) LSR_xxx(REG_WORK2, REG_WORK2, REG_WORK4); // 64-bit shift so width==32 -> mask 0 BFI_xxii(REG_WORK2, REG_WORK2, 32, 32); ROR_xxx(REG_WORK2, REG_WORK2, REG_WORK3); - AND_xxx(d, d, REG_WORK2); ROR_www(REG_WORK1, s, REG_WORK4); BFI_xxii(REG_WORK1, REG_WORK1, 32, 32); ROR_xxx(REG_WORK1, REG_WORK1, REG_WORK3); - MVN_xx(REG_WORK2, REG_WORK2); - AND_xxx(REG_WORK1, REG_WORK1, REG_WORK2); - - ORR_xxx(d, d, REG_WORK1); - ROR_xxi(d, d, 32); - MOV_ww(d, d); // Clean upper 32 bits after 64-bit BFINS operations + BIC_xxx(REG_WORK1, REG_WORK1, REG_WORK2); // Flags come from the source field, not the positioned/masked value. // Shift the source left by (32 - width) so bit 31 = source bit (width-1): // N = that bit, Z = (low `width` bits of source == 0). - MOV_wi(REG_WORK2, 32); - SUB_www(REG_WORK2, REG_WORK2, REG_WORK4); - LSL_www(REG_WORK1, s, REG_WORK2); - TST_ww(REG_WORK1, REG_WORK1); + MOV_wi(REG_WORK3, 32); + SUB_www(REG_WORK3, REG_WORK3, REG_WORK4); + LSL_www(REG_WORK3, s, REG_WORK3); + TST_ww(REG_WORK3, REG_WORK3); + MRS_NZCV_x(REG_WORK4); + + AND_xxx(d, d, REG_WORK2); + ORR_xxx(d, d, REG_WORK1); + ROR_xxi(d, d, 32); + MOV_ww(d, d); // Clean upper 32 bits after 64-bit BFINS operations + MSR_NZCV_x(REG_WORK4); flags_carry_inverted = false; unlock2(width); @@ -2189,14 +2225,13 @@ MIDFUNC(4,jnf_BFINS_dd,(RW4 d, RR4 s, RR4 offs, RR4 width)) LSR_xxx(REG_WORK2, REG_WORK2, REG_WORK4); // 64-bit shift so width==32 -> mask 0 BFI_xxii(REG_WORK2, REG_WORK2, 32, 32); ROR_xxx(REG_WORK2, REG_WORK2, REG_WORK3); - AND_xxx(d, d, REG_WORK2); ROR_www(REG_WORK1, s, REG_WORK4); BFI_xxii(REG_WORK1, REG_WORK1, 32, 32); ROR_xxx(REG_WORK1, REG_WORK1, REG_WORK3); - MVN_xx(REG_WORK2, REG_WORK2); - AND_xxx(REG_WORK1, REG_WORK1, REG_WORK2); + BIC_xxx(REG_WORK1, REG_WORK1, REG_WORK2); + AND_xxx(d, d, REG_WORK2); ORR_xxx(d, d, REG_WORK1); ROR_xxi(d, d, 32); MOV_ww(d, d); // Clean upper 32 bits after 64-bit BFINS operations @@ -2224,25 +2259,26 @@ MIDFUNC(4,jff_BFINS_dd,(RW4 d, RR4 s, RR4 offs, RR4 width)) LSR_xxx(REG_WORK2, REG_WORK2, REG_WORK4); // 64-bit shift so width==32 -> mask 0 BFI_xxii(REG_WORK2, REG_WORK2, 32, 32); ROR_xxx(REG_WORK2, REG_WORK2, REG_WORK3); - AND_xxx(d, d, REG_WORK2); ROR_www(REG_WORK1, s, REG_WORK4); BFI_xxii(REG_WORK1, REG_WORK1, 32, 32); ROR_xxx(REG_WORK1, REG_WORK1, REG_WORK3); - MVN_xx(REG_WORK2, REG_WORK2); - AND_xxx(REG_WORK1, REG_WORK1, REG_WORK2); - - ORR_xxx(d, d, REG_WORK1); - ROR_xxi(d, d, 32); - MOV_ww(d, d); // Clean upper 32 bits after 64-bit BFINS operations + BIC_xxx(REG_WORK1, REG_WORK1, REG_WORK2); // Flags come from the source field, not the positioned/masked value. // Shift the source left by (32 - width) so bit 31 = source bit (width-1): // N = that bit, Z = (low `width` bits of source == 0). - MOV_wi(REG_WORK2, 32); - SUB_www(REG_WORK2, REG_WORK2, REG_WORK4); - LSL_www(REG_WORK1, s, REG_WORK2); - TST_ww(REG_WORK1, REG_WORK1); + MOV_wi(REG_WORK3, 32); + SUB_www(REG_WORK3, REG_WORK3, REG_WORK4); + LSL_www(REG_WORK3, s, REG_WORK3); + TST_ww(REG_WORK3, REG_WORK3); + MRS_NZCV_x(REG_WORK4); + + AND_xxx(d, d, REG_WORK2); + ORR_xxx(d, d, REG_WORK1); + ROR_xxi(d, d, 32); + MOV_ww(d, d); // Clean upper 32 bits after 64-bit BFINS operations + MSR_NZCV_x(REG_WORK4); flags_carry_inverted = false; unlock2(width); @@ -3176,7 +3212,9 @@ MIDFUNC(2,jff_DIVU,(RW4 d, RR4 s)) TBZ_wii(d, 31, 2); MOV_wish(REG_WORK1, 0x9000, 16); // Set V and N (if d < 0) } else if (currprefs.cpu_model >= 68040) { - MRS_NZCV_x(REG_WORK1); + int saved_flags = readreg(FLAGTMP); + MOV_ww(REG_WORK1, saved_flags); + unlock2(saved_flags); CLEAR_xxCflag(REG_WORK1, REG_WORK1); } else { // 68000/010 @@ -3198,12 +3236,16 @@ MIDFUNC(2,jff_DIVU,(RW4 d, RR4 s)) // Overflow: V set, C cleared; N/Z depend on CPU model (setdivuflags()). if (currprefs.cpu_model >= 68040) { // V set, C cleared, N and Z unchanged - MRS_NZCV_x(REG_WORK1); + int saved_flags = readreg(FLAGTMP); + MOV_ww(REG_WORK1, saved_flags); + unlock2(saved_flags); SET_xxVflag(REG_WORK1, REG_WORK1); CLEAR_xxCflag(REG_WORK1, REG_WORK1); } else if (currprefs.cpu_model >= 68020) { - // V set, N set if dividend < 0, Z and C unchanged - MRS_NZCV_x(REG_WORK1); + // 68020/030 preserve incoming Z/C and N, except a negative dividend sets N. + int saved_flags = readreg(FLAGTMP); + MOV_ww(REG_WORK1, saved_flags); + unlock2(saved_flags); SET_xxVflag(REG_WORK1, REG_WORK1); TBZ_wii(d, 31, 2); SET_xxNflag(REG_WORK1, REG_WORK1); diff --git a/jit/arm/compemu_support_arm.cpp b/jit/arm/compemu_support_arm.cpp index af327a33..156a0550 100644 --- a/jit/arm/compemu_support_arm.cpp +++ b/jit/arm/compemu_support_arm.cpp @@ -3701,6 +3701,8 @@ void build_comp(void) } prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead; prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive; + if (table68k[opcode].mnemo == i_DIVU) + prop[cft_map(opcode)].use_flags |= FLAG_CZNV; /* Unconditional jumps don't evaluate condition codes, so they * don't actually use any flags themselves */ if (prop[cft_map(opcode)].cflow & fl_const_jump) diff --git a/jit/arm/gencomp_arm.c b/jit/arm/gencomp_arm.c index 63d60fc1..90ef1374 100644 --- a/jit/arm/gencomp_arm.c +++ b/jit/arm/gencomp_arm.c @@ -3097,12 +3097,24 @@ static void gen_mull(uae_u32 opcode, struct instr *curi, const char* ssize) { if (!noflags) { comprintf("\t if (extra & 0x0400) {\n"); /* Need full 64 bit result */ comprintf("\t int r3=(extra & 7);\n"); - comprintf("\t mov_l_rr(r3,dst);\n"); /* operands now in r3 and r2 */ - comprintf("\t if (extra & 0x0800) { \n"); /* signed */ - comprintf("\t\t jff_MULS64(r2,r3);\n"); - comprintf("\t } else { \n"); - comprintf("\t\t jff_MULU64(r2,r3);\n"); - comprintf("\t } \n"); /* The result is in r2/r3, with r2 holding the lower 32 bits */ + comprintf("\t if (r2 == r3) {\n"); + comprintf("\t mov_l_rr(tmp,r2);\n"); + comprintf("\t mov_l_rr(r3,dst);\n"); /* operands now in r3 and tmp */ + comprintf("\t if (extra & 0x0800) { \n"); /* signed */ + comprintf("\t jff_MULS64(tmp,r3);\n"); + comprintf("\t } else { \n"); + comprintf("\t jff_MULU64(tmp,r3);\n"); + comprintf("\t } \n"); + comprintf("\t if (currprefs.cpu_model >= 68040)\n"); + comprintf("\t mov_l_rr(r2,tmp);\n"); + comprintf("\t } else {\n"); + comprintf("\t mov_l_rr(r3,dst);\n"); /* operands now in r3 and r2 */ + comprintf("\t if (extra & 0x0800) { \n"); /* signed */ + comprintf("\t jff_MULS64(r2,r3);\n"); + comprintf("\t } else { \n"); + comprintf("\t jff_MULU64(r2,r3);\n"); + comprintf("\t } \n"); /* The result is in r2/r3, with r2 holding the lower 32 bits */ + comprintf("\t }\n"); comprintf("\t } else {\n"); /* Only want 32 bit result */ /* operands in dst and r2, result goes into r2 */ /* shouldn't matter whether it's signed or unsigned?!? */ @@ -3115,12 +3127,24 @@ static void gen_mull(uae_u32 opcode, struct instr *curi, const char* ssize) { } else { comprintf("\t if (extra & 0x0400) {\n"); /* Need full 64 bit result */ comprintf("\t int r3=(extra & 7);\n"); - comprintf("\t mov_l_rr(r3,dst);\n"); /* operands now in r3 and r2 */ - comprintf("\t if (extra & 0x0800) { \n"); /* signed */ - comprintf("\t\t jnf_MULS64(r2,r3);\n"); - comprintf("\t } else { \n"); - comprintf("\t\t jnf_MULU64(r2,r3);\n"); - comprintf("\t } \n"); /* The result is in r2/r3, with r2 holding the lower 32 bits */ + comprintf("\t if (r2 == r3) {\n"); + comprintf("\t mov_l_rr(tmp,r2);\n"); + comprintf("\t mov_l_rr(r3,dst);\n"); /* operands now in r3 and tmp */ + comprintf("\t if (extra & 0x0800) { \n"); /* signed */ + comprintf("\t jnf_MULS64(tmp,r3);\n"); + comprintf("\t } else { \n"); + comprintf("\t jnf_MULU64(tmp,r3);\n"); + comprintf("\t } \n"); + comprintf("\t if (currprefs.cpu_model >= 68040)\n"); + comprintf("\t mov_l_rr(r2,tmp);\n"); + comprintf("\t } else {\n"); + comprintf("\t mov_l_rr(r3,dst);\n"); /* operands now in r3 and r2 */ + comprintf("\t if (extra & 0x0800) { \n"); /* signed */ + comprintf("\t jnf_MULS64(r2,r3);\n"); + comprintf("\t } else { \n"); + comprintf("\t jnf_MULU64(r2,r3);\n"); + comprintf("\t } \n"); /* The result is in r2/r3, with r2 holding the lower 32 bits */ + comprintf("\t }\n"); comprintf("\t } else {\n"); /* Only want 32 bit result */ /* operands in dst and r2, result foes into r2 */ /* shouldn't matter whether it's signed or unsigned?!? */ -- 2.47.3