From 7d856060dc8d981b93fa8a237a7f78cb7bfbdb18 Mon Sep 17 00:00:00 2001 From: Frode Solheim Date: Fri, 18 Sep 2015 18:18:01 +0200 Subject: [PATCH] JIT: Updated cpuid, flush_cpu_icache --- jit/codegen_x86.cpp | 146 +++++++++++++++++++++--------------- jit/compemu_midfunc_x86.cpp | 41 +++++++++- 2 files changed, 127 insertions(+), 60 deletions(-) diff --git a/jit/codegen_x86.cpp b/jit/codegen_x86.cpp index 3229e09d..8037c139 100644 --- a/jit/codegen_x86.cpp +++ b/jit/codegen_x86.cpp @@ -3487,8 +3487,10 @@ struct cpuinfo_x86 { uae_u32 x86_hwcap; uae_u8 x86_model; uae_u8 x86_mask; - int cpuid_level; // Maximum supported CPUID level, -1=no CPUID - char x86_vendor_id[16]; + bool x86_has_xmm2; + int cpuid_level; // Maximum supported CPUID level, -1=no CPUID + char x86_vendor_id[16]; + uintptr x86_clflush_size; }; struct cpuinfo_x86 cpuinfo; @@ -3574,55 +3576,66 @@ static void c->x86_vendor = X86_VENDOR_UNKNOWN; } -static void -cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx) +/* + * Generic CPUID function + * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx + * resulting in stale register contents being returned. + */ +/* Some CPUID calls want 'count' to be placed in ecx */ +#ifdef __GNUC__ +static void cpuid_count(uae_u32 op, uae_u32 count, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx) +{ + uae_u32 _eax, _ebx, _ecx, _edx; + _eax = op; + _ecx = count; + __asm__ __volatile__( + " movl %0,%%eax \n" + " movl %2,%%ecx \n" + " cpuid \n" + " movl %%eax,%0 \n" + " movl %%ebx,%1 \n" + " movl %%ecx,%2 \n" + " movl %%edx,%3 \n" + : "+m" (_eax), + "=m" (_ebx), + "+m" (_ecx), + "=m" (_edx) + : + : "eax", "ebx", "ecx", "edx"); + *eax = _eax; + *ebx = _ebx; + *ecx = _ecx; + *edx = _edx; +} +#endif + +#ifdef _MSC_VER +static void cpuid_count(uae_u32 op, uae_u32 count, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx) { - const int CPUID_SPACE = 4096; - uae_u8* cpuid_space = (uae_u8 *)vm_acquire(CPUID_SPACE); - if (cpuid_space == VM_MAP_FAILED) - jit_abort("Could not allocate cpuid_space"); - vm_protect(cpuid_space, CPUID_SPACE, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE); - - static uae_u32 s_op, s_eax, s_ebx, s_ecx, s_edx; - uae_u8* tmp=get_target(); - - s_op = op; - set_target(cpuid_space); - raw_push_l_r(0); /* eax */ - raw_push_l_r(1); /* ecx */ - raw_push_l_r(2); /* edx */ - raw_push_l_r(3); /* ebx */ - raw_mov_l_rm(0,(uintptr)&s_op); - raw_cpuid(0); - raw_mov_l_mr((uintptr)&s_eax,0); - raw_mov_l_mr((uintptr)&s_ebx,3); - raw_mov_l_mr((uintptr)&s_ecx,1); - raw_mov_l_mr((uintptr)&s_edx,2); - raw_pop_l_r(3); - raw_pop_l_r(2); - raw_pop_l_r(1); - raw_pop_l_r(0); - raw_ret(); -#ifdef USE_UDIS86 - if (!op) { /* Only disassemble once! */ - UDISFN(cpuid_space, target) + int cpuinfo[4]; + cpuinfo[0] = op; + cpuinfo[1] = 0; + cpuinfo[2] = count; + cpuinfo[3] = 0; + __cpuidex(cpuinfo, op, count); + *eax = cpuinfo[0]; + *ebx = cpuinfo[1]; + *ecx = cpuinfo[2]; + *edx = cpuinfo[3]; } #endif - set_target(tmp); - - ((cpuop_func*)cpuid_space)(0); - if (eax != NULL) *eax = s_eax; - if (ebx != NULL) *ebx = s_ebx; - if (ecx != NULL) *ecx = s_ecx; - if (edx != NULL) *edx = s_edx; - vm_release(cpuid_space, CPUID_SPACE); +static void +cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx) +{ + cpuid_count(op, 0, eax, ebx, ecx, edx); } static void raw_init_cpu(void) { struct cpuinfo_x86 *c = &cpuinfo; + uae_u32 dummy; /* Defaults */ c->x86_processor = X86_PROCESSOR_max; @@ -3631,6 +3644,11 @@ raw_init_cpu(void) c->x86_model = c->x86_mask = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_hwcap = 0; +#ifdef CPU_x86_64 + c->x86_clflush_size = 64; +#else + c->x86_clflush_size = 32; +#endif /* Get vendor name */ c->x86_vendor_id[12] = '\0'; @@ -3645,7 +3663,7 @@ raw_init_cpu(void) c->x86_brand_id = 0; if ( c->cpuid_level >= 0x00000001 ) { uae_u32 tfms, brand_id; - cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap); + cpuid(0x00000001, &tfms, &brand_id, &dummy, &c->x86_hwcap); c->x86 = (tfms >> 8) & 15; if (c->x86 == 0xf) c->x86 += (tfms >> 20) & 0xff; /* extended family */ @@ -3654,6 +3672,10 @@ raw_init_cpu(void) c->x86_model |= (tfms >> 12) & 0xf0; /* extended model */ c->x86_brand_id = brand_id & 0xff; c->x86_mask = tfms & 15; + if (c->x86_hwcap & (1 << 19)) + { + c->x86_clflush_size = ((brand_id >> 8) & 0xff) * 8; + } } else { /* Have CPUID level 0 only - unheard of */ c->x86 = 4; @@ -3661,11 +3683,11 @@ raw_init_cpu(void) /* AMD-defined flags: level 0x80000001 */ uae_u32 xlvl; - cpuid(0x80000000, &xlvl, NULL, NULL, NULL); + cpuid(0x80000000, &xlvl, &dummy, &dummy, &dummy); if ( (xlvl & 0xffff0000) == 0x80000000 ) { if ( xlvl >= 0x80000001 ) { uae_u32 features, extra_features; - cpuid(0x80000001, NULL, NULL, &extra_features, &features); + cpuid(0x80000001, &dummy, &dummy, &extra_features, &features); if (features & (1 << 29)) { /* Assume x86-64 if long mode is supported */ c->x86_processor = X86_PROCESSOR_X86_64; @@ -3711,23 +3733,25 @@ raw_init_cpu(void) } if (c->x86_processor == X86_PROCESSOR_max) { c->x86_processor = X86_PROCESSOR_I386; - jit_log("Error: unknown processor type\n"); - jit_log(" Family : %d\n", c->x86); - jit_log(" Model : %d\n", c->x86_model); - jit_log(" Mask : %d\n", c->x86_mask); - jit_log(" Vendor : %s [%d]\n", c->x86_vendor_id, c->x86_vendor); + jit_log("Error: unknown processor type"); + jit_log(" Family : %d", c->x86); + jit_log(" Model : %d", c->x86_model); + jit_log(" Mask : %d", c->x86_mask); + jit_log(" Vendor : %s [%d]", c->x86_vendor_id, c->x86_vendor); if (c->x86_brand_id) - fprintf(stderr, " BrandID : %02x\n", c->x86_brand_id); + jit_log(" BrandID : %02x", c->x86_brand_id); } /* Have CMOV support? */ - have_cmov = c->x86_hwcap & (1 << 15); + have_cmov = (c->x86_hwcap & (1 << 15)) != 0; #if defined(CPU_x86_64) if (!have_cmov) { jit_abort("x86-64 implementations are bound to have CMOV!"); } #endif + c->x86_has_xmm2 = (c->x86_hwcap & (1 << 26)) != 0; + /* Can the host CPU suffer from partial register stalls? */ have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL); #if 1 @@ -3743,16 +3767,17 @@ raw_init_cpu(void) align_loops = x86_alignments[c->x86_processor].align_loop; align_jumps = x86_alignments[c->x86_processor].align_jump; } - { - TCHAR *s = au (c->x86_vendor_id); - write_log (_T("CPUID level=%d, Family=%d, Model=%d, Mask=%d, Vendor=%s [%d]\n"), - c->cpuid_level, c->x86, c->x86_model, c->x86_mask, s, c->x86_vendor); - xfree (s); - } + + jit_log("Max CPUID level=%d Processor is %s [%s]", + c->cpuid_level, c->x86_vendor_id, + x86_processor_string_table[c->x86_processor]); + raw_flags_init(); } #if 0 +static void __attribute_noinline__ prevent_redzone_use(void) {} + static bool target_check_bsf(void) { bool mismatch = false; @@ -3761,8 +3786,9 @@ static bool target_check_bsf(void) for (int g_OF = 0; g_OF <= 1; g_OF++) { for (int g_SF = 0; g_SF <= 1; g_SF++) { for (int value = -1; value <= 1; value++) { - unsigned long flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF; - long tmp = value; + uintptr flags = (g_SF << 7) | (g_OF << 11) | (g_ZF << 6) | g_CF; + intptr tmp = value; + prevent_redzone_use(); __asm__ __volatile__ ("push %0; popf; bsf %1,%1; pushf; pop %0" : "+r" (flags), "+r" (tmp) : : "cc"); int OF = (flags >> 11) & 1; @@ -3775,7 +3801,9 @@ static bool target_check_bsf(void) } }}}} if (mismatch) + { jit_log("Target CPU defines all flags on BSF instruction"); + } return !mismatch; } #endif diff --git a/jit/compemu_midfunc_x86.cpp b/jit/compemu_midfunc_x86.cpp index 21610fab..9774a94e 100644 --- a/jit/compemu_midfunc_x86.cpp +++ b/jit/compemu_midfunc_x86.cpp @@ -2889,14 +2889,53 @@ MIDFUNC(2,fmul_rr,(FRW d, FR s)) } MENDFUNC(2,fmul_rr,(FRW d, FR s)) +#ifdef __GNUC__ + +static inline void mfence(void) +{ +#ifdef CPU_i386 + if (!cpuinfo.x86_has_xmm2) + __asm__ __volatile__("lock; addl $0,0(%%esp)":::"memory"); + else +#endif + __asm__ __volatile__("mfence":::"memory"); +} + +static inline void clflush(volatile void *__p) +{ + __asm__ __volatile__("clflush %0" : "+m" (*(volatile char *)__p)); +} + +static inline void flush_cpu_icache(void *start, void *stop) +{ + mfence(); + if (cpuinfo.x86_clflush_size != 0) + { + volatile char *vaddr = (volatile char *)(((uintptr)start / cpuinfo.x86_clflush_size) * cpuinfo.x86_clflush_size); + volatile char *vend = (volatile char *)((((uintptr)stop + cpuinfo.x86_clflush_size - 1) / cpuinfo.x86_clflush_size) * cpuinfo.x86_clflush_size); + while (vaddr < vend) + { + clflush(vaddr); + vaddr += cpuinfo.x86_clflush_size; + } + } + mfence(); +} + +#else + static inline void flush_cpu_icache(void *start, void *stop) { UNUSED(start); UNUSED(stop); } +#endif + static inline void write_jmp_target(uae_u32 *jmpaddr, cpuop_func* a) { - *(jmpaddr)=(uintptr)a-((uintptr)jmpaddr+4); + uintptr rel = (uintptr) a - ((uintptr) jmpaddr + 4); + *(jmpaddr) = (uae_u32) rel; + flush_cpu_icache((void *) jmpaddr, (void *) &jmpaddr[1]); } static inline void emit_jmp_target(uae_u32 a) { -- 2.47.3