From: Toni Wilen Date: Fri, 17 Aug 2018 20:16:55 +0000 (+0300) Subject: 64-bit wide pfield_doline(). Not yet complete. X-Git-Tag: 4100~120 X-Git-Url: https://git.unchartedbackwaters.co.uk/w/?a=commitdiff_plain;h=4cdb1bc08bdeec88a0cb5a6df61a54c764bdf9a0;p=francis%2Fwinuae.git 64-bit wide pfield_doline(). Not yet complete. --- diff --git a/drawing.cpp b/drawing.cpp index edbcca04..65dcefe8 100644 --- a/drawing.cpp +++ b/drawing.cpp @@ -177,12 +177,9 @@ static xcolnr *p_xcolors; than enough". The coordinates used for indexing into these arrays are almost, but not quite, Amiga coordinates (there's a constant offset). */ static union { - /* Let's try to align this thing. */ - double uupzuq; - long int cruxmedo; - uae_u8 apixels[MAX_PIXELS_PER_LINE * 2]; - uae_u16 apixels_w[MAX_PIXELS_PER_LINE * 2 / sizeof (uae_u16)]; - uae_u32 apixels_l[MAX_PIXELS_PER_LINE * 2 / sizeof (uae_u32)]; + uae_u64 apixels_q[MAX_PIXELS_PER_LINE * 2 / sizeof(uae_u64)]; + uae_u32 apixels_l[MAX_PIXELS_PER_LINE * 2 / sizeof(uae_u32)]; + uae_u8 apixels[MAX_PIXELS_PER_LINE * 2]; } pixdata; static uae_u8 *refresh_indicator_buffer; @@ -1047,7 +1044,7 @@ static void pfield_init_linetoscr (bool border) leftborderhidden = playfield_start - native_ddf_left2; if (hblank_left_start > playfield_start) leftborderhidden += hblank_left_start - playfield_start; - src_pixel = MAX_PIXELS_PER_LINE + res_shift_from_window (leftborderhidden); + src_pixel = MAX_PIXELS_PER_LINE + res_shift_from_window(leftborderhidden); if (dip_for_drawing->nr_sprites == 0 && !expanded) return; @@ -2362,7 +2359,15 @@ Don't touch this if you don't know what you are doing. */ b ^= (tmp << shift); \ } while (0) +#define MERGE64(a,b,mask,shift) do {\ + uae_u64 tmp = mask & (a ^ (b >> shift)); \ + a ^= tmp; \ + b ^= (tmp << shift); \ +} while (0) + + #define GETLONG(P) (*(uae_u32 *)P) +#define GETLONG64(P) (*(uae_u64 *)P) STATIC_INLINE void pfield_doline_1 (uae_u32 *pixels, int wordcount, int planes) { @@ -2404,7 +2409,7 @@ STATIC_INLINE void pfield_doline_1 (uae_u32 *pixels, int wordcount, int planes) MERGE (b6, b7, 0x00ff00ff, 8); MERGE (b0, b2, 0x0000ffff, 16); - do_put_mem_long (pixels, b0); + do_put_mem_long (pixels + 0, b0); do_put_mem_long (pixels + 4, b2); MERGE (b1, b3, 0x0000ffff, 16); do_put_mem_long (pixels + 2, b1); @@ -2419,6 +2424,62 @@ STATIC_INLINE void pfield_doline_1 (uae_u32 *pixels, int wordcount, int planes) } } + +STATIC_INLINE void pfield_doline64_1(uae_u64 *pixels, int wordcount, int planes) +{ + while (wordcount-- > 0) { + uae_u64 b0, b1, b2, b3, b4, b5, b6, b7; + + b0 = 0, b1 = 0, b2 = 0, b3 = 0, b4 = 0, b5 = 0, b6 = 0, b7 = 0; + switch (planes) { +#ifdef AGA + case 8: b0 = GETLONG64(real_bplpt[7]); real_bplpt[7] += 8; + case 7: b1 = GETLONG64(real_bplpt[6]); real_bplpt[6] += 8; +#endif + case 6: b2 = GETLONG64(real_bplpt[5]); real_bplpt[5] += 8; + case 5: b3 = GETLONG64(real_bplpt[4]); real_bplpt[4] += 8; + case 4: b4 = GETLONG64(real_bplpt[3]); real_bplpt[3] += 8; + case 3: b5 = GETLONG64(real_bplpt[2]); real_bplpt[2] += 8; + case 2: b6 = GETLONG64(real_bplpt[1]); real_bplpt[1] += 8; + case 1: b7 = GETLONG64(real_bplpt[0]); real_bplpt[0] += 8; + } + + MERGE64(b0, b1, 0x5555555555555555, 1); + MERGE64(b2, b3, 0x5555555555555555, 1); + MERGE64(b4, b5, 0x5555555555555555, 1); + MERGE64(b6, b7, 0x5555555555555555, 1); + + MERGE64(b0, b2, 0x3333333333333333, 2); + MERGE64(b1, b3, 0x3333333333333333, 2); + MERGE64(b4, b6, 0x3333333333333333, 2); + MERGE64(b5, b7, 0x3333333333333333, 2); + + MERGE64(b0, b4, 0x0f0f0f0f0f0f0f0f, 4); + MERGE64(b1, b5, 0x0f0f0f0f0f0f0f0f, 4); + MERGE64(b2, b6, 0x0f0f0f0f0f0f0f0f, 4); + MERGE64(b3, b7, 0x0f0f0f0f0f0f0f0f, 4); + + MERGE64(b0, b1, 0x00ff00ff00ff00ff, 8); + MERGE64(b2, b3, 0x00ff00ff00ff00ff, 8); + MERGE64(b4, b5, 0x00ff00ff00ff00ff, 8); + MERGE64(b6, b7, 0x00ff00ff00ff00ff, 8); + + MERGE64(b0, b2, 0x0000ffff0000ffff, 16); + do_put_mem_quad(pixels + 0, b0); + do_put_mem_quad(pixels + 4, b2); + MERGE64(b1, b3, 0x0000ffff0000ffff, 16); + do_put_mem_quad(pixels + 2, b1); + do_put_mem_quad(pixels + 6, b3); + MERGE64(b4, b6, 0x0000ffff0000ffff, 16); + do_put_mem_quad(pixels + 1, b4); + do_put_mem_quad(pixels + 5, b6); + MERGE64(b5, b7, 0x0000ffff0000ffff, 16); + do_put_mem_quad(pixels + 3, b5); + do_put_mem_quad(pixels + 7, b7); + pixels += 8; + } +} + /* See above for comments on inlining. These functions should _not_ be inlined themselves. */ static void NOINLINE pfield_doline_n1 (uae_u32 *data, int count) { pfield_doline_1 (data, count, 1); } @@ -2432,10 +2493,52 @@ static void NOINLINE pfield_doline_n7 (uae_u32 *data, int count) { pfield_doline static void NOINLINE pfield_doline_n8 (uae_u32 *data, int count) { pfield_doline_1 (data, count, 8); } #endif +static void NOINLINE pfield_doline64_n1(uae_u64 *data, int count) { pfield_doline64_1(data, count, 1); } +static void NOINLINE pfield_doline64_n2(uae_u64 *data, int count) { pfield_doline64_1(data, count, 2); } +static void NOINLINE pfield_doline64_n3(uae_u64 *data, int count) { pfield_doline64_1(data, count, 3); } +static void NOINLINE pfield_doline64_n4(uae_u64 *data, int count) { pfield_doline64_1(data, count, 4); } +static void NOINLINE pfield_doline64_n5(uae_u64 *data, int count) { pfield_doline64_1(data, count, 5); } +static void NOINLINE pfield_doline64_n6(uae_u64 *data, int count) { pfield_doline64_1(data, count, 6); } +#ifdef AGA +static void NOINLINE pfield_doline64_n7(uae_u64 *data, int count) { pfield_doline64_1(data, count, 7); } +static void NOINLINE pfield_doline64_n8(uae_u64 *data, int count) { pfield_doline64_1(data, count, 8); } +#endif + static void pfield_doline (int lineno) { +#if 0 + int wordcount = (dp_for_drawing->plflinelen + 1) / 2; + uae_u64 *data = pixdata.apixels_q + MAX_PIXELS_PER_LINE / sizeof(uae_u64); + +#define DATA_POINTER(n) ((debug_bpl_mask & (1 << n)) ? (line_data[lineno] + (n) * MAX_WORDS_PER_LINE * 2) : (debug_bpl_mask_one ? all_ones : all_zeros)) + real_bplpt[0] = DATA_POINTER(0); + real_bplpt[1] = DATA_POINTER(1); + real_bplpt[2] = DATA_POINTER(2); + real_bplpt[3] = DATA_POINTER(3); + real_bplpt[4] = DATA_POINTER(4); + real_bplpt[5] = DATA_POINTER(5); +#ifdef AGA + real_bplpt[6] = DATA_POINTER(6); + real_bplpt[7] = DATA_POINTER(7); +#endif + + switch (bplplanecnt) { + default: break; + case 0: memset(data, 0, wordcount * 64); break; + case 1: pfield_doline64_n1(data, wordcount); break; + case 2: pfield_doline64_n2(data, wordcount); break; + case 3: pfield_doline64_n3(data, wordcount); break; + case 4: pfield_doline64_n4(data, wordcount); break; + case 5: pfield_doline64_n5(data, wordcount); break; + case 6: pfield_doline64_n6(data, wordcount); break; +#ifdef AGA + case 7: pfield_doline64_n7(data, wordcount); break; + case 8: pfield_doline64_n8(data, wordcount); break; +#endif + } +#else int wordcount = dp_for_drawing->plflinelen; - uae_u32 *data = pixdata.apixels_l + MAX_PIXELS_PER_LINE / 4; + uae_u32 *data = pixdata.apixels_l + MAX_PIXELS_PER_LINE / sizeof(uae_u32); #define DATA_POINTER(n) ((debug_bpl_mask & (1 << n)) ? (line_data[lineno] + (n) * MAX_WORDS_PER_LINE * 2) : (debug_bpl_mask_one ? all_ones : all_zeros)) real_bplpt[0] = DATA_POINTER (0); @@ -2463,6 +2566,7 @@ static void pfield_doline (int lineno) case 8: pfield_doline_n8 (data, wordcount); break; #endif } +#endif if (refresh_indicator_buffer && refresh_indicator_height > lineno) { uae_u8 *opline = refresh_indicator_buffer + lineno * MAX_PIXELS_PER_LINE * 2;