]> git.unchartedbackwaters.co.uk Git - francis/winuae.git/commitdiff
uaegfx masked and overlapping blit fix
authorToni Wilen <twilen@winuae.net>
Sat, 3 Dec 2022 12:24:36 +0000 (14:24 +0200)
committerToni Wilen <twilen@winuae.net>
Sat, 3 Dec 2022 12:24:36 +0000 (14:24 +0200)
od-win32/picasso96_win.cpp
p96_blit.cpp

index de8dfb7e92e2b1dd87df3aafded3dc130f71fa95..76ed7a64147685b50bfae3a366d0a0ce988bb6b6 100644 (file)
@@ -1555,7 +1555,7 @@ static void picasso_handle_hsync(void)
 #define BLT_NAME BLIT_SRC_8
 #define BLT_NAME_MASK BLIT_SRC_MASK_8
 #define BLT_FUNC(s,d) *d = *s
-#define BLT_FUNC_MASK(s,d,mask) *d = ((*d) & ~mask) | (((*s) | (*d)) & mask)
+#define BLT_FUNC_MASK(s,d,mask) *d = ((*d) & ~mask) | ((*s) & mask)
 #include "../p96_blit.cpp"
 #define BLT_NAME BLIT_NOTONLYDST_8
 #define BLT_NAME_MASK BLIT_NOTONLYDST_MASK_8
index a5de5593ca3a434cfde886d01bc7f9b46d3e2ffe..e43683abf9603767f2581e5e4d463e887114666e 100644 (file)
@@ -10,27 +10,54 @@ static void NOINLINE BLT_NAME(unsigned int w, unsigned int h, uae_u8 *src, uae_u
        w *= BLT_SIZE;
        ww = w / 4;
        xxd = w - (ww * 4);
-       for(y = 0; y < h; y++) {
-               uae_u8 *src_8;
-               uae_u8 *dst_8;
-               uae_u32 *src_32 = (uae_u32*)src2;
-               uae_u32 *dst_32 = (uae_u32*)dst2;
-               for (x = 0; x < ww; x++) {
-                       BLT_FUNC(src_32, dst_32);
-                       src_32++; dst_32++;
+       if (src < dst && src + h * srcpitch > dst) {
+               dst2 += h * dstpitch + w;
+               src2 += h * srcpitch + w;
+               for (y = 0; y < h; y++) {
+                       dst2 -= dstpitch;
+                       src2 -= srcpitch;
+                       uae_u8 *src_8;
+                       uae_u8 *dst_8;
+                       src_8 = (uae_u8*)src2;
+                       dst_8 = (uae_u8*)dst2;
+                       for (x = 0; x < xxd; x++) {
+                               src_8--;
+                               dst_8--;
+                               uae_u32 sv = *src_8;
+                               uae_u32 dv = *dst_8;
+                               BLT_FUNC(&sv, &dv);
+                               *dst_8 = (uae_u8)dv;
+                       }
+                       uae_u32 *src_32 = (uae_u32*)src_8;
+                       uae_u32 *dst_32 = (uae_u32*)dst_8;
+                       for (x = 0; x < ww; x++) {
+                               src_32--; dst_32--;
+                               BLT_FUNC(src_32, dst_32);
+                       }
                }
-               src_8 = (uae_u8*)src_32;
-               dst_8 = (uae_u8*)dst_32;
-               for (x = 0; x < xxd; x++) {
-                       uae_u32 sv = *src_8;
-                       uae_u32 dv = *dst_8;
-                       BLT_FUNC(&sv, &dv);
-                       *dst_8 = (uae_u8)dv;
-                       src_8++;
-                       dst_8++;
+       } else {
+               for (y = 0; y < h; y++) {
+                       uae_u8 *src_8;
+                       uae_u8 *dst_8;
+                       uae_u32 *src_32 = (uae_u32*)src2;
+                       uae_u32 *dst_32 = (uae_u32*)dst2;
+                       for (x = 0; x < ww; x++) {
+                               BLT_FUNC(src_32, dst_32);
+                               src_32++; dst_32++;
+                       }
+                       src_8 = (uae_u8 *)src_32;
+                       dst_8 = (uae_u8 *)dst_32;
+                       for (x = 0; x < xxd; x++) {
+                               uae_u32 sv = *src_8;
+                               uae_u32 dv = *dst_8;
+                               BLT_FUNC(&sv, &dv);
+                               *dst_8 = (uae_u8)dv;
+                               src_8++;
+                               dst_8++;
+                       }
+                       dst2 += dstpitch;
+                       src2 += srcpitch;
                }
-               dst2 += dstpitch;
-               src2 += srcpitch;
        }
 }
 #else
@@ -44,10 +71,148 @@ static void NOINLINE BLT_NAME(unsigned int w, unsigned int h, uae_u8 *src, uae_u
 
        if (w < 8 * BLT_MULT) {
                ww = w / BLT_MULT;
-               for(y = 0; y < h; y++) {
+               if (src2 < dst2 && src2 + h * srcpitch > dst2) {
+                       dst2 += h * dstpitch + w * BLT_SIZE;
+                       src2 += h * srcpitch + w * BLT_SIZE;
+                       for (y = 0; y < h; y++) {
+                               dst2 -= dstpitch;
+                               src2 -= srcpitch;
+#if BLT_SIZE == 2
+                               if (w & 1) {
+                                       dst2 -= 2;
+                                       src2 -= 2;
+                                       uae_u16 *src_16 = (uae_u16*)src2;
+                                       uae_u16 *dst_16 = (uae_u16*)dst2;
+                                       BLT_FUNC(src_16, dst_16);
+                               }
+#elif BLT_SIZE == 1
+                               {
+                                       int wb = w & 3;
+                                       while (wb--) {
+                                               src2--;
+                                               dst2--;
+                                               uae_u8 *src_8 = (uae_u8*)src2;
+                                               uae_u8 *dst_8 = (uae_u8*)dst2;
+                                               BLT_FUNC(src_8, dst_8);
+                                       }
+                               }
+#endif
+                               uae_u32 *src_32 = (uae_u32*)src2;
+                               uae_u32 *dst_32 = (uae_u32*)dst2;
+                               for (x = 0; x < ww; x++) {
+                                       src_32--; dst_32--;
+                                       BLT_FUNC(src_32, dst_32);
+                               }
+                       }
+               } else {
+                       for (y = 0; y < h; y++) {
+                               uae_u32 *src_32 = (uae_u32*)src2;
+                               uae_u32 *dst_32 = (uae_u32*)dst2;
+                               for (x = 0; x < ww; x++) {
+                                       BLT_FUNC(src_32, dst_32);
+                                       src_32++; dst_32++;
+                               }
+#if BLT_SIZE == 2
+                               if (w & 1) {
+                                       uae_u16 *src_16 = (uae_u16*)src_32;
+                                       uae_u16 *dst_16 = (uae_u16*)dst_32;
+                                       BLT_FUNC(src_16, dst_16);
+                               }
+#elif BLT_SIZE == 1
+                               {
+                                       int wb = w & 3;
+                                       uae_u8 *src_8 = (uae_u8*)src_32;
+                                       uae_u8 *dst_8 = (uae_u8*)dst_32;
+                                       while (wb--) {
+                                               BLT_FUNC(src_8, dst_8);
+                                               src_8++;
+                                               dst_8++;
+                                       }
+                               }
+#endif
+                               dst2 += dstpitch;
+                               src2 += srcpitch;
+                       }
+               }
+               return;
+       }
+
+       ww = w / (8 * BLT_MULT);
+       xxd = (w - ww * (8 * BLT_MULT)) / BLT_MULT;
+       if (src2 < dst2 && src2 + h * srcpitch > dst2) {
+               dst2 += h * dstpitch + w * BLT_SIZE;
+               src2 += h * srcpitch + w * BLT_SIZE;
+               for (y = 0; y < h; y++) {
+                       dst2 -= dstpitch;
+                       src2 -= srcpitch;
+#if BLT_SIZE == 2
+                       if (w & 1) {
+                               src2 -= 2;
+                               dst2 -= 2;
+                               uae_u16 *src_16 = (uae_u16*)src2;
+                               uae_u16 *dst_16 = (uae_u16*)dst2;
+                               BLT_FUNC(src_16, dst_16);
+                       }
+#elif BLT_SIZE == 1
+                       {
+                               int wb = w & 3;
+                               while (wb--) {
+                                       src2--;
+                                       dst2--;
+                                       uae_u8 *src_8 = (uae_u8*)src2;
+                                       uae_u8 *dst_8 = (uae_u8*)dst2;
+                                       BLT_FUNC(src_8, dst_8);
+                               }
+                       }
+#endif
                        uae_u32 *src_32 = (uae_u32*)src2;
                        uae_u32 *dst_32 = (uae_u32*)dst2;
+                       for (x = 0; x < xxd; x++) {
+                               src_32--; dst_32--;
+                               BLT_FUNC(src_32, dst_32);
+                       }
                        for (x = 0; x < ww; x++) {
+                               src_32--; dst_32--;
+                               BLT_FUNC(src_32, dst_32);
+                               src_32--; dst_32--;
+                               BLT_FUNC(src_32, dst_32);
+                               src_32--; dst_32--;
+                               BLT_FUNC(src_32, dst_32);
+                               src_32--; dst_32--;
+                               BLT_FUNC(src_32, dst_32);
+                               src_32--; dst_32--;
+                               BLT_FUNC(src_32, dst_32);
+                               src_32--; dst_32--;
+                               BLT_FUNC(src_32, dst_32);
+                               src_32--; dst_32--;
+                               BLT_FUNC(src_32, dst_32);
+                               src_32--; dst_32--;
+                               BLT_FUNC(src_32, dst_32);
+                       }
+               }
+       } else {
+               for (y = 0; y < h; y++) {
+                       uae_u32 *src_32 = (uae_u32*)src2;
+                       uae_u32 *dst_32 = (uae_u32*)dst2;
+                       for (x = 0; x < ww; x++) {
+                               BLT_FUNC(src_32, dst_32);
+                               src_32++; dst_32++;
+                               BLT_FUNC(src_32, dst_32);
+                               src_32++; dst_32++;
+                               BLT_FUNC(src_32, dst_32);
+                               src_32++; dst_32++;
+                               BLT_FUNC(src_32, dst_32);
+                               src_32++; dst_32++;
+                               BLT_FUNC(src_32, dst_32);
+                               src_32++; dst_32++;
+                               BLT_FUNC(src_32, dst_32);
+                               src_32++; dst_32++;
+                               BLT_FUNC(src_32, dst_32);
+                               src_32++; dst_32++;
+                               BLT_FUNC(src_32, dst_32);
+                               src_32++; dst_32++;
+                       }
+                       for (x = 0; x < xxd; x++) {
                                BLT_FUNC(src_32, dst_32);
                                src_32++; dst_32++;
                        }
@@ -72,56 +237,6 @@ static void NOINLINE BLT_NAME(unsigned int w, unsigned int h, uae_u8 *src, uae_u
                        dst2 += dstpitch;
                        src2 += srcpitch;
                }
-               return;
-       }
-
-       ww = w / (8 * BLT_MULT);
-       xxd = (w - ww * (8 * BLT_MULT)) / BLT_MULT;
-       for(y = 0; y < h; y++) {
-               uae_u32 *src_32 = (uae_u32*)src2;
-               uae_u32 *dst_32 = (uae_u32*)dst2;
-               for (x = 0; x < ww; x++) {
-                       BLT_FUNC(src_32, dst_32);
-                       src_32++; dst_32++;
-                       BLT_FUNC(src_32, dst_32);
-                       src_32++; dst_32++;
-                       BLT_FUNC(src_32, dst_32);
-                       src_32++; dst_32++;
-                       BLT_FUNC(src_32, dst_32);
-                       src_32++; dst_32++;
-                       BLT_FUNC(src_32, dst_32);
-                       src_32++; dst_32++;
-                       BLT_FUNC(src_32, dst_32);
-                       src_32++; dst_32++;
-                       BLT_FUNC(src_32, dst_32);
-                       src_32++; dst_32++;
-                       BLT_FUNC(src_32, dst_32);
-                       src_32++; dst_32++;
-               }
-               for (x = 0; x < xxd; x++) {
-                       BLT_FUNC(src_32, dst_32);
-                       src_32++; dst_32++;
-               }
-#if BLT_SIZE == 2
-               if (w & 1) {
-                       uae_u16 *src_16 = (uae_u16*)src_32;
-                       uae_u16 *dst_16 = (uae_u16*)dst_32;
-                       BLT_FUNC (src_16, dst_16);
-               }
-#elif BLT_SIZE == 1
-               {
-                       int wb = w & 3;
-                       uae_u8 *src_8 = (uae_u8*)src_32;
-                       uae_u8 *dst_8 = (uae_u8*)dst_32;
-                       while (wb--) {
-                               BLT_FUNC(src_8, dst_8);
-                               src_8++;
-                               dst_8++;
-                       }
-               }
-#endif
-               dst2 += dstpitch;
-               src2 += srcpitch;
        }
 }
 #endif
@@ -132,17 +247,48 @@ static void NOINLINE BLT_NAME_MASK(unsigned int w, unsigned int h, uae_u8 *src,
        uae_u8 *src2 = src;
        uae_u8 *dst2 = dst;
        unsigned int y, x;
+       uae_u32 mask32 = mask * 0x01010101;
 
-       for (y = 0; y < h; y++) {
-               uae_u8 *src_8 = src2;
-               uae_u8 *dst_8 = dst2;
-               for (x = 0; x < w; x++) {
-                       BLT_FUNC_MASK(src_8, dst_8, mask);
-                       src_8++;
-                       dst_8++;
+       if (src < dst && src + h * srcpitch > dst) {
+               dst2 += h * dstpitch + w;
+               src2 += h * srcpitch + w;
+               for (y = 0; y < h; y++) {
+                       dst2 -= dstpitch;
+                       src2 -= srcpitch;
+                       uae_u32 *src_32 = (uae_u32*)src2;
+                       uae_u32 *dst_32 = (uae_u32*)dst2;
+                       for (x = 0; x < (w & ~3); x += 4) {
+                               src_32--;
+                               dst_32--;
+                               BLT_FUNC_MASK(src_32, dst_32, mask32);
+                       }
+                       uae_u8 *src_8 = (uae_u8*)src_32;
+                       uae_u8 *dst_8 = (uae_u8*)dst_32;
+                       for (x = 0; x < (w & 3); x++) {
+                               src_8--;
+                               dst_8--;
+                               BLT_FUNC_MASK(src_8, dst_8, mask);
+                       }
+               }
+       } else {
+               for (y = 0; y < h; y++) {
+                       uae_u32 *src_32 = (uae_u32*)src2;
+                       uae_u32 *dst_32 = (uae_u32*)dst2;
+                       for (x = 0; x < (w & ~3); x += 4) {
+                               BLT_FUNC_MASK(src_32, dst_32, mask32);
+                               src_32++;
+                               dst_32++;
+                       }
+                       uae_u8 *src_8 = (uae_u8*)src_32;
+                       uae_u8 *dst_8 = (uae_u8*)dst_32;
+                       for (x = 0; x < (w & 3); x++) {
+                               BLT_FUNC_MASK(src_8, dst_8, mask);
+                               src_8++;
+                               dst_8++;
+                       }
+                       dst2 += dstpitch;
+                       src2 += srcpitch;
                }
-               dst2 += dstpitch;
-               src2 += srcpitch;
        }
 }
 #endif