]> git.unchartedbackwaters.co.uk Git - francis/winuae.git/commitdiff
imported winuaesrc1500b7.zip
authorToni Wilen <twilen@winuae.net>
Sun, 9 Mar 2008 11:49:57 +0000 (13:49 +0200)
committerToni Wilen <twilen@winuae.net>
Mon, 22 Feb 2010 19:39:12 +0000 (21:39 +0200)
42 files changed:
cfgfile.c
cia.c
custom.c
debug.c
disk.c
drawing.c
expansion.c
gayle.c
gfxutil.c
include/gfxfilter.h
include/options.h
include/statusline.h [new file with mode: 0755]
include/sysdeps.h
include/xwin.h
newcpu.c
od-win32/asm.cmd [moved from od-win32/asm.bat with 50% similarity]
od-win32/debug_win32.c
od-win32/dxwrap.c
od-win32/dxwrap.h
od-win32/hardfile_win32.c
od-win32/hq3x16.asm [new file with mode: 0755]
od-win32/hq3x32.asm [new file with mode: 0755]
od-win32/hq4x16.asm [new file with mode: 0755]
od-win32/hq4x32.asm [new file with mode: 0755]
od-win32/mman.c
od-win32/picasso96_win.c
od-win32/picasso96_win.h
od-win32/resources/resource
od-win32/resources/resource.h
od-win32/resources/winuae.rc
od-win32/serial_win32.c
od-win32/win32.h
od-win32/win32_scale2x.c
od-win32/win32gfx.c
od-win32/win32gfx.h
od-win32/win32gui.c
od-win32/winuae_msvc/winuae_msvc.vcproj
od-win32/winuaechangelog.txt
od-win32/writelog.c
sana2.c
savestate.c
uaelib.c

index 2741d4939f0e48af4cc4f1d26c1d7901775fb8a2..05b838e3825700c6f0beafdf9bfb6c2f21212b75 100755 (executable)
--- a/cfgfile.c
+++ b/cfgfile.c
@@ -1350,6 +1350,7 @@ static int cfgfile_parse_hardware (struct uae_prefs *p, char *option, char *valu
        || cfgfile_intval (option, value, "bogomem_size", &p->bogomem_size, 0x40000)
        || cfgfile_intval (option, value, "gfxcard_size", &p->gfxmem_size, 0x100000)
        || cfgfile_intval (option, value, "floppy_speed", &p->floppy_speed, 1)
+       || cfgfile_intval (option, value, "floppy_write_length", &p->floppy_write_length, 1)
        || cfgfile_intval (option, value, "nr_floppies", &p->nr_floppies, 1)
        || cfgfile_intval (option, value, "floppy0type", &p->dfxtype[0], 1)
        || cfgfile_intval (option, value, "floppy1type", &p->dfxtype[1], 1)
@@ -2976,6 +2977,7 @@ void default_prefs (struct uae_prefs *p, int type)
     p->dfxtype[2] = DRV_NONE;
     p->dfxtype[3] = DRV_NONE;
     p->floppy_speed = 100;
+    p->floppy_write_length = 0;
     p->dfxclickvolume = 33;
 
     p->statecapturebuffersize = 20 * 1024 * 1024;
diff --git a/cia.c b/cia.c
index 4c17554d1486aef866185a7c9b9be49e7081e77e..2f581ec1688037f77e285050e1a312699e84a6fc 100755 (executable)
--- a/cia.c
+++ b/cia.c
@@ -1090,13 +1090,13 @@ void CIA_reset (void)
 
 void dumpcia (void)
 {
-    console_out ("A: CRA %02x CRB %02x ICR %02x IM %02x TA %04x (%04x) TB %04x (%04x)\n",
+    console_out_f ("A: CRA %02x CRB %02x ICR %02x IM %02x TA %04x (%04x) TB %04x (%04x)\n",
                 ciaacra, ciaacrb, ciaaicr, ciaaimask, ciaata, ciaala, ciaatb, ciaalb);
-    console_out ("TOD %06x (%06x) ALARM %06x %c%c\n",
+    console_out_f ("TOD %06x (%06x) ALARM %06x %c%c\n",
                 ciaatod, ciaatol, ciaaalarm, ciaatlatch ? 'L' : ' ', ciaatodon ? ' ' : 'S');
-    console_out ("B: CRA %02x CRB %02x ICR %02x IM %02x TA %04x (%04x) TB %04x (%04x)\n",
+    console_out_f ("B: CRA %02x CRB %02x ICR %02x IM %02x TA %04x (%04x) TB %04x (%04x)\n",
                 ciabcra, ciabcrb, ciaaicr, ciabimask, ciabta, ciabla, ciabtb, ciablb);
-    console_out ("TOD %06x (%06x) ALARM %06x %c%c\n",
+    console_out_f ("TOD %06x (%06x) ALARM %06x %c%c\n",
                 ciabtod, ciabtol, ciabalarm, ciabtlatch ? 'L' : ' ', ciabtodon ? ' ' : 'S');
 }
 
@@ -1119,6 +1119,11 @@ addrbank cia_bank = {
 };
 
 
+STATIC_INLINE isgayle (void)
+{
+    return (currprefs.cs_ide == 1 || currprefs.cs_pcmcia);
+}
+
 /* e-clock is 10 CPU cycles, 6 cycles low, 4 high
  * data transfer happens during 4 high cycles
  */
@@ -1157,22 +1162,23 @@ static uae_u32 REGPARAM2 cia_bget (uaecptr addr)
     cia_wait_pre ();
     v = 0xff;
     switch ((addr >> 12) & 3) {
-    case 0:
-       v = (addr & 1) ? ReadCIAA (r) : ReadCIAB (r);
+       case 0:
+           if (!isgayle ())
+               v = (addr & 1) ? ReadCIAA (r) : ReadCIAB (r);
        break;
-    case 1:
-       v = (addr & 1) ? 0xff : ReadCIAB (r);
+       case 1:
+           v = (addr & 1) ? 0xff : ReadCIAB (r);
        break;
-    case 2:
-       v = (addr & 1) ? ReadCIAA (r) : 0xff;
+       case 2:
+           v = (addr & 1) ? ReadCIAA (r) : 0xff;
        break;
-    case 3:
-       if (currprefs.cpu_model == 68000 && currprefs.cpu_compatible)
-           v = (addr & 1) ? regs.irc : regs.irc >> 8;
-       if (warned > 0) {
-           write_log ("cia_bget: unknown CIA address %x PC=%x\n", addr, M68K_GETPC);
-           warned--;
-       }
+       case 3:
+           if (currprefs.cpu_model == 68000 && currprefs.cpu_compatible)
+               v = (addr & 1) ? regs.irc : regs.irc >> 8;
+           if (warned > 0) {
+               write_log ("cia_bget: unknown CIA address %x PC=%x\n", addr, M68K_GETPC);
+               warned--;
+           }
        break;
     }
     cia_wait_post ();
@@ -1191,24 +1197,24 @@ static uae_u32 REGPARAM2 cia_wget (uaecptr addr)
     v = 0xffff;
     switch ((addr >> 12) & 3)
     {
-    case 0:
-       v = (ReadCIAB (r) << 8) | ReadCIAA (r);
+       case 0:
+           if (!isgayle ())
+               v = (ReadCIAB (r) << 8) | ReadCIAA (r);
        break;
-    case 1:
-       v = (ReadCIAB (r) << 8) | 0xff;
+       case 1:
+           v = (ReadCIAB (r) << 8) | 0xff;
        break;
-    case 2:
-       v = (0xff << 8) | ReadCIAA (r);
+       case 2:
+           v = (0xff << 8) | ReadCIAA (r);
        break;
-    case 3:
-       if (currprefs.cpu_model == 68000 && currprefs.cpu_compatible)
-           v = regs.irc;
-       if (warned > 0) {
-           write_log ("cia_wget: unknown CIA address %x PC=%x\n", addr, M68K_GETPC);
-           warned--;
-       }
+       case 3:
+           if (currprefs.cpu_model == 68000 && currprefs.cpu_compatible)
+               v = regs.irc;
+           if (warned > 0) {
+               write_log ("cia_wget: unknown CIA address %x PC=%x\n", addr, M68K_GETPC);
+               warned--;
+           }
        break;
-
     }
     cia_wait_post ();
     return v;
@@ -1246,13 +1252,15 @@ static void REGPARAM2 cia_bput (uaecptr addr, uae_u32 value)
     special_mem |= S_WRITE;
 #endif
     cia_wait_pre ();
-    if ((addr & 0x2000) == 0)
-       WriteCIAB (r, value);
-    if ((addr & 0x1000) == 0)
-       WriteCIAA (r, value);
-    if (((addr & 0x3000) == 0x3000) && warned > 0) {
-       write_log ("cia_bput: unknown CIA address %x %x\n", addr, value);
-       warned--;
+    if (!isgayle () || (addr & 0x3000) != 0) {
+       if ((addr & 0x2000) == 0)
+           WriteCIAB (r, value);
+       if ((addr & 0x1000) == 0)
+           WriteCIAA (r, value);
+       if (((addr & 0x3000) == 0x3000) && warned > 0) {
+           write_log ("cia_bput: unknown CIA address %x %x\n", addr, value);
+           warned--;
+       }
     }
     cia_wait_post ();
 }
@@ -1265,13 +1273,15 @@ static void REGPARAM2 cia_wput (uaecptr addr, uae_u32 value)
     special_mem |= S_WRITE;
 #endif
     cia_wait_pre ();
-    if ((addr & 0x2000) == 0)
-       WriteCIAB (r, value >> 8);
-    if ((addr & 0x1000) == 0)
-       WriteCIAA (r, value & 0xff);
-    if (((addr & 0x3000) == 0x3000) && warned > 0) {
-       write_log ("cia_wput: unknown CIA address %x %x\n", addr, value);
-       warned--;
+    if (!isgayle () || (addr & 0x3000) != 0) {
+       if ((addr & 0x2000) == 0)
+           WriteCIAB (r, value >> 8);
+       if ((addr & 0x1000) == 0)
+           WriteCIAA (r, value & 0xff);
+       if (((addr & 0x3000) == 0x3000) && warned > 0) {
+           write_log ("cia_wput: unknown CIA address %x %x\n", addr, value);
+           warned--;
+       }
     }
     cia_wait_post ();
 }
index 8de418994a41fdeb3d223c6dcb820d27fd601071..c616777815e74384f5ae96d972b49e030c6c64f9 100755 (executable)
--- a/custom.c
+++ b/custom.c
@@ -1930,8 +1930,8 @@ static void record_sprite (int line, int num, int sprxp, uae_u16 *data, uae_u16
     }
     width = (sprite_width << sprite_buffer_res) >> sprres;
 
-    /* Try to coalesce entries if they aren't too far apart.  */
-    if (! next_sprite_forced && e[-1].max + 16 >= sprxp) {
+    /* Try to coalesce entries if they aren't too far apart, except AGA due to AGA sprite color selection.  */
+    if (! next_sprite_forced && e[-1].max + 16 >= sprxp && !(currprefs.chipset_mask & CSMASK_AGA)) {
        e--;
     } else {
        next_sprite_entry++;
@@ -3359,7 +3359,7 @@ void dump_aga_custom (void)
        rgb2 = current_colors.color_regs_aga[c2] | (color_regs_aga_genlock[c2] << 31);
        rgb3 = current_colors.color_regs_aga[c3] | (color_regs_aga_genlock[c3] << 31);
        rgb4 = current_colors.color_regs_aga[c4] | (color_regs_aga_genlock[c4] << 31);
-       console_out("%3d %08.8X %3d %08.8X %3d %08.8X %3d %08.8X\n",
+       console_out_f ("%3d %08.8X %3d %08.8X %3d %08.8X %3d %08.8X\n",
            c1, rgb1, c2, rgb2, c3, rgb3, c4, rgb4);
     }
 }
@@ -4854,17 +4854,17 @@ void customreset (int hardreset)
 
 void dumpcustom (void)
 {
-    console_out ("DMACON: %x INTENA: %x INTREQ: %x VPOS: %x HPOS: %x\n", DMACONR(),
+    console_out_f ("DMACON: %x INTENA: %x INTREQ: %x VPOS: %x HPOS: %x\n", DMACONR(),
            (unsigned int)intena, (unsigned int)intreq, (unsigned int)vpos, (unsigned int)current_hpos());
-    console_out ("COP1LC: %08lx, COP2LC: %08lx COPPTR: %08lx\n", (unsigned long)cop1lc, (unsigned long)cop2lc, cop_state.ip);
-    console_out ("DIWSTRT: %04x DIWSTOP: %04x DDFSTRT: %04x DDFSTOP: %04x\n",
+    console_out_f ("COP1LC: %08lx, COP2LC: %08lx COPPTR: %08lx\n", (unsigned long)cop1lc, (unsigned long)cop2lc, cop_state.ip);
+    console_out_f ("DIWSTRT: %04x DIWSTOP: %04x DDFSTRT: %04x DDFSTOP: %04x\n",
            (unsigned int)diwstrt, (unsigned int)diwstop, (unsigned int)ddfstrt, (unsigned int)ddfstop);
-    console_out ("BPLCON 0: %04x 1: %04x 2: %04x 3: %04x 4: %04x\n", bplcon0, bplcon1, bplcon2, bplcon3, bplcon4);
+    console_out_f ("BPLCON 0: %04x 1: %04x 2: %04x 3: %04x 4: %04x\n", bplcon0, bplcon1, bplcon2, bplcon3, bplcon4);
     if (timeframes) {
-       console_out ("Average frame time: %.2f ms [frames: %d time: %d]\n",
+       console_out_f ("Average frame time: %.2f ms [frames: %d time: %d]\n",
                    (double)frametime / timeframes, timeframes, frametime);
        if (total_skipped)
-           console_out ("Skipped frames: %d\n", total_skipped);
+           console_out_f ("Skipped frames: %d\n", total_skipped);
     }
 }
 
diff --git a/debug.c b/debug.c
index 361ab4ea0939616be3e23303f66b7b357535d09e..05a8274e958a8135388e1fec6a7f321034337bf6 100755 (executable)
--- a/debug.c
+++ b/debug.c
@@ -396,7 +396,7 @@ static void converter (char **c)
     for (i = 0; i < 32; i++)
        s[i] = (v & (1 << (31 - i))) ? '1' : '0';
     s[i] = 0;
-    console_out ("0x%08X = %%%s = %u = %d\n", v, s, v, (uae_s32)v);
+    console_out_f ("0x%08X = %%%s = %u = %d\n", v, s, v, (uae_s32)v);
 }
 
 static uae_u32 lastaddr (void)
@@ -497,7 +497,7 @@ static uaecptr nextaddr (uaecptr addr, uaecptr *end)
        uaecptr xa = addr;
        if (xa == 1)
            xa = 0;
-       console_out("%08X -> %08X (%08X)...\n", xa, xa + next - 1, next);
+       console_out_f ("%08X -> %08X (%08X)...\n", xa, xa + next - 1, next);
     }
 #endif
     return addr;
@@ -606,7 +606,7 @@ static void dump_custom_regs (int aga)
        addr2 = custd[j].adr & 0x1ff;
        v1 = (p1[addr1 + 0] << 8) | p1[addr1 + 1];
        v2 = (p1[addr2 + 0] << 8) | p1[addr2 + 1];
-       console_out ("%03.3X %s\t%04.4X\t%03.3X %s\t%04.4X\n",
+       console_out_f ("%03.3X %s\t%04.4X\t%03.3X %s\t%04.4X\n",
            addr1, custd[i].name, v1,
            addr2, custd[j].name, v2);
     }
@@ -622,14 +622,14 @@ static void dump_vectors (uaecptr addr)
 
     while (int_labels[i].name || trap_labels[j].name) {
        if (int_labels[i].name) {
-           console_out ("$%08X: %s  \t $%08X\t", int_labels[i].adr + addr,
+           console_out_f ("$%08X: %s  \t $%08X\t", int_labels[i].adr + addr,
                int_labels[i].name, get_long (int_labels[i].adr + addr));
            i++;
        } else {
            console_out ("\t\t\t\t");
        }
        if (trap_labels[j].name) {
-           console_out("$%08X: %s  \t $%08X", trap_labels[j].adr + addr,
+           console_out_f ("$%08X: %s  \t $%08X", trap_labels[j].adr + addr,
               trap_labels[j].name, get_long (trap_labels[j].adr + addr));
            j++;
        }
@@ -653,9 +653,9 @@ static void disassemble_wait (FILE *file, unsigned long insn)
     if (v_mask > 0) {
        console_out ("vpos ");
        if (ve != 0x7f) {
-           console_out ("& 0x%02x ", ve);
+           console_out_f ("& 0x%02x ", ve);
        }
-       console_out (">= 0x%02x", v_mask);
+       console_out_f (">= 0x%02x", v_mask);
     }
     if (he > 0) {
        if (v_mask > 0) {
@@ -663,14 +663,14 @@ static void disassemble_wait (FILE *file, unsigned long insn)
        }
        console_out (" hpos ");
        if (he != 0xfe) {
-           console_out ("& 0x%02x ", he);
+           console_out_f ("& 0x%02x ", he);
        }
-       console_out (">= 0x%02x", h_mask);
+       console_out_f (">= 0x%02x", h_mask);
     } else {
        console_out (", ignore horizontal");
     }
 
-    console_out (".\n                        \t; VP %02x, VE %02x; HP %02x, HE %02x; BFD %d\n",
+    console_out_f (".\n                        \t; VP %02x, VE %02x; HP %02x, HE %02x; BFD %d\n",
             vp, ve, hp, he, bfd);
 }
 
@@ -743,7 +743,7 @@ static void decode_copper_insn (FILE* file, unsigned long insn, unsigned long ad
     if (get_copper_address(-1) >= addr && get_copper_address(-1) <= addr + 3)
        here = '*';
 
-    console_out ("%c%08lx: %04lx %04lx%s\t; ", here, addr, insn >> 16, insn & 0xFFFF, record);
+    console_out_f ("%c%08lx: %04lx %04lx%s\t; ", here, addr, insn >> 16, insn & 0xFFFF, record);
 
     switch (insn_type) {
     case 0x00010000: /* WAIT insn */
@@ -771,9 +771,9 @@ static void decode_copper_insn (FILE* file, unsigned long insn, unsigned long ad
                i++;
            }
            if (custd[i].name)
-               console_out ("%s := 0x%04lx\n", custd[i].name, insn & 0xffff);
+               console_out_f ("%s := 0x%04lx\n", custd[i].name, insn & 0xffff);
            else
-               console_out ("%04x := 0x%04lx\n", addr, insn & 0xffff);
+               console_out_f ("%04x := 0x%04lx\n", addr, insn & 0xffff);
        }
        break;
 
@@ -809,7 +809,7 @@ static int copper_debugger (char **c)
            debug_copper = 0;
        else
            debug_copper = 1;
-       console_out ("Copper debugger %s.\n", debug_copper ? "enabled" : "disabled");
+       console_out_f ("Copper debugger %s.\n", debug_copper ? "enabled" : "disabled");
     } else if(**c == 't') {
        debug_copper = 1|2;
        return 1;
@@ -818,7 +818,7 @@ static int copper_debugger (char **c)
        debug_copper = 1|4;
        if (more_params(c)) {
            debug_copper_pc = readhex(c);
-           console_out ("Copper breakpoint @0x%08.8x\n", debug_copper_pc);
+           console_out_f ("Copper breakpoint @0x%08.8x\n", debug_copper_pc);
        } else {
            debug_copper &= ~4;
        }
@@ -887,11 +887,11 @@ static void listcheater(int mode, int size)
            b = get_word (ts->addr);
        }
        if (mode)
-           console_out("%08X=%04X ", ts->addr, b);
+           console_out_f ("%08X=%04X ", ts->addr, b);
        else
-           console_out("%08X ", ts->addr);
+           console_out_f ("%08X ", ts->addr);
        if ((i % skip) == skip)
-           console_out("\n");
+           console_out ("\n");
     }
 }
 
@@ -1013,7 +1013,7 @@ static void deepcheatsearch (char **c)
        }
     }
 
-    console_out ("%d addresses found\n", cnt);
+    console_out_f ("%d addresses found\n", cnt);
     if (cnt <= MAX_CHEAT_VIEW) {
        clearcheater();
        cnt = 0;
@@ -1029,7 +1029,7 @@ static void deepcheatsearch (char **c)
        }
        listcheater(1, size);
     } else {
-       console_out("Now continue with 'g' and use 'D' again after you have lost another life\n");
+       console_out ("Now continue with 'g' and use 'D' again after you have lost another life\n");
     }
 }
 
@@ -1129,7 +1129,7 @@ static void cheatsearch (char **c)
        }
        listcheater (0, size);
     }
-    console_out ("Found %d possible addresses with 0x%X (%u) (%d bytes)\n", count, val, val, size);
+    console_out_f ("Found %d possible addresses with 0x%X (%u) (%d bytes)\n", count, val, val, size);
     if (count > 0)
        console_out ("Now continue with 'g' and use 'C' with a different value\n");
     first = 0;
@@ -1242,17 +1242,17 @@ static void illg_debug_do (uaecptr addr, int rwi, int size, uae_u32 val)
            illg_debug_check (ad, rwi, size, val);
        } else if ((mask & 3) == 0) {
            if (rwi & 2)
-               console_out ("W: %08.8X=%02.2X PC=%08.8X\n", ad, v, pc);
+               console_out_f ("W: %08.8X=%02.2X PC=%08.8X\n", ad, v, pc);
            else if (rwi & 1)
-               console_out ("R: %08.8X    PC=%08.8X\n", ad, pc);
+               console_out_f ("R: %08.8X    PC=%08.8X\n", ad, pc);
            if (illgdebug_break)
                activate_debugger ();
        } else if (!(mask & 1) && (rwi & 1)) {
-           console_out ("RO: %08.8X=%02.2X PC=%08.8X\n", ad, v, pc);
+           console_out_f ("RO: %08.8X=%02.2X PC=%08.8X\n", ad, v, pc);
            if (illgdebug_break)
                activate_debugger ();
        } else if (!(mask & 2) && (rwi & 2)) {
-           console_out ("WO: %08.8X    PC=%08.8X\n", ad, pc);
+           console_out_f ("WO: %08.8X    PC=%08.8X\n", ad, pc);
            if (illgdebug_break)
                activate_debugger ();
        }
@@ -1275,7 +1275,7 @@ static struct smc_item *smc_table;
 static void smc_free (void)
 {
     if (smc_table)
-       console_out("SMCD disabled\n");
+       console_out ("SMCD disabled\n");
     xfree(smc_table);
     smc_mode = 0;
     smc_table = NULL;
@@ -1304,7 +1304,7 @@ static void smc_detect_init (char **c)
        initialize_memwatch (0);
     if (v)
        smc_mode = 1;
-    console_out ("SMCD enabled. Break=%d\n", smc_mode);
+    console_out_f ("SMCD enabled. Break=%d\n", smc_mode);
 }
 
 #define SMC_MAXHITS 8
@@ -1346,12 +1346,12 @@ static void smc_detector (uaecptr addr, int rwi, int size, uae_u32 *valp)
     }
     if (hitcnt < 100) {
        smc_table[hitaddr].cnt++;
-       console_out ("SMC at %08.8X - %08.8X (%d) from %08.8X\n",
+       console_out_f ("SMC at %08.8X - %08.8X (%d) from %08.8X\n",
            hitaddr, hitaddr + hitcnt, hitcnt, hitpc);
        if (smc_mode)
            activate_debugger ();
        if (smc_table[hitaddr].cnt >= SMC_MAXHITS)
-           console_out ("* hit count >= %d, future hits ignored\n", SMC_MAXHITS);
+           console_out_f ("* hit count >= %d, future hits ignored\n", SMC_MAXHITS);
     }
 }
 
@@ -1718,7 +1718,7 @@ static void memwatch (char **c)
                uae_u32 len = 1;
                if (more_params (c))
                    len = readhex (c);
-               console_out ("cleared logging addresses %08.8X - %08.8X\n", addr, addr + len);
+               console_out_f ("cleared logging addresses %08.8X - %08.8X\n", addr, addr + len);
                while (len > 0) {
                    addr &= 0xffffff;
                    illgdebug[addr] = 7;
@@ -1727,7 +1727,7 @@ static void memwatch (char **c)
                }
            } else {
                illg_free();
-               console_out("Illegal memory access logging disabled\n");
+               console_out ("Illegal memory access logging disabled\n");
            }
        } else {
            illg_init ();
@@ -1735,7 +1735,7 @@ static void memwatch (char **c)
            illgdebug_break = 0;
            if (more_params (c))
                illgdebug_break = 1;
-           console_out ("Illegal memory access logging enabled. Break=%d\n", illgdebug_break);
+           console_out_f ("Illegal memory access logging enabled. Break=%d\n", illgdebug_break);
        }
        return;
     }
@@ -1746,7 +1746,7 @@ static void memwatch (char **c)
     mwn->size = 0;
     ignore_ws (c);
     if (!more_params (c)) {
-       console_out ("Memwatch %d removed\n", num);
+       console_out_f ("Memwatch %d removed\n", num);
        return;
     }
     mwn->addr = readhex (c);
@@ -1825,7 +1825,7 @@ static void writeintomem (char **c)
        put_byte (addr, val);
        cc = 'B';
     }
-    console_out ("Wrote %X (%u) at %08X.%c\n", val, val, addr, cc);
+    console_out_f ("Wrote %X (%u) at %08X.%c\n", val, val, addr, cc);
 }
 
 static uae_u8 *dump_xlate(uae_u32 addr)
@@ -1930,15 +1930,15 @@ static char* BSTR2CSTR(uae_u8 *bstr)
 static void print_task_info(uaecptr node)
 {
     int process = get_byte (node + 8) == 13 ? 1 : 0;
-    console_out ("%08X: %08X", node, 0);
-    console_out (process ? " PROCESS '%s'" : " TASK    '%s'\n", get_real_address (get_long (node + 10)));
+    console_out_f ("%08X: %08X", node, 0);
+    console_out_f (process ? " PROCESS '%s'" : " TASK    '%s'\n", get_real_address (get_long (node + 10)));
     if (process) {
        uaecptr cli = BPTR2APTR (get_long (node + 172));
        int tasknum = get_long (node + 140);
        if (cli && tasknum) {
            uae_u8 *command_bstr = get_real_address (BPTR2APTR (get_long (cli + 16)));
            char *command = BSTR2CSTR (command_bstr);
-           console_out (" [%d, '%s']\n", tasknum, command);
+           console_out_f (" [%d, '%s']\n", tasknum, command);
            xfree (command);
        } else {
            console_out ("\n");
@@ -1952,11 +1952,11 @@ static void show_exec_tasks (void)
     uaecptr taskready = get_long (execbase + 406);
     uaecptr taskwait = get_long (execbase + 420);
     uaecptr node, end;
-    console_out ("execbase at 0x%08X\n", (unsigned long) execbase);
+    console_out_f ("execbase at 0x%08X\n", (unsigned long) execbase);
     console_out ("Current:\n");
     node = get_long (execbase + 276);
     print_task_info (node);
-    console_out ("Ready:\n");
+    console_out_f ("Ready:\n");
     node = get_long (taskready);
     end = get_long (taskready + 4);
     while (node) {
@@ -1995,7 +1995,7 @@ int instruction_breakpoint (char **c)
                if (more_params (c))
                    sr_bpmask = readhex (c);
            }
-           console_out ("SR breakpoint, value=%04X, mask=%04X\n", sr_bpvalue, sr_bpmask);
+           console_out_f ("SR breakpoint, value=%04X, mask=%04X\n", sr_bpvalue, sr_bpmask);
            return 0;
        } else if (nc == 'I') {
            next_char (c);
@@ -2017,7 +2017,7 @@ int instruction_breakpoint (char **c)
                bpn = &bpnodes[i];
                if (!bpn->enabled)
                    continue;
-               console_out ("%8X ", bpn->addr);
+               console_out_f ("%8X ", bpn->addr);
                got = 1;
            }
            if (!got)
@@ -2113,7 +2113,7 @@ static void savemem (char **cc)
     len2 = len = readhex (cc);
     fp = fopen (name, "wb");
     if (fp == NULL) {
-       console_out ("Couldn't open file '%s'\n", name);
+       console_out_f ("Couldn't open file '%s'\n", name);
        return;
     }
     while (len > 0) {
@@ -2127,7 +2127,7 @@ static void savemem (char **cc)
     }
     fclose (fp);
     if (len == 0)
-       console_out ("Wrote %08X - %08X (%d bytes) to '%s'\n",
+       console_out_f ("Wrote %08X - %08X (%d bytes) to '%s'\n",
            src2, src2 + len2, len2, name);
     return;
 S_argh:
@@ -2192,7 +2192,7 @@ static void searchmem (char **cc)
        if (more_params (cc))
            endaddr = readhex (cc);
     }
-    console_out ("Searching from %08X to %08X..\n", addr, endaddr);
+    console_out_f ("Searching from %08X to %08X..\n", addr, endaddr);
     while ((addr = nextaddr (addr, NULL)) != 0xffffffff) {
        if (addr == endaddr)
            break;
@@ -2208,7 +2208,7 @@ static void searchmem (char **cc)
        }
        if (i == sslen) {
            got++;
-           console_out (" %08X", addr);
+           console_out_f (" %08X", addr);
            if (got > 100) {
                console_out ("\nMore than 100 results, aborting..");
                break;
@@ -2288,7 +2288,7 @@ static void debugtest_set (char **inptr)
            debugtest_modes[val] = 0;
        else
            debugtest_modes[val] = val2;
-       console_out ("debugtest '%s': %s. break = %s\n",
+       console_out_f ("debugtest '%s': %s. break = %s\n",
            debugtest_names[val], debugtest_modes[val] ? "on" :"off", val2 == 2 ? "on" : "off");
     }
 }
@@ -2402,13 +2402,13 @@ static void debug_sprite (char **inptr)
            }
        }
        tmp[width] = 0;
-       console_out ("%3d: %s\n", y, tmp);
+       console_out_f ("%3d: %s\n", y, tmp);
     }
 
-    console_out ("Sprite address %08X, width = %d\n", addr, size * 16);
-    console_out ("OCS: StartX=%d StartY=%d EndY=%d\n", xpos, ypos, ypose);
-    console_out ("ECS: StartX=%d (%d.%d) StartY=%d EndY=%d%s\n", xpos_ecs, xpos_ecs / 4, xpos_ecs & 3, ypos_ecs, ypose_ecs, ecs ? " (*)" : "");
-    console_out ("Attach: %d. AGA SSCAN/SH10 bit: %d\n", attach, sh10);
+    console_out_f ("Sprite address %08X, width = %d\n", addr, size * 16);
+    console_out_f ("OCS: StartX=%d StartY=%d EndY=%d\n", xpos, ypos, ypose);
+    console_out_f ("ECS: StartX=%d (%d.%d) StartY=%d EndY=%d%s\n", xpos_ecs, xpos_ecs / 4, xpos_ecs & 3, ypos_ecs, ypose_ecs, ecs ? " (*)" : "");
+    console_out_f ("Attach: %d. AGA SSCAN/SH10 bit: %d\n", attach, sh10);
 }
 
 static void disk_debug (char **inptr)
@@ -2420,7 +2420,7 @@ static void disk_debug (char **inptr)
        (*inptr)++;
        ignore_ws (inptr);
        disk_debug_logging = readint (inptr);
-       console_out ("disk logging level %d\n", disk_debug_logging);
+       console_out_f ("disk logging level %d\n", disk_debug_logging);
        return;
     }
     disk_debug_mode = 0;
@@ -2443,7 +2443,7 @@ static void disk_debug (char **inptr)
     if (disk_debug_logging == 0)
        disk_debug_logging = 1;
 end:
-    console_out ("disk breakpoint mode %c%c%c track %d\n",
+    console_out_f ("disk breakpoint mode %c%c%c track %d\n",
        disk_debug_mode & DISK_DEBUG_DMA_READ ? 'R' : '-',
        disk_debug_mode & DISK_DEBUG_DMA_WRITE ? 'W' : '-',
        disk_debug_mode & DISK_DEBUG_PIO ? 'P' : '-',
@@ -2464,7 +2464,7 @@ static void find_ea (char **inptr)
        if (more_params(inptr))
            end = readhex (inptr);
     }
-    console_out ("Searching from %08X to %08X\n", addr, end);
+    console_out_f ("Searching from %08X to %08X\n", addr, end);
     while((addr = nextaddr(addr, &end)) != 0xffffffff) {
        if ((addr & 1) == 0 && addr + 6 <= end) {
            sea = 0xffffffff;
@@ -2598,7 +2598,7 @@ static void debug_1 (void)
                    next_char (&inptr);
                    if (more_params (&inptr))
                        debug_sprite_mask = readint (&inptr);
-                   console_out ("sprite mask: %02.2X\n", debug_sprite_mask);
+                   console_out_f ("sprite mask: %02.2X\n", debug_sprite_mask);
                }
            } else {
                searchmem (&inptr);
@@ -2614,7 +2614,7 @@ static void debug_1 (void)
                inputdevice_logging = 1 | 2;
                if (more_params (&inptr))
                    inputdevice_logging = readint(&inptr);
-               console_out("input logging level %d\n", inputdevice_logging);
+               console_out_f ("input logging level %d\n", inputdevice_logging);
            } else if (*inptr == 'm') {
                memory_map_dump_2 (0);
            } else if (*inptr == 't') {
@@ -2748,16 +2748,16 @@ static void debug_1 (void)
                break;
            }
 #endif
-           if (more_params(&inptr)) {
-               maddr = readhex(&inptr);
+           if (more_params (&inptr)) {
+               maddr = readhex (&inptr);
            } else {
                maddr = nxmem;
            }
-           if (more_params(&inptr))
-               lines = readhex(&inptr);
+           if (more_params (&inptr))
+               lines = readhex (&inptr);
            else
                lines = 20;
-           dumpmem(maddr, &nxmem, lines);
+           dumpmem (maddr, &nxmem, lines);
        }
        break;
        case 'o':
@@ -2778,7 +2778,7 @@ static void debug_1 (void)
            } else {
                int i;
                for (i = 0; i < 8; i++)
-                   console_out ("Plane %d offset %d\n", i, bpl_off[i]);
+                   console_out_f ("Plane %d offset %d\n", i, bpl_off[i]);
            }
            break;
        case 'b':
@@ -2857,7 +2857,7 @@ void debug (void)
                    continue;
                if (bpnodes[i].addr == pc) {
                    bp = 1;
-                   console_out ("Breakpoint at %08.8X\n", pc);
+                   console_out_f ("Breakpoint at %08.8X\n", pc);
                    break;
                }
            }
@@ -2922,7 +2922,7 @@ void debug (void)
            }
        }
     } else {
-       console_out ("Memwatch %d: break at %08X.%c %c%c%c %08.8X PC=%08X\n", memwatch_triggered - 1, mwhit.addr,
+       console_out_f ("Memwatch %d: break at %08X.%c %c%c%c %08.8X PC=%08X\n", memwatch_triggered - 1, mwhit.addr,
            mwhit.size == 1 ? 'B' : (mwhit.size == 2 ? 'W' : 'L'),
            (mwhit.rwi & 1) ? 'R' : ' ', (mwhit.rwi & 2) ? 'W' : ' ', (mwhit.rwi & 4) ? 'I' : ' ',
            mwhit.val, mwhit.pc);
@@ -3093,7 +3093,7 @@ static void mmu_do_hit_pre (struct mmudata *md, uaecptr addr, int size, int rwi,
     mmur = regs;
     pc = m68k_getpc (&regs);
     if (mmu_logging)
-       console_out ("MMU: hit %08.8X SZ=%d RW=%d V=%08.8X PC=%08.8X\n", addr, size, rwi, v, pc);
+       console_out_f ("MMU: hit %08.8X SZ=%d RW=%d V=%08.8X PC=%08.8X\n", addr, size, rwi, v, pc);
 
     p = mmu_regs;
     put_long (p, 0); p += 4;
@@ -3156,7 +3156,7 @@ static int mmu_hit (uaecptr addr, int size, int rwi, uae_u32 *v)
                    if (maddr == addr) /* infinite mmu hit loop? no thanks.. */
                        return 1;
                    if (mmu_logging)
-                       console_out ("MMU: remap %08.8X -> %08.8X SZ=%d RW=%d\n", addr, maddr, size, rwi);
+                       console_out_f ("MMU: remap %08.8X -> %08.8X SZ=%d RW=%d\n", addr, maddr, size, rwi);
                    if ((rwi & 2)) {
                        switch (size)
                        {
@@ -3282,7 +3282,7 @@ int mmu_init(int mode, uaecptr parm, uaecptr parm2)
     p = parm;
     mmu_struct = p;
     if (get_long (p) != 1) {
-       console_out ("MMU: version mismatch %d <> %d\n", get_long (p), 1);
+       console_out_f ("MMU: version mismatch %d <> %d\n", get_long (p), 1);
        return 0;
     }
     p += 4;
@@ -3304,7 +3304,7 @@ int mmu_init(int mode, uaecptr parm, uaecptr parm2)
            if (mn->mmubank->p_addr == parm2) {
                getmmubank(mn->mmubank, parm2);
                if (mmu_logging)
-                   console_out ("MMU: bank update %08.8X: %08.8X - %08.8X %08.8X\n",
+                   console_out_f ("MMU: bank update %08.8X: %08.8X - %08.8X %08.8X\n",
                        mn->mmubank->flags, mn->mmubank->addr, mn->mmubank->len + mn->mmubank->addr,
                        mn->mmubank->remap);
            }
@@ -3342,7 +3342,7 @@ int mmu_init(int mode, uaecptr parm, uaecptr parm2)
     }
 
     initialize_memwatch(1);
-    console_out ("MMU: enabled, %d banks, CB=%08.8X S=%08.8X BNK=%08.8X SF=%08.8X, %d*%d\n",
+    console_out_f ("MMU: enabled, %d banks, CB=%08.8X S=%08.8X BNK=%08.8X SF=%08.8X, %d*%d\n",
        size - 1, mmu_callback, parm, banks, mmu_regs, mmu_slots, 1 << MMU_PAGE_SHIFT);
     set_special (&regs, SPCFLAG_BRK);
     return 1;
diff --git a/disk.c b/disk.c
index 09e41c3ac3c11b353af1ab881b40a89b8c46be45..100fdf30b0d4eb00b3ef6cbaf4abb8dd3f8acbba 100755 (executable)
--- a/disk.c
+++ b/disk.c
@@ -50,8 +50,8 @@ static int longwritemode = 0;
 
 /* support HD floppies */
 #define FLOPPY_DRIVE_HD
-/* writable track length with normal 2us bitcell/300RPM motor (PAL) */
-#define FLOPPY_WRITE_LEN (currprefs.ntscmode ? (12798 / 2) : (12668 / 2)) /* 12667 PAL, 12797 NTSC */
+/* writable track length with normal 2us bitcell/300RPM motor, 12667 PAL, 12797 NTSC */
+#define FLOPPY_WRITE_LEN (currprefs.floppy_write_length > 256 ? currprefs.floppy_write_length / 2 : (currprefs.ntscmode ? (12798 / 2) : (12668 / 2)))
 #define FLOPPY_WRITE_MAXLEN 0x3800
 /* This works out to 350 */
 #define FLOPPY_GAP_LEN (FLOPPY_WRITE_LEN - 11 * 544)
@@ -2312,12 +2312,12 @@ void dumpdisk (void)
     for (i = 0; i < MAX_FLOPPY_DRIVES; i++) {
        drive *drv = &floppy[i];
        if (!(disabled & (1 << i))) {
-           console_out ("Drive %d: motor %s cylinder %2d sel %s %s mfmpos %d/%d\n",
+           console_out_f ("Drive %d: motor %s cylinder %2d sel %s %s mfmpos %d/%d\n",
                i, drv->motoroff ? "off" : " on", drv->cyl, (selected & (1 << i)) ? "no" : "yes",
                drive_writeprotected(drv) ? "ro" : "rw", drv->mfmpos, drv->tracklen);
            w = word;
            for (j = 0; j < 15; j++) {
-               console_out ("%04.4X ", w);
+               console_out_f ("%04.4X ", w);
                for (k = 0; k < 16; k++) {
                    w <<= 1;
                    w |= getonebit (drv->bigmfmbuf, drv->mfmpos + j * 16 + k);
@@ -2326,7 +2326,7 @@ void dumpdisk (void)
            console_out ("\n");
        }
     }
-    console_out ("side %d, dma %d, bitoffset %d, word %04.4X, dskbytr %04.4X adkcon %04.4X dsksync %04.4X\n", side, dskdmaen, bitoffset, word, dskbytr_val, adkcon, dsksync);
+    console_out_f ("side %d, dma %d, bitoffset %d, word %04.4X, dskbytr %04.4X adkcon %04.4X dsksync %04.4X\n", side, dskdmaen, bitoffset, word, dskbytr_val, adkcon, dsksync);
 }
 
 static void disk_dmafinished (void)
index 5c90557375853617d473d122a456db7165a39e0c..6d8a0372b7f7b324140fae28987fb26020749ecc 100755 (executable)
--- a/drawing.c
+++ b/drawing.c
@@ -49,6 +49,7 @@
 #include "jit/compemu.h"
 #endif
 #include "savestate.h"
+#include "statusline.h"
 
 extern int sprite_buffer_res;
 
@@ -2134,26 +2135,6 @@ static void init_drawing_frame (void)
  * Some code to put status information on the screen.
  */
 
-#define TD_PADX 10
-#define TD_PADY 2
-#define TD_WIDTH 32
-#define TD_LED_WIDTH 24
-#define TD_LED_HEIGHT 4
-
-#define TD_RIGHT 1
-#define TD_BOTTOM 2
-
-static int td_pos = (TD_RIGHT|TD_BOTTOM);
-
-#define TD_NUM_WIDTH 7
-#define TD_NUM_HEIGHT 7
-
-#define TD_TOTAL_HEIGHT (TD_PADY * 2 + TD_NUM_HEIGHT)
-
-#define NUMBERS_NUM 16
-
-#define TD_BORDER 0x333
-
 static const char *numbers = { /* ugly  0123456789CHD%+- */
 "+++++++--++++-+++++++++++++++++-++++++++++++++++++++++++++++++++++++++++++++-++++++-++++----++---+--------------"
 "+xxxxx+--+xx+-+xxxxx++xxxxx++x+-+x++xxxxx++xxxxx++xxxxx++xxxxx++xxxxx++xxxx+-+x++x+-+xxx++-+xx+-+x---+----------"
@@ -2164,18 +2145,18 @@ static const char *numbers = { /* ugly  0123456789CHD%+- */
 "+++++++---+++-++++++++++++++----+++++++++++++++++--+++--++++++++++++++++++++-++++++-++++------------------------"
 };
 
-STATIC_INLINE void putpixel (int x, xcolnr c8)
+STATIC_INLINE void putpixel (uae_u8 *buf, int bpp, int x, xcolnr c8)
 {
     if (x <= 0)
        return;
 
-    switch (gfxvidinfo.pixbytes) {
+    switch (bpp) {
     case 1:
-       xlinebuffer[x] = (uae_u8)c8;
+       buf[x] = (uae_u8)c8;
        break;
     case 2:
     {
-       uae_u16 *p = (uae_u16 *)xlinebuffer + x;
+       uae_u16 *p = (uae_u16 *)buf + x;
        *p = (uae_u16)c8;
        break;
     }
@@ -2184,14 +2165,19 @@ STATIC_INLINE void putpixel (int x, xcolnr c8)
        break;
     case 4:
     {
-       uae_u32 *p = (uae_u32 *)xlinebuffer + x;
+       uae_u32 *p = (uae_u32 *)buf + x;
        *p = c8;
        break;
     }
     }
 }
 
-static void write_tdnumber (int x, int y, int num)
+STATIC_INLINE uae_u32 ledcolor (uae_u32 c, uae_u32 *rc, uae_u32 *gc, uae_u32 *bc)
+{
+    return rc[(c >> 16) & 0xff] | gc[(c >> 8) & 0xff] | bc[(c >> 0) & 0xff];
+}
+
+static void write_tdnumber (uae_u8 *buf, int bpp, int x, int y, int num, uae_u32 c1, uae_u32 c2)
 {
     int j;
     const char *numptr;
@@ -2199,71 +2185,73 @@ static void write_tdnumber (int x, int y, int num)
     numptr = numbers + num * TD_NUM_WIDTH + NUMBERS_NUM * TD_NUM_WIDTH * y;
     for (j = 0; j < TD_NUM_WIDTH; j++) {
        if (*numptr == 'x')
-           putpixel (x + j, xcolors[0xfff]);
+           putpixel (buf, bpp, x + j, c1);
        else if (*numptr == '+')
-           putpixel (x + j, xcolors[0x000]);
+           putpixel (buf, bpp, x + j, c2);
        numptr++;
     }
 }
 
-static void draw_status_line (int line)
+void draw_status_line_single (uae_u8 *buf, int bpp, int y, int totalwidth, uae_u32 *rc, uae_u32 *gc, uae_u32 *bc)
 {
-    int x_start, y, j, led;
+    int x_start, j, led;
+    uae_u32 c1, c2, cb;
+
+    c1 = ledcolor (0xffffff, rc, gc, bc);
+    c2 = ledcolor (0x000000, rc, gc, bc);
+    cb = ledcolor (TD_BORDER, rc, gc, bc);
 
     if (td_pos & TD_RIGHT)
-       x_start = gfxvidinfo.width - TD_PADX - NUM_LEDS * TD_WIDTH;
+       x_start = totalwidth - TD_PADX - NUM_LEDS * TD_WIDTH;
     else
        x_start = TD_PADX;
 
-    y = line - (gfxvidinfo.height - TD_TOTAL_HEIGHT);
-    xlinebuffer = gfxvidinfo.linemem;
-    if (xlinebuffer == 0)
-       xlinebuffer = row_map[line];
-
     for (led = 0; led < NUM_LEDS; led++) {
        int side, pos, num1 = -1, num2 = -1, num3 = -1, num4 = -1;
-       int x, off_rgb, on_rgb, c, on = 0, am = 2;
+       int x, c, on = 0, am = 2;
+       xcolnr on_rgb, off_rgb;
+
        if (led >= 1 && led <= 4) {
            int pled = led - 1;
            int track = gui_data.drive_track[pled];
            pos = 6 + pled;
-           on_rgb = 0x0c0;
-           off_rgb = 0x030;
+           on_rgb = 0x00cc00;
+           off_rgb = 0x003300;
            if (!gui_data.drive_disabled[pled]) {
                num1 = -1;
                num2 = track / 10;
                num3 = track % 10;
                on = gui_data.drive_motor[pled];
                if (gui_data.drive_writing[pled])
-                   on_rgb = 0xc00;
+                   on_rgb = 0xcc0000;
            }
            side = gui_data.drive_side;
        } else if (led == 0) {
            pos = 3;
            on = gui_data.powerled;
-           on_rgb = 0xc00;
-           off_rgb = 0x300;
+           on_rgb = 0xcc0000;
+           off_rgb = 0x330000;
        } else if (led == 5) {
            pos = 5;
            on = gui_data.cd;
-           on_rgb = 0x00c;
-           off_rgb = 0x003;
+           on_rgb = 0x0000cc;
+           off_rgb = 0x000033;
            num1 = -1;
            num2 = 10;
            num3 = 12;
        } else if (led == 6) {
            pos = 4;
            on = gui_data.hd;
-           on_rgb = on == 2 ? 0xc00 : 0x00c;
-           off_rgb = 0x003;
+           on_rgb = on == 2 ? 0xcc0000 : 0x0000cc;
+           off_rgb = 0x000033;
            num1 = -1;
            num2 = 11;
            num3 = 12;
        } else if (led == 7) {
            int fps = (gui_data.fps + 5) / 10;
            pos = 2;
-           on_rgb = 0x000;
-           off_rgb = 0x000;
+           on_rgb = 0x000000;
+           off_rgb = 0x000000;
            num1 = fps / 100;
            num2 = (fps - num1 * 100) / 10;
            num3 = fps % 10;
@@ -2274,8 +2262,8 @@ static void draw_status_line (int line)
            int idle = (gui_data.idle + 5) / 10;
            pos = 1;
            on = framecnt;
-           on_rgb = 0xc00;
-           off_rgb = 0x000;
+           on_rgb = 0xcc0000;
+           off_rgb = 0x000000;
            num1 = idle / 100;
            num2 = (idle - num1 * 100) / 10;
            num3 = idle % 10;
@@ -2292,44 +2280,60 @@ static void draw_status_line (int line)
                num2 = snd / 10;
                num3 = snd % 10;
            }
-           on_rgb = 0x000;
+           on_rgb = 0x000000;
            if (on < 0)
-               on_rgb = 0xcc0; // underflow
+               on_rgb = 0xcccc00; // underflow
            else if (on == 2)
-               on_rgb = 0xc00; // really big overflow
+               on_rgb = 0xcc0000; // really big overflow
            else if (on == 1)
-               on_rgb = 0x00c; // "normal" overflow
-           off_rgb = 0x000;
+               on_rgb = 0x0000cc; // "normal" overflow
+           off_rgb = 0x000000;
            am = 3;
        }
-       c = xcolors[on ? on_rgb : off_rgb];
+       c = ledcolor (on ? on_rgb : off_rgb, rc, gc, bc);
        if (y == 0 || y == TD_TOTAL_HEIGHT - 1)
-           c = xcolors[TD_BORDER];
+           c = ledcolor (TD_BORDER, rc, gc, bc);
 
        x = x_start + pos * TD_WIDTH;
-       putpixel (x - 1, xcolors[TD_BORDER]);
+       putpixel (buf, bpp, x - 1, cb);
        for (j = 0; j < TD_LED_WIDTH; j++)
-           putpixel (x + j, c);
-       putpixel (x + j, xcolors[TD_BORDER]);
+           putpixel (buf, bpp, x + j, c);
+       putpixel (buf, bpp, x + j, cb);
 
        if (y >= TD_PADY && y - TD_PADY < TD_NUM_HEIGHT) {
            if (num3 >= 0) {
                x += (TD_LED_WIDTH - am * TD_NUM_WIDTH) / 2;
                if (num1 > 0) {
-                   write_tdnumber (x, y - TD_PADY, num1);
+                   write_tdnumber (buf, bpp, x, y - TD_PADY, num1, c1, c2);
                    x += TD_NUM_WIDTH;
                }
-               write_tdnumber (x, y - TD_PADY, num2);
+               write_tdnumber (buf, bpp, x, y - TD_PADY, num2, c1, c2);
                x += TD_NUM_WIDTH;
-               write_tdnumber (x, y - TD_PADY, num3);
+               write_tdnumber (buf, bpp, x, y - TD_PADY, num3, c1, c2);
                x += TD_NUM_WIDTH;
                if (num4 > 0)
-                   write_tdnumber (x, y - TD_PADY, num4);
+                   write_tdnumber (buf, bpp, x, y - TD_PADY, num4, c1, c2);
            }
        }
     }
 }
 
+static void draw_status_line (int line)
+{
+    int bpp, y;
+    uae_u8 *buf;
+
+    if (currprefs.leds_on_screen != STATUSLINE_BUILTIN)
+       return;
+    bpp = gfxvidinfo.pixbytes;
+    y = line - (gfxvidinfo.height - TD_TOTAL_HEIGHT);
+    xlinebuffer = gfxvidinfo.linemem;
+    if (xlinebuffer == 0)
+       xlinebuffer = row_map[line];
+    buf = xlinebuffer;
+    draw_status_line_single (buf, bpp, y, gfxvidinfo.width, xredcolors, xgreencolors, xbluecolors);
+}
+
 #define LIGHTPEN_HEIGHT 12
 #define LIGHTPEN_WIDTH 17
 
@@ -2363,7 +2367,7 @@ static void draw_lightpen_cursor (int x, int y, int line, int onscreen)
     for (i = 0; i < LIGHTPEN_WIDTH; i++) {
        int xx = x + i - LIGHTPEN_WIDTH / 2;
        if (*p != '-' && xx >= 0 && xx < gfxvidinfo.width)
-           putpixel(xx, *p == 'x' ? xcolors[color1] : xcolors[color2]);
+           putpixel(xlinebuffer, gfxvidinfo.pixbytes, xx, *p == 'x' ? xcolors[color1] : xcolors[color2]);
        p++;
     }
 }
index 8b29d54e84886f4234fa22c07cd2e66aded9a21b..8dde2d68b0401ef6a2c9f452cf09eb1ea861a61f 100755 (executable)
@@ -27,6 +27,9 @@
 #include "ncr_scsi.h"
 #include "debug.h"
 
+#define RTAREA_DEFAULT 0xf00000
+#define RTAREA_BACKUP  0xef0000
+
 #define MAX_EXPANSION_BOARDS 8
 
 /* ********************************************************** */
@@ -1089,15 +1092,15 @@ static void allocate_expamem (void)
 static uaecptr check_boot_rom (void)
 {
     int i;
-    uaecptr b = 0xf00000;
+    uaecptr b = RTAREA_DEFAULT;
     addrbank *ab;
 
     if (currprefs.cs_cdtvcd || currprefs.cs_cdtvscsi)
-       b = 0xe70000;
-    ab = &get_mem_bank (0xf00000);
+       b = RTAREA_BACKUP;
+    ab = &get_mem_bank (RTAREA_DEFAULT);
     if (ab) {
-       if (valid_address (0xf00000, 65536))
-           b = 0xe70000;
+       if (valid_address (RTAREA_DEFAULT, 65536))
+           b = RTAREA_BACKUP;
     }
     for (i = 0; i < currprefs.mountitems; i++) {
        struct uaedev_config_info *uci = &currprefs.mountconfig[i];
diff --git a/gayle.c b/gayle.c
index e917e3dcf1e7b31445d285b7a088954cdd84ed87..fed34d0c0b505c91ad8dd13ace922bb71826c7e4 100755 (executable)
--- a/gayle.c
+++ b/gayle.c
@@ -1351,7 +1351,7 @@ static void checkflush (int addr)
     if (pcmcia_card == 0 || pcmcia_sram == 0)
        return;
     if (pcmcia_write_min >= 0) {
-       if (addr < 0 || abs (pcmcia_write_min - addr) > 60000 || abs (pcmcia_write_max - addr) > 60000) {
+       if (addr < 0 || abs (pcmcia_write_min - addr) >= 512 || abs (pcmcia_write_max - addr) >= 512) {
            int blocksize = pcmcia_sram->hfd.blocksize;
            int mask = ~(blocksize - 1);
            int start = pcmcia_write_min & mask;
@@ -1418,6 +1418,7 @@ static int initpcmcia (const char *path, int readonly, int reset)
     pcmcia_attrs = xcalloc (pcmcia_attrs_size, 1);
     if (!pcmcia_sram->hfd.drive_empty) {
        pcmcia_common_size = pcmcia_sram->hfd.size;
+       pcmcia_common_size = 16384;
        if (pcmcia_sram->hfd.size > 4 * 1024 * 1024) {
            write_log ("PCMCIA SRAM: too large device, %d bytes\n", pcmcia_sram->hfd.size);
            pcmcia_common_size = 4 * 1024 * 1024;
index 2e36734835c9c64cf84b022ad98ee92b397696fd..b230da5a287d42bdf4a21dc3dfcdb981eda98863 100755 (executable)
--- a/gfxutil.c
+++ b/gfxutil.c
@@ -205,7 +205,37 @@ void alloc_colors_picasso (int rw, int gw, int bw, int rs, int gs, int bs, int a
        p96_rgbx16[i] = doMask(r, rw, rs) | doMask(g, gw, gs) | doMask(b, bw, bs);
     }
 }
-    
+
+void alloc_colors_rgb (int rw, int gw, int bw, int rs, int gs, int bs, int aw, int as, int alpha, int byte_swap,
+                      uae_u32 *rc, uae_u32 *gc, uae_u32 *bc)
+{
+    int bpp = rw + gw + bw + aw;
+    int i;
+    for(i = 0; i < 256; i++) {
+       int j = i + 256;
+       rc[i] = doColor (gamma[j], rw, rs) | doAlpha (alpha, aw, as);
+       gc[i] = doColor (gamma[j], gw, gs) | doAlpha (alpha, aw, as);
+       bc[i] = doColor (gamma[j], bw, bs) | doAlpha (alpha, aw, as);
+       if (byte_swap) {
+           if (bpp <= 16) {
+               rc[i] = bswap_16 (rc[i]);
+               gc[i] = bswap_16 (gc[i]);
+               bc[i] = bswap_16 (bc[i]);
+           } else {
+               rc[i] = bswap_32 (rc[i]);
+               gc[i] = bswap_32 (gc[i]);
+               bc[i] = bswap_32 (bc[i]);
+           }
+       }
+       if (bpp <= 16) {
+           /* Fill upper 16 bits of each colour value with
+            * a copy of the colour. */
+           rc[i] = rc[i] * 0x00010001;
+           gc[i] = gc[i] * 0x00010001;
+           bc[i] = bc[i] * 0x00010001;
+       }
+    }
+}
 
 void alloc_colors64k (int rw, int gw, int bw, int rs, int gs, int bs, int aw, int as, int alpha, int byte_swap)
 {
@@ -235,31 +265,7 @@ void alloc_colors64k (int rw, int gw, int bw, int rs, int gs, int bs, int aw, in
        }
     }
 #if defined(AGA) || defined(GFXFILTER)
-    /* create AGA color tables */
-    for(i = 0; i < 256; i++) {
-       j = i + 256;
-       xredcolors[i] = doColor (gamma[j], rw, rs) | doAlpha (alpha, aw, as);
-       xgreencolors[i] = doColor (gamma[j], gw, gs) | doAlpha (alpha, aw, as);
-       xbluecolors[i] = doColor (gamma[j], bw, bs) | doAlpha (alpha, aw, as);
-       if (byte_swap) {
-           if (bpp <= 16) {
-               xredcolors  [i] = bswap_16 (xredcolors[i]);
-               xgreencolors[i] = bswap_16 (xgreencolors[i]);
-               xbluecolors [i] = bswap_16 (xbluecolors[i]);
-           } else {
-               xredcolors  [i] = bswap_32 (xredcolors[i]);
-               xgreencolors[i] = bswap_32 (xgreencolors[i]);
-               xbluecolors [i] = bswap_32 (xbluecolors[i]);
-           }
-       }
-       if (bpp <= 16) {
-           /* Fill upper 16 bits of each colour value with
-            * a copy of the colour. */
-           xredcolors  [i] = xredcolors  [i] * 0x00010001;
-           xgreencolors[i] = xgreencolors[i] * 0x00010001;
-           xbluecolors [i] = xbluecolors [i] * 0x00010001;
-       }
-    }
+    alloc_colors_rgb (rw, gw, bw, rs, gs, bs, aw, as, alpha, byte_swap, xredcolors, xgreencolors, xbluecolors);
     /* copy original color table */
     for (i = 0; i < 256; i++) {
        redc[0 * 256 + i] = xredcolors[0];
index 4adaaa8aef3cbcc712dd815f165da50b5b8ed7f8..fdd5daf195907ef28f6bb7588ba58bbe421e43a2 100755 (executable)
@@ -34,6 +34,10 @@ extern void AdMame2x32(u8 *srcPtr, u32 srcPitch, /* u8 deltaPtr, */
 extern void hq_init(int rb, int gb, int bb, int rs, int gs, int bs);
 extern void hq2x_16(unsigned char*, unsigned char*, DWORD, DWORD, DWORD);
 extern void hq2x_32(unsigned char*, unsigned char*, DWORD, DWORD, DWORD);
+extern void hq3x_16(unsigned char*, unsigned char*, DWORD, DWORD, DWORD);
+extern void hq3x_32(unsigned char*, unsigned char*, DWORD, DWORD, DWORD);
+extern void hq4x_16(unsigned char*, unsigned char*, DWORD, DWORD, DWORD);
+extern void hq4x_32(unsigned char*, unsigned char*, DWORD, DWORD, DWORD);
 
 #define UAE_FILTER_NULL 1
 #define UAE_FILTER_DIRECT3D 2
@@ -57,7 +61,7 @@ struct uae_filter
 {
     int type, yuv;
     char *name, *cfgname;
-    int x[5];
+    int x[6];
 };
 
 extern struct uae_filter uaefilters[];
index cb2eea51c40bdfd9666e4d2ac96503e2953a75fa..25ea47d6eaaa9abc4895469967910f5d4a0b17eb 100755 (executable)
@@ -199,6 +199,7 @@ struct uae_prefs {
     int cpu_cycle_exact;
     int blitter_cycle_exact;
     int floppy_speed;
+    int floppy_write_length;
     int tod_hack;
     uae_u32 maprom;
 
diff --git a/include/statusline.h b/include/statusline.h
new file mode 100755 (executable)
index 0000000..90c016c
--- /dev/null
@@ -0,0 +1,25 @@
+
+#define TD_PADX 10
+#define TD_PADY 2
+#define TD_WIDTH 32
+#define TD_LED_WIDTH 24
+#define TD_LED_HEIGHT 4
+
+#define TD_RIGHT 1
+#define TD_BOTTOM 2
+
+static int td_pos = (TD_RIGHT|TD_BOTTOM);
+
+#define TD_NUM_WIDTH 7
+#define TD_NUM_HEIGHT 7
+
+#define TD_TOTAL_HEIGHT (TD_PADY * 2 + TD_NUM_HEIGHT)
+
+#define NUMBERS_NUM 16
+
+#define TD_BORDER 0x333333
+
+#define STATUSLINE_BUILTIN 1
+#define STATUSLINE_TARGET 2
+
+
index 521b0fef2d83a67dc4e17ba01ee76cb98ed6acbd..ac3b014f376731482c7f1def52cc6507a161b5d9 100755 (executable)
@@ -457,7 +457,8 @@ extern void write_dlog (const char *, ...);
 
 extern void close_console (void);
 extern void reopen_console (void);
-extern void console_out (const char *, ...);
+extern void console_out (const char *);
+extern void console_out_f (const char *, ...);
 extern void console_flush (void);
 extern int console_get (char *, int);
 extern void f_out (void *, const char *, ...);
index b4b83084b713c54d7ce4e3530fa571db46979539..ab0c5cba7586664329c8f094334459520a623df2 100755 (executable)
@@ -11,6 +11,8 @@ typedef uae_u32 xcolnr;
 typedef int (*allocfunc_type)(int, int, int, xcolnr *);
 
 extern xcolnr xcolors[4096];
+extern xcolnr xcolors_16[4096];
+extern xcolnr xcolors_32[4096];
 extern uae_u32 p96_rgbx16[65536];
 
 extern int graphics_setup (void);
@@ -41,6 +43,8 @@ extern unsigned int doMask256 (int p, int bits, int shift);
 extern void setup_maxcol (int);
 extern void alloc_colors256 (int (*)(int, int, int, xcolnr *));
 extern void alloc_colors64k (int, int, int, int, int, int, int, int, int, int);
+extern void alloc_colors_rgb (int rw, int gw, int bw, int rs, int gs, int bs, int aw, int as, int alpha, int byte_swap,
+                             uae_u32 *rc, uae_u32 *gc, uae_u32 *bc);
 extern void alloc_colors_picasso (int, int, int, int, int, int, int, int, int, int);
 extern void setup_greydither (int bits, allocfunc_type allocfunc);
 extern void setup_greydither_maxcol (int maxcol, allocfunc_type allocfunc);
index 067a47f26da03c9a7059a3d0ad03a727da08a75e..2f288a79b79c9d6305571bd4148918fdc0751940 100755 (executable)
--- a/newcpu.c
+++ b/newcpu.c
@@ -866,7 +866,7 @@ static void exception_debug (int nr)
 #ifdef DEBUGGER
     if (!exception_debugging)
        return;
-    console_out ("Exception %d, PC=%08.8X\n", nr, m68k_getpc (&regs));
+    console_out_f ("Exception %d, PC=%08.8X\n", nr, m68k_getpc (&regs));
 #endif
 }
 
similarity index 50%
rename from od-win32/asm.bat
rename to od-win32/asm.cmd
index ec0326bb877f66ec381726b509b54ca2b6bbc62d..4a67210499a22c404eb78b9c65d7a7061dc1c380 100755 (executable)
@@ -1,3 +1,6 @@
 nasm -O1 -f win32 hq2x32.asm
 nasm -O1 -f win32 hq3x32.asm
 nasm -O1 -f win32 hq4x32.asm
+nasm -O1 -f win32 hq2x16.asm
+nasm -O1 -f win32 hq3x16.asm
+nasm -O1 -f win32 hq4x16.asm
index 0378e47ca2e6322a382dc2159e66d6375b4b513b..344cd7acb416075fadccedd73eaf671668bccf94 100755 (executable)
@@ -978,7 +978,7 @@ static void ToggleBreakpoint(HWND hwnd)
        SendMessage(hwnd, LB_GETTEXT, index, (LPARAM)addrstr);
        addrstr[8] = '\0';
        ptr = addrstr;
-       console_out("\nf %s\n", addrstr);
+       console_out_f ("\nf %s\n", addrstr);
        instruction_breakpoint(&ptr);
        RedrawWindow(hwnd, 0, 0, RDW_INVALIDATE);
 }
index 6de36be44d636ece503f803396070ca9a895a50f..a15e88e06491a99b50dff1260812566a5615e0c3 100755 (executable)
@@ -564,6 +564,8 @@ static void DirectDraw_Blt (LPDIRECTDRAWSURFACE7 dst, RECT *dstrect, LPDIRECTDRA
     HRESULT ddrval;
     if (dst == NULL)
        dst = getlocksurface ();
+    if (src == NULL)
+       src = getlocksurface ();
     while (FAILED(ddrval = IDirectDrawSurface7_Blt (dst, dstrect, src, srcrect, DDBLT_WAIT, NULL))) {
        if (ddrval == DDERR_SURFACELOST) {
            ddrval = restoresurface (dst);
@@ -586,6 +588,28 @@ void DirectDraw_BlitRect (LPDIRECTDRAWSURFACE7 dst, RECT *dstrect, LPDIRECTDRAWS
     DirectDraw_Blt (dst, dstrect, src, scrrect);
 }
 
+void DirectDraw_Fill (RECT *rect, uae_u32 color)
+{
+    HRESULT ddrval;
+    DDBLTFX ddbltfx;
+    LPDIRECTDRAWSURFACE7 dst;
+
+    memset (&ddbltfx, 0, sizeof (ddbltfx));
+    ddbltfx.dwFillColor = color;
+    ddbltfx.dwSize = sizeof (ddbltfx);
+    dst = getlocksurface ();
+    while (FAILED(ddrval = IDirectDrawSurface7_Blt (dst, rect, NULL, NULL, DDBLT_WAIT | DDBLT_COLORFILL, &ddbltfx))) {
+       if (ddrval == DDERR_SURFACELOST) {
+           ddrval = restoresurface (dst);
+           if (FAILED (ddrval))
+               break;
+       } else if (ddrval != DDERR_SURFACEBUSY) {
+           write_log ("DirectDraw_Fill: %s\n", DXError (ddrval));
+           break;
+       }
+    }
+
+}
 
 extern int vblank_skip;
 static void flip (void)
index 43b6341159749266c04fb5086f0c2bac550cd9ce..8cd74cd624e2c94b8402b245c9e6b344fac89336 100755 (executable)
@@ -169,6 +169,7 @@ int DirectDraw_Flip (int wait);
 int DirectDraw_BlitToPrimary (RECT *rect);
 void DirectDraw_Blit (LPDIRECTDRAWSURFACE7 dst, LPDIRECTDRAWSURFACE7 src);
 void DirectDraw_BlitRect (LPDIRECTDRAWSURFACE7 dst, RECT *dstrect, LPDIRECTDRAWSURFACE7 src, RECT *scrrect);
+void DirectDraw_Fill (RECT *rect, uae_u32 color);
 
 HRESULT DirectDraw_SetPaletteEntries (int start, int count, PALETTEENTRY *palette);
 HRESULT DirectDraw_SetPalette (int remove);
index 57e33044eadc3dfa5c1569d821d4470fcbd4649c..eada136172139621ad72dbdc2e22df10644cb2bf 100755 (executable)
@@ -27,6 +27,8 @@
 #include <cfgmgr32.h>   // for SetupDiXxx functions.
 #endif
 
+static int usefloppydrives = 0;
+
 struct uae_driveinfo {
     uae_u64 offset2;
     uae_u64 size2;
@@ -42,6 +44,7 @@ struct uae_driveinfo {
     int removablemedia;
     int nomedia;
     int dangerous;
+    int readonly;
 };
 
 #define HDF_HANDLE_WIN32 1
@@ -64,7 +67,8 @@ static int isnomediaerr (DWORD err)
        err == ERROR_MEDIA_CHANGED ||
        err == ERROR_NO_MEDIA_IN_DRIVE ||
        err == ERROR_DEV_NOT_EXIST ||
-       err == ERROR_BAD_NET_NAME)
+       err == ERROR_BAD_NET_NAME ||
+       err == ERROR_WRONG_DISK)
        return 1;
     return 0;
 }
@@ -190,6 +194,8 @@ int hdf_open (struct hardfiledata *hfd, const char *pname)
            hfd->flags = HFD_FLAGS_REALDRIVE;
            if (udi->nomedia)
                hfd->drive_empty = -1;
+           if (udi->readonly)
+               hfd->readonly = 1;
            flags = FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS;
            h = CreateFile (udi->device_path,
                GENERIC_READ | (hfd->readonly ? 0 : GENERIC_WRITE),
@@ -641,12 +647,89 @@ int hdf_write (struct hardfiledata *hfd, void *buffer, uae_u64 offset, int len)
 
 #ifdef WINDDK
 
+static void generatestorageproperty (struct uae_driveinfo *udi, int ignoreduplicates)
+{
+    strcpy (udi->vendor_id, "UAE");
+    strcpy (udi->product_id, "DISK");
+    strcpy (udi->product_rev, "1.0");
+    sprintf (udi->device_name, "%s", udi->device_path);
+    udi->removablemedia = 1;
+}
 
-static BOOL GetDevicePropertyFromName(const char *DevicePath, DWORD Index, DWORD *index2, uae_u8 *buffer, int ignoreduplicates)
+static int getstorageproperty (PUCHAR outBuf, int returnedLength, struct uae_driveinfo *udi, int ignoreduplicates)
 {
+    PSTORAGE_DEVICE_DESCRIPTOR devDesc;
+    char orgname[1024];
+    PUCHAR p;
     int i, j;
+
+    devDesc = (PSTORAGE_DEVICE_DESCRIPTOR) outBuf;
+    p = (PUCHAR) outBuf;
+    if (devDesc->DeviceType != INQ_DASD && devDesc->DeviceType != INQ_ROMD && devDesc->DeviceType != INQ_OPTD) {
+        write_log ("not a direct access device, ignored (type=%d)\n", devDesc->DeviceType);
+        return 1;
+    }
+    if (devDesc->VendorIdOffset && p[devDesc->VendorIdOffset]) {
+        j = 0;
+        for (i = devDesc->VendorIdOffset; p[i] != (UCHAR) NULL && i < returnedLength; i++)
+           udi->vendor_id[j++] = p[i];
+    }
+    if (devDesc->ProductIdOffset && p[devDesc->ProductIdOffset]) {
+       j = 0;
+       for (i = devDesc->ProductIdOffset; p[i] != (UCHAR) NULL && i < returnedLength; i++)
+           udi->product_id[j++] = p[i];
+    }
+    if (devDesc->ProductRevisionOffset && p[devDesc->ProductRevisionOffset]) {
+        j = 0;
+        for (i = devDesc->ProductRevisionOffset; p[i] != (UCHAR) NULL && i < returnedLength; i++)
+           udi->product_rev[j++] = p[i];
+    }
+    if (devDesc->SerialNumberOffset && p[devDesc->SerialNumberOffset]) {
+        j = 0;
+        for (i = devDesc->SerialNumberOffset; p[i] != (UCHAR) NULL && i < returnedLength; i++)
+           udi->product_serial[j++] = p[i];
+    }
+    if (udi->vendor_id[0])
+        strcat (udi->device_name, udi->vendor_id);
+    if (udi->product_id[0]) {
+        if (udi->device_name[0])
+           strcat (udi->device_name, " ");
+       strcat (udi->device_name, udi->product_id);
+    }
+    if (udi->product_rev[0]) {
+        if (udi->device_name[0])
+           strcat (udi->device_name, " ");
+       strcat (udi->device_name, udi->product_rev);
+    }
+    if (udi->product_serial[0]) {
+        if (udi->device_name[0])
+           strcat (udi->device_name, " ");
+       strcat (udi->device_name, udi->product_serial);
+    }
+    if (!udi->device_name[0]) {
+        write_log ("empty device id?!?, replacing with device path\n");
+        strcpy (udi->device_name, udi->device_path);
+    }
+    udi->removablemedia = devDesc->RemovableMedia;
+    write_log ("device id string: '%s'\n", udi->device_name);
+    if (ignoreduplicates) {
+       sprintf (orgname, "HD_%s", udi->device_name);
+       if (isharddrive (orgname) >= 0) {
+           write_log ("duplicate device, ignored\n");
+           return 1;
+       }
+       if (!udi->removablemedia) {
+           write_log ("drive letter not removable, ignored\n");
+           return 1;
+       }
+    }
+    return 0;
+}
+
+static BOOL GetDevicePropertyFromName(const char *DevicePath, DWORD Index, DWORD *index2, uae_u8 *buffer, int ignoreduplicates)
+{
+    int i, nosp;
     int ret = -1;
-    PUCHAR p;
     STORAGE_PROPERTY_QUERY query;
     DRIVE_LAYOUT_INFORMATION           *dli;
     struct uae_driveinfo *udi;
@@ -656,7 +739,6 @@ static BOOL GetDevicePropertyFromName(const char *DevicePath, DWORD Index, DWORD
     DISK_GEOMETRY                      dg;
     GET_LENGTH_INFORMATION             gli;
     PSTORAGE_ADAPTER_DESCRIPTOR         adpDesc;
-    PSTORAGE_DEVICE_DESCRIPTOR          devDesc;
     int gli_ok;
     BOOL                                status;
     ULONG                               length = 0,
@@ -707,12 +789,11 @@ static BOOL GetDevicePropertyFromName(const char *DevicePath, DWORD Index, DWORD
                        NULL
                        );
     if (!status) {
-       write_log ("IOCTL_STORAGE_QUERY_PROPERTY failed with error code%d.\n", GetLastError());
-       ret = 1;
-       goto end;
+       write_log ("IOCTL_STORAGE_QUERY_PROPERTY failed with error code %d.\n", GetLastError());
+    } else {
+        adpDesc = (PSTORAGE_ADAPTER_DESCRIPTOR) outBuf;
     }
 
-    adpDesc = (PSTORAGE_ADAPTER_DESCRIPTOR) outBuf;
     query.PropertyId = StorageDeviceProperty;
     query.QueryType = PropertyStandardQuery;
     status = DeviceIoControl(
@@ -725,74 +806,25 @@ static BOOL GetDevicePropertyFromName(const char *DevicePath, DWORD Index, DWORD
                        &returnedLength,
                        NULL);
     if (!status) {
-        write_log ("IOCTL_STORAGE_QUERY_PROPERTY failed with error code %d.\n", GetLastError());
-        ret = 1;
-        goto end;
-    }
-    devDesc = (PSTORAGE_DEVICE_DESCRIPTOR) outBuf;
-    p = (PUCHAR) outBuf;
-    if (devDesc->DeviceType != INQ_DASD && devDesc->DeviceType != INQ_ROMD && devDesc->DeviceType != INQ_OPTD) {
-        ret = 1;
-        write_log ("not a direct access device, ignored (type=%d)\n", devDesc->DeviceType);
-        goto end;
-    }
-    if (devDesc->VendorIdOffset && p[devDesc->VendorIdOffset]) {
-        j = 0;
-        for (i = devDesc->VendorIdOffset; p[i] != (UCHAR) NULL && i < returnedLength; i++)
-           udi->vendor_id[j++] = p[i];
-    }
-    if (devDesc->ProductIdOffset && p[devDesc->ProductIdOffset]) {
-       j = 0;
-       for (i = devDesc->ProductIdOffset; p[i] != (UCHAR) NULL && i < returnedLength; i++)
-           udi->product_id[j++] = p[i];
-    }
-    if (devDesc->ProductRevisionOffset && p[devDesc->ProductRevisionOffset]) {
-        j = 0;
-        for (i = devDesc->ProductRevisionOffset; p[i] != (UCHAR) NULL && i < returnedLength; i++)
-           udi->product_rev[j++] = p[i];
-    }
-    if (devDesc->SerialNumberOffset && p[devDesc->SerialNumberOffset]) {
-        j = 0;
-        for (i = devDesc->SerialNumberOffset; p[i] != (UCHAR) NULL && i < returnedLength; i++)
-           udi->product_serial[j++] = p[i];
-    }
-    if (udi->vendor_id[0])
-        strcat (udi->device_name, udi->vendor_id);
-    if (udi->product_id[0]) {
-        if (udi->device_name[0])
-           strcat (udi->device_name, " ");
-       strcat (udi->device_name, udi->product_id);
-    }
-    if (udi->product_rev[0]) {
-        if (udi->device_name[0])
-           strcat (udi->device_name, " ");
-       strcat (udi->device_name, udi->product_rev);
-    }
-    if (udi->product_serial[0]) {
-        if (udi->device_name[0])
-           strcat (udi->device_name, " ");
-       strcat (udi->device_name, udi->product_serial);
-    }
-    if (!udi->device_name[0]) {
-        write_log ("empty device id?!?, replacing with device path\n");
-        strcpy (udi->device_name, udi->device_path);
-    }
-    udi->removablemedia = devDesc->RemovableMedia;
-    write_log ("device id string: '%s'\n", udi->device_name);
-    if (ignoreduplicates) {
-       sprintf (orgname, "HD_%s", udi->device_name);
-       if (isharddrive (orgname) >= 0) {
-           write_log ("duplicate device, ignored\n");
+       DWORD err = GetLastError ();
+        write_log ("IOCTL_STORAGE_QUERY_PROPERTY failed with error code %d.\n", err);
+       if (err != ERROR_INVALID_FUNCTION) {
            ret = 1;
            goto end;
        }
-       if (!udi->removablemedia) {
-           write_log ("drive letter not removable, ignored\n");
-           ret = 1;
+       nosp = 1;
+       generatestorageproperty (udi, ignoreduplicates);
+    } else {
+       int r;
+       nosp = 0;
+       r = getstorageproperty (outBuf, returnedLength, udi, ignoreduplicates);
+       if (r) {
+           ret = r;
            goto end;
        }
     }
     strcpy (orgname, udi->device_name);
+    udi->bytespersector = 512;
     if (!DeviceIoControl (hDevice, IOCTL_DISK_GET_DRIVE_GEOMETRY, NULL, 0, (void*)&dg, sizeof (dg), &returnedLength, NULL)) {
        DWORD err = GetLastError();
        if (isnomediaerr (err)) {
@@ -803,11 +835,19 @@ static BOOL GetDevicePropertyFromName(const char *DevicePath, DWORD Index, DWORD
        ret = 1;
        goto end;
     }
+    udi->readonly = 0;
+    if (!DeviceIoControl (hDevice, IOCTL_DISK_IS_WRITABLE, NULL, 0, NULL, 0, &returnedLength, NULL)) {
+       DWORD err = GetLastError ();
+       if (err == ERROR_WRITE_PROTECT)
+           udi->readonly = 1;
+    }
+
     gli_ok = 1;
     if (!DeviceIoControl (hDevice, IOCTL_DISK_GET_LENGTH_INFO, NULL, 0, (void*)&gli, sizeof (gli), &returnedLength, NULL)) {
-       write_log ("IOCTL_DISK_GET_LENGTH_INFO failed with error code %d.\n", GetLastError());
        gli_ok = 0;
-       write_log ("IOCTL_DISK_GET_LENGTH_INFO not supported, detected disk size may not be correct.\n");
+       write_log ("IOCTL_DISK_GET_LENGTH_INFO failed with error code %d.\n", GetLastError());
+       if (!nosp)
+           write_log ("IOCTL_DISK_GET_LENGTH_INFO not supported, detected disk size may not be correct.\n");
     }
     udi->bytespersector = dg.BytesPerSector;
     if (dg.BytesPerSector < 512) {
@@ -821,23 +861,27 @@ static BOOL GetDevicePropertyFromName(const char *DevicePath, DWORD Index, DWORD
        goto end;
     }
     udi->offset = udi->offset2 = 0;
-    write_log ("BytesPerSector=%d Cyls=%I64d TracksPerCyl=%d SecsPerTrack=%d\n",
-       dg.BytesPerSector, dg.Cylinders.QuadPart, dg.TracksPerCylinder, dg.SectorsPerTrack);
+    write_log ("BPS=%d Cyls=%I64d TPC=%d SPT=%d MediaType=%d\n",
+       dg.BytesPerSector, dg.Cylinders.QuadPart, dg.TracksPerCylinder, dg.SectorsPerTrack, dg.MediaType);
     udi->size = udi->size2 = (uae_u64)dg.BytesPerSector * (uae_u64)dg.Cylinders.QuadPart *
        (uae_u64)dg.TracksPerCylinder * (uae_u64)dg.SectorsPerTrack;
     if (gli_ok)
        udi->size = udi->size2 = gli.Length.QuadPart;
     write_log ("device size %I64d (0x%I64x) bytes\n", udi->size, udi->size);
+    trim (orgname);
 
     memset (outBuf, 0, sizeof (outBuf));
     status = DeviceIoControl(hDevice, IOCTL_DISK_GET_DRIVE_LAYOUT, NULL, 0,
        &outBuf, sizeof (outBuf), &returnedLength, NULL);
     if (!status) {
-       write_log ("IOCTL_DISK_GET_DRIVE_LAYOUT failed with error code%d.\n", GetLastError());
-       ret = 1;
-       goto end;
+       DWORD err = GetLastError();
+       write_log ("IOCTL_DISK_GET_DRIVE_LAYOUT failed with error code %d.\n", err);
+       if (err != ERROR_INVALID_FUNCTION) {
+           ret = 1;
+           goto end;
+       }
+       goto amipartfound;
     }
-    trim (orgname);
     dli = (DRIVE_LAYOUT_INFORMATION*)outBuf;
     if (dli->PartitionCount) {
        struct uae_driveinfo *udi2 = udi;
@@ -1059,7 +1103,7 @@ static int hdf_init2 (int force)
        errormode = SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX);
        dwDriveMask = GetLogicalDrives();
         for(drive = 'A'; drive <= 'Z'; drive++) {
-           if((dwDriveMask & 1) && drive >= 'C') {
+           if((dwDriveMask & 1) && (drive >= 'C' || usefloppydrives)) {
                char tmp1[20], tmp2[20];
                DWORD drivetype;
                sprintf (tmp1, "%c:\\", drive);
@@ -1109,6 +1153,7 @@ char *hdf_getnameharddrive (int index, int flags, int *sectorsize)
     uae_u64 size = uae_drives[index].size;
     int nomedia = uae_drives[index].nomedia;
     char *dang = "?";
+    char *rw = "RW";
 
     switch (uae_drives[index].dangerous)
     {
@@ -1127,6 +1172,8 @@ char *hdf_getnameharddrive (int index, int flags, int *sectorsize)
     }
     if (nomedia)       
        dang = "NO MEDIA";
+    if (uae_drives[index].readonly)
+       rw = "RO";
 
     if (sectorsize)
        *sectorsize = uae_drives[index].bytespersector;
@@ -1136,10 +1183,12 @@ char *hdf_getnameharddrive (int index, int flags, int *sectorsize)
        } else {
            if (size >= 1024 * 1024 * 1024)
                sprintf (tmp, "%.1fG", ((double)(uae_u32)(size / (1024 * 1024))) / 1024.0);
+           else if (size < 10 * 1024 * 1024)
+               sprintf (tmp, "%dK", size / 1024);
            else
                sprintf (tmp, "%.1fM", ((double)(uae_u32)(size / (1024))) / 1024.0);
        }
-       sprintf (name, "%10s [%s] %s", dang, tmp, uae_drives[index].device_name + 3);
+       sprintf (name, "%10s [%s,%s] %s", dang, tmp, rw, uae_drives[index].device_name + 3);
        return name;
     }
     if (flags & 2)
@@ -1154,16 +1203,19 @@ static int hmc (struct hardfiledata *hfd)
     int first = 1;
 
     while (hfd->handle_valid) {
-//     write_log ("testing if %s:%d has media inserted\n", hfd->emptyname, nr);
+       DWORD errormode;
+       write_log ("testing if %s has media inserted\n", hfd->emptyname);
        status = 0;
+       errormode = SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX);
        SetFilePointer (hfd->handle, 0, NULL, FILE_BEGIN);
        ret = ReadFile (hfd->handle, buf, hfd->blocksize, &got, NULL);
        err = GetLastError ();
-//     if (ret)
-//         write_log ("read ok\n");
-//     else
-//         write_log ("=%d\n", err);
-       if (!ret && err == ERROR_DEV_NOT_EXIST) {
+       SetErrorMode(errormode);
+       if (ret)
+           write_log ("read ok\n");
+       else
+           write_log ("=%d\n", err);
+       if (!ret && (err == ERROR_DEV_NOT_EXIST || err == ERROR_WRONG_DISK)) {
            if (!first)
                break;
            first = 0;
@@ -1183,7 +1235,7 @@ static int hmc (struct hardfiledata *hfd)
        status = -1;
 end:
     xfree (buf);
-    //write_log("hmc returned %d\n", status);
+    write_log("hmc returned %d\n", status);
     return status;
 }
 
diff --git a/od-win32/hq3x16.asm b/od-win32/hq3x16.asm
new file mode 100755 (executable)
index 0000000..c76a3a1
--- /dev/null
@@ -0,0 +1,2520 @@
+;hq3x filter
+;16bpp output
+;----------------------------------------------------------
+;Copyright (C) 2003 MaxSt ( maxst@hiend3d.com )
+;
+;This program is free software; you can redistribute it and/or
+;modify it under the terms of the GNU General Public License
+;as published by the Free Software Foundation; either
+;version 2 of the License, or (at your option) any later
+;version.
+;
+;This program is distributed in the hope that it will be useful,
+;but WITHOUT ANY WARRANTY; without even the implied warranty of
+;MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;GNU General Public License for more details.
+;
+;You should have received a copy of the GNU General Public License
+;along with this program; if not, write to the Free Software
+;Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+GLOBAL _hq3x_16
+
+EXTERN _LUT16to32
+EXTERN _RGBtoYUV
+
+SECTION .bss
+linesleft resd 1
+xcounter  resd 1
+cross     resd 1
+nextline  resd 1
+prevline  resd 1
+w1        resd 1
+w2        resd 1
+w3        resd 1
+w4        resd 1
+w5        resd 1
+w6        resd 1
+w7        resd 1
+w8        resd 1
+w9        resd 1
+
+SECTION .data
+
+reg_blank    dd  0,0
+const7       dd  0x00070007,0x00000007
+threshold    dd  0x00300706,0x00000000
+zerolowbits  dd  0xF7DEF7DE
+
+SECTION .text
+
+%macro TestDiff 2
+    xor     ecx,ecx
+    mov     edx,[%1]
+    cmp     edx,[%2]
+    je      %%fin
+    mov     ecx,_RGBtoYUV
+    movd    mm1,[ecx+edx*4]
+    movq    mm5,mm1
+    mov     edx,[%2]
+    movd    mm2,[ecx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    ecx,mm1
+%%fin:
+%endmacro
+
+%macro DiffOrNot 4
+   TestDiff %1,%2
+   test ecx,ecx
+   jz   %%same
+   %3
+   jmp %%fin
+%%same:
+   %4
+%%fin
+%endmacro
+
+%macro DiffOrNot 6
+   TestDiff %1,%2
+   test ecx,ecx
+   jz   %%same
+   %3
+   %4
+   jmp %%fin
+%%same:
+   %5
+   %6
+%%fin
+%endmacro
+
+%macro DiffOrNot 8
+   TestDiff %1,%2
+   test ecx,ecx
+   jz   %%same
+   %3
+   %4
+   %5
+   jmp %%fin
+%%same:
+   %6
+   %7
+   %8
+%%fin
+%endmacro
+
+%macro DiffOrNot 10
+   TestDiff %1,%2
+   test ecx,ecx
+   jz %%same
+   %3
+   %4
+   %5
+   %6
+   jmp %%fin
+%%same:
+   %7
+   %8
+   %9
+   %10
+%%fin
+%endmacro
+
+%macro Interp1 3
+    mov edx,%2
+    mov ecx,%3
+    cmp edx,ecx
+    je  %%fin
+    and edx,[zerolowbits]
+    and ecx,[zerolowbits]
+    add ecx,edx
+    shr ecx,1
+    add ecx,0x0821
+    and ecx,[zerolowbits]
+    add edx,ecx
+    shr edx,1
+%%fin
+    mov %1,dx
+%endmacro
+
+%macro Interp2 4
+    mov edx,%3
+    mov ecx,%4
+    cmp edx,ecx
+    je  %%fin1
+    and edx,[zerolowbits]
+    and ecx,[zerolowbits]
+    add ecx,edx
+    shr ecx,1
+    add ecx,0x0821
+%%fin1
+    mov edx,%2
+    cmp edx,ecx
+    je  %%fin2
+    and ecx,[zerolowbits]
+    and edx,[zerolowbits]
+    add edx,ecx
+    shr edx,1
+%%fin2
+    mov %1,dx
+%endmacro
+
+%macro Interp3 2
+    mov        ecx, _LUT16to32
+    movd       mm1, [ecx+eax*4]
+    mov        edx, %2
+    movd       mm2, [ecx+edx*4]
+    punpcklbw  mm1, [reg_blank]
+    punpcklbw  mm2, [reg_blank]
+    pmullw     mm1, [const7]
+    paddw      mm1, mm2
+    psrlw      mm1, 5
+    packuswb   mm1, [reg_blank]
+    movd       edx, mm1
+    shl        dl,  2
+    shr        edx, 1
+    shl        dx,  3
+    shr        edx, 5
+    mov        %1,  dx
+%endmacro
+
+%macro Interp4 3
+    mov        ecx, _LUT16to32
+    movd       mm1, [ecx+eax*4]
+    mov        edx, %2
+    movd       mm2, [ecx+edx*4]
+    mov        edx, %3
+    movd       mm3, [ecx+edx*4]
+    punpcklbw  mm1, [reg_blank]
+    punpcklbw  mm2, [reg_blank]
+    punpcklbw  mm3, [reg_blank]
+    psllw      mm1, 1
+    paddw      mm2, mm3
+    pmullw     mm2, [const7]
+    paddw      mm1, mm2
+    psrlw      mm1, 6
+    packuswb   mm1, [reg_blank]
+    movd       edx, mm1
+    shl        dl,  2
+    shr        edx, 1
+    shl        dx,  3
+    shr        edx, 5
+    mov        %1,  dx
+%endmacro
+
+%macro Interp5 3
+    mov edx,%2
+    mov ecx,%3
+    cmp edx,ecx
+    je  %%fin
+    and edx,[zerolowbits]
+    and ecx,[zerolowbits]
+    add edx,ecx
+    shr edx,1
+%%fin
+    mov %1,dx
+%endmacro
+
+%macro PIXEL00_1M 0
+    Interp1 [edi],eax,[w1]
+%endmacro
+
+%macro PIXEL00_1U 0
+    Interp1 [edi],eax,[w2]
+%endmacro
+
+%macro PIXEL00_1L 0
+    Interp1 [edi],eax,[w4]
+%endmacro
+
+%macro PIXEL00_2 0
+    Interp2 [edi],eax,[w4],[w2]
+%endmacro
+
+%macro PIXEL00_4 0
+    Interp4 [edi],[w4],[w2]
+%endmacro
+
+%macro PIXEL00_5 0
+    Interp5 [edi],[w4],[w2]
+%endmacro
+
+%macro PIXEL00_C 0
+    mov [edi],ax
+%endmacro
+
+%macro PIXEL01_1 0
+    Interp1 [edi+2],eax,[w2]
+%endmacro
+
+%macro PIXEL01_3 0
+    Interp3 [edi+2],[w2]
+%endmacro
+
+%macro PIXEL01_6 0
+    Interp1 [edi+2],[w2],eax
+%endmacro
+
+%macro PIXEL01_C 0
+    mov [edi+2],ax
+%endmacro
+
+%macro PIXEL02_1M 0
+    Interp1 [edi+4],eax,[w3]
+%endmacro
+
+%macro PIXEL02_1U 0
+    Interp1 [edi+4],eax,[w2]
+%endmacro
+
+%macro PIXEL02_1R 0
+    Interp1 [edi+4],eax,[w6]
+%endmacro
+
+%macro PIXEL02_2 0
+    Interp2 [edi+4],eax,[w2],[w6]
+%endmacro
+
+%macro PIXEL02_4 0
+    Interp4 [edi+4],[w2],[w6]
+%endmacro
+
+%macro PIXEL02_5 0
+    Interp5 [edi+4],[w2],[w6]
+%endmacro
+
+%macro PIXEL02_C 0
+    mov [edi+4],ax
+%endmacro
+
+%macro PIXEL10_1 0
+    Interp1 [edi+ebx],eax,[w4]
+%endmacro
+
+%macro PIXEL10_3 0
+    Interp3 [edi+ebx],[w4]
+%endmacro
+
+%macro PIXEL10_6 0
+    Interp1 [edi+ebx],[w4],eax
+%endmacro
+
+%macro PIXEL10_C 0
+    mov [edi+ebx],ax
+%endmacro
+
+%macro PIXEL11 0
+    mov [edi+ebx+2],ax
+%endmacro
+
+%macro PIXEL12_1 0
+    Interp1 [edi+ebx+4],eax,[w6]
+%endmacro
+
+%macro PIXEL12_3 0
+    Interp3 [edi+ebx+4],[w6]
+%endmacro             
+
+%macro PIXEL12_6 0
+    Interp1 [edi+ebx+4],[w6],eax
+%endmacro
+
+%macro PIXEL12_C 0
+    mov [edi+ebx+4],ax
+%endmacro
+
+%macro PIXEL20_1M 0
+    Interp1 [edi+ebx*2],eax,[w7]
+%endmacro
+
+%macro PIXEL20_1D 0
+    Interp1 [edi+ebx*2],eax,[w8]
+%endmacro
+
+%macro PIXEL20_1L 0
+    Interp1 [edi+ebx*2],eax,[w4]
+%endmacro
+
+%macro PIXEL20_2 0
+    Interp2 [edi+ebx*2],eax,[w8],[w4]
+%endmacro
+
+%macro PIXEL20_4 0
+    Interp4 [edi+ebx*2],[w8],[w4]
+%endmacro
+
+%macro PIXEL20_5 0
+    Interp5 [edi+ebx*2],[w8],[w4]
+%endmacro
+
+%macro PIXEL20_C 0
+    mov [edi+ebx*2],ax
+%endmacro
+
+%macro PIXEL21_1 0
+    Interp1 [edi+ebx*2+2],eax,[w8]
+%endmacro
+
+%macro PIXEL21_3 0
+    Interp3 [edi+ebx*2+2],[w8]
+%endmacro
+
+%macro PIXEL21_6 0
+    Interp1 [edi+ebx*2+2],[w8],eax
+%endmacro
+
+%macro PIXEL21_C 0
+    mov [edi+ebx*2+2],ax
+%endmacro
+
+%macro PIXEL22_1M 0
+    Interp1 [edi+ebx*2+4],eax,[w9]
+%endmacro
+
+%macro PIXEL22_1D 0
+    Interp1 [edi+ebx*2+4],eax,[w8]
+%endmacro
+
+%macro PIXEL22_1R 0
+    Interp1 [edi+ebx*2+4],eax,[w6]
+%endmacro
+
+%macro PIXEL22_2 0
+    Interp2 [edi+ebx*2+4],eax,[w6],[w8]
+%endmacro
+
+%macro PIXEL22_4 0
+    Interp4 [edi+ebx*2+4],[w6],[w8]
+%endmacro
+
+%macro PIXEL22_5 0
+    Interp5 [edi+ebx*2+4],[w6],[w8]
+%endmacro
+
+%macro PIXEL22_C 0
+    mov [edi+ebx*2+4],ax
+%endmacro
+
+inbuffer     equ 8
+outbuffer    equ 12
+Xres         equ 16
+Yres         equ 20
+pitch        equ 24
+
+_hq3x_16:
+    push ebp
+    mov ebp,esp
+    pushad
+
+    mov     esi,[ebp+inbuffer]
+    mov     edi,[ebp+outbuffer]
+    mov     edx,[ebp+Yres]
+    mov     [linesleft],edx
+    mov     ebx,[ebp+Xres]
+    shl     ebx,1
+    mov     dword[prevline],0
+    mov     dword[nextline],ebx
+.loopy
+    mov     ecx,[ebp+Xres]
+    sub     ecx,2                 ; x={Xres-2, Xres-1} are special cases.
+    mov     dword[xcounter],ecx
+    ; x=0 - special case
+    mov     ebx,[prevline]
+    movq    mm5,[esi+ebx]
+    movq    mm6,[esi]
+    mov     ebx,[nextline]
+    movq    mm7,[esi+ebx]
+    movd    eax,mm5
+    movzx   edx,ax  
+    mov     [w1],edx
+    mov     [w2],edx
+    shr     eax,16
+    mov     [w3],eax
+    movd    eax,mm6
+    movzx   edx,ax  
+    mov     [w4],edx
+    mov     [w5],edx
+    shr     eax,16
+    mov     [w6],eax
+    movd    eax,mm7
+    movzx   edx,ax  
+    mov     [w7],edx
+    mov     [w8],edx
+    shr     eax,16
+    mov     [w9],eax
+    jmp     .flags
+.loopx
+    mov     ebx,[prevline]
+    movq    mm5,[esi+ebx-2]
+    movq    mm6,[esi-2]
+    mov     ebx,[nextline]
+    movq    mm7,[esi+ebx-2]
+    movd    eax,mm5
+    movzx   edx,ax  
+    mov     [w1],edx
+    shr     eax,16
+    mov     [w2],eax
+    psrlq   mm5,32
+    movd    eax,mm5
+    movzx   edx,ax  
+    mov     [w3],edx
+    movd    eax,mm6
+    movzx   edx,ax  
+    mov     [w4],edx
+    shr     eax,16
+    mov     [w5],eax
+    psrlq   mm6,32
+    movd    eax,mm6
+    movzx   edx,ax  
+    mov     [w6],edx
+    movd    eax,mm7
+    movzx   edx,ax  
+    mov     [w7],edx
+    shr     eax,16
+    mov     [w8],eax
+    psrlq   mm7,32
+    movd    eax,mm7
+    movzx   edx,ax  
+    mov     [w9],edx
+.flags
+    mov     ebx,_RGBtoYUV
+    mov     eax,[w5]
+    xor     ecx,ecx
+    movd    mm5,[ebx+eax*4]
+    mov     dword[cross],0
+
+    mov     edx,[w2]
+    cmp     eax,edx
+    je      .noflag2
+    or      dword[cross],1
+    movq    mm1,mm5
+    movd    mm2,[ebx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+    test    edx,edx
+    jz      .noflag2
+    or      ecx,2
+.noflag2
+    mov     edx,[w4]
+    cmp     eax,edx
+    je      .noflag4
+    or      dword[cross],2
+    movq    mm1,mm5
+    movd    mm2,[ebx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+    test    edx,edx
+    jz      .noflag4
+    or      ecx,8
+.noflag4
+    mov     edx,[w6]
+    cmp     eax,edx
+    je      .noflag6
+    or      dword[cross],4
+    movq    mm1,mm5
+    movd    mm2,[ebx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+    test    edx,edx
+    jz      .noflag6
+    or      ecx,16
+.noflag6
+    mov     edx,[w8]
+    cmp     eax,edx
+    je      .noflag8
+    or      dword[cross],8
+    movq    mm1,mm5
+    movd    mm2,[ebx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+    test    edx,edx
+    jz      .noflag8
+    or      ecx,64
+.noflag8
+    test    ecx,ecx
+    jnz     .testflag1
+    mov     ecx,[cross]
+    mov     ebx,[ebp+pitch]
+    jmp     [FuncTable2+ecx*4]
+.testflag1
+    mov     edx,[w1]
+    cmp     eax,edx
+    je      .noflag1
+    movq    mm1,mm5
+    movd    mm2,[ebx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+    test    edx,edx
+    jz      .noflag1
+    or      ecx,1
+.noflag1
+    mov     edx,[w3]
+    cmp     eax,edx
+    je      .noflag3
+    movq    mm1,mm5
+    movd    mm2,[ebx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+    test    edx,edx
+    jz      .noflag3
+    or      ecx,4
+.noflag3
+    mov     edx,[w7]
+    cmp     eax,edx
+    je      .noflag7
+    movq    mm1,mm5
+    movd    mm2,[ebx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+    test    edx,edx
+    jz      .noflag7
+    or      ecx,32
+.noflag7
+    mov     edx,[w9]
+    cmp     eax,edx
+    je      .noflag9
+    movq    mm1,mm5
+    movd    mm2,[ebx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+    test    edx,edx
+    jz      .noflag9
+    or      ecx,128
+.noflag9
+    mov  ebx,[ebp+pitch]
+    jmp  [FuncTable+ecx*4]
+
+..@flag0
+..@flag1
+..@flag4
+..@flag32
+..@flag128
+..@flag5
+..@flag132
+..@flag160
+..@flag33
+..@flag129
+..@flag36
+..@flag133
+..@flag164
+..@flag161
+..@flag37
+..@flag165
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag2
+..@flag34
+..@flag130
+..@flag162
+    PIXEL00_1M
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag16
+..@flag17
+..@flag48
+..@flag49
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag64
+..@flag65
+..@flag68
+..@flag69
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag8
+..@flag12
+..@flag136
+..@flag140
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag3
+..@flag35
+..@flag131
+..@flag163
+    PIXEL00_1L
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag6
+..@flag38
+..@flag134
+..@flag166
+    PIXEL00_1M
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag20
+..@flag21
+..@flag52
+..@flag53
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag144
+..@flag145
+..@flag176
+..@flag177
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag192
+..@flag193
+..@flag196
+..@flag197
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag96
+..@flag97
+..@flag100
+..@flag101
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1L
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag40
+..@flag44
+..@flag168
+..@flag172
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag9
+..@flag13
+..@flag137
+..@flag141
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag18
+..@flag50
+    PIXEL00_1M
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_1M,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag80
+..@flag81
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_1M
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_1M,PIXEL12_3,PIXEL21_3,PIXEL22_4
+    jmp .loopx_end
+..@flag72
+..@flag76
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL11
+    PIXEL12_1
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_1M,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag10
+..@flag138
+    DiffOrNot w4,w2,PIXEL00_1M,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+    PIXEL02_1M
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag66
+    PIXEL00_1M
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag24
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag7
+..@flag39
+..@flag135
+    PIXEL00_1L
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag148
+..@flag149
+..@flag180
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag224
+..@flag228
+..@flag225
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1L
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag41
+..@flag169
+..@flag45
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag22
+..@flag54
+    PIXEL00_1M
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag208
+..@flag209
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_1M
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+    jmp .loopx_end
+..@flag104
+..@flag108
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL11
+    PIXEL12_1
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag11
+..@flag139
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+    PIXEL02_1M
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag19
+..@flag51
+    DiffOrNot w2,w6,PIXEL00_1L,PIXEL01_C,PIXEL02_1M,PIXEL12_C,PIXEL00_2,PIXEL01_6,PIXEL02_5,PIXEL12_1
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag146
+..@flag178
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_1M,PIXEL12_C,PIXEL22_1D,PIXEL01_1,PIXEL02_5,PIXEL12_6,PIXEL22_2
+    PIXEL00_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_2
+    PIXEL21_1
+    jmp .loopx_end
+..@flag84
+..@flag85
+    DiffOrNot w6,w8,PIXEL02_1U,PIXEL12_C,PIXEL21_C,PIXEL22_1M,PIXEL02_2,PIXEL12_6,PIXEL21_1,PIXEL22_5
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_1M
+    jmp .loopx_end
+..@flag112
+..@flag113
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL20_1L,PIXEL21_C,PIXEL22_1M,PIXEL12_1,PIXEL20_2,PIXEL21_6,PIXEL22_5
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    jmp .loopx_end
+..@flag200
+..@flag204
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_1M,PIXEL21_C,PIXEL22_1R,PIXEL10_1,PIXEL20_5,PIXEL21_6,PIXEL22_2
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL11
+    PIXEL12_1
+    jmp .loopx_end
+..@flag73
+..@flag77
+    DiffOrNot w8,w4,PIXEL00_1U,PIXEL10_C,PIXEL20_1M,PIXEL21_C,PIXEL00_2,PIXEL10_6,PIXEL20_5,PIXEL21_1
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL11
+    PIXEL12_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag42
+..@flag170
+    DiffOrNot w4,w2,PIXEL00_1M,PIXEL01_C,PIXEL10_C,PIXEL20_1D,PIXEL00_5,PIXEL01_1,PIXEL10_6,PIXEL20_2
+    PIXEL02_1M
+    PIXEL11
+    PIXEL12_1
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag14
+..@flag142
+    DiffOrNot w4,w2,PIXEL00_1M,PIXEL01_C,PIXEL02_1R,PIXEL10_C,PIXEL00_5,PIXEL01_6,PIXEL02_2,PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag67
+    PIXEL00_1L
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag70
+    PIXEL00_1M
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag28
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag152
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag194
+    PIXEL00_1M
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag98
+    PIXEL00_1M
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1L
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag56
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag25
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag26
+..@flag31
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL10_C,PIXEL00_4,PIXEL10_3
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_C,PIXEL12_C,PIXEL02_4,PIXEL12_3
+    PIXEL11
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag82
+..@flag214
+    PIXEL00_1M
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL01_3,PIXEL02_4
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1M
+    DiffOrNot w6,w8,PIXEL21_C,PIXEL22_C,PIXEL21_3,PIXEL22_4
+    jmp .loopx_end
+..@flag88
+..@flag248
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL11
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL10_3,PIXEL20_4
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL22_C,PIXEL12_3,PIXEL22_4
+    jmp .loopx_end
+..@flag74
+..@flag107
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL00_4,PIXEL01_3
+    PIXEL02_1M
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_1
+    DiffOrNot w8,w4,PIXEL20_C,PIXEL21_C,PIXEL20_4,PIXEL21_3
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag27
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+    PIXEL02_1M
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag86
+    PIXEL00_1M
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_1M
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag216
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_C
+    PIXEL11
+    PIXEL20_1M
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+    jmp .loopx_end
+..@flag106
+    PIXEL00_1M
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL11
+    PIXEL12_1
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag30
+    PIXEL00_1M
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+    PIXEL10_C
+    PIXEL11
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag210
+    PIXEL00_1M
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_1M
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+    jmp .loopx_end
+..@flag120
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL11
+    PIXEL12_C
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag75
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+    PIXEL02_1M
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag29
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag198
+    PIXEL00_1M
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag184
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag99
+    PIXEL00_1L
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1L
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag57
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag71
+    PIXEL00_1L
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag156
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag226
+    PIXEL00_1M
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1L
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag60
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag195
+    PIXEL00_1L
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag102
+    PIXEL00_1M
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1L
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag153
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag58
+    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag83
+    PIXEL00_1L
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1M
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    jmp .loopx_end
+..@flag92
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    jmp .loopx_end
+..@flag202
+    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_1
+    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag78
+    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_1
+    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag154
+    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag114
+    PIXEL00_1M
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1L
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    jmp .loopx_end
+..@flag89
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    jmp .loopx_end
+..@flag90
+    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    jmp .loopx_end
+..@flag55
+..@flag23
+    DiffOrNot w2,w6,PIXEL00_1L,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL00_2,PIXEL01_6,PIXEL02_5,PIXEL12_1
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag182
+..@flag150
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL22_1D,PIXEL01_1,PIXEL02_5,PIXEL12_6,PIXEL22_2
+    PIXEL00_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_2
+    PIXEL21_1
+    jmp .loopx_end
+..@flag213
+..@flag212
+    DiffOrNot w6,w8,PIXEL02_1U,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL02_2,PIXEL12_6,PIXEL21_1,PIXEL22_5
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_1M
+    jmp .loopx_end
+..@flag241
+..@flag240
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL20_1L,PIXEL21_C,PIXEL22_C,PIXEL12_1,PIXEL20_2,PIXEL21_6,PIXEL22_5
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    jmp .loopx_end
+..@flag236
+..@flag232
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL22_1R,PIXEL10_1,PIXEL20_5,PIXEL21_6,PIXEL22_2
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL11
+    PIXEL12_1
+    jmp .loopx_end
+..@flag109
+..@flag105
+    DiffOrNot w8,w4,PIXEL00_1U,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL00_2,PIXEL10_6,PIXEL20_5,PIXEL21_1
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL11
+    PIXEL12_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag171
+..@flag43
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL20_1D,PIXEL00_5,PIXEL01_1,PIXEL10_6,PIXEL20_2
+    PIXEL02_1M
+    PIXEL11
+    PIXEL12_1
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag143
+..@flag15
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL02_1R,PIXEL10_C,PIXEL00_5,PIXEL01_6,PIXEL02_2,PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag124
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL11
+    PIXEL12_C
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag203
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+    PIXEL02_1M
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag62
+    PIXEL00_1M
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+    PIXEL10_C
+    PIXEL11
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag211
+    PIXEL00_1L
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_1M
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+    jmp .loopx_end
+..@flag118
+    PIXEL00_1M
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_1L
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag217
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_C
+    PIXEL11
+    PIXEL20_1M
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+    jmp .loopx_end
+..@flag110
+    PIXEL00_1M
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL11
+    PIXEL12_1
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag155
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+    PIXEL02_1M
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag188
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag185
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag61
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag157
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag103
+    PIXEL00_1L
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1L
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag227
+    PIXEL00_1L
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1L
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag230
+    PIXEL00_1M
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1L
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag199
+    PIXEL00_1L
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag220
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_C
+    PIXEL11
+    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+    jmp .loopx_end
+..@flag158
+    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+    PIXEL10_C
+    PIXEL11
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag234
+    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL11
+    PIXEL12_1
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag242
+    PIXEL00_1M
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_1L
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+    jmp .loopx_end
+..@flag59
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag121
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL11
+    PIXEL12_C
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    jmp .loopx_end
+..@flag87
+    PIXEL00_1L
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_1M
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    jmp .loopx_end
+..@flag79
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+    PIXEL02_1R
+    PIXEL11
+    PIXEL12_1
+    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag122
+    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    PIXEL11
+    PIXEL12_C
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    jmp .loopx_end
+..@flag94
+    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+    PIXEL10_C
+    PIXEL11
+    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    jmp .loopx_end
+..@flag218
+    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    PIXEL10_C
+    PIXEL11
+    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+    jmp .loopx_end
+..@flag91
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    PIXEL11
+    PIXEL12_C
+    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    jmp .loopx_end
+..@flag229
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1L
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag167
+    PIXEL00_1L
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag173
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag181
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag186
+    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag115
+    PIXEL00_1L
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1L
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    jmp .loopx_end
+..@flag93
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    jmp .loopx_end
+..@flag206
+    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_1
+    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag205
+..@flag201
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_1
+    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag174
+..@flag46
+    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag179
+..@flag147
+    PIXEL00_1L
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag117
+..@flag116
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1L
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    jmp .loopx_end
+..@flag189
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag231
+    PIXEL00_1L
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1L
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag126
+    PIXEL00_1M
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+    PIXEL11
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag219
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+    PIXEL02_1M
+    PIXEL11
+    PIXEL20_1M
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+    jmp .loopx_end
+..@flag125
+    DiffOrNot w8,w4,PIXEL00_1U,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL00_2,PIXEL10_6,PIXEL20_5,PIXEL21_1
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL11
+    PIXEL12_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag221
+    DiffOrNot w6,w8,PIXEL02_1U,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL02_2,PIXEL12_6,PIXEL21_1,PIXEL22_5
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL10_C
+    PIXEL11
+    PIXEL20_1M
+    jmp .loopx_end
+..@flag207
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL02_1R,PIXEL10_C,PIXEL00_5,PIXEL01_6,PIXEL02_2,PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag238
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL22_1R,PIXEL10_1,PIXEL20_5,PIXEL21_6,PIXEL22_2
+    PIXEL00_1M
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL11
+    PIXEL12_1
+    jmp .loopx_end
+..@flag190
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL22_1D,PIXEL01_1,PIXEL02_5,PIXEL12_6,PIXEL22_2
+    PIXEL00_1M
+    PIXEL10_C
+    PIXEL11
+    PIXEL20_1D
+    PIXEL21_1
+    jmp .loopx_end
+..@flag187
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL20_1D,PIXEL00_5,PIXEL01_1,PIXEL10_6,PIXEL20_2
+    PIXEL02_1M
+    PIXEL11
+    PIXEL12_C
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag243
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL20_1L,PIXEL21_C,PIXEL22_C,PIXEL12_1,PIXEL20_2,PIXEL21_6,PIXEL22_5
+    PIXEL00_1L
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    jmp .loopx_end
+..@flag119
+    DiffOrNot w2,w6,PIXEL00_1L,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL00_2,PIXEL01_6,PIXEL02_5,PIXEL12_1
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_1L
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag237
+..@flag233
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_1
+    DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag175
+..@flag47
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag183
+..@flag151
+    PIXEL00_1L
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag245
+..@flag244
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1L
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2
+    jmp .loopx_end
+..@flag250
+    PIXEL00_1M
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL11
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL10_3,PIXEL20_4
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL22_C,PIXEL12_3,PIXEL22_4
+    jmp .loopx_end
+..@flag123
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL00_4,PIXEL01_3
+    PIXEL02_1M
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    DiffOrNot w8,w4,PIXEL20_C,PIXEL21_C,PIXEL20_4,PIXEL21_3
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag95
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL10_C,PIXEL00_4,PIXEL10_3
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_C,PIXEL12_C,PIXEL02_4,PIXEL12_3
+    PIXEL11
+    PIXEL20_1M
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag222
+    PIXEL00_1M
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL01_3,PIXEL02_4
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1M
+    DiffOrNot w6,w8,PIXEL21_C,PIXEL22_C,PIXEL21_3,PIXEL22_4
+    jmp .loopx_end
+..@flag252
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL11
+    PIXEL12_C
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL10_3,PIXEL20_4
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2
+    jmp .loopx_end
+..@flag249
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_C
+    PIXEL11
+    DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL22_C,PIXEL12_3,PIXEL22_4
+    jmp .loopx_end
+..@flag235
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL00_4,PIXEL01_3
+    PIXEL02_1M
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_1
+    DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag111
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_1
+    DiffOrNot w8,w4,PIXEL20_C,PIXEL21_C,PIXEL20_4,PIXEL21_3
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag63
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_C,PIXEL12_C,PIXEL02_4,PIXEL12_3
+    PIXEL10_C
+    PIXEL11
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag159
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL10_C,PIXEL00_4,PIXEL10_3
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag215
+    PIXEL00_1L
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1M
+    DiffOrNot w6,w8,PIXEL21_C,PIXEL22_C,PIXEL21_3,PIXEL22_4
+    jmp .loopx_end
+..@flag246
+    PIXEL00_1M
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL01_3,PIXEL02_4
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1L
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2
+    jmp .loopx_end
+..@flag254
+    PIXEL00_1M
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL01_3,PIXEL02_4
+    PIXEL11
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL10_3,PIXEL20_4
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_2
+    jmp .loopx_end
+..@flag253
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2
+    jmp .loopx_end
+..@flag251
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL00_4,PIXEL01_3
+    PIXEL02_1M
+    PIXEL11
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_2,PIXEL21_3
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL22_C,PIXEL12_3,PIXEL22_4
+    jmp .loopx_end
+..@flag239
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_1
+    DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag127
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_2,PIXEL01_3,PIXEL10_3
+    DiffOrNot w2,w6,PIXEL02_C,PIXEL12_C,PIXEL02_4,PIXEL12_3
+    PIXEL11
+    DiffOrNot w8,w4,PIXEL20_C,PIXEL21_C,PIXEL20_4,PIXEL21_3
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag191
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag223
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL10_C,PIXEL00_4,PIXEL10_3
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_2,PIXEL12_3
+    PIXEL11
+    PIXEL20_1M
+    DiffOrNot w6,w8,PIXEL21_C,PIXEL22_C,PIXEL21_3,PIXEL22_4
+    jmp .loopx_end
+..@flag247
+    PIXEL00_1L
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1L
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2
+    jmp .loopx_end
+..@flag255
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2
+    jmp .loopx_end
+
+..@cross0
+    mov edx,eax
+    shl eax,16
+    or  eax,edx
+    mov [edi],eax
+    mov [edi+4],ax
+    mov [edi+ebx],eax
+    mov [edi+ebx+4],ax
+    mov [edi+ebx*2],eax
+    mov [edi+ebx*2+4],ax
+    jmp .loopx_end
+..@cross1
+    mov edx,eax
+    shl eax,16
+    or  eax,edx
+    mov ecx,[w2]
+    and edx,[zerolowbits]
+    and ecx,[zerolowbits]
+    add ecx,edx
+    shr ecx,1
+    add ecx,0x0821
+    and ecx,[zerolowbits]
+    add edx,ecx
+    shr edx,1
+    mov [edi],dx
+    mov [edi+2],dx
+    mov [edi+4],dx
+    mov [edi+ebx],eax
+    mov [edi+ebx+4],ax
+    mov [edi+ebx*2],eax
+    mov [edi+ebx*2+4],ax
+    jmp .loopx_end
+..@cross2
+    mov edx,eax
+    shl eax,16
+    or  eax,edx
+    mov ecx,[w4]
+    and edx,[zerolowbits]
+    and ecx,[zerolowbits]
+    add ecx,edx
+    shr ecx,1
+    add ecx,0x0821
+    and ecx,[zerolowbits]
+    add edx,ecx
+    shr edx,1
+    mov [edi],dx
+    mov [edi+2],eax
+    mov [edi+ebx],dx
+    mov [edi+ebx+2],eax
+    mov [edi+ebx*2],dx
+    mov [edi+ebx*2+2],eax
+    jmp .loopx_end
+..@cross4
+    mov edx,eax
+    shl eax,16
+    or  eax,edx
+    mov ecx,[w6]
+    and edx,[zerolowbits]
+    and ecx,[zerolowbits]
+    add ecx,edx
+    shr ecx,1
+    add ecx,0x0821
+    and ecx,[zerolowbits]
+    add edx,ecx
+    shr edx,1
+    mov [edi],eax
+    mov [edi+4],dx
+    mov [edi+ebx],eax
+    mov [edi+ebx+4],dx
+    mov [edi+ebx*2],eax
+    mov [edi+ebx*2+4],dx
+    jmp .loopx_end
+..@cross8
+    mov edx,eax
+    shl eax,16
+    or  eax,edx
+    mov ecx,[w8]
+    and edx,[zerolowbits]
+    and ecx,[zerolowbits]
+    add ecx,edx
+    shr ecx,1
+    add ecx,0x0821
+    and ecx,[zerolowbits]
+    add edx,ecx
+    shr edx,1
+    mov [edi],eax
+    mov [edi+4],ax
+    mov [edi+ebx],eax
+    mov [edi+ebx+4],ax
+    mov [edi+ebx*2],dx
+    mov [edi+ebx*2+2],dx
+    mov [edi+ebx*2+4],dx
+    jmp     .loopx_end
+
+.loopx_end
+    add     esi,2
+    add     edi,6
+    dec     dword[xcounter]
+    jle     .xres_2
+    jmp     .loopx
+.xres_2
+    ; x=Xres-2 - special case
+    jl      .xres_1
+    mov     ebx,[prevline]
+    movq    mm5,[esi+ebx-4]
+    movq    mm6,[esi-4]
+    mov     ebx,[nextline]
+    movq    mm7,[esi+ebx-4]
+    psrlq   mm5,16
+    psrlq   mm6,16
+    psrlq   mm7,16
+    movd    eax,mm5
+    movzx   edx,ax
+    mov     [w1],edx
+    shr     eax,16
+    mov     [w2],eax
+    psrlq   mm5,32
+    movd    eax,mm5
+    mov     [w3],eax
+    movd    eax,mm6
+    movzx   edx,ax
+    mov     [w4],edx
+    shr     eax,16
+    mov     [w5],eax
+    psrlq   mm6,32
+    movd    eax,mm6
+    mov     [w6],eax
+    movd    eax,mm7
+    movzx   edx,ax
+    mov     [w7],edx
+    shr     eax,16
+    mov     [w8],eax
+    psrlq   mm7,32
+    movd    eax,mm7
+    mov     [w9],eax
+    jmp     .flags
+.xres_1
+    cmp     dword[xcounter],-1
+    jl      .nexty
+    ; x=Xres-1 - special case
+    mov     ebx,[prevline]
+    movq    mm5,[esi+ebx-6]
+    movq    mm6,[esi-6]
+    mov     ebx,[nextline]
+    movq    mm7,[esi+ebx-6]
+    psrlq   mm5,32
+    psrlq   mm6,32
+    psrlq   mm7,32
+    movd    eax,mm5
+    movzx   edx,ax  
+    mov     [w1],edx
+    shr     eax,16
+    mov     [w2],eax
+    mov     [w3],eax
+    movd    eax,mm6
+    movzx   edx,ax  
+    mov     [w4],edx
+    shr     eax,16
+    mov     [w5],eax
+    mov     [w6],eax
+    movd    eax,mm7
+    movzx   edx,ax  
+    mov     [w7],edx
+    shr     eax,16
+    mov     [w8],eax
+    mov     [w9],eax
+    jmp     .flags
+.nexty
+    add     edi,ebx
+    add     edi,ebx
+    dec     dword[linesleft]
+    jz      .fin
+    mov     ebx,[ebp+Xres]
+    shl     ebx,1
+    cmp     dword[linesleft],1
+    je      .lastline
+    mov     dword[nextline],ebx
+    neg     ebx
+    mov     dword[prevline],ebx
+    jmp     .loopy
+.lastline
+    mov     dword[nextline],0
+    neg     ebx
+    mov     dword[prevline],ebx
+    jmp     .loopy
+.fin
+    emms
+    popad
+    mov esp,ebp
+    pop ebp
+    ret
+
+SECTION .data
+FuncTable
+    dd ..@flag0, ..@flag1, ..@flag2, ..@flag3, ..@flag4, ..@flag5, ..@flag6, ..@flag7
+    dd ..@flag8, ..@flag9, ..@flag10, ..@flag11, ..@flag12, ..@flag13, ..@flag14, ..@flag15
+    dd ..@flag16, ..@flag17, ..@flag18, ..@flag19, ..@flag20, ..@flag21, ..@flag22, ..@flag23
+    dd ..@flag24, ..@flag25, ..@flag26, ..@flag27, ..@flag28, ..@flag29, ..@flag30, ..@flag31
+    dd ..@flag32, ..@flag33, ..@flag34, ..@flag35, ..@flag36, ..@flag37, ..@flag38, ..@flag39
+    dd ..@flag40, ..@flag41, ..@flag42, ..@flag43, ..@flag44, ..@flag45, ..@flag46, ..@flag47
+    dd ..@flag48, ..@flag49, ..@flag50, ..@flag51, ..@flag52, ..@flag53, ..@flag54, ..@flag55
+    dd ..@flag56, ..@flag57, ..@flag58, ..@flag59, ..@flag60, ..@flag61, ..@flag62, ..@flag63
+    dd ..@flag64, ..@flag65, ..@flag66, ..@flag67, ..@flag68, ..@flag69, ..@flag70, ..@flag71
+    dd ..@flag72, ..@flag73, ..@flag74, ..@flag75, ..@flag76, ..@flag77, ..@flag78, ..@flag79
+    dd ..@flag80, ..@flag81, ..@flag82, ..@flag83, ..@flag84, ..@flag85, ..@flag86, ..@flag87
+    dd ..@flag88, ..@flag89, ..@flag90, ..@flag91, ..@flag92, ..@flag93, ..@flag94, ..@flag95
+    dd ..@flag96, ..@flag97, ..@flag98, ..@flag99, ..@flag100, ..@flag101, ..@flag102, ..@flag103
+    dd ..@flag104, ..@flag105, ..@flag106, ..@flag107, ..@flag108, ..@flag109, ..@flag110, ..@flag111
+    dd ..@flag112, ..@flag113, ..@flag114, ..@flag115, ..@flag116, ..@flag117, ..@flag118, ..@flag119
+    dd ..@flag120, ..@flag121, ..@flag122, ..@flag123, ..@flag124, ..@flag125, ..@flag126, ..@flag127
+    dd ..@flag128, ..@flag129, ..@flag130, ..@flag131, ..@flag132, ..@flag133, ..@flag134, ..@flag135
+    dd ..@flag136, ..@flag137, ..@flag138, ..@flag139, ..@flag140, ..@flag141, ..@flag142, ..@flag143
+    dd ..@flag144, ..@flag145, ..@flag146, ..@flag147, ..@flag148, ..@flag149, ..@flag150, ..@flag151
+    dd ..@flag152, ..@flag153, ..@flag154, ..@flag155, ..@flag156, ..@flag157, ..@flag158, ..@flag159
+    dd ..@flag160, ..@flag161, ..@flag162, ..@flag163, ..@flag164, ..@flag165, ..@flag166, ..@flag167
+    dd ..@flag168, ..@flag169, ..@flag170, ..@flag171, ..@flag172, ..@flag173, ..@flag174, ..@flag175
+    dd ..@flag176, ..@flag177, ..@flag178, ..@flag179, ..@flag180, ..@flag181, ..@flag182, ..@flag183
+    dd ..@flag184, ..@flag185, ..@flag186, ..@flag187, ..@flag188, ..@flag189, ..@flag190, ..@flag191
+    dd ..@flag192, ..@flag193, ..@flag194, ..@flag195, ..@flag196, ..@flag197, ..@flag198, ..@flag199
+    dd ..@flag200, ..@flag201, ..@flag202, ..@flag203, ..@flag204, ..@flag205, ..@flag206, ..@flag207
+    dd ..@flag208, ..@flag209, ..@flag210, ..@flag211, ..@flag212, ..@flag213, ..@flag214, ..@flag215
+    dd ..@flag216, ..@flag217, ..@flag218, ..@flag219, ..@flag220, ..@flag221, ..@flag222, ..@flag223
+    dd ..@flag224, ..@flag225, ..@flag226, ..@flag227, ..@flag228, ..@flag229, ..@flag230, ..@flag231
+    dd ..@flag232, ..@flag233, ..@flag234, ..@flag235, ..@flag236, ..@flag237, ..@flag238, ..@flag239
+    dd ..@flag240, ..@flag241, ..@flag242, ..@flag243, ..@flag244, ..@flag245, ..@flag246, ..@flag247
+    dd ..@flag248, ..@flag249, ..@flag250, ..@flag251, ..@flag252, ..@flag253, ..@flag254, ..@flag255
+
+FuncTable2
+    dd ..@cross0, ..@cross1, ..@cross2, ..@flag0,
+    dd ..@cross4, ..@flag0,  ..@flag0,  ..@flag0,
+    dd ..@cross8, ..@flag0,  ..@flag0,  ..@flag0,
+    dd ..@flag0,  ..@flag0,  ..@flag0,  ..@flag0
+
diff --git a/od-win32/hq3x32.asm b/od-win32/hq3x32.asm
new file mode 100755 (executable)
index 0000000..16923b6
--- /dev/null
@@ -0,0 +1,2563 @@
+;hq3x filter
+;32bpp output
+;----------------------------------------------------------
+;Copyright (C) 2003 MaxSt ( maxst@hiend3d.com )
+;
+;This program is free software; you can redistribute it and/or
+;modify it under the terms of the GNU General Public License
+;as published by the Free Software Foundation; either
+;version 2 of the License, or (at your option) any later
+;version.
+;
+;This program is distributed in the hope that it will be useful,
+;but WITHOUT ANY WARRANTY; without even the implied warranty of
+;MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;GNU General Public License for more details.
+;
+;You should have received a copy of the GNU General Public License
+;along with this program; if not, write to the Free Software
+;Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+GLOBAL _hq3x_32
+
+EXTERN _LUT16to32
+EXTERN _RGBtoYUV
+
+SECTION .bss
+linesleft resd 1
+xcounter  resd 1
+cross     resd 1
+nextline  resd 1
+prevline  resd 1
+w1        resd 1
+w2        resd 1
+w3        resd 1
+w4        resd 1
+w5        resd 1
+w6        resd 1
+w7        resd 1
+w8        resd 1
+w9        resd 1
+c1        resd 1
+c2        resd 1
+c3        resd 1
+c4        resd 1
+c5        resd 1
+c6        resd 1
+c7        resd 1
+c8        resd 1
+c9        resd 1
+
+SECTION .data
+
+reg_blank    dd  0,0
+const7       dd  0x00070007,0x00000007
+threshold    dd  0x00300706,0x00000000
+
+SECTION .text
+
+%macro TestDiff 2
+    xor     ecx,ecx
+    mov     edx,[%1]
+    cmp     edx,[%2]
+    je      %%fin
+    mov     ecx,_RGBtoYUV
+    movd    mm1,[ecx+edx*4]
+    movq    mm5,mm1
+    mov     edx,[%2]
+    movd    mm2,[ecx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    ecx,mm1
+%%fin:
+%endmacro
+
+%macro DiffOrNot 4
+   TestDiff %1,%2
+   test ecx,ecx
+   jz   %%same
+   %3
+   jmp %%fin
+%%same:
+   %4
+%%fin
+%endmacro
+
+%macro DiffOrNot 6
+   TestDiff %1,%2
+   test ecx,ecx
+   jz   %%same
+   %3
+   %4
+   jmp %%fin
+%%same:
+   %5
+   %6
+%%fin
+%endmacro
+
+%macro DiffOrNot 8
+   TestDiff %1,%2
+   test ecx,ecx
+   jz   %%same
+   %3
+   %4
+   %5
+   jmp %%fin
+%%same:
+   %6
+   %7
+   %8
+%%fin
+%endmacro
+
+%macro DiffOrNot 10
+   TestDiff %1,%2
+   test ecx,ecx
+   jz %%same
+   %3
+   %4
+   %5
+   %6
+   jmp %%fin
+%%same:
+   %7
+   %8
+   %9
+   %10
+%%fin
+%endmacro
+
+%macro Interp1 3
+    mov edx,%2
+    shl edx,2
+    add edx,%3
+    sub edx,%2
+    shr edx,2
+    mov %1,edx
+%endmacro
+
+%macro Interp2 4
+    mov edx,%2
+    shl edx,1
+    add edx,%3
+    add edx,%4
+    shr edx,2
+    mov %1,edx
+%endmacro
+
+%macro Interp3 2
+    movd       mm1, eax
+    movd       mm2, %2
+    punpcklbw  mm1, [reg_blank]
+    punpcklbw  mm2, [reg_blank]
+    pmullw     mm1, [const7]
+    paddw      mm1, mm2
+    psrlw      mm1, 3
+    packuswb   mm1, [reg_blank]
+    movd       %1, mm1
+%endmacro
+
+%macro Interp4 3
+    movd       mm1, eax
+    movd       mm2, %2
+    movd       mm3, %3
+    punpcklbw  mm1, [reg_blank]
+    punpcklbw  mm2, [reg_blank]
+    punpcklbw  mm3, [reg_blank]
+    psllw      mm1, 1
+    paddw      mm2, mm3
+    pmullw     mm2, [const7]
+    paddw      mm1, mm2
+    psrlw      mm1, 4
+    packuswb   mm1, [reg_blank]
+    movd       %1, mm1
+%endmacro
+
+%macro Interp5 3
+    mov edx,%2
+    add edx,%3
+    shr edx,1
+    mov %1,edx
+%endmacro
+
+%macro PIXEL00_1M 0
+    Interp1 [edi],eax,[c1]
+%endmacro
+
+%macro PIXEL00_1U 0
+    Interp1 [edi],eax,[c2]
+%endmacro
+
+%macro PIXEL00_1L 0
+    Interp1 [edi],eax,[c4]
+%endmacro
+
+%macro PIXEL00_2 0
+    Interp2 [edi],eax,[c4],[c2]
+%endmacro
+
+%macro PIXEL00_4 0
+    Interp4 [edi],[c4],[c2]
+%endmacro
+
+%macro PIXEL00_5 0
+    Interp5 [edi],[c4],[c2]
+%endmacro
+
+%macro PIXEL00_C 0
+    mov [edi],eax
+%endmacro
+
+%macro PIXEL01_1 0
+    Interp1 [edi+4],eax,[c2]
+%endmacro
+
+%macro PIXEL01_3 0
+    Interp3 [edi+4],[c2]
+%endmacro
+
+%macro PIXEL01_6 0
+    Interp1 [edi+4],[c2],eax
+%endmacro
+
+%macro PIXEL01_C 0
+    mov [edi+4],eax
+%endmacro
+
+%macro PIXEL02_1M 0
+    Interp1 [edi+8],eax,[c3]
+%endmacro
+
+%macro PIXEL02_1U 0
+    Interp1 [edi+8],eax,[c2]
+%endmacro
+
+%macro PIXEL02_1R 0
+    Interp1 [edi+8],eax,[c6]
+%endmacro
+
+%macro PIXEL02_2 0
+    Interp2 [edi+8],eax,[c2],[c6]
+%endmacro
+
+%macro PIXEL02_4 0
+    Interp4 [edi+8],[c2],[c6]
+%endmacro
+
+%macro PIXEL02_5 0
+    Interp5 [edi+8],[c2],[c6]
+%endmacro
+
+%macro PIXEL02_C 0
+    mov [edi+8],eax
+%endmacro
+
+%macro PIXEL10_1 0
+    Interp1 [edi+ebx],eax,[c4]
+%endmacro
+
+%macro PIXEL10_3 0
+    Interp3 [edi+ebx],[c4]
+%endmacro
+
+%macro PIXEL10_6 0
+    Interp1 [edi+ebx],[c4],eax
+%endmacro
+
+%macro PIXEL10_C 0
+    mov [edi+ebx],eax
+%endmacro
+
+%macro PIXEL11 0
+    mov [edi+ebx+4],eax
+%endmacro
+
+%macro PIXEL12_1 0
+    Interp1 [edi+ebx+8],eax,[c6]
+%endmacro
+
+%macro PIXEL12_3 0
+    Interp3 [edi+ebx+8],[c6]
+%endmacro
+
+%macro PIXEL12_6 0
+    Interp1 [edi+ebx+8],[c6],eax
+%endmacro
+
+%macro PIXEL12_C 0
+    mov [edi+ebx+8],eax
+%endmacro
+
+%macro PIXEL20_1M 0
+    Interp1 [edi+ebx*2],eax,[c7]
+%endmacro
+
+%macro PIXEL20_1D 0
+    Interp1 [edi+ebx*2],eax,[c8]
+%endmacro
+
+%macro PIXEL20_1L 0
+    Interp1 [edi+ebx*2],eax,[c4]
+%endmacro
+
+%macro PIXEL20_2 0
+    Interp2 [edi+ebx*2],eax,[c8],[c4]
+%endmacro
+
+%macro PIXEL20_4 0
+    Interp4 [edi+ebx*2],[c8],[c4]
+%endmacro
+
+%macro PIXEL20_5 0
+    Interp5 [edi+ebx*2],[c8],[c4]
+%endmacro
+
+%macro PIXEL20_C 0
+    mov [edi+ebx*2],eax
+%endmacro
+
+%macro PIXEL21_1 0
+    Interp1 [edi+ebx*2+4],eax,[c8]
+%endmacro
+
+%macro PIXEL21_3 0
+    Interp3 [edi+ebx*2+4],[c8]
+%endmacro
+
+%macro PIXEL21_6 0
+    Interp1 [edi+ebx*2+4],[c8],eax
+%endmacro
+
+%macro PIXEL21_C 0
+    mov [edi+ebx*2+4],eax
+%endmacro
+
+%macro PIXEL22_1M 0
+    Interp1 [edi+ebx*2+8],eax,[c9]
+%endmacro
+
+%macro PIXEL22_1D 0
+    Interp1 [edi+ebx*2+8],eax,[c8]
+%endmacro
+
+%macro PIXEL22_1R 0
+    Interp1 [edi+ebx*2+8],eax,[c6]
+%endmacro
+
+%macro PIXEL22_2 0
+    Interp2 [edi+ebx*2+8],eax,[c6],[c8]
+%endmacro
+
+%macro PIXEL22_4 0
+    Interp4 [edi+ebx*2+8],[c6],[c8]
+%endmacro
+
+%macro PIXEL22_5 0
+    Interp5 [edi+ebx*2+8],[c6],[c8]
+%endmacro
+
+%macro PIXEL22_C 0
+    mov [edi+ebx*2+8],eax
+%endmacro
+
+inbuffer     equ 8
+outbuffer    equ 12
+Xres         equ 16
+Yres         equ 20
+pitch        equ 24
+
+_hq3x_32:
+    push ebp
+    mov ebp,esp
+    pushad
+
+    mov     esi,[ebp+inbuffer]
+    mov     edi,[ebp+outbuffer]
+    mov     edx,[ebp+Yres]
+    mov     [linesleft],edx
+    mov     ebx,[ebp+Xres]
+    shl     ebx,1
+    mov     dword[prevline],0
+    mov     dword[nextline],ebx
+.loopy
+    mov     ecx,[ebp+Xres]
+    sub     ecx,2                 ; x={Xres-2, Xres-1} are special cases.
+    mov     dword[xcounter],ecx
+    ; x=0 - special case
+    mov     ebx,[prevline]
+    movq    mm5,[esi+ebx]
+    movq    mm6,[esi]
+    mov     ebx,[nextline]
+    movq    mm7,[esi+ebx]
+    movd    eax,mm5
+    movzx   edx,ax  
+    mov     [w1],edx
+    mov     [w2],edx
+    shr     eax,16
+    mov     [w3],eax
+    movd    eax,mm6
+    movzx   edx,ax  
+    mov     [w4],edx
+    mov     [w5],edx
+    shr     eax,16
+    mov     [w6],eax
+    movd    eax,mm7
+    movzx   edx,ax  
+    mov     [w7],edx
+    mov     [w8],edx
+    shr     eax,16
+    mov     [w9],eax
+    jmp     .flags
+.loopx
+    mov     ebx,[prevline]
+    movq    mm5,[esi+ebx-2]
+    movq    mm6,[esi-2]
+    mov     ebx,[nextline]
+    movq    mm7,[esi+ebx-2]
+    movd    eax,mm5
+    movzx   edx,ax  
+    mov     [w1],edx
+    shr     eax,16
+    mov     [w2],eax
+    psrlq   mm5,32
+    movd    eax,mm5
+    movzx   edx,ax  
+    mov     [w3],edx
+    movd    eax,mm6
+    movzx   edx,ax  
+    mov     [w4],edx
+    shr     eax,16
+    mov     [w5],eax
+    psrlq   mm6,32
+    movd    eax,mm6
+    movzx   edx,ax  
+    mov     [w6],edx
+    movd    eax,mm7
+    movzx   edx,ax  
+    mov     [w7],edx
+    shr     eax,16
+    mov     [w8],eax
+    psrlq   mm7,32
+    movd    eax,mm7
+    movzx   edx,ax  
+    mov     [w9],edx
+.flags
+    mov     ebx,_RGBtoYUV
+    mov     eax,[w5]
+    xor     ecx,ecx
+    movd    mm5,[ebx+eax*4]
+    mov     dword[cross],0
+
+    mov     edx,[w2]
+    cmp     eax,edx
+    je      .noflag2
+    or      dword[cross],1
+    movq    mm1,mm5
+    movd    mm2,[ebx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+    test    edx,edx
+    jz      .noflag2
+    or      ecx,2
+.noflag2
+    mov     edx,[w4]
+    cmp     eax,edx
+    je      .noflag4
+    or      dword[cross],2
+    movq    mm1,mm5
+    movd    mm2,[ebx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+    test    edx,edx
+    jz      .noflag4
+    or      ecx,8
+.noflag4
+    mov     edx,[w6]
+    cmp     eax,edx
+    je      .noflag6
+    or      dword[cross],4
+    movq    mm1,mm5
+    movd    mm2,[ebx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+    test    edx,edx
+    jz      .noflag6
+    or      ecx,16
+.noflag6
+    mov     edx,[w8]
+    cmp     eax,edx
+    je      .noflag8
+    or      dword[cross],8
+    movq    mm1,mm5
+    movd    mm2,[ebx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+    test    edx,edx
+    jz      .noflag8
+    or      ecx,64
+.noflag8
+    test    ecx,ecx
+    jnz     .testflag1
+    mov     ecx,[cross]
+    mov     ebx,_LUT16to32
+    mov     eax,[ebx+eax*4]
+    jmp     [FuncTable2+ecx*4]
+.testflag1
+    mov     edx,[w1]
+    cmp     eax,edx
+    je      .noflag1
+    movq    mm1,mm5
+    movd    mm2,[ebx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+    test    edx,edx
+    jz      .noflag1
+    or      ecx,1
+.noflag1
+    mov     edx,[w3]
+    cmp     eax,edx
+    je      .noflag3
+    movq    mm1,mm5
+    movd    mm2,[ebx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+    test    edx,edx
+    jz      .noflag3
+    or      ecx,4
+.noflag3
+    mov     edx,[w7]
+    cmp     eax,edx
+    je      .noflag7
+    movq    mm1,mm5
+    movd    mm2,[ebx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+    test    edx,edx
+    jz      .noflag7
+    or      ecx,32
+.noflag7
+    mov     edx,[w9]
+    cmp     eax,edx
+    je      .noflag9
+    movq    mm1,mm5
+    movd    mm2,[ebx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+    test    edx,edx
+    jz      .noflag9
+    or      ecx,128
+.noflag9
+    mov  ebx,_LUT16to32
+    mov  eax,[ebx+eax*4]
+    mov  edx,[w2]
+    mov  edx,[ebx+edx*4]
+    mov  [c2],edx
+    mov  edx,[w4]
+    mov  edx,[ebx+edx*4]
+    mov  [c4],edx
+    mov  edx,[w6]
+    mov  edx,[ebx+edx*4]
+    mov  [c6],edx
+    mov  edx,[w8]
+    mov  edx,[ebx+edx*4]
+    mov  [c8],edx
+    test ecx,0x005A
+    jz  .switch
+    mov  edx,[w1]
+    mov  edx,[ebx+edx*4]
+    mov  [c1],edx
+    mov  edx,[w3]
+    mov  edx,[ebx+edx*4]
+    mov  [c3],edx
+    mov  edx,[w7]
+    mov  edx,[ebx+edx*4]
+    mov  [c7],edx
+    mov  edx,[w9]
+    mov  edx,[ebx+edx*4]
+    mov  [c9],edx
+.switch
+    mov  ebx,[ebp+pitch]
+    jmp  [FuncTable+ecx*4]
+
+..@flag0
+..@flag1
+..@flag4
+..@flag32
+..@flag128
+..@flag5
+..@flag132
+..@flag160
+..@flag33
+..@flag129
+..@flag36
+..@flag133
+..@flag164
+..@flag161
+..@flag37
+..@flag165
+;    PIXEL00_2
+;    PIXEL01_1
+;    PIXEL02_2
+;    PIXEL10_1
+;    PIXEL11
+;    PIXEL12_1
+;    PIXEL20_2
+;    PIXEL21_1
+;    PIXEL22_2
+
+;   the same, only optimized
+    mov ecx,eax
+    shl ecx,1
+    add ecx,[c2]
+    mov edx,ecx
+    add edx,[c4]
+    shr edx,2
+    mov [edi],edx
+    mov edx,ecx
+    add edx,eax
+    shr edx,2
+    mov [edi+4],edx
+    add ecx,[c6]
+    shr ecx,2
+    mov [edi+8],ecx
+    mov ecx,eax
+    shl ecx,2
+    sub ecx,eax
+    mov edx,ecx
+    add edx,[c4]
+    shr edx,2
+    mov [edi+ebx],edx
+    mov [edi+ebx+4],eax
+    add ecx,[c6]
+    shr ecx,2
+    mov [edi+ebx+8],ecx
+    mov ecx,eax
+    shl ecx,1
+    add ecx,[c8]
+    mov edx,ecx
+    add edx,[c4]
+    shr edx,2
+    mov [edi+ebx*2],edx
+    mov edx,ecx
+    add edx,eax
+    shr edx,2
+    mov [edi+ebx*2+4],edx
+    add ecx,[c6]
+    shr ecx,2
+    mov [edi+ebx*2+8],ecx
+    jmp .loopx_end
+..@flag2
+..@flag34
+..@flag130
+..@flag162
+    PIXEL00_1M
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag16
+..@flag17
+..@flag48
+..@flag49
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag64
+..@flag65
+..@flag68
+..@flag69
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag8
+..@flag12
+..@flag136
+..@flag140
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag3
+..@flag35
+..@flag131
+..@flag163
+    PIXEL00_1L
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag6
+..@flag38
+..@flag134
+..@flag166
+    PIXEL00_1M
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag20
+..@flag21
+..@flag52
+..@flag53
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag144
+..@flag145
+..@flag176
+..@flag177
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag192
+..@flag193
+..@flag196
+..@flag197
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag96
+..@flag97
+..@flag100
+..@flag101
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1L
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag40
+..@flag44
+..@flag168
+..@flag172
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag9
+..@flag13
+..@flag137
+..@flag141
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag18
+..@flag50
+    PIXEL00_1M
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_1M,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag80
+..@flag81
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_1M
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_1M,PIXEL12_3,PIXEL21_3,PIXEL22_4
+    jmp .loopx_end
+..@flag72
+..@flag76
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL11
+    PIXEL12_1
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_1M,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag10
+..@flag138
+    DiffOrNot w4,w2,PIXEL00_1M,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+    PIXEL02_1M
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag66
+    PIXEL00_1M
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag24
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag7
+..@flag39
+..@flag135
+    PIXEL00_1L
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag148
+..@flag149
+..@flag180
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag224
+..@flag228
+..@flag225
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1L
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag41
+..@flag169
+..@flag45
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag22
+..@flag54
+    PIXEL00_1M
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag208
+..@flag209
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_1M
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+    jmp .loopx_end
+..@flag104
+..@flag108
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL11
+    PIXEL12_1
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag11
+..@flag139
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+    PIXEL02_1M
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag19
+..@flag51
+    DiffOrNot w2,w6,PIXEL00_1L,PIXEL01_C,PIXEL02_1M,PIXEL12_C,PIXEL00_2,PIXEL01_6,PIXEL02_5,PIXEL12_1
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag146
+..@flag178
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_1M,PIXEL12_C,PIXEL22_1D,PIXEL01_1,PIXEL02_5,PIXEL12_6,PIXEL22_2
+    PIXEL00_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_2
+    PIXEL21_1
+    jmp .loopx_end
+..@flag84
+..@flag85
+    DiffOrNot w6,w8,PIXEL02_1U,PIXEL12_C,PIXEL21_C,PIXEL22_1M,PIXEL02_2,PIXEL12_6,PIXEL21_1,PIXEL22_5
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_1M
+    jmp .loopx_end
+..@flag112
+..@flag113
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL20_1L,PIXEL21_C,PIXEL22_1M,PIXEL12_1,PIXEL20_2,PIXEL21_6,PIXEL22_5
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    jmp .loopx_end
+..@flag200
+..@flag204
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_1M,PIXEL21_C,PIXEL22_1R,PIXEL10_1,PIXEL20_5,PIXEL21_6,PIXEL22_2
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL11
+    PIXEL12_1
+    jmp .loopx_end
+..@flag73
+..@flag77
+    DiffOrNot w8,w4,PIXEL00_1U,PIXEL10_C,PIXEL20_1M,PIXEL21_C,PIXEL00_2,PIXEL10_6,PIXEL20_5,PIXEL21_1
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL11
+    PIXEL12_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag42
+..@flag170
+    DiffOrNot w4,w2,PIXEL00_1M,PIXEL01_C,PIXEL10_C,PIXEL20_1D,PIXEL00_5,PIXEL01_1,PIXEL10_6,PIXEL20_2
+    PIXEL02_1M
+    PIXEL11
+    PIXEL12_1
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag14
+..@flag142
+    DiffOrNot w4,w2,PIXEL00_1M,PIXEL01_C,PIXEL02_1R,PIXEL10_C,PIXEL00_5,PIXEL01_6,PIXEL02_2,PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag67
+    PIXEL00_1L
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag70
+    PIXEL00_1M
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag28
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag152
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag194
+    PIXEL00_1M
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag98
+    PIXEL00_1M
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1L
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag56
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag25
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag26
+..@flag31
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL10_C,PIXEL00_4,PIXEL10_3
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_C,PIXEL12_C,PIXEL02_4,PIXEL12_3
+    PIXEL11
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag82
+..@flag214
+    PIXEL00_1M
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL01_3,PIXEL02_4
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1M
+    DiffOrNot w6,w8,PIXEL21_C,PIXEL22_C,PIXEL21_3,PIXEL22_4
+    jmp .loopx_end
+..@flag88
+..@flag248
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL11
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL10_3,PIXEL20_4
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL22_C,PIXEL12_3,PIXEL22_4
+    jmp .loopx_end
+..@flag74
+..@flag107
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL00_4,PIXEL01_3
+    PIXEL02_1M
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_1
+    DiffOrNot w8,w4,PIXEL20_C,PIXEL21_C,PIXEL20_4,PIXEL21_3
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag27
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+    PIXEL02_1M
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag86
+    PIXEL00_1M
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_1M
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag216
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_C
+    PIXEL11
+    PIXEL20_1M
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+    jmp .loopx_end
+..@flag106
+    PIXEL00_1M
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL11
+    PIXEL12_1
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag30
+    PIXEL00_1M
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+    PIXEL10_C
+    PIXEL11
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag210
+    PIXEL00_1M
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_1M
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+    jmp .loopx_end
+..@flag120
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL11
+    PIXEL12_C
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag75
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+    PIXEL02_1M
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag29
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag198
+    PIXEL00_1M
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag184
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag99
+    PIXEL00_1L
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1L
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag57
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag71
+    PIXEL00_1L
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag156
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag226
+    PIXEL00_1M
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1L
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag60
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag195
+    PIXEL00_1L
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag102
+    PIXEL00_1M
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1L
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag153
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag58
+    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag83
+    PIXEL00_1L
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1M
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    jmp .loopx_end
+..@flag92
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    jmp .loopx_end
+..@flag202
+    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_1
+    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag78
+    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_1
+    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag154
+    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag114
+    PIXEL00_1M
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1L
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    jmp .loopx_end
+..@flag89
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    jmp .loopx_end
+..@flag90
+    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    jmp .loopx_end
+..@flag55
+..@flag23
+    DiffOrNot w2,w6,PIXEL00_1L,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL00_2,PIXEL01_6,PIXEL02_5,PIXEL12_1
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag182
+..@flag150
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL22_1D,PIXEL01_1,PIXEL02_5,PIXEL12_6,PIXEL22_2
+    PIXEL00_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_2
+    PIXEL21_1
+    jmp .loopx_end
+..@flag213
+..@flag212
+    DiffOrNot w6,w8,PIXEL02_1U,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL02_2,PIXEL12_6,PIXEL21_1,PIXEL22_5
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_1M
+    jmp .loopx_end
+..@flag241
+..@flag240
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL20_1L,PIXEL21_C,PIXEL22_C,PIXEL12_1,PIXEL20_2,PIXEL21_6,PIXEL22_5
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    jmp .loopx_end
+..@flag236
+..@flag232
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL22_1R,PIXEL10_1,PIXEL20_5,PIXEL21_6,PIXEL22_2
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL11
+    PIXEL12_1
+    jmp .loopx_end
+..@flag109
+..@flag105
+    DiffOrNot w8,w4,PIXEL00_1U,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL00_2,PIXEL10_6,PIXEL20_5,PIXEL21_1
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL11
+    PIXEL12_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag171
+..@flag43
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL20_1D,PIXEL00_5,PIXEL01_1,PIXEL10_6,PIXEL20_2
+    PIXEL02_1M
+    PIXEL11
+    PIXEL12_1
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag143
+..@flag15
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL02_1R,PIXEL10_C,PIXEL00_5,PIXEL01_6,PIXEL02_2,PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag124
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL11
+    PIXEL12_C
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag203
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+    PIXEL02_1M
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag62
+    PIXEL00_1M
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+    PIXEL10_C
+    PIXEL11
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag211
+    PIXEL00_1L
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_1M
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+    jmp .loopx_end
+..@flag118
+    PIXEL00_1M
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_1L
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag217
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_C
+    PIXEL11
+    PIXEL20_1M
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+    jmp .loopx_end
+..@flag110
+    PIXEL00_1M
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL11
+    PIXEL12_1
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag155
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+    PIXEL02_1M
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag188
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag185
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag61
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag157
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag103
+    PIXEL00_1L
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1L
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag227
+    PIXEL00_1L
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1L
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag230
+    PIXEL00_1M
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1L
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag199
+    PIXEL00_1L
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag220
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_C
+    PIXEL11
+    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+    jmp .loopx_end
+..@flag158
+    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+    PIXEL10_C
+    PIXEL11
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag234
+    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL11
+    PIXEL12_1
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag242
+    PIXEL00_1M
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_1L
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+    jmp .loopx_end
+..@flag59
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag121
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL11
+    PIXEL12_C
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    jmp .loopx_end
+..@flag87
+    PIXEL00_1L
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_1M
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    jmp .loopx_end
+..@flag79
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+    PIXEL02_1R
+    PIXEL11
+    PIXEL12_1
+    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag122
+    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    PIXEL11
+    PIXEL12_C
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    jmp .loopx_end
+..@flag94
+    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+    PIXEL10_C
+    PIXEL11
+    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    jmp .loopx_end
+..@flag218
+    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    PIXEL10_C
+    PIXEL11
+    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+    jmp .loopx_end
+..@flag91
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    PIXEL11
+    PIXEL12_C
+    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    jmp .loopx_end
+..@flag229
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1L
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag167
+    PIXEL00_1L
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag173
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag181
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag186
+    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag115
+    PIXEL00_1L
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1L
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    jmp .loopx_end
+..@flag93
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    jmp .loopx_end
+..@flag206
+    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_1
+    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag205
+..@flag201
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_1
+    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag174
+..@flag46
+    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag179
+..@flag147
+    PIXEL00_1L
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag117
+..@flag116
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1L
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    jmp .loopx_end
+..@flag189
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag231
+    PIXEL00_1L
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1L
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag126
+    PIXEL00_1M
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+    PIXEL11
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag219
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+    PIXEL02_1M
+    PIXEL11
+    PIXEL20_1M
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+    jmp .loopx_end
+..@flag125
+    DiffOrNot w8,w4,PIXEL00_1U,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL00_2,PIXEL10_6,PIXEL20_5,PIXEL21_1
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL11
+    PIXEL12_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag221
+    DiffOrNot w6,w8,PIXEL02_1U,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL02_2,PIXEL12_6,PIXEL21_1,PIXEL22_5
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL10_C
+    PIXEL11
+    PIXEL20_1M
+    jmp .loopx_end
+..@flag207
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL02_1R,PIXEL10_C,PIXEL00_5,PIXEL01_6,PIXEL02_2,PIXEL10_1
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1M
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag238
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL22_1R,PIXEL10_1,PIXEL20_5,PIXEL21_6,PIXEL22_2
+    PIXEL00_1M
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL11
+    PIXEL12_1
+    jmp .loopx_end
+..@flag190
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL22_1D,PIXEL01_1,PIXEL02_5,PIXEL12_6,PIXEL22_2
+    PIXEL00_1M
+    PIXEL10_C
+    PIXEL11
+    PIXEL20_1D
+    PIXEL21_1
+    jmp .loopx_end
+..@flag187
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL20_1D,PIXEL00_5,PIXEL01_1,PIXEL10_6,PIXEL20_2
+    PIXEL02_1M
+    PIXEL11
+    PIXEL12_C
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag243
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL20_1L,PIXEL21_C,PIXEL22_C,PIXEL12_1,PIXEL20_2,PIXEL21_6,PIXEL22_5
+    PIXEL00_1L
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL10_1
+    PIXEL11
+    jmp .loopx_end
+..@flag119
+    DiffOrNot w2,w6,PIXEL00_1L,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL00_2,PIXEL01_6,PIXEL02_5,PIXEL12_1
+    PIXEL10_1
+    PIXEL11
+    PIXEL20_1L
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag237
+..@flag233
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_2
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_1
+    DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag175
+..@flag47
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_1
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_2
+    jmp .loopx_end
+..@flag183
+..@flag151
+    PIXEL00_1L
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_2
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag245
+..@flag244
+    PIXEL00_2
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1L
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2
+    jmp .loopx_end
+..@flag250
+    PIXEL00_1M
+    PIXEL01_C
+    PIXEL02_1M
+    PIXEL11
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL10_3,PIXEL20_4
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL22_C,PIXEL12_3,PIXEL22_4
+    jmp .loopx_end
+..@flag123
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL00_4,PIXEL01_3
+    PIXEL02_1M
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    DiffOrNot w8,w4,PIXEL20_C,PIXEL21_C,PIXEL20_4,PIXEL21_3
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag95
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL10_C,PIXEL00_4,PIXEL10_3
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_C,PIXEL12_C,PIXEL02_4,PIXEL12_3
+    PIXEL11
+    PIXEL20_1M
+    PIXEL21_C
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag222
+    PIXEL00_1M
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL01_3,PIXEL02_4
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1M
+    DiffOrNot w6,w8,PIXEL21_C,PIXEL22_C,PIXEL21_3,PIXEL22_4
+    jmp .loopx_end
+..@flag252
+    PIXEL00_1M
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL11
+    PIXEL12_C
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL10_3,PIXEL20_4
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2
+    jmp .loopx_end
+..@flag249
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_1M
+    PIXEL10_C
+    PIXEL11
+    DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL22_C,PIXEL12_3,PIXEL22_4
+    jmp .loopx_end
+..@flag235
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL00_4,PIXEL01_3
+    PIXEL02_1M
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_1
+    DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag111
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_1
+    DiffOrNot w8,w4,PIXEL20_C,PIXEL21_C,PIXEL20_4,PIXEL21_3
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag63
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_C,PIXEL12_C,PIXEL02_4,PIXEL12_3
+    PIXEL10_C
+    PIXEL11
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag159
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL10_C,PIXEL00_4,PIXEL10_3
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1M
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag215
+    PIXEL00_1L
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1M
+    DiffOrNot w6,w8,PIXEL21_C,PIXEL22_C,PIXEL21_3,PIXEL22_4
+    jmp .loopx_end
+..@flag246
+    PIXEL00_1M
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL01_3,PIXEL02_4
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1L
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2
+    jmp .loopx_end
+..@flag254
+    PIXEL00_1M
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL01_3,PIXEL02_4
+    PIXEL11
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL10_3,PIXEL20_4
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_2
+    jmp .loopx_end
+..@flag253
+    PIXEL00_1U
+    PIXEL01_1
+    PIXEL02_1U
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2
+    jmp .loopx_end
+..@flag251
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL00_4,PIXEL01_3
+    PIXEL02_1M
+    PIXEL11
+    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_2,PIXEL21_3
+    DiffOrNot w6,w8,PIXEL12_C,PIXEL22_C,PIXEL12_3,PIXEL22_4
+    jmp .loopx_end
+..@flag239
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2
+    PIXEL01_C
+    PIXEL02_1R
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_1
+    DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2
+    PIXEL21_C
+    PIXEL22_1R
+    jmp .loopx_end
+..@flag127
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_2,PIXEL01_3,PIXEL10_3
+    DiffOrNot w2,w6,PIXEL02_C,PIXEL12_C,PIXEL02_4,PIXEL12_3
+    PIXEL11
+    DiffOrNot w8,w4,PIXEL20_C,PIXEL21_C,PIXEL20_4,PIXEL21_3
+    PIXEL22_1M
+    jmp .loopx_end
+..@flag191
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1D
+    PIXEL21_1
+    PIXEL22_1D
+    jmp .loopx_end
+..@flag223
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL10_C,PIXEL00_4,PIXEL10_3
+    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_2,PIXEL12_3
+    PIXEL11
+    PIXEL20_1M
+    DiffOrNot w6,w8,PIXEL21_C,PIXEL22_C,PIXEL21_3,PIXEL22_4
+    jmp .loopx_end
+..@flag247
+    PIXEL00_1L
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2
+    PIXEL10_1
+    PIXEL11
+    PIXEL12_C
+    PIXEL20_1L
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2
+    jmp .loopx_end
+..@flag255
+    DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2
+    PIXEL01_C
+    DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2
+    PIXEL10_C
+    PIXEL11
+    PIXEL12_C
+    DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2
+    PIXEL21_C
+    DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2
+    jmp .loopx_end
+
+..@cross0
+    mov     ebx,[ebp+pitch]
+    mov     [edi],eax
+    mov     [edi+4],eax
+    mov     [edi+8],eax
+    mov     [edi+ebx],eax
+    mov     [edi+ebx+4],eax
+    mov     [edi+ebx+8],eax
+    mov     [edi+ebx*2],eax
+    mov     [edi+ebx*2+4],eax
+    mov     [edi+ebx*2+8],eax
+    jmp     .loopx_end
+..@cross1
+    mov     ecx,[w2] 
+    mov     edx,eax
+    shl     edx,2
+    add     edx,[ebx+ecx*4]
+    sub     edx,eax
+    shr     edx,2
+    mov     ebx,[ebp+pitch]
+    mov     [edi],edx
+    mov     [edi+4],edx
+    mov     [edi+8],edx
+    mov     [edi+ebx],eax
+    mov     [edi+ebx+4],eax
+    mov     [edi+ebx+8],eax
+    mov     [edi+ebx*2],eax
+    mov     [edi+ebx*2+4],eax
+    mov     [edi+ebx*2+8],eax
+    jmp     .loopx_end
+..@cross2
+    mov     ecx,[w4]
+    mov     edx,eax
+    shl     edx,2
+    add     edx,[ebx+ecx*4]
+    sub     edx,eax
+    shr     edx,2
+    mov     ebx,[ebp+pitch]
+    mov     [edi],edx
+    mov     [edi+4],eax
+    mov     [edi+8],eax
+    mov     [edi+ebx],edx
+    mov     [edi+ebx+4],eax
+    mov     [edi+ebx+8],eax
+    mov     [edi+ebx*2],edx
+    mov     [edi+ebx*2+4],eax
+    mov     [edi+ebx*2+8],eax
+    jmp     .loopx_end
+..@cross4
+    mov     ecx,[w6]
+    mov     edx,eax
+    shl     edx,2
+    add     edx,[ebx+ecx*4]
+    sub     edx,eax
+    shr     edx,2
+    mov     ebx,[ebp+pitch]
+    mov     [edi],eax
+    mov     [edi+4],eax
+    mov     [edi+8],edx
+    mov     [edi+ebx],eax
+    mov     [edi+ebx+4],eax
+    mov     [edi+ebx+8],edx
+    mov     [edi+ebx*2],eax
+    mov     [edi+ebx*2+4],eax
+    mov     [edi+ebx*2+8],edx
+    jmp     .loopx_end
+..@cross8
+    mov     ecx,[w8]
+    mov     edx,eax
+    shl     edx,2
+    add     edx,[ebx+ecx*4]
+    sub     edx,eax
+    shr     edx,2
+    mov     ebx,[ebp+pitch]
+    mov     [edi],eax
+    mov     [edi+4],eax
+    mov     [edi+8],eax
+    mov     [edi+ebx],eax
+    mov     [edi+ebx+4],eax
+    mov     [edi+ebx+8],eax
+    mov     [edi+ebx*2],edx
+    mov     [edi+ebx*2+4],edx
+    mov     [edi+ebx*2+8],edx
+    jmp     .loopx_end
+..@crossN
+    mov     edx,[w2]
+    mov     ecx,[ebx+edx*4]
+    mov     [c2],ecx
+    mov     edx,[w4]
+    mov     ecx,[ebx+edx*4]
+    mov     [c4],ecx
+    mov     edx,[w6]
+    mov     ecx,[ebx+edx*4]
+    mov     [c6],ecx
+    mov     edx,[w8]
+    mov     ecx,[ebx+edx*4]
+    mov     [c8],ecx
+    mov     ebx,[ebp+pitch]
+    jmp     ..@flag0
+
+.loopx_end
+    add     esi,2
+    add     edi,12
+    dec     dword[xcounter]
+    jle     .xres_2
+    jmp     .loopx
+.xres_2
+    ; x=Xres-2 - special case
+    jl      .xres_1
+    mov     ebx,[prevline]
+    movq    mm5,[esi+ebx-4]
+    movq    mm6,[esi-4]
+    mov     ebx,[nextline]
+    movq    mm7,[esi+ebx-4]
+    psrlq   mm5,16
+    psrlq   mm6,16
+    psrlq   mm7,16
+    movd    eax,mm5
+    movzx   edx,ax
+    mov     [w1],edx
+    shr     eax,16
+    mov     [w2],eax
+    psrlq   mm5,32
+    movd    eax,mm5
+    mov     [w3],eax
+    movd    eax,mm6
+    movzx   edx,ax
+    mov     [w4],edx
+    shr     eax,16
+    mov     [w5],eax
+    psrlq   mm6,32
+    movd    eax,mm6
+    mov     [w6],eax
+    movd    eax,mm7
+    movzx   edx,ax
+    mov     [w7],edx
+    shr     eax,16
+    mov     [w8],eax
+    psrlq   mm7,32
+    movd    eax,mm7
+    mov     [w9],eax
+    jmp     .flags
+.xres_1
+    cmp     dword[xcounter],-1
+    jl      .nexty
+    ; x=Xres-1 - special case
+    mov     ebx,[prevline]
+    movq    mm5,[esi+ebx-6]
+    movq    mm6,[esi-6]
+    mov     ebx,[nextline]
+    movq    mm7,[esi+ebx-6]
+    psrlq   mm5,32
+    psrlq   mm6,32
+    psrlq   mm7,32
+    movd    eax,mm5
+    movzx   edx,ax  
+    mov     [w1],edx
+    shr     eax,16
+    mov     [w2],eax
+    mov     [w3],eax
+    movd    eax,mm6
+    movzx   edx,ax  
+    mov     [w4],edx
+    shr     eax,16
+    mov     [w5],eax
+    mov     [w6],eax
+    movd    eax,mm7
+    movzx   edx,ax  
+    mov     [w7],edx
+    shr     eax,16
+    mov     [w8],eax
+    mov     [w9],eax
+    jmp     .flags
+.nexty
+    add     edi,ebx
+    add     edi,ebx
+    dec     dword[linesleft]
+    jz      .fin
+    mov     ebx,[ebp+Xres]
+    shl     ebx,1
+    cmp     dword[linesleft],1
+    je      .lastline
+    mov     dword[nextline],ebx
+    neg     ebx
+    mov     dword[prevline],ebx
+    jmp     .loopy
+.lastline
+    mov     dword[nextline],0
+    neg     ebx
+    mov     dword[prevline],ebx
+    jmp     .loopy
+.fin
+    emms
+    popad
+    mov esp,ebp
+    pop ebp
+    ret
+
+SECTION .data
+FuncTable
+    dd ..@flag0, ..@flag1, ..@flag2, ..@flag3, ..@flag4, ..@flag5, ..@flag6, ..@flag7
+    dd ..@flag8, ..@flag9, ..@flag10, ..@flag11, ..@flag12, ..@flag13, ..@flag14, ..@flag15
+    dd ..@flag16, ..@flag17, ..@flag18, ..@flag19, ..@flag20, ..@flag21, ..@flag22, ..@flag23
+    dd ..@flag24, ..@flag25, ..@flag26, ..@flag27, ..@flag28, ..@flag29, ..@flag30, ..@flag31
+    dd ..@flag32, ..@flag33, ..@flag34, ..@flag35, ..@flag36, ..@flag37, ..@flag38, ..@flag39
+    dd ..@flag40, ..@flag41, ..@flag42, ..@flag43, ..@flag44, ..@flag45, ..@flag46, ..@flag47
+    dd ..@flag48, ..@flag49, ..@flag50, ..@flag51, ..@flag52, ..@flag53, ..@flag54, ..@flag55
+    dd ..@flag56, ..@flag57, ..@flag58, ..@flag59, ..@flag60, ..@flag61, ..@flag62, ..@flag63
+    dd ..@flag64, ..@flag65, ..@flag66, ..@flag67, ..@flag68, ..@flag69, ..@flag70, ..@flag71
+    dd ..@flag72, ..@flag73, ..@flag74, ..@flag75, ..@flag76, ..@flag77, ..@flag78, ..@flag79
+    dd ..@flag80, ..@flag81, ..@flag82, ..@flag83, ..@flag84, ..@flag85, ..@flag86, ..@flag87
+    dd ..@flag88, ..@flag89, ..@flag90, ..@flag91, ..@flag92, ..@flag93, ..@flag94, ..@flag95
+    dd ..@flag96, ..@flag97, ..@flag98, ..@flag99, ..@flag100, ..@flag101, ..@flag102, ..@flag103
+    dd ..@flag104, ..@flag105, ..@flag106, ..@flag107, ..@flag108, ..@flag109, ..@flag110, ..@flag111
+    dd ..@flag112, ..@flag113, ..@flag114, ..@flag115, ..@flag116, ..@flag117, ..@flag118, ..@flag119
+    dd ..@flag120, ..@flag121, ..@flag122, ..@flag123, ..@flag124, ..@flag125, ..@flag126, ..@flag127
+    dd ..@flag128, ..@flag129, ..@flag130, ..@flag131, ..@flag132, ..@flag133, ..@flag134, ..@flag135
+    dd ..@flag136, ..@flag137, ..@flag138, ..@flag139, ..@flag140, ..@flag141, ..@flag142, ..@flag143
+    dd ..@flag144, ..@flag145, ..@flag146, ..@flag147, ..@flag148, ..@flag149, ..@flag150, ..@flag151
+    dd ..@flag152, ..@flag153, ..@flag154, ..@flag155, ..@flag156, ..@flag157, ..@flag158, ..@flag159
+    dd ..@flag160, ..@flag161, ..@flag162, ..@flag163, ..@flag164, ..@flag165, ..@flag166, ..@flag167
+    dd ..@flag168, ..@flag169, ..@flag170, ..@flag171, ..@flag172, ..@flag173, ..@flag174, ..@flag175
+    dd ..@flag176, ..@flag177, ..@flag178, ..@flag179, ..@flag180, ..@flag181, ..@flag182, ..@flag183
+    dd ..@flag184, ..@flag185, ..@flag186, ..@flag187, ..@flag188, ..@flag189, ..@flag190, ..@flag191
+    dd ..@flag192, ..@flag193, ..@flag194, ..@flag195, ..@flag196, ..@flag197, ..@flag198, ..@flag199
+    dd ..@flag200, ..@flag201, ..@flag202, ..@flag203, ..@flag204, ..@flag205, ..@flag206, ..@flag207
+    dd ..@flag208, ..@flag209, ..@flag210, ..@flag211, ..@flag212, ..@flag213, ..@flag214, ..@flag215
+    dd ..@flag216, ..@flag217, ..@flag218, ..@flag219, ..@flag220, ..@flag221, ..@flag222, ..@flag223
+    dd ..@flag224, ..@flag225, ..@flag226, ..@flag227, ..@flag228, ..@flag229, ..@flag230, ..@flag231
+    dd ..@flag232, ..@flag233, ..@flag234, ..@flag235, ..@flag236, ..@flag237, ..@flag238, ..@flag239
+    dd ..@flag240, ..@flag241, ..@flag242, ..@flag243, ..@flag244, ..@flag245, ..@flag246, ..@flag247
+    dd ..@flag248, ..@flag249, ..@flag250, ..@flag251, ..@flag252, ..@flag253, ..@flag254, ..@flag255
+
+FuncTable2
+    dd ..@cross0, ..@cross1, ..@cross2, ..@crossN,
+    dd ..@cross4, ..@crossN, ..@crossN, ..@crossN,
+    dd ..@cross8, ..@crossN, ..@crossN, ..@crossN,
+    dd ..@crossN, ..@crossN, ..@crossN, ..@crossN
+
diff --git a/od-win32/hq4x16.asm b/od-win32/hq4x16.asm
new file mode 100755 (executable)
index 0000000..cef36aa
--- /dev/null
@@ -0,0 +1,3952 @@
+;hq4x filter
+;16bpp output
+;----------------------------------------------------------
+;Copyright (C) 2003 MaxSt ( maxst@hiend3d.com )
+;
+;This program is free software; you can redistribute it and/or
+;modify it under the terms of the GNU General Public License
+;as published by the Free Software Foundation; either
+;version 2 of the License, or (at your option) any later
+;version.
+;
+;This program is distributed in the hope that it will be useful,
+;but WITHOUT ANY WARRANTY; without even the implied warranty of
+;MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;GNU General Public License for more details.
+;
+;You should have received a copy of the GNU General Public License
+;along with this program; if not, write to the Free Software
+;Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+GLOBAL _hq4x_16
+
+EXTERN _LUT16to32
+EXTERN _RGBtoYUV
+
+SECTION .bss
+linesleft resd 1
+xcounter  resd 1
+cross     resd 1
+nextline  resd 1
+prevline  resd 1
+w1        resd 1
+w2        resd 1
+w3        resd 1
+w4        resd 1
+w5        resd 1
+w6        resd 1
+w7        resd 1
+w8        resd 1
+w9        resd 1
+
+SECTION .data
+
+reg_blank    dd  0,0
+const3       dd  0x00030003,0x00000003
+const5       dd  0x00050005,0x00000005
+const6       dd  0x00060006,0x00000006
+const7       dd  0x00070007,0x00000007
+threshold    dd  0x00300706,0x00000000
+zerolowbits  dd  0xF7DEF7DE
+
+SECTION .text
+
+%macro AUXADDRESS 0
+    mov     ecx, edi
+    add     ecx, ebx
+    add     ecx, ebx
+%endmacro
+
+%macro TestDiff 2
+    mov     edx,[%1]
+    sub     edx,[%2]
+    jz      %%fin
+    mov     edx,[%1]
+    shl     edx,2
+    add     edx,_RGBtoYUV
+    movd    mm1,[edx]
+    movq    mm5,mm1
+    mov     edx,[%2]
+    shl     edx,2
+    add     edx,_RGBtoYUV
+    movd    mm2,[edx]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+%%fin:
+%endmacro
+
+%macro DiffOrNot 4
+   TestDiff %1,%2
+   test edx,edx
+   jz   %%same
+   %3
+   jmp %%fin
+%%same:
+   %4
+%%fin
+%endmacro
+
+%macro DiffOrNot 8
+   TestDiff %1,%2
+   test edx,edx
+   jz   %%same
+   %3
+   %4
+   %5
+   jmp %%fin
+%%same:
+   %6
+   %7
+   %8
+%%fin
+%endmacro
+
+%macro DiffOrNot 10
+   TestDiff %1,%2
+   test edx,edx
+   jz %%same
+   %3
+   %4
+   %5
+   %6
+   jmp %%fin
+%%same:
+   %7
+   %8
+   %9
+   %10
+%%fin
+%endmacro
+
+%macro DiffOrNot 14
+   TestDiff %1,%2
+   test edx,edx
+   jz %%same
+   %3
+   %4
+   %5
+   %6
+   %7
+   %8
+   jmp %%fin
+%%same:
+   %9
+   %10
+   %11
+   %12
+   %13
+   %14
+%%fin
+%endmacro
+
+%macro Interp1 3
+    mov edx,%2
+    mov eax,%3
+    cmp edx,eax
+    je  %%fin
+    and edx,[zerolowbits]
+    and eax,[zerolowbits]
+    add eax,edx
+    shr eax,1
+    add eax,0x0821
+    and eax,[zerolowbits]
+    add edx,eax
+    shr edx,1
+%%fin
+    mov %1,dx
+%endmacro
+
+%macro Interp2 4
+    mov edx,%3
+    mov eax,%4
+    cmp edx,eax
+    je  %%fin1
+    and edx,[zerolowbits]
+    and eax,[zerolowbits]
+    add eax,edx
+    shr eax,1
+    add eax,0x0821
+%%fin1
+    mov edx,%2
+    cmp edx,eax
+    je  %%fin2
+    and eax,[zerolowbits]
+    and edx,[zerolowbits]
+    add edx,eax
+    shr edx,1
+%%fin2
+    mov %1,dx
+%endmacro
+
+%macro Interp3 3
+    mov        eax, _LUT16to32
+    mov        edx, %2
+    movd       mm1, [eax+edx*4]
+    mov        edx, %3
+    movd       mm2, [eax+edx*4]
+    punpcklbw  mm1, [reg_blank]
+    punpcklbw  mm2, [reg_blank]
+    pmullw     mm1, [const7]
+    paddw      mm1, mm2
+    psrlw      mm1, 5
+    packuswb   mm1, [reg_blank]
+    movd       edx, mm1
+    shl        dl,  2
+    shr        edx, 1
+    shl        dx,  3
+    shr        edx, 5
+    mov        %1,  dx
+%endmacro
+
+%macro Interp5 3
+    mov edx,%2
+    mov eax,%3
+    cmp edx,eax
+    je  %%fin
+    and edx,[zerolowbits]
+    and eax,[zerolowbits]
+    add edx,eax
+    shr edx,1
+%%fin
+    mov %1,dx
+%endmacro
+
+%macro Interp6 4
+    mov        eax, _LUT16to32
+    mov        edx, %2
+    movd       mm1, [eax+edx*4]
+    mov        edx, %3
+    movd       mm2, [eax+edx*4]
+    mov        edx, %4
+    movd       mm3, [eax+edx*4]
+    punpcklbw  mm1, [reg_blank]
+    punpcklbw  mm2, [reg_blank]
+    punpcklbw  mm3, [reg_blank]
+    pmullw     mm1, [const5]
+    psllw      mm2, 1
+    paddw      mm1, mm3
+    paddw      mm1, mm2
+    psrlw      mm1, 5
+    packuswb   mm1, [reg_blank]
+    movd       edx, mm1
+    shl        dl,  2
+    shr        edx, 1
+    shl        dx,  3
+    shr        edx, 5
+    mov        %1,  dx
+%endmacro
+
+%macro Interp7 4
+    mov        eax, _LUT16to32
+    mov        edx, %2
+    movd       mm1, [eax+edx*4]
+    mov        edx, %3
+    movd       mm2, [eax+edx*4]
+    mov        edx, %4
+    movd       mm3, [eax+edx*4]
+    punpcklbw  mm1, [reg_blank]
+    punpcklbw  mm2, [reg_blank]
+    punpcklbw  mm3, [reg_blank]
+    pmullw     mm1, [const6]
+    paddw      mm2, mm3
+    paddw      mm1, mm2
+    psrlw      mm1, 5
+    packuswb   mm1, [reg_blank]
+    movd       edx, mm1
+    shl        dl,  2
+    shr        edx, 1
+    shl        dx,  3
+    shr        edx, 5
+    mov        %1,  dx
+%endmacro
+
+%macro Interp8 3
+    mov        eax, _LUT16to32
+    mov        edx, %2
+    movd       mm1, [eax+edx*4]
+    mov        edx, %3
+    movd       mm2, [eax+edx*4]
+    punpcklbw  mm1, [reg_blank]
+    punpcklbw  mm2, [reg_blank]
+    pmullw     mm1, [const5]
+    pmullw     mm2, [const3]
+    paddw      mm1, mm2
+    psrlw      mm1, 5
+    packuswb   mm1, [reg_blank]
+    movd       edx, mm1
+    shl        dl,  2
+    shr        edx, 1
+    shl        dx,  3
+    shr        edx, 5
+    mov        %1,  dx
+%endmacro
+
+%macro PIXEL00_0 0
+    mov eax,[w5]
+    mov [edi],ax
+%endmacro
+
+%macro PIXEL00_11 0
+    Interp1 [edi],[w5],[w4]
+%endmacro
+
+%macro PIXEL00_12 0
+    Interp1 [edi],[w5],[w2]
+%endmacro
+
+%macro PIXEL00_20 0
+    Interp2 [edi],[w5],[w2],[w4]
+%endmacro
+
+%macro PIXEL00_50 0
+    Interp5 [edi],[w2],[w4]
+%endmacro
+
+%macro PIXEL00_80 0
+    Interp8 [edi],[w5],[w1]
+%endmacro
+
+%macro PIXEL00_81 0
+    Interp8 [edi],[w5],[w4]
+%endmacro
+
+%macro PIXEL00_82 0
+    Interp8 [edi],[w5],[w2]
+%endmacro
+
+%macro PIXEL01_0 0
+    mov eax,[w5]
+    mov [edi+2],ax
+%endmacro
+
+%macro PIXEL01_10 0
+    Interp1 [edi+2],[w5],[w1]
+%endmacro
+
+%macro PIXEL01_12 0
+    Interp1 [edi+2],[w5],[w2]
+%endmacro
+
+%macro PIXEL01_14 0
+    Interp1 [edi+2],[w2],[w5]
+%endmacro
+
+%macro PIXEL01_21 0
+    Interp2 [edi+2],[w2],[w5],[w4]
+%endmacro
+
+%macro PIXEL01_31 0
+    Interp3 [edi+2],[w5],[w4]
+%endmacro
+
+%macro PIXEL01_50 0
+    Interp5 [edi+2],[w2],[w5]
+%endmacro
+
+%macro PIXEL01_60 0
+    Interp6 [edi+2],[w5],[w2],[w4]
+%endmacro
+
+%macro PIXEL01_61 0
+    Interp6 [edi+2],[w5],[w2],[w1]
+%endmacro
+
+%macro PIXEL01_82 0
+    Interp8 [edi+2],[w5],[w2]
+%endmacro
+
+%macro PIXEL01_83 0
+    Interp8 [edi+2],[w2],[w4]
+%endmacro
+
+%macro PIXEL02_0 0
+    mov eax,[w5]
+    mov [edi+4],ax
+%endmacro
+
+%macro PIXEL02_10 0
+    Interp1 [edi+4],[w5],[w3]
+%endmacro
+
+%macro PIXEL02_11 0
+    Interp1 [edi+4],[w5],[w2]
+%endmacro
+
+%macro PIXEL02_13 0
+    Interp1 [edi+4],[w2],[w5]
+%endmacro
+
+%macro PIXEL02_21 0
+    Interp2 [edi+4],[w2],[w5],[w6]
+%endmacro
+
+%macro PIXEL02_32 0
+    Interp3 [edi+4],[w5],[w6]
+%endmacro
+
+%macro PIXEL02_50 0
+    Interp5 [edi+4],[w2],[w5]
+%endmacro
+
+%macro PIXEL02_60 0
+    Interp6 [edi+4],[w5],[w2],[w6]
+%endmacro
+
+%macro PIXEL02_61 0
+    Interp6 [edi+4],[w5],[w2],[w3]
+%endmacro
+
+%macro PIXEL02_81 0
+    Interp8 [edi+4],[w5],[w2]
+%endmacro
+
+%macro PIXEL02_83 0
+    Interp8 [edi+4],[w2],[w6]
+%endmacro
+
+%macro PIXEL03_0 0
+    mov eax,[w5]
+    mov [edi+6],ax
+%endmacro
+
+%macro PIXEL03_11 0
+    Interp1 [edi+6],[w5],[w2]
+%endmacro
+
+%macro PIXEL03_12 0
+    Interp1 [edi+6],[w5],[w6]
+%endmacro
+
+%macro PIXEL03_20 0
+    Interp2 [edi+6],[w5],[w2],[w6]
+%endmacro
+
+%macro PIXEL03_50 0
+    Interp5 [edi+6],[w2],[w6]
+%endmacro
+
+%macro PIXEL03_80 0
+    Interp8 [edi+6],[w5],[w3]
+%endmacro
+
+%macro PIXEL03_81 0
+    Interp8 [edi+6],[w5],[w2]
+%endmacro
+
+%macro PIXEL03_82 0
+    Interp8 [edi+6],[w5],[w6]
+%endmacro
+
+%macro PIXEL10_0 0
+    mov eax,[w5]
+    mov [edi+ebx],ax
+%endmacro
+
+%macro PIXEL10_10 0
+    Interp1 [edi+ebx],[w5],[w1]
+%endmacro
+
+%macro PIXEL10_11 0
+    Interp1 [edi+ebx],[w5],[w4]
+%endmacro
+
+%macro PIXEL10_13 0
+    Interp1 [edi+ebx],[w4],[w5]
+%endmacro
+
+%macro PIXEL10_21 0
+    Interp2 [edi+ebx],[w4],[w5],[w2]
+%endmacro
+
+%macro PIXEL10_32 0
+    Interp3 [edi+ebx],[w5],[w2]
+%endmacro
+
+%macro PIXEL10_50 0
+    Interp5 [edi+ebx],[w4],[w5]
+%endmacro
+
+%macro PIXEL10_60 0
+    Interp6 [edi+ebx],[w5],[w4],[w2]
+%endmacro
+
+%macro PIXEL10_61 0
+    Interp6 [edi+ebx],[w5],[w4],[w1]
+%endmacro
+
+%macro PIXEL10_81 0
+    Interp8 [edi+ebx],[w5],[w4]
+%endmacro
+
+%macro PIXEL10_83 0
+    Interp8 [edi+ebx],[w4],[w2]
+%endmacro
+
+%macro PIXEL11_0 0
+    mov eax,[w5]
+    mov [edi+ebx+2],ax
+%endmacro
+
+%macro PIXEL11_30 0
+    Interp3 [edi+ebx+2],[w5],[w1]
+%endmacro
+
+%macro PIXEL11_31 0
+    Interp3 [edi+ebx+2],[w5],[w4]
+%endmacro
+
+%macro PIXEL11_32 0
+    Interp3 [edi+ebx+2],[w5],[w2]
+%endmacro
+
+%macro PIXEL11_70 0
+    Interp7 [edi+ebx+2],[w5],[w4],[w2]
+%endmacro
+
+%macro PIXEL12_0 0
+    mov eax,[w5]
+    mov [edi+ebx+4],ax
+%endmacro
+
+%macro PIXEL12_30 0
+    Interp3 [edi+ebx+4],[w5],[w3]
+%endmacro
+
+%macro PIXEL12_31 0
+    Interp3 [edi+ebx+4],[w5],[w2]
+%endmacro
+
+%macro PIXEL12_32 0
+    Interp3 [edi+ebx+4],[w5],[w6]
+%endmacro
+
+%macro PIXEL12_70 0
+    Interp7 [edi+ebx+4],[w5],[w6],[w2]
+%endmacro
+
+%macro PIXEL13_0 0
+    mov eax,[w5]
+    mov [edi+ebx+6],ax
+%endmacro
+
+%macro PIXEL13_10 0
+    Interp1 [edi+ebx+6],[w5],[w3]
+%endmacro
+
+%macro PIXEL13_12 0
+    Interp1 [edi+ebx+6],[w5],[w6]
+%endmacro
+
+%macro PIXEL13_14 0
+    Interp1 [edi+ebx+6],[w6],[w5]
+%endmacro
+
+%macro PIXEL13_21 0
+    Interp2 [edi+ebx+6],[w6],[w5],[w2]
+%endmacro
+
+%macro PIXEL13_31 0
+    Interp3 [edi+ebx+6],[w5],[w2]
+%endmacro
+
+%macro PIXEL13_50 0
+    Interp5 [edi+ebx+6],[w6],[w5]
+%endmacro
+
+%macro PIXEL13_60 0
+    Interp6 [edi+ebx+6],[w5],[w6],[w2]
+%endmacro
+
+%macro PIXEL13_61 0
+    Interp6 [edi+ebx+6],[w5],[w6],[w3]
+%endmacro
+
+%macro PIXEL13_82 0
+    Interp8 [edi+ebx+6],[w5],[w6]
+%endmacro
+
+%macro PIXEL13_83 0
+    Interp8 [edi+ebx+6],[w6],[w2]
+%endmacro
+
+%macro PIXEL20_0 0
+    mov eax,[w5]
+    mov [ecx],ax
+%endmacro
+
+%macro PIXEL20_10 0
+    Interp1 [ecx],[w5],[w7]
+%endmacro
+
+%macro PIXEL20_12 0
+    Interp1 [ecx],[w5],[w4]
+%endmacro
+
+%macro PIXEL20_14 0
+    Interp1 [ecx],[w4],[w5]
+%endmacro
+
+%macro PIXEL20_21 0
+    Interp2 [ecx],[w4],[w5],[w8]
+%endmacro
+
+%macro PIXEL20_31 0
+    Interp3 [ecx],[w5],[w8]
+%endmacro
+
+%macro PIXEL20_50 0
+    Interp5 [ecx],[w4],[w5]
+%endmacro
+
+%macro PIXEL20_60 0
+    Interp6 [ecx],[w5],[w4],[w8]
+%endmacro
+
+%macro PIXEL20_61 0
+    Interp6 [ecx],[w5],[w4],[w7]
+%endmacro
+
+%macro PIXEL20_82 0
+    Interp8 [ecx],[w5],[w4]
+%endmacro
+
+%macro PIXEL20_83 0
+    Interp8 [ecx],[w4],[w8]
+%endmacro
+
+%macro PIXEL21_0 0
+    mov eax,[w5]
+    mov [ecx+2],ax
+%endmacro
+
+%macro PIXEL21_30 0
+    Interp3 [ecx+2],[w5],[w7]
+%endmacro
+
+%macro PIXEL21_31 0
+    Interp3 [ecx+2],[w5],[w8]
+%endmacro
+
+%macro PIXEL21_32 0
+    Interp3 [ecx+2],[w5],[w4]
+%endmacro
+
+%macro PIXEL21_70 0
+    Interp7 [ecx+2],[w5],[w4],[w8]
+%endmacro
+
+%macro PIXEL22_0 0
+    mov eax,[w5]
+    mov [ecx+4],ax
+%endmacro
+
+%macro PIXEL22_30 0
+    Interp3 [ecx+4],[w5],[w9]
+%endmacro
+
+%macro PIXEL22_31 0
+    Interp3 [ecx+4],[w5],[w6]
+%endmacro
+
+%macro PIXEL22_32 0
+    Interp3 [ecx+4],[w5],[w8]
+%endmacro
+
+%macro PIXEL22_70 0
+    Interp7 [ecx+4],[w5],[w6],[w8]
+%endmacro
+
+%macro PIXEL23_0 0
+    mov eax,[w5]
+    mov [ecx+6],ax
+%endmacro
+
+%macro PIXEL23_10 0
+    Interp1 [ecx+6],[w5],[w9]
+%endmacro
+
+%macro PIXEL23_11 0
+    Interp1 [ecx+6],[w5],[w6]
+%endmacro
+
+%macro PIXEL23_13 0
+    Interp1 [ecx+6],[w6],[w5]
+%endmacro
+
+%macro PIXEL23_21 0
+    Interp2 [ecx+6],[w6],[w5],[w8]
+%endmacro
+
+%macro PIXEL23_32 0
+    Interp3 [ecx+6],[w5],[w8]
+%endmacro
+
+%macro PIXEL23_50 0
+    Interp5 [ecx+6],[w6],[w5]
+%endmacro
+
+%macro PIXEL23_60 0
+    Interp6 [ecx+6],[w5],[w6],[w8]
+%endmacro
+
+%macro PIXEL23_61 0
+    Interp6 [ecx+6],[w5],[w6],[w9]
+%endmacro
+
+%macro PIXEL23_81 0
+    Interp8 [ecx+6],[w5],[w6]
+%endmacro
+
+%macro PIXEL23_83 0
+    Interp8 [ecx+6],[w6],[w8]
+%endmacro
+
+%macro PIXEL30_0 0
+    mov eax,[w5]
+    mov [ecx+ebx],ax
+%endmacro
+
+%macro PIXEL30_11 0
+    Interp1 [ecx+ebx],[w5],[w8]
+%endmacro
+
+%macro PIXEL30_12 0
+    Interp1 [ecx+ebx],[w5],[w4]
+%endmacro
+
+%macro PIXEL30_20 0
+    Interp2 [ecx+ebx],[w5],[w8],[w4]
+%endmacro
+
+%macro PIXEL30_50 0
+    Interp5 [ecx+ebx],[w8],[w4]
+%endmacro
+
+%macro PIXEL30_80 0
+    Interp8 [ecx+ebx],[w5],[w7]
+%endmacro
+
+%macro PIXEL30_81 0
+    Interp8 [ecx+ebx],[w5],[w8]
+%endmacro
+
+%macro PIXEL30_82 0
+    Interp8 [ecx+ebx],[w5],[w4]
+%endmacro
+
+%macro PIXEL31_0 0
+    mov eax,[w5]
+    mov [ecx+ebx+2],ax
+%endmacro
+
+%macro PIXEL31_10 0
+    Interp1 [ecx+ebx+2],[w5],[w7]
+%endmacro
+
+%macro PIXEL31_11 0
+    Interp1 [ecx+ebx+2],[w5],[w8]
+%endmacro
+
+%macro PIXEL31_13 0
+    Interp1 [ecx+ebx+2],[w8],[w5]
+%endmacro
+
+%macro PIXEL31_21 0
+    Interp2 [ecx+ebx+2],[w8],[w5],[w4]
+%endmacro
+
+%macro PIXEL31_32 0
+    Interp3 [ecx+ebx+2],[w5],[w4]
+%endmacro
+
+%macro PIXEL31_50 0
+    Interp5 [ecx+ebx+2],[w8],[w5]
+%endmacro
+
+%macro PIXEL31_60 0
+    Interp6 [ecx+ebx+2],[w5],[w8],[w4]
+%endmacro
+
+%macro PIXEL31_61 0
+    Interp6 [ecx+ebx+2],[w5],[w8],[w7]
+%endmacro
+
+%macro PIXEL31_81 0
+    Interp8 [ecx+ebx+2],[w5],[w8]
+%endmacro
+
+%macro PIXEL31_83 0
+    Interp8 [ecx+ebx+2],[w8],[w4]
+%endmacro
+
+%macro PIXEL32_0 0
+    mov eax,[w5]
+    mov [ecx+ebx+4],ax
+%endmacro
+
+%macro PIXEL32_10 0
+    Interp1 [ecx+ebx+4],[w5],[w9]
+%endmacro
+
+%macro PIXEL32_12 0
+    Interp1 [ecx+ebx+4],[w5],[w8]
+%endmacro
+
+%macro PIXEL32_14 0
+    Interp1 [ecx+ebx+4],[w8],[w5]
+%endmacro
+
+%macro PIXEL32_21 0
+    Interp2 [ecx+ebx+4],[w8],[w5],[w6]
+%endmacro
+
+%macro PIXEL32_31 0
+    Interp3 [ecx+ebx+4],[w5],[w6]
+%endmacro
+
+%macro PIXEL32_50 0
+    Interp5 [ecx+ebx+4],[w8],[w5]
+%endmacro
+
+%macro PIXEL32_60 0
+    Interp6 [ecx+ebx+4],[w5],[w8],[w6]
+%endmacro
+
+%macro PIXEL32_61 0
+    Interp6 [ecx+ebx+4],[w5],[w8],[w9]
+%endmacro
+
+%macro PIXEL32_82 0
+    Interp8 [ecx+ebx+4],[w5],[w8]
+%endmacro
+
+%macro PIXEL32_83 0
+    Interp8 [ecx+ebx+4],[w8],[w6]
+%endmacro
+
+%macro PIXEL33_0 0
+    mov eax,[w5]
+    mov [ecx+ebx+6],ax
+%endmacro
+
+%macro PIXEL33_11 0
+    Interp1 [ecx+ebx+6],[w5],[w6]
+%endmacro
+
+%macro PIXEL33_12 0
+    Interp1 [ecx+ebx+6],[w5],[w8]
+%endmacro
+
+%macro PIXEL33_20 0
+    Interp2 [ecx+ebx+6],[w5],[w8],[w6]
+%endmacro
+
+%macro PIXEL33_50 0
+    Interp5 [ecx+ebx+6],[w8],[w6]
+%endmacro
+
+%macro PIXEL33_80 0
+    Interp8 [ecx+ebx+6],[w5],[w9]
+%endmacro
+
+%macro PIXEL33_81 0
+    Interp8 [ecx+ebx+6],[w5],[w6]
+%endmacro
+
+%macro PIXEL33_82 0
+    Interp8 [ecx+ebx+6],[w5],[w8]
+%endmacro
+
+inbuffer     equ 8
+outbuffer    equ 12
+Xres         equ 16
+Yres         equ 20
+pitch        equ 24
+
+_hq4x_16:
+    push ebp
+    mov ebp,esp
+    pushad
+
+    mov     esi,[ebp+inbuffer]
+    mov     edi,[ebp+outbuffer]
+    mov     edx,[ebp+Yres]
+    mov     [linesleft],edx
+    mov     ebx,[ebp+Xres]
+    shl     ebx,1
+    mov     dword[prevline],0
+    mov     dword[nextline],ebx
+.loopy
+    mov     ecx,[ebp+Xres]
+    sub     ecx,2                 ; x={Xres-2, Xres-1} are special cases.
+    mov     dword[xcounter],ecx
+    ; x=0 - special case
+    mov     ebx,[prevline]
+    movq    mm5,[esi+ebx]
+    movq    mm6,[esi]
+    mov     ebx,[nextline]
+    movq    mm7,[esi+ebx]
+    movd    eax,mm5
+    movzx   edx,ax  
+    mov     [w1],edx
+    mov     [w2],edx
+    shr     eax,16
+    mov     [w3],eax
+    movd    eax,mm6
+    movzx   edx,ax  
+    mov     [w4],edx
+    mov     [w5],edx
+    shr     eax,16
+    mov     [w6],eax
+    movd    eax,mm7
+    movzx   edx,ax  
+    mov     [w7],edx
+    mov     [w8],edx
+    shr     eax,16
+    mov     [w9],eax
+    jmp     .flags
+.loopx
+    mov     ebx,[prevline]
+    movq    mm5,[esi+ebx-2]
+    movq    mm6,[esi-2]
+    mov     ebx,[nextline]
+    movq    mm7,[esi+ebx-2]
+    movd    eax,mm5
+    movzx   edx,ax  
+    mov     [w1],edx
+    shr     eax,16
+    mov     [w2],eax
+    psrlq   mm5,32
+    movd    eax,mm5
+    movzx   edx,ax  
+    mov     [w3],edx
+    movd    eax,mm6
+    movzx   edx,ax  
+    mov     [w4],edx
+    shr     eax,16
+    mov     [w5],eax
+    psrlq   mm6,32
+    movd    eax,mm6
+    movzx   edx,ax  
+    mov     [w6],edx
+    movd    eax,mm7
+    movzx   edx,ax  
+    mov     [w7],edx
+    shr     eax,16
+    mov     [w8],eax
+    psrlq   mm7,32
+    movd    eax,mm7
+    movzx   edx,ax  
+    mov     [w9],edx
+.flags
+    mov     ebx,_RGBtoYUV
+    mov     eax,[w5]
+    xor     ecx,ecx
+    movd    mm5,[ebx+eax*4]
+    mov     dword[cross],0
+
+    mov     edx,[w2]
+    cmp     eax,edx
+    je      .noflag2
+    or      dword[cross],1
+    movq    mm1,mm5
+    movd    mm2,[ebx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+    test    edx,edx
+    jz      .noflag2
+    or      ecx,2
+.noflag2
+    mov     edx,[w4]
+    cmp     eax,edx
+    je      .noflag4
+    or      dword[cross],2
+    movq    mm1,mm5
+    movd    mm2,[ebx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+    test    edx,edx
+    jz      .noflag4
+    or      ecx,8
+.noflag4
+    mov     edx,[w6]
+    cmp     eax,edx
+    je      .noflag6
+    or      dword[cross],4
+    movq    mm1,mm5
+    movd    mm2,[ebx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+    test    edx,edx
+    jz      .noflag6
+    or      ecx,16
+.noflag6
+    mov     edx,[w8]
+    cmp     eax,edx
+    je      .noflag8
+    or      dword[cross],8
+    movq    mm1,mm5
+    movd    mm2,[ebx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+    test    edx,edx
+    jz      .noflag8
+    or      ecx,64
+.noflag8
+    cmp     dword[cross],0
+    jnz     .testflag1
+    mov     ebx,[ebp+pitch]
+    mov     edx,eax
+    shl     eax,16
+    or      eax,edx
+    AUXADDRESS
+    mov     [edi],eax
+    mov     [edi+4],eax
+    mov     [edi+ebx],eax
+    mov     [edi+ebx+4],eax
+    mov     [ecx],eax
+    mov     [ecx+4],eax
+    mov     [ecx+ebx],eax
+    mov     [ecx+ebx+4],eax
+    jmp     .loopx_end
+.testflag1
+    mov     edx,[w1]
+    cmp     eax,edx
+    je      .noflag1
+    movq    mm1,mm5
+    movd    mm2,[ebx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+    test    edx,edx
+    jz      .noflag1
+    or      ecx,1
+.noflag1
+    mov     edx,[w3]
+    cmp     eax,edx
+    je      .noflag3
+    movq    mm1,mm5
+    movd    mm2,[ebx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+    test    edx,edx
+    jz      .noflag3
+    or      ecx,4
+.noflag3
+    mov     edx,[w7]
+    cmp     eax,edx
+    je      .noflag7
+    movq    mm1,mm5
+    movd    mm2,[ebx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+    test    edx,edx
+    jz      .noflag7
+    or      ecx,32
+.noflag7
+    mov     edx,[w9]
+    cmp     eax,edx
+    je      .noflag9
+    movq    mm1,mm5
+    movd    mm2,[ebx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+    test    edx,edx
+    jz      .noflag9
+    or      ecx,128
+.noflag9
+    mov  ebx,[ebp+pitch]
+    jmp  [FuncTable+ecx*4]
+
+..@flag0
+..@flag1
+..@flag4
+..@flag32
+..@flag128
+..@flag5
+..@flag132
+..@flag160
+..@flag33
+..@flag129
+..@flag36
+..@flag133
+..@flag164
+..@flag161
+..@flag37
+..@flag165
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_70
+    PIXEL13_60
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag2
+..@flag34
+..@flag130
+..@flag162
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag16
+..@flag17
+..@flag48
+..@flag49
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag64
+..@flag65
+..@flag68
+..@flag69
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_70
+    PIXEL13_60
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL30_80
+    PIXEL31_10
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag8
+..@flag12
+..@flag136
+..@flag140
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_70
+    PIXEL13_60
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag3
+..@flag35
+..@flag131
+..@flag163
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag6
+..@flag38
+..@flag134
+..@flag166
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag20
+..@flag21
+..@flag52
+..@flag53
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_31
+    PIXEL13_31
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag144
+..@flag145
+..@flag176
+..@flag177
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag192
+..@flag193
+..@flag196
+..@flag197
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_70
+    PIXEL13_60
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL30_80
+    PIXEL31_10
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag96
+..@flag97
+..@flag100
+..@flag101
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_70
+    PIXEL13_60
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL30_82
+    PIXEL31_32
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag40
+..@flag44
+..@flag168
+..@flag172
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_70
+    PIXEL13_60
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag9
+..@flag13
+..@flag137
+..@flag141
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_70
+    PIXEL13_60
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag18
+..@flag50
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    DiffOrNot w2,w6,PIXEL02_10,PIXEL03_80,PIXEL12_30,PIXEL13_10,PIXEL02_50,PIXEL03_50,PIXEL12_0,PIXEL13_50
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag80
+..@flag81
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_61
+    PIXEL21_30
+    DiffOrNot w6,w8,PIXEL22_30,PIXEL23_10,PIXEL32_10,PIXEL33_80,PIXEL22_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    PIXEL30_80
+    PIXEL31_10
+    jmp .loopx_end
+..@flag72
+..@flag76
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_70
+    PIXEL13_60
+    DiffOrNot w8,w4,PIXEL20_10,PIXEL21_30,PIXEL30_80,PIXEL31_10,PIXEL20_50,PIXEL21_0,PIXEL30_50,PIXEL31_50
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag10
+..@flag138
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_80,PIXEL01_10,PIXEL10_10,PIXEL11_30,PIXEL00_50,PIXEL01_50,PIXEL10_50,PIXEL11_0
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag66
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL30_80
+    PIXEL31_10
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag24
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag7
+..@flag39
+..@flag135
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag148
+..@flag149
+..@flag180
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_31
+    PIXEL13_31
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag224
+..@flag228
+..@flag225
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_70
+    PIXEL13_60
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL30_82
+    PIXEL31_32
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag41
+..@flag169
+..@flag45
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_70
+    PIXEL13_60
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag22
+..@flag54
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL12_0
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag208
+..@flag209
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    PIXEL30_80
+    PIXEL31_10
+    jmp .loopx_end
+..@flag104
+..@flag108
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_70
+    PIXEL13_60
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag11
+..@flag139
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL11_0
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag19
+..@flag51
+    AUXADDRESS
+    DiffOrNot w2,w6,PIXEL00_81,PIXEL01_31,PIXEL02_10,PIXEL03_80,PIXEL12_30,PIXEL13_10,PIXEL00_12,PIXEL01_14,PIXEL02_83,PIXEL03_50,PIXEL12_70,PIXEL13_21
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag146
+..@flag178
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    DiffOrNot w2,w6,PIXEL02_10,PIXEL03_80,PIXEL12_30,PIXEL13_10,PIXEL23_32,PIXEL33_82,PIXEL02_21,PIXEL03_50,PIXEL12_70,PIXEL13_83,PIXEL23_13,PIXEL33_11
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_32
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_82
+    jmp .loopx_end
+..@flag84
+..@flag85
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_81
+    DiffOrNot w6,w8,PIXEL03_81,PIXEL13_31,PIXEL22_30,PIXEL23_10,PIXEL32_10,PIXEL33_80,PIXEL03_12,PIXEL13_14,PIXEL22_70,PIXEL23_83,PIXEL32_21,PIXEL33_50
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_31
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL30_80
+    PIXEL31_10
+    jmp .loopx_end
+..@flag112
+..@flag113
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_82
+    PIXEL21_32
+    DiffOrNot w6,w8,PIXEL22_30,PIXEL23_10,PIXEL30_82,PIXEL31_32,PIXEL32_10,PIXEL33_80,PIXEL22_70,PIXEL23_21,PIXEL30_11,PIXEL31_13,PIXEL32_83,PIXEL33_50
+    jmp .loopx_end
+..@flag200
+..@flag204
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_70
+    PIXEL13_60
+    DiffOrNot w8,w4,PIXEL20_10,PIXEL21_30,PIXEL30_80,PIXEL31_10,PIXEL32_31,PIXEL33_81,PIXEL20_21,PIXEL21_70,PIXEL30_50,PIXEL31_83,PIXEL32_14,PIXEL33_12
+    PIXEL22_31
+    PIXEL23_81
+    jmp .loopx_end
+..@flag73
+..@flag77
+    AUXADDRESS
+    DiffOrNot w8,w4,PIXEL00_82,PIXEL10_32,PIXEL20_10,PIXEL21_30,PIXEL30_80,PIXEL31_10,PIXEL00_11,PIXEL10_13,PIXEL20_83,PIXEL21_70,PIXEL30_50,PIXEL31_21
+    PIXEL01_82
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL11_32
+    PIXEL12_70
+    PIXEL13_60
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag42
+..@flag170
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_80,PIXEL01_10,PIXEL10_10,PIXEL11_30,PIXEL20_31,PIXEL30_81,PIXEL00_50,PIXEL01_21,PIXEL10_83,PIXEL11_70,PIXEL20_14,PIXEL30_12
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL21_31
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL31_81
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag14
+..@flag142
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_80,PIXEL01_10,PIXEL02_32,PIXEL03_82,PIXEL10_10,PIXEL11_30,PIXEL00_50,PIXEL01_83,PIXEL02_13,PIXEL03_11,PIXEL10_21,PIXEL11_70
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag67
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL30_80
+    PIXEL31_10
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag70
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL30_80
+    PIXEL31_10
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag28
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_31
+    PIXEL13_31
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag152
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag194
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL30_80
+    PIXEL31_10
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag98
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL30_82
+    PIXEL31_32
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag56
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag25
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag26
+..@flag31
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL11_0
+    PIXEL12_0
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag82
+..@flag214
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL12_0
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    PIXEL30_80
+    PIXEL31_10
+    jmp .loopx_end
+..@flag88
+..@flag248
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_30
+    PIXEL13_10
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    jmp .loopx_end
+..@flag74
+..@flag107
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL11_0
+    PIXEL12_30
+    PIXEL13_61
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag27
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL11_0
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag86
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL12_0
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_80
+    PIXEL31_10
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag216
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    PIXEL30_80
+    PIXEL31_10
+    jmp .loopx_end
+..@flag106
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_30
+    PIXEL13_61
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag30
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_0
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag210
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    PIXEL30_80
+    PIXEL31_10
+    jmp .loopx_end
+..@flag120
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_30
+    PIXEL13_10
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag75
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL11_0
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL30_80
+    PIXEL31_10
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag29
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_31
+    PIXEL13_31
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag198
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL30_80
+    PIXEL31_10
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag184
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag99
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL30_82
+    PIXEL31_32
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag57
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag71
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL30_80
+    PIXEL31_10
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag156
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_31
+    PIXEL13_31
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag226
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL30_82
+    PIXEL31_32
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag60
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_31
+    PIXEL13_31
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag195
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL30_80
+    PIXEL31_10
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag102
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL30_82
+    PIXEL31_32
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag153
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag58
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_80,PIXEL01_10,PIXEL10_10,PIXEL11_30,PIXEL00_20,PIXEL01_12,PIXEL10_11,PIXEL11_0
+    DiffOrNot w2,w6,PIXEL02_10,PIXEL03_80,PIXEL12_30,PIXEL13_10,PIXEL02_11,PIXEL03_20,PIXEL12_0,PIXEL13_12
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag83
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    DiffOrNot w2,w6,PIXEL02_10,PIXEL03_80,PIXEL12_30,PIXEL13_10,PIXEL02_11,PIXEL03_20,PIXEL12_0,PIXEL13_12
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL20_61
+    PIXEL21_30
+    DiffOrNot w6,w8,PIXEL22_30,PIXEL23_10,PIXEL32_10,PIXEL33_80,PIXEL22_0,PIXEL23_11,PIXEL32_12,PIXEL33_20
+    PIXEL30_80
+    PIXEL31_10
+    jmp .loopx_end
+..@flag92
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_31
+    PIXEL13_31
+    DiffOrNot w8,w4,PIXEL20_10,PIXEL21_30,PIXEL30_80,PIXEL31_10,PIXEL20_12,PIXEL21_0,PIXEL30_20,PIXEL31_11
+    DiffOrNot w6,w8,PIXEL22_30,PIXEL23_10,PIXEL32_10,PIXEL33_80,PIXEL22_0,PIXEL23_11,PIXEL32_12,PIXEL33_20
+    jmp .loopx_end
+..@flag202
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_80,PIXEL01_10,PIXEL10_10,PIXEL11_30,PIXEL00_20,PIXEL01_12,PIXEL10_11,PIXEL11_0
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL12_30
+    PIXEL13_61
+    DiffOrNot w8,w4,PIXEL20_10,PIXEL21_30,PIXEL30_80,PIXEL31_10,PIXEL20_12,PIXEL21_0,PIXEL30_20,PIXEL31_11
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag78
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_80,PIXEL01_10,PIXEL10_10,PIXEL11_30,PIXEL00_20,PIXEL01_12,PIXEL10_11,PIXEL11_0
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL12_32
+    PIXEL13_82
+    DiffOrNot w8,w4,PIXEL20_10,PIXEL21_30,PIXEL30_80,PIXEL31_10,PIXEL20_12,PIXEL21_0,PIXEL30_20,PIXEL31_11
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag154
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_80,PIXEL01_10,PIXEL10_10,PIXEL11_30,PIXEL00_20,PIXEL01_12,PIXEL10_11,PIXEL11_0
+    DiffOrNot w2,w6,PIXEL02_10,PIXEL03_80,PIXEL12_30,PIXEL13_10,PIXEL02_11,PIXEL03_20,PIXEL12_0,PIXEL13_12
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag114
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    DiffOrNot w2,w6,PIXEL02_10,PIXEL03_80,PIXEL12_30,PIXEL13_10,PIXEL02_11,PIXEL03_20,PIXEL12_0,PIXEL13_12
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL20_82
+    PIXEL21_32
+    DiffOrNot w6,w8,PIXEL22_30,PIXEL23_10,PIXEL32_10,PIXEL33_80,PIXEL22_0,PIXEL23_11,PIXEL32_12,PIXEL33_20
+    PIXEL30_82
+    PIXEL31_32
+    jmp .loopx_end
+..@flag89
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_30
+    PIXEL13_10
+    DiffOrNot w8,w4,PIXEL20_10,PIXEL21_30,PIXEL30_80,PIXEL31_10,PIXEL20_12,PIXEL21_0,PIXEL30_20,PIXEL31_11
+    DiffOrNot w6,w8,PIXEL22_30,PIXEL23_10,PIXEL32_10,PIXEL33_80,PIXEL22_0,PIXEL23_11,PIXEL32_12,PIXEL33_20
+    jmp .loopx_end
+..@flag90
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_80,PIXEL01_10,PIXEL10_10,PIXEL11_30,PIXEL00_20,PIXEL01_12,PIXEL10_11,PIXEL11_0
+    DiffOrNot w2,w6,PIXEL02_10,PIXEL03_80,PIXEL12_30,PIXEL13_10,PIXEL02_11,PIXEL03_20,PIXEL12_0,PIXEL13_12
+    DiffOrNot w8,w4,PIXEL20_10,PIXEL21_30,PIXEL30_80,PIXEL31_10,PIXEL20_12,PIXEL21_0,PIXEL30_20,PIXEL31_11
+    DiffOrNot w6,w8,PIXEL22_30,PIXEL23_10,PIXEL32_10,PIXEL33_80,PIXEL22_0,PIXEL23_11,PIXEL32_12,PIXEL33_20
+    jmp .loopx_end
+..@flag55
+..@flag23
+    AUXADDRESS
+    DiffOrNot w2,w6,PIXEL00_81,PIXEL01_31,PIXEL02_0,PIXEL03_0,PIXEL12_0,PIXEL13_0,PIXEL00_12,PIXEL01_14,PIXEL02_83,PIXEL03_50,PIXEL12_70,PIXEL13_21
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag182
+..@flag150
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL12_0,PIXEL13_0,PIXEL23_32,PIXEL33_82,PIXEL02_21,PIXEL03_50,PIXEL12_70,PIXEL13_83,PIXEL23_13,PIXEL33_11
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_32
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_82
+    jmp .loopx_end
+..@flag213
+..@flag212
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_81
+    DiffOrNot w6,w8,PIXEL03_81,PIXEL13_31,PIXEL22_0,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL03_12,PIXEL13_14,PIXEL22_70,PIXEL23_83,PIXEL32_21,PIXEL33_50
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_31
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL30_80
+    PIXEL31_10
+    jmp .loopx_end
+..@flag241
+..@flag240
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_82
+    PIXEL21_32
+    DiffOrNot w6,w8,PIXEL22_0,PIXEL23_0,PIXEL30_82,PIXEL31_32,PIXEL32_0,PIXEL33_0,PIXEL22_70,PIXEL23_21,PIXEL30_11,PIXEL31_13,PIXEL32_83,PIXEL33_50
+    jmp .loopx_end
+..@flag236
+..@flag232
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_70
+    PIXEL13_60
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL21_0,PIXEL30_0,PIXEL31_0,PIXEL32_31,PIXEL33_81,PIXEL20_21,PIXEL21_70,PIXEL30_50,PIXEL31_83,PIXEL32_14,PIXEL33_12
+    PIXEL22_31
+    PIXEL23_81
+    jmp .loopx_end
+..@flag109
+..@flag105
+    AUXADDRESS
+    DiffOrNot w8,w4,PIXEL00_82,PIXEL10_32,PIXEL20_0,PIXEL21_0,PIXEL30_0,PIXEL31_0,PIXEL00_11,PIXEL10_13,PIXEL20_83,PIXEL21_70,PIXEL30_50,PIXEL31_21
+    PIXEL01_82
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL11_32
+    PIXEL12_70
+    PIXEL13_60
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag171
+..@flag43
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL11_0,PIXEL20_31,PIXEL30_81,PIXEL00_50,PIXEL01_21,PIXEL10_83,PIXEL11_70,PIXEL20_14,PIXEL30_12
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL21_31
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL31_81
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag143
+..@flag15
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL02_32,PIXEL03_82,PIXEL10_0,PIXEL11_0,PIXEL00_50,PIXEL01_83,PIXEL02_13,PIXEL03_11,PIXEL10_21,PIXEL11_70
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag124
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_31
+    PIXEL13_31
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag203
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL11_0
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL30_80
+    PIXEL31_10
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag62
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_0
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag211
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    PIXEL30_80
+    PIXEL31_10
+    jmp .loopx_end
+..@flag118
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL12_0
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_82
+    PIXEL31_32
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag217
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    PIXEL30_80
+    PIXEL31_10
+    jmp .loopx_end
+..@flag110
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_32
+    PIXEL13_82
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag155
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL11_0
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag188
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_31
+    PIXEL13_31
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag185
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag61
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_31
+    PIXEL13_31
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag157
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_31
+    PIXEL13_31
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag103
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL30_82
+    PIXEL31_32
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag227
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL30_82
+    PIXEL31_32
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag230
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL30_82
+    PIXEL31_32
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag199
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL30_80
+    PIXEL31_10
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag220
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_31
+    PIXEL13_31
+    DiffOrNot w8,w4,PIXEL20_10,PIXEL21_30,PIXEL30_80,PIXEL31_10,PIXEL20_12,PIXEL21_0,PIXEL30_20,PIXEL31_11
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    jmp .loopx_end
+..@flag158
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_80,PIXEL01_10,PIXEL10_10,PIXEL11_30,PIXEL00_20,PIXEL01_12,PIXEL10_11,PIXEL11_0
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL12_0
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag234
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_80,PIXEL01_10,PIXEL10_10,PIXEL11_30,PIXEL00_20,PIXEL01_12,PIXEL10_11,PIXEL11_0
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL12_30
+    PIXEL13_61
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag242
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    DiffOrNot w2,w6,PIXEL02_10,PIXEL03_80,PIXEL12_30,PIXEL13_10,PIXEL02_11,PIXEL03_20,PIXEL12_0,PIXEL13_12
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    PIXEL30_82
+    PIXEL31_32
+    jmp .loopx_end
+..@flag59
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    DiffOrNot w2,w6,PIXEL02_10,PIXEL03_80,PIXEL12_30,PIXEL13_10,PIXEL02_11,PIXEL03_20,PIXEL12_0,PIXEL13_12
+    PIXEL11_0
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag121
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_30
+    PIXEL13_10
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    DiffOrNot w6,w8,PIXEL22_30,PIXEL23_10,PIXEL32_10,PIXEL33_80,PIXEL22_0,PIXEL23_11,PIXEL32_12,PIXEL33_20
+    jmp .loopx_end
+..@flag87
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_0
+    PIXEL20_61
+    PIXEL21_30
+    DiffOrNot w6,w8,PIXEL22_30,PIXEL23_10,PIXEL32_10,PIXEL33_80,PIXEL22_0,PIXEL23_11,PIXEL32_12,PIXEL33_20
+    PIXEL30_80
+    PIXEL31_10
+    jmp .loopx_end
+..@flag79
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL11_0
+    PIXEL12_32
+    PIXEL13_82
+    DiffOrNot w8,w4,PIXEL20_10,PIXEL21_30,PIXEL30_80,PIXEL31_10,PIXEL20_12,PIXEL21_0,PIXEL30_20,PIXEL31_11
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag122
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_80,PIXEL01_10,PIXEL10_10,PIXEL11_30,PIXEL00_20,PIXEL01_12,PIXEL10_11,PIXEL11_0
+    DiffOrNot w2,w6,PIXEL02_10,PIXEL03_80,PIXEL12_30,PIXEL13_10,PIXEL02_11,PIXEL03_20,PIXEL12_0,PIXEL13_12
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    DiffOrNot w6,w8,PIXEL22_30,PIXEL23_10,PIXEL32_10,PIXEL33_80,PIXEL22_0,PIXEL23_11,PIXEL32_12,PIXEL33_20
+    jmp .loopx_end
+..@flag94
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_80,PIXEL01_10,PIXEL10_10,PIXEL11_30,PIXEL00_20,PIXEL01_12,PIXEL10_11,PIXEL11_0
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL12_0
+    DiffOrNot w8,w4,PIXEL20_10,PIXEL21_30,PIXEL30_80,PIXEL31_10,PIXEL20_12,PIXEL21_0,PIXEL30_20,PIXEL31_11
+    DiffOrNot w6,w8,PIXEL22_30,PIXEL23_10,PIXEL32_10,PIXEL33_80,PIXEL22_0,PIXEL23_11,PIXEL32_12,PIXEL33_20
+    jmp .loopx_end
+..@flag218
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_80,PIXEL01_10,PIXEL10_10,PIXEL11_30,PIXEL00_20,PIXEL01_12,PIXEL10_11,PIXEL11_0
+    DiffOrNot w2,w6,PIXEL02_10,PIXEL03_80,PIXEL12_30,PIXEL13_10,PIXEL02_11,PIXEL03_20,PIXEL12_0,PIXEL13_12
+    DiffOrNot w8,w4,PIXEL20_10,PIXEL21_30,PIXEL30_80,PIXEL31_10,PIXEL20_12,PIXEL21_0,PIXEL30_20,PIXEL31_11
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    jmp .loopx_end
+..@flag91
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    DiffOrNot w2,w6,PIXEL02_10,PIXEL03_80,PIXEL12_30,PIXEL13_10,PIXEL02_11,PIXEL03_20,PIXEL12_0,PIXEL13_12
+    PIXEL11_0
+    DiffOrNot w8,w4,PIXEL20_10,PIXEL21_30,PIXEL30_80,PIXEL31_10,PIXEL20_12,PIXEL21_0,PIXEL30_20,PIXEL31_11
+    DiffOrNot w6,w8,PIXEL22_30,PIXEL23_10,PIXEL32_10,PIXEL33_80,PIXEL22_0,PIXEL23_11,PIXEL32_12,PIXEL33_20
+    jmp .loopx_end
+..@flag229
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_70
+    PIXEL13_60
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL30_82
+    PIXEL31_32
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag167
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag173
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_70
+    PIXEL13_60
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag181
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_31
+    PIXEL13_31
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag186
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_80,PIXEL01_10,PIXEL10_10,PIXEL11_30,PIXEL00_20,PIXEL01_12,PIXEL10_11,PIXEL11_0
+    DiffOrNot w2,w6,PIXEL02_10,PIXEL03_80,PIXEL12_30,PIXEL13_10,PIXEL02_11,PIXEL03_20,PIXEL12_0,PIXEL13_12
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag115
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    DiffOrNot w2,w6,PIXEL02_10,PIXEL03_80,PIXEL12_30,PIXEL13_10,PIXEL02_11,PIXEL03_20,PIXEL12_0,PIXEL13_12
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL20_82
+    PIXEL21_32
+    DiffOrNot w6,w8,PIXEL22_30,PIXEL23_10,PIXEL32_10,PIXEL33_80,PIXEL22_0,PIXEL23_11,PIXEL32_12,PIXEL33_20
+    PIXEL30_82
+    PIXEL31_32
+    jmp .loopx_end
+..@flag93
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_31
+    PIXEL13_31
+    DiffOrNot w8,w4,PIXEL20_10,PIXEL21_30,PIXEL30_80,PIXEL31_10,PIXEL20_12,PIXEL21_0,PIXEL30_20,PIXEL31_11
+    DiffOrNot w6,w8,PIXEL22_30,PIXEL23_10,PIXEL32_10,PIXEL33_80,PIXEL22_0,PIXEL23_11,PIXEL32_12,PIXEL33_20
+    jmp .loopx_end
+..@flag206
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_80,PIXEL01_10,PIXEL10_10,PIXEL11_30,PIXEL00_20,PIXEL01_12,PIXEL10_11,PIXEL11_0
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL12_32
+    PIXEL13_82
+    DiffOrNot w8,w4,PIXEL20_10,PIXEL21_30,PIXEL30_80,PIXEL31_10,PIXEL20_12,PIXEL21_0,PIXEL30_20,PIXEL31_11
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag205
+..@flag201
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_70
+    PIXEL13_60
+    DiffOrNot w8,w4,PIXEL20_10,PIXEL21_30,PIXEL30_80,PIXEL31_10,PIXEL20_12,PIXEL21_0,PIXEL30_20,PIXEL31_11
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag174
+..@flag46
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_80,PIXEL01_10,PIXEL10_10,PIXEL11_30,PIXEL00_20,PIXEL01_12,PIXEL10_11,PIXEL11_0
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag179
+..@flag147
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    DiffOrNot w2,w6,PIXEL02_10,PIXEL03_80,PIXEL12_30,PIXEL13_10,PIXEL02_11,PIXEL03_20,PIXEL12_0,PIXEL13_12
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag117
+..@flag116
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_31
+    PIXEL13_31
+    PIXEL20_82
+    PIXEL21_32
+    DiffOrNot w6,w8,PIXEL22_30,PIXEL23_10,PIXEL32_10,PIXEL33_80,PIXEL22_0,PIXEL23_11,PIXEL32_12,PIXEL33_20
+    PIXEL30_82
+    PIXEL31_32
+    jmp .loopx_end
+..@flag189
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_31
+    PIXEL13_31
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag231
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL30_82
+    PIXEL31_32
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag126
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_0
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag219
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL11_0
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    PIXEL30_80
+    PIXEL31_10
+    jmp .loopx_end
+..@flag125
+    AUXADDRESS
+    DiffOrNot w8,w4,PIXEL00_82,PIXEL10_32,PIXEL20_0,PIXEL21_0,PIXEL30_0,PIXEL31_0,PIXEL00_11,PIXEL10_13,PIXEL20_83,PIXEL21_70,PIXEL30_50,PIXEL31_21
+    PIXEL01_82
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL11_32
+    PIXEL12_31
+    PIXEL13_31
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag221
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_81
+    DiffOrNot w6,w8,PIXEL03_81,PIXEL13_31,PIXEL22_0,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL03_12,PIXEL13_14,PIXEL22_70,PIXEL23_83,PIXEL32_21,PIXEL33_50
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_31
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL30_80
+    PIXEL31_10
+    jmp .loopx_end
+..@flag207
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL02_32,PIXEL03_82,PIXEL10_0,PIXEL11_0,PIXEL00_50,PIXEL01_83,PIXEL02_13,PIXEL03_11,PIXEL10_21,PIXEL11_70
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL30_80
+    PIXEL31_10
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag238
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_32
+    PIXEL13_82
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL21_0,PIXEL30_0,PIXEL31_0,PIXEL32_31,PIXEL33_81,PIXEL20_21,PIXEL21_70,PIXEL30_50,PIXEL31_83,PIXEL32_14,PIXEL33_12
+    PIXEL22_31
+    PIXEL23_81
+    jmp .loopx_end
+..@flag190
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL12_0,PIXEL13_0,PIXEL23_32,PIXEL33_82,PIXEL02_21,PIXEL03_50,PIXEL12_70,PIXEL13_83,PIXEL23_13,PIXEL33_11
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_32
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_82
+    jmp .loopx_end
+..@flag187
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL11_0,PIXEL20_31,PIXEL30_81,PIXEL00_50,PIXEL01_21,PIXEL10_83,PIXEL11_70,PIXEL20_14,PIXEL30_12
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL21_31
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL31_81
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag243
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_82
+    PIXEL21_32
+    DiffOrNot w6,w8,PIXEL22_0,PIXEL23_0,PIXEL30_82,PIXEL31_32,PIXEL32_0,PIXEL33_0,PIXEL22_70,PIXEL23_21,PIXEL30_11,PIXEL31_13,PIXEL32_83,PIXEL33_50
+    jmp .loopx_end
+..@flag119
+    AUXADDRESS
+    DiffOrNot w2,w6,PIXEL00_81,PIXEL01_31,PIXEL02_0,PIXEL03_0,PIXEL12_0,PIXEL13_0,PIXEL00_12,PIXEL01_14,PIXEL02_83,PIXEL03_50,PIXEL12_70,PIXEL13_21
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_82
+    PIXEL31_32
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag237
+..@flag233
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_70
+    PIXEL13_60
+    PIXEL20_0
+    PIXEL21_0
+    PIXEL22_31
+    PIXEL23_81
+    DiffOrNot w8,w4,PIXEL30_0,PIXEL30_20
+    PIXEL31_0
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag175
+..@flag47
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+    PIXEL01_0
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL10_0
+    PIXEL11_0
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag183
+..@flag151
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    PIXEL02_0
+    DiffOrNot w2,w6,PIXEL03_0,PIXEL03_20
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_0
+    PIXEL13_0
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag245
+..@flag244
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_31
+    PIXEL13_31
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_0
+    PIXEL23_0
+    PIXEL30_82
+    PIXEL31_32
+    PIXEL32_0
+    DiffOrNot w6,w8,PIXEL33_0,PIXEL33_20
+    jmp .loopx_end
+..@flag250
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_30
+    PIXEL13_10
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    jmp .loopx_end
+..@flag123
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL11_0
+    PIXEL12_30
+    PIXEL13_10
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag95
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL11_0
+    PIXEL12_0
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_80
+    PIXEL31_10
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag222
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_0
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    PIXEL30_80
+    PIXEL31_10
+    jmp .loopx_end
+..@flag252
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_31
+    PIXEL13_31
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    PIXEL22_0
+    PIXEL23_0
+    PIXEL32_0
+    DiffOrNot w6,w8,PIXEL33_0,PIXEL33_20
+    jmp .loopx_end
+..@flag249
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_0
+    PIXEL21_0
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    DiffOrNot w8,w4,PIXEL30_0,PIXEL30_20
+    PIXEL31_0
+    jmp .loopx_end
+..@flag235
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL11_0
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL20_0
+    PIXEL21_0
+    PIXEL22_31
+    PIXEL23_81
+    DiffOrNot w8,w4,PIXEL30_0,PIXEL30_20
+    PIXEL31_0
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag111
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+    PIXEL01_0
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL10_0
+    PIXEL11_0
+    PIXEL12_32
+    PIXEL13_82
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag63
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+    PIXEL01_0
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL10_0
+    PIXEL11_0
+    PIXEL12_0
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag159
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    PIXEL02_0
+    DiffOrNot w2,w6,PIXEL03_0,PIXEL03_20
+    PIXEL11_0
+    PIXEL12_0
+    PIXEL13_0
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag215
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    PIXEL02_0
+    DiffOrNot w2,w6,PIXEL03_0,PIXEL03_20
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_0
+    PIXEL13_0
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    PIXEL30_80
+    PIXEL31_10
+    jmp .loopx_end
+..@flag246
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL12_0
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_0
+    PIXEL23_0
+    PIXEL30_82
+    PIXEL31_32
+    PIXEL32_0
+    DiffOrNot w6,w8,PIXEL33_0,PIXEL33_20
+    jmp .loopx_end
+..@flag254
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_0
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    PIXEL22_0
+    PIXEL23_0
+    PIXEL32_0
+    DiffOrNot w6,w8,PIXEL33_0,PIXEL33_20
+    jmp .loopx_end
+..@flag253
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_31
+    PIXEL13_31
+    PIXEL20_0
+    PIXEL21_0
+    PIXEL22_0
+    PIXEL23_0
+    DiffOrNot w8,w4,PIXEL30_0,PIXEL30_20
+    PIXEL31_0
+    PIXEL32_0
+    DiffOrNot w6,w8,PIXEL33_0,PIXEL33_20
+    jmp .loopx_end
+..@flag251
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL11_0
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_0
+    PIXEL21_0
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    DiffOrNot w8,w4,PIXEL30_0,PIXEL30_20
+    PIXEL31_0
+    jmp .loopx_end
+..@flag239
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+    PIXEL01_0
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL10_0
+    PIXEL11_0
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_0
+    PIXEL21_0
+    PIXEL22_31
+    PIXEL23_81
+    DiffOrNot w8,w4,PIXEL30_0,PIXEL30_20
+    PIXEL31_0
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag127
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+    PIXEL01_0
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL10_0
+    PIXEL11_0
+    PIXEL12_0
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag191
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+    PIXEL01_0
+    PIXEL02_0
+    DiffOrNot w2,w6,PIXEL03_0,PIXEL03_20
+    PIXEL10_0
+    PIXEL11_0
+    PIXEL12_0
+    PIXEL13_0
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag223
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    PIXEL02_0
+    DiffOrNot w2,w6,PIXEL03_0,PIXEL03_20
+    PIXEL11_0
+    PIXEL12_0
+    PIXEL13_0
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    PIXEL30_80
+    PIXEL31_10
+    jmp .loopx_end
+..@flag247
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    PIXEL02_0
+    DiffOrNot w2,w6,PIXEL03_0,PIXEL03_20
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_0
+    PIXEL13_0
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_0
+    PIXEL23_0
+    PIXEL30_82
+    PIXEL31_32
+    PIXEL32_0
+    DiffOrNot w6,w8,PIXEL33_0,PIXEL33_20
+    jmp .loopx_end
+..@flag255
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+    PIXEL01_0
+    PIXEL02_0
+    DiffOrNot w2,w6,PIXEL03_0,PIXEL03_20
+    PIXEL10_0
+    PIXEL11_0
+    PIXEL12_0
+    PIXEL13_0
+    PIXEL20_0
+    PIXEL21_0
+    PIXEL22_0
+    PIXEL23_0
+    DiffOrNot w8,w4,PIXEL30_0,PIXEL30_20
+    PIXEL31_0
+    PIXEL32_0
+    DiffOrNot w6,w8,PIXEL33_0,PIXEL33_20
+    jmp .loopx_end
+
+.loopx_end
+    add     esi,2
+    add     edi,8
+    dec     dword[xcounter]
+    jle     .xres_2
+    jmp     .loopx
+.xres_2
+    ; x=Xres-2 - special case
+    jl      .xres_1
+    mov     ebx,[prevline]
+    movq    mm5,[esi+ebx-4]
+    movq    mm6,[esi-4]
+    mov     ebx,[nextline]
+    movq    mm7,[esi+ebx-4]
+    psrlq   mm5,16
+    psrlq   mm6,16
+    psrlq   mm7,16
+    movd    eax,mm5
+    movzx   edx,ax
+    mov     [w1],edx
+    shr     eax,16
+    mov     [w2],eax
+    psrlq   mm5,32
+    movd    eax,mm5
+    mov     [w3],eax
+    movd    eax,mm6
+    movzx   edx,ax
+    mov     [w4],edx
+    shr     eax,16
+    mov     [w5],eax
+    psrlq   mm6,32
+    movd    eax,mm6
+    mov     [w6],eax
+    movd    eax,mm7
+    movzx   edx,ax
+    mov     [w7],edx
+    shr     eax,16
+    mov     [w8],eax
+    psrlq   mm7,32
+    movd    eax,mm7
+    mov     [w9],eax
+    jmp     .flags
+.xres_1
+    cmp     dword[xcounter],-1
+    jl      .nexty
+    ; x=Xres-1 - special case
+    mov     ebx,[prevline]
+    movq    mm5,[esi+ebx-6]
+    movq    mm6,[esi-6]
+    mov     ebx,[nextline]
+    movq    mm7,[esi+ebx-6]
+    psrlq   mm5,32
+    psrlq   mm6,32
+    psrlq   mm7,32
+    movd    eax,mm5
+    movzx   edx,ax  
+    mov     [w1],edx
+    shr     eax,16
+    mov     [w2],eax
+    mov     [w3],eax
+    movd    eax,mm6
+    movzx   edx,ax  
+    mov     [w4],edx
+    shr     eax,16
+    mov     [w5],eax
+    mov     [w6],eax
+    movd    eax,mm7
+    movzx   edx,ax  
+    mov     [w7],edx
+    shr     eax,16
+    mov     [w8],eax
+    mov     [w9],eax
+    jmp     .flags
+.nexty
+    add     edi,ebx
+    add     edi,ebx
+    add     edi,ebx
+    dec     dword[linesleft]
+    jz      .fin
+    mov     ebx,[ebp+Xres]
+    shl     ebx,1
+    cmp     dword[linesleft],1
+    je      .lastline
+    mov     dword[nextline],ebx
+    neg     ebx
+    mov     dword[prevline],ebx
+    jmp     .loopy
+.lastline
+    mov     dword[nextline],0
+    neg     ebx
+    mov     dword[prevline],ebx
+    jmp     .loopy
+.fin
+    emms
+    popad
+    mov esp,ebp
+    pop ebp
+    ret
+
+SECTION .data
+FuncTable
+    dd ..@flag0, ..@flag1, ..@flag2, ..@flag3, ..@flag4, ..@flag5, ..@flag6, ..@flag7
+    dd ..@flag8, ..@flag9, ..@flag10, ..@flag11, ..@flag12, ..@flag13, ..@flag14, ..@flag15
+    dd ..@flag16, ..@flag17, ..@flag18, ..@flag19, ..@flag20, ..@flag21, ..@flag22, ..@flag23
+    dd ..@flag24, ..@flag25, ..@flag26, ..@flag27, ..@flag28, ..@flag29, ..@flag30, ..@flag31
+    dd ..@flag32, ..@flag33, ..@flag34, ..@flag35, ..@flag36, ..@flag37, ..@flag38, ..@flag39
+    dd ..@flag40, ..@flag41, ..@flag42, ..@flag43, ..@flag44, ..@flag45, ..@flag46, ..@flag47
+    dd ..@flag48, ..@flag49, ..@flag50, ..@flag51, ..@flag52, ..@flag53, ..@flag54, ..@flag55
+    dd ..@flag56, ..@flag57, ..@flag58, ..@flag59, ..@flag60, ..@flag61, ..@flag62, ..@flag63
+    dd ..@flag64, ..@flag65, ..@flag66, ..@flag67, ..@flag68, ..@flag69, ..@flag70, ..@flag71
+    dd ..@flag72, ..@flag73, ..@flag74, ..@flag75, ..@flag76, ..@flag77, ..@flag78, ..@flag79
+    dd ..@flag80, ..@flag81, ..@flag82, ..@flag83, ..@flag84, ..@flag85, ..@flag86, ..@flag87
+    dd ..@flag88, ..@flag89, ..@flag90, ..@flag91, ..@flag92, ..@flag93, ..@flag94, ..@flag95
+    dd ..@flag96, ..@flag97, ..@flag98, ..@flag99, ..@flag100, ..@flag101, ..@flag102, ..@flag103
+    dd ..@flag104, ..@flag105, ..@flag106, ..@flag107, ..@flag108, ..@flag109, ..@flag110, ..@flag111
+    dd ..@flag112, ..@flag113, ..@flag114, ..@flag115, ..@flag116, ..@flag117, ..@flag118, ..@flag119
+    dd ..@flag120, ..@flag121, ..@flag122, ..@flag123, ..@flag124, ..@flag125, ..@flag126, ..@flag127
+    dd ..@flag128, ..@flag129, ..@flag130, ..@flag131, ..@flag132, ..@flag133, ..@flag134, ..@flag135
+    dd ..@flag136, ..@flag137, ..@flag138, ..@flag139, ..@flag140, ..@flag141, ..@flag142, ..@flag143
+    dd ..@flag144, ..@flag145, ..@flag146, ..@flag147, ..@flag148, ..@flag149, ..@flag150, ..@flag151
+    dd ..@flag152, ..@flag153, ..@flag154, ..@flag155, ..@flag156, ..@flag157, ..@flag158, ..@flag159
+    dd ..@flag160, ..@flag161, ..@flag162, ..@flag163, ..@flag164, ..@flag165, ..@flag166, ..@flag167
+    dd ..@flag168, ..@flag169, ..@flag170, ..@flag171, ..@flag172, ..@flag173, ..@flag174, ..@flag175
+    dd ..@flag176, ..@flag177, ..@flag178, ..@flag179, ..@flag180, ..@flag181, ..@flag182, ..@flag183
+    dd ..@flag184, ..@flag185, ..@flag186, ..@flag187, ..@flag188, ..@flag189, ..@flag190, ..@flag191
+    dd ..@flag192, ..@flag193, ..@flag194, ..@flag195, ..@flag196, ..@flag197, ..@flag198, ..@flag199
+    dd ..@flag200, ..@flag201, ..@flag202, ..@flag203, ..@flag204, ..@flag205, ..@flag206, ..@flag207
+    dd ..@flag208, ..@flag209, ..@flag210, ..@flag211, ..@flag212, ..@flag213, ..@flag214, ..@flag215
+    dd ..@flag216, ..@flag217, ..@flag218, ..@flag219, ..@flag220, ..@flag221, ..@flag222, ..@flag223
+    dd ..@flag224, ..@flag225, ..@flag226, ..@flag227, ..@flag228, ..@flag229, ..@flag230, ..@flag231
+    dd ..@flag232, ..@flag233, ..@flag234, ..@flag235, ..@flag236, ..@flag237, ..@flag238, ..@flag239
+    dd ..@flag240, ..@flag241, ..@flag242, ..@flag243, ..@flag244, ..@flag245, ..@flag246, ..@flag247
+    dd ..@flag248, ..@flag249, ..@flag250, ..@flag251, ..@flag252, ..@flag253, ..@flag254, ..@flag255
+
+
diff --git a/od-win32/hq4x32.asm b/od-win32/hq4x32.asm
new file mode 100755 (executable)
index 0000000..0efc188
--- /dev/null
@@ -0,0 +1,3919 @@
+;hq4x filter
+;32bpp output
+;----------------------------------------------------------
+;Copyright (C) 2003 MaxSt ( maxst@hiend3d.com )
+;
+;This program is free software; you can redistribute it and/or
+;modify it under the terms of the GNU General Public License
+;as published by the Free Software Foundation; either
+;version 2 of the License, or (at your option) any later
+;version.
+;
+;This program is distributed in the hope that it will be useful,
+;but WITHOUT ANY WARRANTY; without even the implied warranty of
+;MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;GNU General Public License for more details.
+;
+;You should have received a copy of the GNU General Public License
+;along with this program; if not, write to the Free Software
+;Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+GLOBAL _hq4x_32
+
+EXTERN _LUT16to32
+EXTERN _RGBtoYUV
+
+SECTION .bss
+linesleft resd 1
+xcounter  resd 1
+cross     resd 1
+nextline  resd 1
+prevline  resd 1
+w1        resd 1
+w2        resd 1
+w3        resd 1
+w4        resd 1
+w5        resd 1
+w6        resd 1
+w7        resd 1
+w8        resd 1
+w9        resd 1
+c1        resd 1
+c2        resd 1
+c3        resd 1
+c4        resd 1
+c5        resd 1
+c6        resd 1
+c7        resd 1
+c8        resd 1
+c9        resd 1
+
+SECTION .data
+
+reg_blank    dd  0,0
+const3       dd  0x00030003,0x00000003
+const5       dd  0x00050005,0x00000005
+const6       dd  0x00060006,0x00000006
+const7       dd  0x00070007,0x00000007
+threshold    dd  0x00300706,0x00000000
+
+SECTION .text
+
+%macro AUXADDRESS 0
+    mov     ecx, edi
+    add     ecx, ebx
+    add     ecx, ebx
+%endmacro
+
+%macro TestDiff 2
+    mov     edx,[%1]
+    sub     edx,[%2]
+    jz      %%fin
+    mov     edx,[%1]
+    shl     edx,2
+    add     edx,_RGBtoYUV
+    movd    mm1,[edx]
+    movq    mm5,mm1
+    mov     edx,[%2]
+    shl     edx,2
+    add     edx,_RGBtoYUV
+    movd    mm2,[edx]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+%%fin:
+%endmacro
+
+%macro DiffOrNot 4
+   TestDiff %1,%2
+   test edx,edx
+   jz   %%same
+   %3
+   jmp %%fin
+%%same:
+   %4
+%%fin
+%endmacro
+
+%macro DiffOrNot 8
+   TestDiff %1,%2
+   test edx,edx
+   jz   %%same
+   %3
+   %4
+   %5
+   jmp %%fin
+%%same:
+   %6
+   %7
+   %8
+%%fin
+%endmacro
+
+%macro DiffOrNot 10
+   TestDiff %1,%2
+   test edx,edx
+   jz %%same
+   %3
+   %4
+   %5
+   %6
+   jmp %%fin
+%%same:
+   %7
+   %8
+   %9
+   %10
+%%fin
+%endmacro
+
+%macro DiffOrNot 14
+   TestDiff %1,%2
+   test edx,edx
+   jz %%same
+   %3
+   %4
+   %5
+   %6
+   %7
+   %8
+   jmp %%fin
+%%same:
+   %9
+   %10
+   %11
+   %12
+   %13
+   %14
+%%fin
+%endmacro
+
+%macro Interp1 3
+    mov edx,%2
+    shl edx,2
+    add edx,%3
+    sub edx,%2
+    shr edx,2
+    mov %1,edx
+%endmacro
+
+%macro Interp2 4
+    mov edx,%2
+    shl edx,1
+    add edx,%3
+    add edx,%4
+    shr edx,2
+    mov %1,edx
+%endmacro
+
+%macro Interp3 2
+    movd       mm1, eax
+    movd       mm2, %2
+    punpcklbw  mm1, [reg_blank]
+    punpcklbw  mm2, [reg_blank]
+    pmullw     mm1, [const7]
+    paddw      mm1, mm2
+    psrlw      mm1, 3
+    packuswb   mm1, [reg_blank]
+    movd       %1, mm1
+%endmacro
+
+%macro Interp5 3
+    mov edx,%2
+    add edx,%3
+    shr edx,1
+    mov %1,edx
+%endmacro
+
+%macro Interp6 3
+    movd       mm1, eax
+    movd       mm2, %2
+    movd       mm3, %3
+    punpcklbw  mm1, [reg_blank]
+    punpcklbw  mm2, [reg_blank]
+    punpcklbw  mm3, [reg_blank]
+    pmullw     mm1, [const5]
+    psllw      mm2, 1
+    paddw      mm1, mm3
+    paddw      mm1, mm2
+    psrlw      mm1, 3
+    packuswb   mm1, [reg_blank]
+    movd       %1, mm1
+%endmacro
+
+%macro Interp7 3
+    movd       mm1, eax
+    movd       mm2, %2
+    movd       mm3, %3
+    punpcklbw  mm1, [reg_blank]
+    punpcklbw  mm2, [reg_blank]
+    punpcklbw  mm3, [reg_blank]
+    pmullw     mm1, [const6]
+    paddw      mm2, mm3
+    paddw      mm1, mm2
+    psrlw      mm1, 3
+    packuswb   mm1, [reg_blank]
+    movd       %1, mm1
+%endmacro
+
+%macro Interp8 3
+    movd       mm1, %2
+    movd       mm2, %3
+    punpcklbw  mm1, [reg_blank]
+    punpcklbw  mm2, [reg_blank]
+    pmullw     mm1, [const5]
+    pmullw     mm2, [const3]
+    paddw      mm1, mm2
+    psrlw      mm1, 3
+    packuswb   mm1, [reg_blank]
+    movd       %1, mm1
+%endmacro
+
+%macro PIXEL00_0 0
+    mov [edi],eax
+%endmacro
+
+%macro PIXEL00_11 0
+    Interp1 [edi],eax,[c4]
+%endmacro
+
+%macro PIXEL00_12 0
+    Interp1 [edi],eax,[c2]
+%endmacro
+
+%macro PIXEL00_20 0
+    Interp2 [edi],eax,[c2],[c4]
+%endmacro
+
+%macro PIXEL00_50 0
+    Interp5 [edi],[c2],[c4]
+%endmacro
+
+%macro PIXEL00_80 0
+    Interp8 [edi],eax,[c1]
+%endmacro
+
+%macro PIXEL00_81 0
+    Interp8 [edi],eax,[c4]
+%endmacro
+
+%macro PIXEL00_82 0
+    Interp8 [edi],eax,[c2]
+%endmacro
+
+%macro PIXEL01_0 0
+    mov [edi+4],eax
+%endmacro
+
+%macro PIXEL01_10 0
+    Interp1 [edi+4],eax,[c1]
+%endmacro
+
+%macro PIXEL01_12 0
+    Interp1 [edi+4],eax,[c2]
+%endmacro
+
+%macro PIXEL01_14 0
+    Interp1 [edi+4],[c2],eax
+%endmacro
+
+%macro PIXEL01_21 0
+    Interp2 [edi+4],[c2],eax,[c4]
+%endmacro
+
+%macro PIXEL01_31 0
+    Interp3 [edi+4],[c4]
+%endmacro
+
+%macro PIXEL01_50 0
+    Interp5 [edi+4],[c2],eax
+%endmacro
+
+%macro PIXEL01_60 0
+    Interp6 [edi+4],[c2],[c4]
+%endmacro
+
+%macro PIXEL01_61 0
+    Interp6 [edi+4],[c2],[c1]
+%endmacro
+
+%macro PIXEL01_82 0
+    Interp8 [edi+4],eax,[c2]
+%endmacro
+
+%macro PIXEL01_83 0
+    Interp8 [edi+4],[c2],[c4]
+%endmacro
+
+%macro PIXEL02_0 0
+    mov [edi+8],eax
+%endmacro
+
+%macro PIXEL02_10 0
+    Interp1 [edi+8],eax,[c3]
+%endmacro
+
+%macro PIXEL02_11 0
+    Interp1 [edi+8],eax,[c2]
+%endmacro
+
+%macro PIXEL02_13 0
+    Interp1 [edi+8],[c2],eax
+%endmacro
+
+%macro PIXEL02_21 0
+    Interp2 [edi+8],[c2],eax,[c6]
+%endmacro
+
+%macro PIXEL02_32 0
+    Interp3 [edi+8],[c6]
+%endmacro
+
+%macro PIXEL02_50 0
+    Interp5 [edi+8],[c2],eax
+%endmacro
+
+%macro PIXEL02_60 0
+    Interp6 [edi+8],[c2],[c6]
+%endmacro
+
+%macro PIXEL02_61 0
+    Interp6 [edi+8],[c2],[c3]
+%endmacro
+
+%macro PIXEL02_81 0
+    Interp8 [edi+8],eax,[c2]
+%endmacro
+
+%macro PIXEL02_83 0
+    Interp8 [edi+8],[c2],[c6]
+%endmacro
+
+%macro PIXEL03_0 0
+    mov [edi+12],eax
+%endmacro
+
+%macro PIXEL03_11 0
+    Interp1 [edi+12],eax,[c2]
+%endmacro
+
+%macro PIXEL03_12 0
+    Interp1 [edi+12],eax,[c6]
+%endmacro
+
+%macro PIXEL03_20 0
+    Interp2 [edi+12],eax,[c2],[c6]
+%endmacro
+
+%macro PIXEL03_50 0
+    Interp5 [edi+12],[c2],[c6]
+%endmacro
+
+%macro PIXEL03_80 0
+    Interp8 [edi+12],eax,[c3]
+%endmacro
+
+%macro PIXEL03_81 0
+    Interp8 [edi+12],eax,[c2]
+%endmacro
+
+%macro PIXEL03_82 0
+    Interp8 [edi+12],eax,[c6]
+%endmacro
+
+%macro PIXEL10_0 0
+    mov [edi+ebx],eax
+%endmacro
+
+%macro PIXEL10_10 0
+    Interp1 [edi+ebx],eax,[c1]
+%endmacro
+
+%macro PIXEL10_11 0
+    Interp1 [edi+ebx],eax,[c4]
+%endmacro
+
+%macro PIXEL10_13 0
+    Interp1 [edi+ebx],[c4],eax
+%endmacro
+
+%macro PIXEL10_21 0
+    Interp2 [edi+ebx],[c4],eax,[c2]
+%endmacro
+
+%macro PIXEL10_32 0
+    Interp3 [edi+ebx],[c2]
+%endmacro
+
+%macro PIXEL10_50 0
+    Interp5 [edi+ebx],[c4],eax
+%endmacro
+
+%macro PIXEL10_60 0
+    Interp6 [edi+ebx],[c4],[c2]
+%endmacro
+
+%macro PIXEL10_61 0
+    Interp6 [edi+ebx],[c4],[c1]
+%endmacro
+
+%macro PIXEL10_81 0
+    Interp8 [edi+ebx],eax,[c4]
+%endmacro
+
+%macro PIXEL10_83 0
+    Interp8 [edi+ebx],[c4],[c2]
+%endmacro
+
+%macro PIXEL11_0 0
+    mov [edi+ebx+4],eax
+%endmacro
+
+%macro PIXEL11_30 0
+    Interp3 [edi+ebx+4],[c1]
+%endmacro
+
+%macro PIXEL11_31 0
+    Interp3 [edi+ebx+4],[c4]
+%endmacro
+
+%macro PIXEL11_32 0
+    Interp3 [edi+ebx+4],[c2]
+%endmacro
+
+%macro PIXEL11_70 0
+    Interp7 [edi+ebx+4],[c4],[c2]
+%endmacro
+
+%macro PIXEL12_0 0
+    mov [edi+ebx+8],eax
+%endmacro
+
+%macro PIXEL12_30 0
+    Interp3 [edi+ebx+8],[c3]
+%endmacro
+
+%macro PIXEL12_31 0
+    Interp3 [edi+ebx+8],[c2]
+%endmacro
+
+%macro PIXEL12_32 0
+    Interp3 [edi+ebx+8],[c6]
+%endmacro
+
+%macro PIXEL12_70 0
+    Interp7 [edi+ebx+8],[c6],[c2]
+%endmacro
+
+%macro PIXEL13_0 0
+    mov [edi+ebx+12],eax
+%endmacro
+
+%macro PIXEL13_10 0
+    Interp1 [edi+ebx+12],eax,[c3]
+%endmacro
+
+%macro PIXEL13_12 0
+    Interp1 [edi+ebx+12],eax,[c6]
+%endmacro
+
+%macro PIXEL13_14 0
+    Interp1 [edi+ebx+12],[c6],eax
+%endmacro
+
+%macro PIXEL13_21 0
+    Interp2 [edi+ebx+12],[c6],eax,[c2]
+%endmacro
+
+%macro PIXEL13_31 0
+    Interp3 [edi+ebx+12],[c2]
+%endmacro
+
+%macro PIXEL13_50 0
+    Interp5 [edi+ebx+12],[c6],eax
+%endmacro
+
+%macro PIXEL13_60 0
+    Interp6 [edi+ebx+12],[c6],[c2]
+%endmacro
+
+%macro PIXEL13_61 0
+    Interp6 [edi+ebx+12],[c6],[c3]
+%endmacro
+
+%macro PIXEL13_82 0
+    Interp8 [edi+ebx+12],eax,[c6]
+%endmacro
+
+%macro PIXEL13_83 0
+    Interp8 [edi+ebx+12],[c6],[c2]
+%endmacro
+
+%macro PIXEL20_0 0
+    mov [ecx],eax
+%endmacro
+
+%macro PIXEL20_10 0
+    Interp1 [ecx],eax,[c7]
+%endmacro
+
+%macro PIXEL20_12 0
+    Interp1 [ecx],eax,[c4]
+%endmacro
+
+%macro PIXEL20_14 0
+    Interp1 [ecx],[c4],eax
+%endmacro
+
+%macro PIXEL20_21 0
+    Interp2 [ecx],[c4],eax,[c8]
+%endmacro
+
+%macro PIXEL20_31 0
+    Interp3 [ecx],[c8]
+%endmacro
+
+%macro PIXEL20_50 0
+    Interp5 [ecx],[c4],eax
+%endmacro
+
+%macro PIXEL20_60 0
+    Interp6 [ecx],[c4],[c8]
+%endmacro
+
+%macro PIXEL20_61 0
+    Interp6 [ecx],[c4],[c7]
+%endmacro
+
+%macro PIXEL20_82 0
+    Interp8 [ecx],eax,[c4]
+%endmacro
+
+%macro PIXEL20_83 0
+    Interp8 [ecx],[c4],[c8]
+%endmacro
+
+%macro PIXEL21_0 0
+    mov [ecx+4],eax
+%endmacro
+
+%macro PIXEL21_30 0
+    Interp3 [ecx+4],[c7]
+%endmacro
+
+%macro PIXEL21_31 0
+    Interp3 [ecx+4],[c8]
+%endmacro
+
+%macro PIXEL21_32 0
+    Interp3 [ecx+4],[c4]
+%endmacro
+
+%macro PIXEL21_70 0
+    Interp7 [ecx+4],[c4],[c8]
+%endmacro
+
+%macro PIXEL22_0 0
+    mov [ecx+8],eax
+%endmacro
+
+%macro PIXEL22_30 0
+    Interp3 [ecx+8],[c9]
+%endmacro
+
+%macro PIXEL22_31 0
+    Interp3 [ecx+8],[c6]
+%endmacro
+
+%macro PIXEL22_32 0
+    Interp3 [ecx+8],[c8]
+%endmacro
+
+%macro PIXEL22_70 0
+    Interp7 [ecx+8],[c6],[c8]
+%endmacro
+
+%macro PIXEL23_0 0
+    mov [ecx+12],eax
+%endmacro
+
+%macro PIXEL23_10 0
+    Interp1 [ecx+12],eax,[c9]
+%endmacro
+
+%macro PIXEL23_11 0
+    Interp1 [ecx+12],eax,[c6]
+%endmacro
+
+%macro PIXEL23_13 0
+    Interp1 [ecx+12],[c6],eax
+%endmacro
+
+%macro PIXEL23_21 0
+    Interp2 [ecx+12],[c6],eax,[c8]
+%endmacro
+
+%macro PIXEL23_32 0
+    Interp3 [ecx+12],[c8]
+%endmacro
+
+%macro PIXEL23_50 0
+    Interp5 [ecx+12],[c6],eax
+%endmacro
+
+%macro PIXEL23_60 0
+    Interp6 [ecx+12],[c6],[c8]
+%endmacro
+
+%macro PIXEL23_61 0
+    Interp6 [ecx+12],[c6],[c9]
+%endmacro
+
+%macro PIXEL23_81 0
+    Interp8 [ecx+12],eax,[c6]
+%endmacro
+
+%macro PIXEL23_83 0
+    Interp8 [ecx+12],[c6],[c8]
+%endmacro
+
+%macro PIXEL30_0 0
+    mov [ecx+ebx],eax
+%endmacro
+
+%macro PIXEL30_11 0
+    Interp1 [ecx+ebx],eax,[c8]
+%endmacro
+
+%macro PIXEL30_12 0
+    Interp1 [ecx+ebx],eax,[c4]
+%endmacro
+
+%macro PIXEL30_20 0
+    Interp2 [ecx+ebx],eax,[c8],[c4]
+%endmacro
+
+%macro PIXEL30_50 0
+    Interp5 [ecx+ebx],[c8],[c4]
+%endmacro
+
+%macro PIXEL30_80 0
+    Interp8 [ecx+ebx],eax,[c7]
+%endmacro
+
+%macro PIXEL30_81 0
+    Interp8 [ecx+ebx],eax,[c8]
+%endmacro
+
+%macro PIXEL30_82 0
+    Interp8 [ecx+ebx],eax,[c4]
+%endmacro
+
+%macro PIXEL31_0 0
+    mov [ecx+ebx+4],eax
+%endmacro
+
+%macro PIXEL31_10 0
+    Interp1 [ecx+ebx+4],eax,[c7]
+%endmacro
+
+%macro PIXEL31_11 0
+    Interp1 [ecx+ebx+4],eax,[c8]
+%endmacro
+
+%macro PIXEL31_13 0
+    Interp1 [ecx+ebx+4],[c8],eax
+%endmacro
+
+%macro PIXEL31_21 0
+    Interp2 [ecx+ebx+4],[c8],eax,[c4]
+%endmacro
+
+%macro PIXEL31_32 0
+    Interp3 [ecx+ebx+4],[c4]
+%endmacro
+
+%macro PIXEL31_50 0
+    Interp5 [ecx+ebx+4],[c8],eax
+%endmacro
+
+%macro PIXEL31_60 0
+    Interp6 [ecx+ebx+4],[c8],[c4]
+%endmacro
+
+%macro PIXEL31_61 0
+    Interp6 [ecx+ebx+4],[c8],[c7]
+%endmacro
+
+%macro PIXEL31_81 0
+    Interp8 [ecx+ebx+4],eax,[c8]
+%endmacro
+
+%macro PIXEL31_83 0
+    Interp8 [ecx+ebx+4],[c8],[c4]
+%endmacro
+
+%macro PIXEL32_0 0
+    mov [ecx+ebx+8],eax
+%endmacro
+
+%macro PIXEL32_10 0
+    Interp1 [ecx+ebx+8],eax,[c9]
+%endmacro
+
+%macro PIXEL32_12 0
+    Interp1 [ecx+ebx+8],eax,[c8]
+%endmacro
+
+%macro PIXEL32_14 0
+    Interp1 [ecx+ebx+8],[c8],eax
+%endmacro
+
+%macro PIXEL32_21 0
+    Interp2 [ecx+ebx+8],[c8],eax,[c6]
+%endmacro
+
+%macro PIXEL32_31 0
+    Interp3 [ecx+ebx+8],[c6]
+%endmacro
+
+%macro PIXEL32_50 0
+    Interp5 [ecx+ebx+8],[c8],eax
+%endmacro
+
+%macro PIXEL32_60 0
+    Interp6 [ecx+ebx+8],[c8],[c6]
+%endmacro
+
+%macro PIXEL32_61 0
+    Interp6 [ecx+ebx+8],[c8],[c9]
+%endmacro
+
+%macro PIXEL32_82 0
+    Interp8 [ecx+ebx+8],eax,[c8]
+%endmacro
+
+%macro PIXEL32_83 0
+    Interp8 [ecx+ebx+8],[c8],[c6]
+%endmacro
+
+%macro PIXEL33_0 0
+    mov [ecx+ebx+12],eax
+%endmacro
+
+%macro PIXEL33_11 0
+    Interp1 [ecx+ebx+12],eax,[c6]
+%endmacro
+
+%macro PIXEL33_12 0
+    Interp1 [ecx+ebx+12],eax,[c8]
+%endmacro
+
+%macro PIXEL33_20 0
+    Interp2 [ecx+ebx+12],eax,[c8],[c6]
+%endmacro
+
+%macro PIXEL33_50 0
+    Interp5 [ecx+ebx+12],[c8],[c6]
+%endmacro
+
+%macro PIXEL33_80 0
+    Interp8 [ecx+ebx+12],eax,[c9]
+%endmacro
+
+%macro PIXEL33_81 0
+    Interp8 [ecx+ebx+12],eax,[c6]
+%endmacro
+
+%macro PIXEL33_82 0
+    Interp8 [ecx+ebx+12],eax,[c8]
+%endmacro
+
+inbuffer     equ 8
+outbuffer    equ 12
+Xres         equ 16
+Yres         equ 20
+pitch        equ 24
+
+_hq4x_32:
+    push ebp
+    mov ebp,esp
+    pushad
+
+    mov     esi,[ebp+inbuffer]
+    mov     edi,[ebp+outbuffer]
+    mov     edx,[ebp+Yres]
+    mov     [linesleft],edx
+    mov     ebx,[ebp+Xres]
+    shl     ebx,1
+    mov     dword[prevline],0
+    mov     dword[nextline],ebx
+.loopy
+    mov     ecx,[ebp+Xres]
+    sub     ecx,2                 ; x={Xres-2, Xres-1} are special cases.
+    mov     dword[xcounter],ecx
+    ; x=0 - special case
+    mov     ebx,[prevline]
+    movq    mm5,[esi+ebx]
+    movq    mm6,[esi]
+    mov     ebx,[nextline]
+    movq    mm7,[esi+ebx]
+    movd    eax,mm5
+    movzx   edx,ax  
+    mov     [w1],edx
+    mov     [w2],edx
+    shr     eax,16
+    mov     [w3],eax
+    movd    eax,mm6
+    movzx   edx,ax  
+    mov     [w4],edx
+    mov     [w5],edx
+    shr     eax,16
+    mov     [w6],eax
+    movd    eax,mm7
+    movzx   edx,ax  
+    mov     [w7],edx
+    mov     [w8],edx
+    shr     eax,16
+    mov     [w9],eax
+    jmp     .flags
+.loopx
+    mov     ebx,[prevline]
+    movq    mm5,[esi+ebx-2]
+    movq    mm6,[esi-2]
+    mov     ebx,[nextline]
+    movq    mm7,[esi+ebx-2]
+    movd    eax,mm5
+    movzx   edx,ax  
+    mov     [w1],edx
+    shr     eax,16
+    mov     [w2],eax
+    psrlq   mm5,32
+    movd    eax,mm5
+    movzx   edx,ax  
+    mov     [w3],edx
+    movd    eax,mm6
+    movzx   edx,ax  
+    mov     [w4],edx
+    shr     eax,16
+    mov     [w5],eax
+    psrlq   mm6,32
+    movd    eax,mm6
+    movzx   edx,ax  
+    mov     [w6],edx
+    movd    eax,mm7
+    movzx   edx,ax  
+    mov     [w7],edx
+    shr     eax,16
+    mov     [w8],eax
+    psrlq   mm7,32
+    movd    eax,mm7
+    movzx   edx,ax  
+    mov     [w9],edx
+.flags
+    mov     ebx,_RGBtoYUV
+    mov     eax,[w5]
+    xor     ecx,ecx
+    movd    mm5,[ebx+eax*4]
+    mov     dword[cross],0
+
+    mov     edx,[w2]
+    cmp     eax,edx
+    je      .noflag2
+    or      dword[cross],1
+    movq    mm1,mm5
+    movd    mm2,[ebx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+    test    edx,edx
+    jz      .noflag2
+    or      ecx,2
+.noflag2
+    mov     edx,[w4]
+    cmp     eax,edx
+    je      .noflag4
+    or      dword[cross],2
+    movq    mm1,mm5
+    movd    mm2,[ebx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+    test    edx,edx
+    jz      .noflag4
+    or      ecx,8
+.noflag4
+    mov     edx,[w6]
+    cmp     eax,edx
+    je      .noflag6
+    or      dword[cross],4
+    movq    mm1,mm5
+    movd    mm2,[ebx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+    test    edx,edx
+    jz      .noflag6
+    or      ecx,16
+.noflag6
+    mov     edx,[w8]
+    cmp     eax,edx
+    je      .noflag8
+    or      dword[cross],8
+    movq    mm1,mm5
+    movd    mm2,[ebx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+    test    edx,edx
+    jz      .noflag8
+    or      ecx,64
+.noflag8
+    cmp     dword[cross],0
+    jnz     .testflag1
+    mov     ebx,_LUT16to32
+    mov     eax,[ebx+eax*4]
+    mov     ebx,[ebp+pitch]
+    AUXADDRESS
+    mov     [edi],eax
+    mov     [edi+4],eax
+    mov     [edi+8],eax
+    mov     [edi+12],eax
+    mov     [edi+ebx],eax
+    mov     [edi+ebx+4],eax
+    mov     [edi+ebx+8],eax
+    mov     [edi+ebx+12],eax
+    mov     [ecx],eax
+    mov     [ecx+4],eax
+    mov     [ecx+8],eax
+    mov     [ecx+12],eax
+    mov     [ecx+ebx],eax
+    mov     [ecx+ebx+4],eax
+    mov     [ecx+ebx+8],eax
+    mov     [ecx+ebx+12],eax
+    jmp     .loopx_end
+.testflag1
+    mov     edx,[w1]
+    cmp     eax,edx
+    je      .noflag1
+    movq    mm1,mm5
+    movd    mm2,[ebx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+    test    edx,edx
+    jz      .noflag1
+    or      ecx,1
+.noflag1
+    mov     edx,[w3]
+    cmp     eax,edx
+    je      .noflag3
+    movq    mm1,mm5
+    movd    mm2,[ebx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+    test    edx,edx
+    jz      .noflag3
+    or      ecx,4
+.noflag3
+    mov     edx,[w7]
+    cmp     eax,edx
+    je      .noflag7
+    movq    mm1,mm5
+    movd    mm2,[ebx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+    test    edx,edx
+    jz      .noflag7
+    or      ecx,32
+.noflag7
+    mov     edx,[w9]
+    cmp     eax,edx
+    je      .noflag9
+    movq    mm1,mm5
+    movd    mm2,[ebx+edx*4]
+    psubusb mm1,mm2
+    psubusb mm2,mm5
+    por     mm1,mm2
+    psubusb mm1,[threshold]
+    movd    edx,mm1
+    test    edx,edx
+    jz      .noflag9
+    or      ecx,128
+.noflag9
+    mov  ebx,_LUT16to32
+    mov  eax,[ebx+eax*4]
+    mov  edx,[w2]
+    mov  edx,[ebx+edx*4]
+    mov  [c2],edx
+    mov  edx,[w4]
+    mov  edx,[ebx+edx*4]
+    mov  [c4],edx
+    mov  edx,[w6]
+    mov  edx,[ebx+edx*4]
+    mov  [c6],edx
+    mov  edx,[w8]
+    mov  edx,[ebx+edx*4]
+    mov  [c8],edx
+    test ecx,0x005A
+    jz  .switch
+    mov  edx,[w1]
+    mov  edx,[ebx+edx*4]
+    mov  [c1],edx
+    mov  edx,[w3]
+    mov  edx,[ebx+edx*4]
+    mov  [c3],edx
+    mov  edx,[w7]
+    mov  edx,[ebx+edx*4]
+    mov  [c7],edx
+    mov  edx,[w9]
+    mov  edx,[ebx+edx*4]
+    mov  [c9],edx
+.switch
+    mov  ebx,[ebp+pitch]
+    jmp  [FuncTable+ecx*4]
+
+..@flag0
+..@flag1
+..@flag4
+..@flag32
+..@flag128
+..@flag5
+..@flag132
+..@flag160
+..@flag33
+..@flag129
+..@flag36
+..@flag133
+..@flag164
+..@flag161
+..@flag37
+..@flag165
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_70
+    PIXEL13_60
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag2
+..@flag34
+..@flag130
+..@flag162
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag16
+..@flag17
+..@flag48
+..@flag49
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag64
+..@flag65
+..@flag68
+..@flag69
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_70
+    PIXEL13_60
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL30_80
+    PIXEL31_10
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag8
+..@flag12
+..@flag136
+..@flag140
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_70
+    PIXEL13_60
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag3
+..@flag35
+..@flag131
+..@flag163
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag6
+..@flag38
+..@flag134
+..@flag166
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag20
+..@flag21
+..@flag52
+..@flag53
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_31
+    PIXEL13_31
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag144
+..@flag145
+..@flag176
+..@flag177
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag192
+..@flag193
+..@flag196
+..@flag197
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_70
+    PIXEL13_60
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL30_80
+    PIXEL31_10
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag96
+..@flag97
+..@flag100
+..@flag101
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_70
+    PIXEL13_60
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL30_82
+    PIXEL31_32
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag40
+..@flag44
+..@flag168
+..@flag172
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_70
+    PIXEL13_60
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag9
+..@flag13
+..@flag137
+..@flag141
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_70
+    PIXEL13_60
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag18
+..@flag50
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    DiffOrNot w2,w6,PIXEL02_10,PIXEL03_80,PIXEL12_30,PIXEL13_10,PIXEL02_50,PIXEL03_50,PIXEL12_0,PIXEL13_50
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag80
+..@flag81
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_61
+    PIXEL21_30
+    DiffOrNot w6,w8,PIXEL22_30,PIXEL23_10,PIXEL32_10,PIXEL33_80,PIXEL22_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    PIXEL30_80
+    PIXEL31_10
+    jmp .loopx_end
+..@flag72
+..@flag76
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_70
+    PIXEL13_60
+    DiffOrNot w8,w4,PIXEL20_10,PIXEL21_30,PIXEL30_80,PIXEL31_10,PIXEL20_50,PIXEL21_0,PIXEL30_50,PIXEL31_50
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag10
+..@flag138
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_80,PIXEL01_10,PIXEL10_10,PIXEL11_30,PIXEL00_50,PIXEL01_50,PIXEL10_50,PIXEL11_0
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag66
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL30_80
+    PIXEL31_10
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag24
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag7
+..@flag39
+..@flag135
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag148
+..@flag149
+..@flag180
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_31
+    PIXEL13_31
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag224
+..@flag228
+..@flag225
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_70
+    PIXEL13_60
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL30_82
+    PIXEL31_32
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag41
+..@flag169
+..@flag45
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_70
+    PIXEL13_60
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag22
+..@flag54
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL12_0
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag208
+..@flag209
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    PIXEL30_80
+    PIXEL31_10
+    jmp .loopx_end
+..@flag104
+..@flag108
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_70
+    PIXEL13_60
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag11
+..@flag139
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL11_0
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag19
+..@flag51
+    AUXADDRESS
+    DiffOrNot w2,w6,PIXEL00_81,PIXEL01_31,PIXEL02_10,PIXEL03_80,PIXEL12_30,PIXEL13_10,PIXEL00_12,PIXEL01_14,PIXEL02_83,PIXEL03_50,PIXEL12_70,PIXEL13_21
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag146
+..@flag178
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    DiffOrNot w2,w6,PIXEL02_10,PIXEL03_80,PIXEL12_30,PIXEL13_10,PIXEL23_32,PIXEL33_82,PIXEL02_21,PIXEL03_50,PIXEL12_70,PIXEL13_83,PIXEL23_13,PIXEL33_11
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_32
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_82
+    jmp .loopx_end
+..@flag84
+..@flag85
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_81
+    DiffOrNot w6,w8,PIXEL03_81,PIXEL13_31,PIXEL22_30,PIXEL23_10,PIXEL32_10,PIXEL33_80,PIXEL03_12,PIXEL13_14,PIXEL22_70,PIXEL23_83,PIXEL32_21,PIXEL33_50
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_31
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL30_80
+    PIXEL31_10
+    jmp .loopx_end
+..@flag112
+..@flag113
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_82
+    PIXEL21_32
+    DiffOrNot w6,w8,PIXEL22_30,PIXEL23_10,PIXEL30_82,PIXEL31_32,PIXEL32_10,PIXEL33_80,PIXEL22_70,PIXEL23_21,PIXEL30_11,PIXEL31_13,PIXEL32_83,PIXEL33_50
+    jmp .loopx_end
+..@flag200
+..@flag204
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_70
+    PIXEL13_60
+    DiffOrNot w8,w4,PIXEL20_10,PIXEL21_30,PIXEL30_80,PIXEL31_10,PIXEL32_31,PIXEL33_81,PIXEL20_21,PIXEL21_70,PIXEL30_50,PIXEL31_83,PIXEL32_14,PIXEL33_12
+    PIXEL22_31
+    PIXEL23_81
+    jmp .loopx_end
+..@flag73
+..@flag77
+    AUXADDRESS
+    DiffOrNot w8,w4,PIXEL00_82,PIXEL10_32,PIXEL20_10,PIXEL21_30,PIXEL30_80,PIXEL31_10,PIXEL00_11,PIXEL10_13,PIXEL20_83,PIXEL21_70,PIXEL30_50,PIXEL31_21
+    PIXEL01_82
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL11_32
+    PIXEL12_70
+    PIXEL13_60
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag42
+..@flag170
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_80,PIXEL01_10,PIXEL10_10,PIXEL11_30,PIXEL20_31,PIXEL30_81,PIXEL00_50,PIXEL01_21,PIXEL10_83,PIXEL11_70,PIXEL20_14,PIXEL30_12
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL21_31
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL31_81
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag14
+..@flag142
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_80,PIXEL01_10,PIXEL02_32,PIXEL03_82,PIXEL10_10,PIXEL11_30,PIXEL00_50,PIXEL01_83,PIXEL02_13,PIXEL03_11,PIXEL10_21,PIXEL11_70
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag67
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL30_80
+    PIXEL31_10
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag70
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL30_80
+    PIXEL31_10
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag28
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_31
+    PIXEL13_31
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag152
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag194
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL30_80
+    PIXEL31_10
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag98
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL30_82
+    PIXEL31_32
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag56
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag25
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag26
+..@flag31
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL11_0
+    PIXEL12_0
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag82
+..@flag214
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL12_0
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    PIXEL30_80
+    PIXEL31_10
+    jmp .loopx_end
+..@flag88
+..@flag248
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_30
+    PIXEL13_10
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    jmp .loopx_end
+..@flag74
+..@flag107
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL11_0
+    PIXEL12_30
+    PIXEL13_61
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag27
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL11_0
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag86
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL12_0
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_80
+    PIXEL31_10
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag216
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    PIXEL30_80
+    PIXEL31_10
+    jmp .loopx_end
+..@flag106
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_30
+    PIXEL13_61
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag30
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_0
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag210
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    PIXEL30_80
+    PIXEL31_10
+    jmp .loopx_end
+..@flag120
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_30
+    PIXEL13_10
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag75
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL11_0
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL30_80
+    PIXEL31_10
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag29
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_31
+    PIXEL13_31
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag198
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL30_80
+    PIXEL31_10
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag184
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag99
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL30_82
+    PIXEL31_32
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag57
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag71
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL30_80
+    PIXEL31_10
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag156
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_31
+    PIXEL13_31
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag226
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL30_82
+    PIXEL31_32
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag60
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_31
+    PIXEL13_31
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag195
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL30_80
+    PIXEL31_10
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag102
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL30_82
+    PIXEL31_32
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag153
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag58
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_80,PIXEL01_10,PIXEL10_10,PIXEL11_30,PIXEL00_20,PIXEL01_12,PIXEL10_11,PIXEL11_0
+    DiffOrNot w2,w6,PIXEL02_10,PIXEL03_80,PIXEL12_30,PIXEL13_10,PIXEL02_11,PIXEL03_20,PIXEL12_0,PIXEL13_12
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag83
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    DiffOrNot w2,w6,PIXEL02_10,PIXEL03_80,PIXEL12_30,PIXEL13_10,PIXEL02_11,PIXEL03_20,PIXEL12_0,PIXEL13_12
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL20_61
+    PIXEL21_30
+    DiffOrNot w6,w8,PIXEL22_30,PIXEL23_10,PIXEL32_10,PIXEL33_80,PIXEL22_0,PIXEL23_11,PIXEL32_12,PIXEL33_20
+    PIXEL30_80
+    PIXEL31_10
+    jmp .loopx_end
+..@flag92
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_31
+    PIXEL13_31
+    DiffOrNot w8,w4,PIXEL20_10,PIXEL21_30,PIXEL30_80,PIXEL31_10,PIXEL20_12,PIXEL21_0,PIXEL30_20,PIXEL31_11
+    DiffOrNot w6,w8,PIXEL22_30,PIXEL23_10,PIXEL32_10,PIXEL33_80,PIXEL22_0,PIXEL23_11,PIXEL32_12,PIXEL33_20
+    jmp .loopx_end
+..@flag202
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_80,PIXEL01_10,PIXEL10_10,PIXEL11_30,PIXEL00_20,PIXEL01_12,PIXEL10_11,PIXEL11_0
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL12_30
+    PIXEL13_61
+    DiffOrNot w8,w4,PIXEL20_10,PIXEL21_30,PIXEL30_80,PIXEL31_10,PIXEL20_12,PIXEL21_0,PIXEL30_20,PIXEL31_11
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag78
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_80,PIXEL01_10,PIXEL10_10,PIXEL11_30,PIXEL00_20,PIXEL01_12,PIXEL10_11,PIXEL11_0
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL12_32
+    PIXEL13_82
+    DiffOrNot w8,w4,PIXEL20_10,PIXEL21_30,PIXEL30_80,PIXEL31_10,PIXEL20_12,PIXEL21_0,PIXEL30_20,PIXEL31_11
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag154
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_80,PIXEL01_10,PIXEL10_10,PIXEL11_30,PIXEL00_20,PIXEL01_12,PIXEL10_11,PIXEL11_0
+    DiffOrNot w2,w6,PIXEL02_10,PIXEL03_80,PIXEL12_30,PIXEL13_10,PIXEL02_11,PIXEL03_20,PIXEL12_0,PIXEL13_12
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag114
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    DiffOrNot w2,w6,PIXEL02_10,PIXEL03_80,PIXEL12_30,PIXEL13_10,PIXEL02_11,PIXEL03_20,PIXEL12_0,PIXEL13_12
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL20_82
+    PIXEL21_32
+    DiffOrNot w6,w8,PIXEL22_30,PIXEL23_10,PIXEL32_10,PIXEL33_80,PIXEL22_0,PIXEL23_11,PIXEL32_12,PIXEL33_20
+    PIXEL30_82
+    PIXEL31_32
+    jmp .loopx_end
+..@flag89
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_30
+    PIXEL13_10
+    DiffOrNot w8,w4,PIXEL20_10,PIXEL21_30,PIXEL30_80,PIXEL31_10,PIXEL20_12,PIXEL21_0,PIXEL30_20,PIXEL31_11
+    DiffOrNot w6,w8,PIXEL22_30,PIXEL23_10,PIXEL32_10,PIXEL33_80,PIXEL22_0,PIXEL23_11,PIXEL32_12,PIXEL33_20
+    jmp .loopx_end
+..@flag90
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_80,PIXEL01_10,PIXEL10_10,PIXEL11_30,PIXEL00_20,PIXEL01_12,PIXEL10_11,PIXEL11_0
+    DiffOrNot w2,w6,PIXEL02_10,PIXEL03_80,PIXEL12_30,PIXEL13_10,PIXEL02_11,PIXEL03_20,PIXEL12_0,PIXEL13_12
+    DiffOrNot w8,w4,PIXEL20_10,PIXEL21_30,PIXEL30_80,PIXEL31_10,PIXEL20_12,PIXEL21_0,PIXEL30_20,PIXEL31_11
+    DiffOrNot w6,w8,PIXEL22_30,PIXEL23_10,PIXEL32_10,PIXEL33_80,PIXEL22_0,PIXEL23_11,PIXEL32_12,PIXEL33_20
+    jmp .loopx_end
+..@flag55
+..@flag23
+    AUXADDRESS
+    DiffOrNot w2,w6,PIXEL00_81,PIXEL01_31,PIXEL02_0,PIXEL03_0,PIXEL12_0,PIXEL13_0,PIXEL00_12,PIXEL01_14,PIXEL02_83,PIXEL03_50,PIXEL12_70,PIXEL13_21
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag182
+..@flag150
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL12_0,PIXEL13_0,PIXEL23_32,PIXEL33_82,PIXEL02_21,PIXEL03_50,PIXEL12_70,PIXEL13_83,PIXEL23_13,PIXEL33_11
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_32
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_82
+    jmp .loopx_end
+..@flag213
+..@flag212
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_81
+    DiffOrNot w6,w8,PIXEL03_81,PIXEL13_31,PIXEL22_0,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL03_12,PIXEL13_14,PIXEL22_70,PIXEL23_83,PIXEL32_21,PIXEL33_50
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_31
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL30_80
+    PIXEL31_10
+    jmp .loopx_end
+..@flag241
+..@flag240
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_82
+    PIXEL21_32
+    DiffOrNot w6,w8,PIXEL22_0,PIXEL23_0,PIXEL30_82,PIXEL31_32,PIXEL32_0,PIXEL33_0,PIXEL22_70,PIXEL23_21,PIXEL30_11,PIXEL31_13,PIXEL32_83,PIXEL33_50
+    jmp .loopx_end
+..@flag236
+..@flag232
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_70
+    PIXEL13_60
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL21_0,PIXEL30_0,PIXEL31_0,PIXEL32_31,PIXEL33_81,PIXEL20_21,PIXEL21_70,PIXEL30_50,PIXEL31_83,PIXEL32_14,PIXEL33_12
+    PIXEL22_31
+    PIXEL23_81
+    jmp .loopx_end
+..@flag109
+..@flag105
+    AUXADDRESS
+    DiffOrNot w8,w4,PIXEL00_82,PIXEL10_32,PIXEL20_0,PIXEL21_0,PIXEL30_0,PIXEL31_0,PIXEL00_11,PIXEL10_13,PIXEL20_83,PIXEL21_70,PIXEL30_50,PIXEL31_21
+    PIXEL01_82
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL11_32
+    PIXEL12_70
+    PIXEL13_60
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag171
+..@flag43
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL11_0,PIXEL20_31,PIXEL30_81,PIXEL00_50,PIXEL01_21,PIXEL10_83,PIXEL11_70,PIXEL20_14,PIXEL30_12
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL21_31
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL31_81
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag143
+..@flag15
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL02_32,PIXEL03_82,PIXEL10_0,PIXEL11_0,PIXEL00_50,PIXEL01_83,PIXEL02_13,PIXEL03_11,PIXEL10_21,PIXEL11_70
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag124
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_31
+    PIXEL13_31
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag203
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL11_0
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL30_80
+    PIXEL31_10
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag62
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_0
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag211
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    PIXEL30_80
+    PIXEL31_10
+    jmp .loopx_end
+..@flag118
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL12_0
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_82
+    PIXEL31_32
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag217
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    PIXEL30_80
+    PIXEL31_10
+    jmp .loopx_end
+..@flag110
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_32
+    PIXEL13_82
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag155
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL11_0
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag188
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_31
+    PIXEL13_31
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag185
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag61
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_31
+    PIXEL13_31
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag157
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_31
+    PIXEL13_31
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag103
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL30_82
+    PIXEL31_32
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag227
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL30_82
+    PIXEL31_32
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag230
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL30_82
+    PIXEL31_32
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag199
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL30_80
+    PIXEL31_10
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag220
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_31
+    PIXEL13_31
+    DiffOrNot w8,w4,PIXEL20_10,PIXEL21_30,PIXEL30_80,PIXEL31_10,PIXEL20_12,PIXEL21_0,PIXEL30_20,PIXEL31_11
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    jmp .loopx_end
+..@flag158
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_80,PIXEL01_10,PIXEL10_10,PIXEL11_30,PIXEL00_20,PIXEL01_12,PIXEL10_11,PIXEL11_0
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL12_0
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag234
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_80,PIXEL01_10,PIXEL10_10,PIXEL11_30,PIXEL00_20,PIXEL01_12,PIXEL10_11,PIXEL11_0
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL12_30
+    PIXEL13_61
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag242
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    DiffOrNot w2,w6,PIXEL02_10,PIXEL03_80,PIXEL12_30,PIXEL13_10,PIXEL02_11,PIXEL03_20,PIXEL12_0,PIXEL13_12
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    PIXEL30_82
+    PIXEL31_32
+    jmp .loopx_end
+..@flag59
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    DiffOrNot w2,w6,PIXEL02_10,PIXEL03_80,PIXEL12_30,PIXEL13_10,PIXEL02_11,PIXEL03_20,PIXEL12_0,PIXEL13_12
+    PIXEL11_0
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag121
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_30
+    PIXEL13_10
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    DiffOrNot w6,w8,PIXEL22_30,PIXEL23_10,PIXEL32_10,PIXEL33_80,PIXEL22_0,PIXEL23_11,PIXEL32_12,PIXEL33_20
+    jmp .loopx_end
+..@flag87
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_0
+    PIXEL20_61
+    PIXEL21_30
+    DiffOrNot w6,w8,PIXEL22_30,PIXEL23_10,PIXEL32_10,PIXEL33_80,PIXEL22_0,PIXEL23_11,PIXEL32_12,PIXEL33_20
+    PIXEL30_80
+    PIXEL31_10
+    jmp .loopx_end
+..@flag79
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL11_0
+    PIXEL12_32
+    PIXEL13_82
+    DiffOrNot w8,w4,PIXEL20_10,PIXEL21_30,PIXEL30_80,PIXEL31_10,PIXEL20_12,PIXEL21_0,PIXEL30_20,PIXEL31_11
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag122
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_80,PIXEL01_10,PIXEL10_10,PIXEL11_30,PIXEL00_20,PIXEL01_12,PIXEL10_11,PIXEL11_0
+    DiffOrNot w2,w6,PIXEL02_10,PIXEL03_80,PIXEL12_30,PIXEL13_10,PIXEL02_11,PIXEL03_20,PIXEL12_0,PIXEL13_12
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    DiffOrNot w6,w8,PIXEL22_30,PIXEL23_10,PIXEL32_10,PIXEL33_80,PIXEL22_0,PIXEL23_11,PIXEL32_12,PIXEL33_20
+    jmp .loopx_end
+..@flag94
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_80,PIXEL01_10,PIXEL10_10,PIXEL11_30,PIXEL00_20,PIXEL01_12,PIXEL10_11,PIXEL11_0
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL12_0
+    DiffOrNot w8,w4,PIXEL20_10,PIXEL21_30,PIXEL30_80,PIXEL31_10,PIXEL20_12,PIXEL21_0,PIXEL30_20,PIXEL31_11
+    DiffOrNot w6,w8,PIXEL22_30,PIXEL23_10,PIXEL32_10,PIXEL33_80,PIXEL22_0,PIXEL23_11,PIXEL32_12,PIXEL33_20
+    jmp .loopx_end
+..@flag218
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_80,PIXEL01_10,PIXEL10_10,PIXEL11_30,PIXEL00_20,PIXEL01_12,PIXEL10_11,PIXEL11_0
+    DiffOrNot w2,w6,PIXEL02_10,PIXEL03_80,PIXEL12_30,PIXEL13_10,PIXEL02_11,PIXEL03_20,PIXEL12_0,PIXEL13_12
+    DiffOrNot w8,w4,PIXEL20_10,PIXEL21_30,PIXEL30_80,PIXEL31_10,PIXEL20_12,PIXEL21_0,PIXEL30_20,PIXEL31_11
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    jmp .loopx_end
+..@flag91
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    DiffOrNot w2,w6,PIXEL02_10,PIXEL03_80,PIXEL12_30,PIXEL13_10,PIXEL02_11,PIXEL03_20,PIXEL12_0,PIXEL13_12
+    PIXEL11_0
+    DiffOrNot w8,w4,PIXEL20_10,PIXEL21_30,PIXEL30_80,PIXEL31_10,PIXEL20_12,PIXEL21_0,PIXEL30_20,PIXEL31_11
+    DiffOrNot w6,w8,PIXEL22_30,PIXEL23_10,PIXEL32_10,PIXEL33_80,PIXEL22_0,PIXEL23_11,PIXEL32_12,PIXEL33_20
+    jmp .loopx_end
+..@flag229
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_70
+    PIXEL13_60
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL30_82
+    PIXEL31_32
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag167
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag173
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_70
+    PIXEL13_60
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag181
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_31
+    PIXEL13_31
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag186
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_80,PIXEL01_10,PIXEL10_10,PIXEL11_30,PIXEL00_20,PIXEL01_12,PIXEL10_11,PIXEL11_0
+    DiffOrNot w2,w6,PIXEL02_10,PIXEL03_80,PIXEL12_30,PIXEL13_10,PIXEL02_11,PIXEL03_20,PIXEL12_0,PIXEL13_12
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag115
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    DiffOrNot w2,w6,PIXEL02_10,PIXEL03_80,PIXEL12_30,PIXEL13_10,PIXEL02_11,PIXEL03_20,PIXEL12_0,PIXEL13_12
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL20_82
+    PIXEL21_32
+    DiffOrNot w6,w8,PIXEL22_30,PIXEL23_10,PIXEL32_10,PIXEL33_80,PIXEL22_0,PIXEL23_11,PIXEL32_12,PIXEL33_20
+    PIXEL30_82
+    PIXEL31_32
+    jmp .loopx_end
+..@flag93
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_31
+    PIXEL13_31
+    DiffOrNot w8,w4,PIXEL20_10,PIXEL21_30,PIXEL30_80,PIXEL31_10,PIXEL20_12,PIXEL21_0,PIXEL30_20,PIXEL31_11
+    DiffOrNot w6,w8,PIXEL22_30,PIXEL23_10,PIXEL32_10,PIXEL33_80,PIXEL22_0,PIXEL23_11,PIXEL32_12,PIXEL33_20
+    jmp .loopx_end
+..@flag206
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_80,PIXEL01_10,PIXEL10_10,PIXEL11_30,PIXEL00_20,PIXEL01_12,PIXEL10_11,PIXEL11_0
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL12_32
+    PIXEL13_82
+    DiffOrNot w8,w4,PIXEL20_10,PIXEL21_30,PIXEL30_80,PIXEL31_10,PIXEL20_12,PIXEL21_0,PIXEL30_20,PIXEL31_11
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag205
+..@flag201
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_70
+    PIXEL13_60
+    DiffOrNot w8,w4,PIXEL20_10,PIXEL21_30,PIXEL30_80,PIXEL31_10,PIXEL20_12,PIXEL21_0,PIXEL30_20,PIXEL31_11
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag174
+..@flag46
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_80,PIXEL01_10,PIXEL10_10,PIXEL11_30,PIXEL00_20,PIXEL01_12,PIXEL10_11,PIXEL11_0
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag179
+..@flag147
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    DiffOrNot w2,w6,PIXEL02_10,PIXEL03_80,PIXEL12_30,PIXEL13_10,PIXEL02_11,PIXEL03_20,PIXEL12_0,PIXEL13_12
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag117
+..@flag116
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_31
+    PIXEL13_31
+    PIXEL20_82
+    PIXEL21_32
+    DiffOrNot w6,w8,PIXEL22_30,PIXEL23_10,PIXEL32_10,PIXEL33_80,PIXEL22_0,PIXEL23_11,PIXEL32_12,PIXEL33_20
+    PIXEL30_82
+    PIXEL31_32
+    jmp .loopx_end
+..@flag189
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_31
+    PIXEL13_31
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag231
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL30_82
+    PIXEL31_32
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag126
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_0
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag219
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL11_0
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    PIXEL30_80
+    PIXEL31_10
+    jmp .loopx_end
+..@flag125
+    AUXADDRESS
+    DiffOrNot w8,w4,PIXEL00_82,PIXEL10_32,PIXEL20_0,PIXEL21_0,PIXEL30_0,PIXEL31_0,PIXEL00_11,PIXEL10_13,PIXEL20_83,PIXEL21_70,PIXEL30_50,PIXEL31_21
+    PIXEL01_82
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL11_32
+    PIXEL12_31
+    PIXEL13_31
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag221
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_81
+    DiffOrNot w6,w8,PIXEL03_81,PIXEL13_31,PIXEL22_0,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL03_12,PIXEL13_14,PIXEL22_70,PIXEL23_83,PIXEL32_21,PIXEL33_50
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_31
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL30_80
+    PIXEL31_10
+    jmp .loopx_end
+..@flag207
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL02_32,PIXEL03_82,PIXEL10_0,PIXEL11_0,PIXEL00_50,PIXEL01_83,PIXEL02_13,PIXEL03_11,PIXEL10_21,PIXEL11_70
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_31
+    PIXEL23_81
+    PIXEL30_80
+    PIXEL31_10
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag238
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_32
+    PIXEL13_82
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL21_0,PIXEL30_0,PIXEL31_0,PIXEL32_31,PIXEL33_81,PIXEL20_21,PIXEL21_70,PIXEL30_50,PIXEL31_83,PIXEL32_14,PIXEL33_12
+    PIXEL22_31
+    PIXEL23_81
+    jmp .loopx_end
+..@flag190
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL12_0,PIXEL13_0,PIXEL23_32,PIXEL33_82,PIXEL02_21,PIXEL03_50,PIXEL12_70,PIXEL13_83,PIXEL23_13,PIXEL33_11
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_32
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_82
+    jmp .loopx_end
+..@flag187
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL11_0,PIXEL20_31,PIXEL30_81,PIXEL00_50,PIXEL01_21,PIXEL10_83,PIXEL11_70,PIXEL20_14,PIXEL30_12
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL21_31
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL31_81
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag243
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_82
+    PIXEL21_32
+    DiffOrNot w6,w8,PIXEL22_0,PIXEL23_0,PIXEL30_82,PIXEL31_32,PIXEL32_0,PIXEL33_0,PIXEL22_70,PIXEL23_21,PIXEL30_11,PIXEL31_13,PIXEL32_83,PIXEL33_50
+    jmp .loopx_end
+..@flag119
+    AUXADDRESS
+    DiffOrNot w2,w6,PIXEL00_81,PIXEL01_31,PIXEL02_0,PIXEL03_0,PIXEL12_0,PIXEL13_0,PIXEL00_12,PIXEL01_14,PIXEL02_83,PIXEL03_50,PIXEL12_70,PIXEL13_21
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_82
+    PIXEL31_32
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag237
+..@flag233
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_60
+    PIXEL03_20
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_70
+    PIXEL13_60
+    PIXEL20_0
+    PIXEL21_0
+    PIXEL22_31
+    PIXEL23_81
+    DiffOrNot w8,w4,PIXEL30_0,PIXEL30_20
+    PIXEL31_0
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag175
+..@flag47
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+    PIXEL01_0
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL10_0
+    PIXEL11_0
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_70
+    PIXEL23_60
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_60
+    PIXEL33_20
+    jmp .loopx_end
+..@flag183
+..@flag151
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    PIXEL02_0
+    DiffOrNot w2,w6,PIXEL03_0,PIXEL03_20
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_0
+    PIXEL13_0
+    PIXEL20_60
+    PIXEL21_70
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_20
+    PIXEL31_60
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag245
+..@flag244
+    AUXADDRESS
+    PIXEL00_20
+    PIXEL01_60
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_60
+    PIXEL11_70
+    PIXEL12_31
+    PIXEL13_31
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_0
+    PIXEL23_0
+    PIXEL30_82
+    PIXEL31_32
+    PIXEL32_0
+    DiffOrNot w6,w8,PIXEL33_0,PIXEL33_20
+    jmp .loopx_end
+..@flag250
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_30
+    PIXEL13_10
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    jmp .loopx_end
+..@flag123
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL11_0
+    PIXEL12_30
+    PIXEL13_10
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag95
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL11_0
+    PIXEL12_0
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_80
+    PIXEL31_10
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag222
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_0
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    PIXEL30_80
+    PIXEL31_10
+    jmp .loopx_end
+..@flag252
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_61
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_31
+    PIXEL13_31
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    PIXEL22_0
+    PIXEL23_0
+    PIXEL32_0
+    DiffOrNot w6,w8,PIXEL33_0,PIXEL33_20
+    jmp .loopx_end
+..@flag249
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_61
+    PIXEL03_80
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_0
+    PIXEL21_0
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    DiffOrNot w8,w4,PIXEL30_0,PIXEL30_20
+    PIXEL31_0
+    jmp .loopx_end
+..@flag235
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL11_0
+    PIXEL12_30
+    PIXEL13_61
+    PIXEL20_0
+    PIXEL21_0
+    PIXEL22_31
+    PIXEL23_81
+    DiffOrNot w8,w4,PIXEL30_0,PIXEL30_20
+    PIXEL31_0
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag111
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+    PIXEL01_0
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL10_0
+    PIXEL11_0
+    PIXEL12_32
+    PIXEL13_82
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    PIXEL22_30
+    PIXEL23_61
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag63
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+    PIXEL01_0
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL10_0
+    PIXEL11_0
+    PIXEL12_0
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_61
+    PIXEL33_80
+    jmp .loopx_end
+..@flag159
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    PIXEL02_0
+    DiffOrNot w2,w6,PIXEL03_0,PIXEL03_20
+    PIXEL11_0
+    PIXEL12_0
+    PIXEL13_0
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_80
+    PIXEL31_61
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag215
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    PIXEL02_0
+    DiffOrNot w2,w6,PIXEL03_0,PIXEL03_20
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_0
+    PIXEL13_0
+    PIXEL20_61
+    PIXEL21_30
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    PIXEL30_80
+    PIXEL31_10
+    jmp .loopx_end
+..@flag246
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL10_61
+    PIXEL11_30
+    PIXEL12_0
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_0
+    PIXEL23_0
+    PIXEL30_82
+    PIXEL31_32
+    PIXEL32_0
+    DiffOrNot w6,w8,PIXEL33_0,PIXEL33_20
+    jmp .loopx_end
+..@flag254
+    AUXADDRESS
+    PIXEL00_80
+    PIXEL01_10
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL10_10
+    PIXEL11_30
+    PIXEL12_0
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    PIXEL22_0
+    PIXEL23_0
+    PIXEL32_0
+    DiffOrNot w6,w8,PIXEL33_0,PIXEL33_20
+    jmp .loopx_end
+..@flag253
+    AUXADDRESS
+    PIXEL00_82
+    PIXEL01_82
+    PIXEL02_81
+    PIXEL03_81
+    PIXEL10_32
+    PIXEL11_32
+    PIXEL12_31
+    PIXEL13_31
+    PIXEL20_0
+    PIXEL21_0
+    PIXEL22_0
+    PIXEL23_0
+    DiffOrNot w8,w4,PIXEL30_0,PIXEL30_20
+    PIXEL31_0
+    PIXEL32_0
+    DiffOrNot w6,w8,PIXEL33_0,PIXEL33_20
+    jmp .loopx_end
+..@flag251
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    PIXEL02_10
+    PIXEL03_80
+    PIXEL11_0
+    PIXEL12_30
+    PIXEL13_10
+    PIXEL20_0
+    PIXEL21_0
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    DiffOrNot w8,w4,PIXEL30_0,PIXEL30_20
+    PIXEL31_0
+    jmp .loopx_end
+..@flag239
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+    PIXEL01_0
+    PIXEL02_32
+    PIXEL03_82
+    PIXEL10_0
+    PIXEL11_0
+    PIXEL12_32
+    PIXEL13_82
+    PIXEL20_0
+    PIXEL21_0
+    PIXEL22_31
+    PIXEL23_81
+    DiffOrNot w8,w4,PIXEL30_0,PIXEL30_20
+    PIXEL31_0
+    PIXEL32_31
+    PIXEL33_81
+    jmp .loopx_end
+..@flag127
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+    PIXEL01_0
+    DiffOrNot w2,w6,PIXEL02_0,PIXEL03_0,PIXEL13_0,PIXEL02_50,PIXEL03_50,PIXEL13_50
+    PIXEL10_0
+    PIXEL11_0
+    PIXEL12_0
+    DiffOrNot w8,w4,PIXEL20_0,PIXEL30_0,PIXEL31_0,PIXEL20_50,PIXEL30_50,PIXEL31_50
+    PIXEL21_0
+    PIXEL22_30
+    PIXEL23_10
+    PIXEL32_10
+    PIXEL33_80
+    jmp .loopx_end
+..@flag191
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+    PIXEL01_0
+    PIXEL02_0
+    DiffOrNot w2,w6,PIXEL03_0,PIXEL03_20
+    PIXEL10_0
+    PIXEL11_0
+    PIXEL12_0
+    PIXEL13_0
+    PIXEL20_31
+    PIXEL21_31
+    PIXEL22_32
+    PIXEL23_32
+    PIXEL30_81
+    PIXEL31_81
+    PIXEL32_82
+    PIXEL33_82
+    jmp .loopx_end
+..@flag223
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_0,PIXEL10_0,PIXEL00_50,PIXEL01_50,PIXEL10_50
+    PIXEL02_0
+    DiffOrNot w2,w6,PIXEL03_0,PIXEL03_20
+    PIXEL11_0
+    PIXEL12_0
+    PIXEL13_0
+    PIXEL20_10
+    PIXEL21_30
+    PIXEL22_0
+    DiffOrNot w6,w8,PIXEL23_0,PIXEL32_0,PIXEL33_0,PIXEL23_50,PIXEL32_50,PIXEL33_50
+    PIXEL30_80
+    PIXEL31_10
+    jmp .loopx_end
+..@flag247
+    AUXADDRESS
+    PIXEL00_81
+    PIXEL01_31
+    PIXEL02_0
+    DiffOrNot w2,w6,PIXEL03_0,PIXEL03_20
+    PIXEL10_81
+    PIXEL11_31
+    PIXEL12_0
+    PIXEL13_0
+    PIXEL20_82
+    PIXEL21_32
+    PIXEL22_0
+    PIXEL23_0
+    PIXEL30_82
+    PIXEL31_32
+    PIXEL32_0
+    DiffOrNot w6,w8,PIXEL33_0,PIXEL33_20
+    jmp .loopx_end
+..@flag255
+    AUXADDRESS
+    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+    PIXEL01_0
+    PIXEL02_0
+    DiffOrNot w2,w6,PIXEL03_0,PIXEL03_20
+    PIXEL10_0
+    PIXEL11_0
+    PIXEL12_0
+    PIXEL13_0
+    PIXEL20_0
+    PIXEL21_0
+    PIXEL22_0
+    PIXEL23_0
+    DiffOrNot w8,w4,PIXEL30_0,PIXEL30_20
+    PIXEL31_0
+    PIXEL32_0
+    DiffOrNot w6,w8,PIXEL33_0,PIXEL33_20
+    jmp .loopx_end
+
+.loopx_end
+    add     esi,2
+    add     edi,16
+    dec     dword[xcounter]
+    jle     .xres_2
+    jmp     .loopx
+.xres_2
+    ; x=Xres-2 - special case
+    jl      .xres_1
+    mov     ebx,[prevline]
+    movq    mm5,[esi+ebx-4]
+    movq    mm6,[esi-4]
+    mov     ebx,[nextline]
+    movq    mm7,[esi+ebx-4]
+    psrlq   mm5,16
+    psrlq   mm6,16
+    psrlq   mm7,16
+    movd    eax,mm5
+    movzx   edx,ax
+    mov     [w1],edx
+    shr     eax,16
+    mov     [w2],eax
+    psrlq   mm5,32
+    movd    eax,mm5
+    mov     [w3],eax
+    movd    eax,mm6
+    movzx   edx,ax
+    mov     [w4],edx
+    shr     eax,16
+    mov     [w5],eax
+    psrlq   mm6,32
+    movd    eax,mm6
+    mov     [w6],eax
+    movd    eax,mm7
+    movzx   edx,ax
+    mov     [w7],edx
+    shr     eax,16
+    mov     [w8],eax
+    psrlq   mm7,32
+    movd    eax,mm7
+    mov     [w9],eax
+    jmp     .flags
+.xres_1
+    cmp     dword[xcounter],-1
+    jl      .nexty
+    ; x=Xres-1 - special case
+    mov     ebx,[prevline]
+    movq    mm5,[esi+ebx-6]
+    movq    mm6,[esi-6]
+    mov     ebx,[nextline]
+    movq    mm7,[esi+ebx-6]
+    psrlq   mm5,32
+    psrlq   mm6,32
+    psrlq   mm7,32
+    movd    eax,mm5
+    movzx   edx,ax  
+    mov     [w1],edx
+    shr     eax,16
+    mov     [w2],eax
+    mov     [w3],eax
+    movd    eax,mm6
+    movzx   edx,ax  
+    mov     [w4],edx
+    shr     eax,16
+    mov     [w5],eax
+    mov     [w6],eax
+    movd    eax,mm7
+    movzx   edx,ax  
+    mov     [w7],edx
+    shr     eax,16
+    mov     [w8],eax
+    mov     [w9],eax
+    jmp     .flags
+.nexty
+    add     edi,ebx
+    add     edi,ebx
+    add     edi,ebx
+    dec     dword[linesleft]
+    jz      .fin
+    mov     ebx,[ebp+Xres]
+    shl     ebx,1
+    cmp     dword[linesleft],1
+    je      .lastline
+    mov     dword[nextline],ebx
+    neg     ebx
+    mov     dword[prevline],ebx
+    jmp     .loopy
+.lastline
+    mov     dword[nextline],0
+    neg     ebx
+    mov     dword[prevline],ebx
+    jmp     .loopy
+.fin
+    emms
+    popad
+    mov esp,ebp
+    pop ebp
+    ret
+
+SECTION .data
+FuncTable
+    dd ..@flag0, ..@flag1, ..@flag2, ..@flag3, ..@flag4, ..@flag5, ..@flag6, ..@flag7
+    dd ..@flag8, ..@flag9, ..@flag10, ..@flag11, ..@flag12, ..@flag13, ..@flag14, ..@flag15
+    dd ..@flag16, ..@flag17, ..@flag18, ..@flag19, ..@flag20, ..@flag21, ..@flag22, ..@flag23
+    dd ..@flag24, ..@flag25, ..@flag26, ..@flag27, ..@flag28, ..@flag29, ..@flag30, ..@flag31
+    dd ..@flag32, ..@flag33, ..@flag34, ..@flag35, ..@flag36, ..@flag37, ..@flag38, ..@flag39
+    dd ..@flag40, ..@flag41, ..@flag42, ..@flag43, ..@flag44, ..@flag45, ..@flag46, ..@flag47
+    dd ..@flag48, ..@flag49, ..@flag50, ..@flag51, ..@flag52, ..@flag53, ..@flag54, ..@flag55
+    dd ..@flag56, ..@flag57, ..@flag58, ..@flag59, ..@flag60, ..@flag61, ..@flag62, ..@flag63
+    dd ..@flag64, ..@flag65, ..@flag66, ..@flag67, ..@flag68, ..@flag69, ..@flag70, ..@flag71
+    dd ..@flag72, ..@flag73, ..@flag74, ..@flag75, ..@flag76, ..@flag77, ..@flag78, ..@flag79
+    dd ..@flag80, ..@flag81, ..@flag82, ..@flag83, ..@flag84, ..@flag85, ..@flag86, ..@flag87
+    dd ..@flag88, ..@flag89, ..@flag90, ..@flag91, ..@flag92, ..@flag93, ..@flag94, ..@flag95
+    dd ..@flag96, ..@flag97, ..@flag98, ..@flag99, ..@flag100, ..@flag101, ..@flag102, ..@flag103
+    dd ..@flag104, ..@flag105, ..@flag106, ..@flag107, ..@flag108, ..@flag109, ..@flag110, ..@flag111
+    dd ..@flag112, ..@flag113, ..@flag114, ..@flag115, ..@flag116, ..@flag117, ..@flag118, ..@flag119
+    dd ..@flag120, ..@flag121, ..@flag122, ..@flag123, ..@flag124, ..@flag125, ..@flag126, ..@flag127
+    dd ..@flag128, ..@flag129, ..@flag130, ..@flag131, ..@flag132, ..@flag133, ..@flag134, ..@flag135
+    dd ..@flag136, ..@flag137, ..@flag138, ..@flag139, ..@flag140, ..@flag141, ..@flag142, ..@flag143
+    dd ..@flag144, ..@flag145, ..@flag146, ..@flag147, ..@flag148, ..@flag149, ..@flag150, ..@flag151
+    dd ..@flag152, ..@flag153, ..@flag154, ..@flag155, ..@flag156, ..@flag157, ..@flag158, ..@flag159
+    dd ..@flag160, ..@flag161, ..@flag162, ..@flag163, ..@flag164, ..@flag165, ..@flag166, ..@flag167
+    dd ..@flag168, ..@flag169, ..@flag170, ..@flag171, ..@flag172, ..@flag173, ..@flag174, ..@flag175
+    dd ..@flag176, ..@flag177, ..@flag178, ..@flag179, ..@flag180, ..@flag181, ..@flag182, ..@flag183
+    dd ..@flag184, ..@flag185, ..@flag186, ..@flag187, ..@flag188, ..@flag189, ..@flag190, ..@flag191
+    dd ..@flag192, ..@flag193, ..@flag194, ..@flag195, ..@flag196, ..@flag197, ..@flag198, ..@flag199
+    dd ..@flag200, ..@flag201, ..@flag202, ..@flag203, ..@flag204, ..@flag205, ..@flag206, ..@flag207
+    dd ..@flag208, ..@flag209, ..@flag210, ..@flag211, ..@flag212, ..@flag213, ..@flag214, ..@flag215
+    dd ..@flag216, ..@flag217, ..@flag218, ..@flag219, ..@flag220, ..@flag221, ..@flag222, ..@flag223
+    dd ..@flag224, ..@flag225, ..@flag226, ..@flag227, ..@flag228, ..@flag229, ..@flag230, ..@flag231
+    dd ..@flag232, ..@flag233, ..@flag234, ..@flag235, ..@flag236, ..@flag237, ..@flag238, ..@flag239
+    dd ..@flag240, ..@flag241, ..@flag242, ..@flag243, ..@flag244, ..@flag245, ..@flag246, ..@flag247
+    dd ..@flag248, ..@flag249, ..@flag250, ..@flag251, ..@flag252, ..@flag253, ..@flag254, ..@flag255
+
+
index f949526afd9b9e4221ee56cfda059c6068d01e41..5dab8ef624409dc379307eecc46ce46ac91733d6 100755 (executable)
@@ -123,7 +123,7 @@ static int testwritewatch (void)
 end:
     if (mem) {
        VirtualFree (mem, TEST_SIZE, MEM_DECOMMIT);
-       VirtualFree (mem, TEST_SIZE, MEM_RELEASE);
+       VirtualFree (mem, 0, MEM_RELEASE);
     }
     return ret;
 }
@@ -229,7 +229,7 @@ int init_shm (void)
 {
     int i;
     LPVOID blah = NULL;
-    uae_u32 size, totalsize, z3size, natmemsize;
+    uae_u32 size, totalsize, z3size, natmemsize, rtgbarrier, rtgextra;
 
     if (natmem_offset)
        VirtualFree(natmem_offset, 0, MEM_RELEASE);
@@ -238,12 +238,16 @@ int init_shm (void)
 
     z3size = 0;
     size = 0x1000000;
+    rtgextra = 0;
+    rtgbarrier = si.dwPageSize;
     if (currprefs.cpu_model >= 68020)
        size = 0x10000000;
     if (currprefs.z3fastmem_size) {
        z3size = currprefs.z3fastmem_size + (currprefs.z3fastmem_start -  0x10000000);
        if (currprefs.gfxmem_size)
-           size += 16 * 1024 * 1024;
+           rtgbarrier = 16 * 1024 * 1024;
+    } else {
+       rtgbarrier = 0;
     }
 
     totalsize = size + z3size + currprefs.gfxmem_size;
@@ -263,20 +267,24 @@ int init_shm (void)
        shmids[i].name[0] = 0;
     }
     natmemsize = size + z3size;
-    if (!currprefs.gfxmem_size) {
-       natmemsize += si.dwPageSize;
-    } else {
-       xfree (memwatchtable);
-       memwatchtable = 0;
+    xfree (memwatchtable);
+    memwatchtable = 0;
+    if (currprefs.gfxmem_size) {
        if (!memwatchok) {
            write_log ("GetWriteWatch() not supported, using guard pages, performance will be slower.\n");
            memwatchtable = xcalloc (currprefs.gfxmem_size / si.dwPageSize + 1, 1);
        }
     }
-
+restart:
     for (;;) {
        int change;
-       blah = VirtualAlloc (NULL, natmemsize, MEM_RESERVE, PAGE_EXECUTE_READWRITE);
+       if (currprefs.gfxmem_size) {
+           rtgextra = si.dwPageSize;
+       } else {
+           rtgbarrier = 0;
+           rtgextra = 0;
+       }
+       blah = VirtualAlloc (NULL, natmemsize + rtgbarrier + currprefs.gfxmem_size + rtgextra + 16 * si.dwPageSize, MEM_RESERVE, PAGE_READWRITE);
        if (blah)
            break;
        write_log ("NATMEM: %dM area failed to allocate, err=%d\n", natmemsize >> 20, GetLastError ());
@@ -288,10 +296,18 @@ int init_shm (void)
        }
     }
     natmem_offset = blah;
-    if (currprefs.gfxmem_size) {
-       p96mem_size = currprefs.gfxmem_size + si.dwPageSize;
-       p96mem_offset = VirtualAlloc (natmem_offset + size + z3size, p96mem_size,
-           MEM_RESERVE | (memwatchok ? MEM_WRITE_WATCH : 0), PAGE_READWRITE);
+    p96mem_size = currprefs.gfxmem_size;
+    if (p96mem_size) {
+       VirtualFree (natmem_offset, 0, MEM_RELEASE);
+       if (!VirtualAlloc (natmem_offset, natmemsize + rtgbarrier, MEM_RESERVE, PAGE_READWRITE)) {
+           write_log ("VirtualAlloc() part 2 error %d. RTG disabled.\n", GetLastError ());
+           currprefs.gfxmem_size = changed_prefs.gfxmem_size = 0;
+           rtgbarrier = si.dwPageSize;
+           rtgextra = 0;
+           goto restart;
+       }
+        p96mem_offset = VirtualAlloc (natmem_offset + natmemsize + rtgbarrier, p96mem_size + rtgextra,
+           MEM_RESERVE | (memwatchok == 1 ? MEM_WRITE_WATCH : 0), PAGE_READWRITE);
        if (!p96mem_offset) {
            currprefs.gfxmem_size = changed_prefs.gfxmem_size = 0;
            write_log ("NATMEM: failed to allocate special Picasso96 GFX RAM, err=%d\n", GetLastError ());
@@ -416,7 +432,6 @@ void *shmat(int shmid, void *shmaddr, int shmflg)
        if(!strcmp(shmids[shmid].name,"rtarea")) {
            shmaddr=natmem_offset + rtarea_base;
            got = TRUE;
-           size += BARRIER;
        }
        if(!strcmp(shmids[shmid].name,"fast")) {
            shmaddr=natmem_offset + 0x200000;
index aca3baaf9d1e22cd418ffe8da91c9874fc12f318..7b3399f0493ae4ba51c9d1b190d5762a8f181031 100755 (executable)
@@ -48,7 +48,7 @@
 
 #define USEDX 1
 
-static int hwsprite = 0;
+static int hwsprite;
 
 #include "registry.h"
 #include "dxwrap.h"
@@ -59,7 +59,7 @@ static int hwsprite = 0;
 int mman_GetWriteWatch (PVOID lpBaseAddress, SIZE_T dwRegionSize, PVOID *lpAddresses, PULONG_PTR lpdwCount, PULONG lpdwGranularity);
 void mman_ResetWatch (PVOID lpBaseAddress, SIZE_T dwRegionSize);
 
-//#define P96TRACING_ENABLED 1
+#define P96TRACING_ENABLED 0
 
 int p96hack_vpos, p96hack_vpos2, p96refresh_active;
 int have_done_picasso; /* For the JIT compiler */
@@ -74,7 +74,7 @@ int p96hsync_counter, palette_changed;
 #define P96TRACING_LEVEL 1
 #endif
 static void flushpixels(void);
-#ifdef P96TRACING_ENABLED
+#if P96TRACING_ENABLED
 #define P96TRACE(x) do { write_log x; } while(0)
 #define P96TRACE_SPR(x) do { write_log x; } while(0)
 #else
@@ -82,8 +82,6 @@ static void flushpixels(void);
 #define P96TRACE_SPR(x)
 #endif
 
-#define GetBytesPerPixel(x) GetBytesPerPixel2(x,__FILE__,__LINE__)
-
 static void REGPARAM2 gfxmem_lputx (uaecptr, uae_u32) REGPARAM;
 static void REGPARAM2 gfxmem_wputx (uaecptr, uae_u32) REGPARAM;
 static void REGPARAM2 gfxmem_bputx (uaecptr, uae_u32) REGPARAM;
@@ -187,7 +185,7 @@ static void endianswap (uae_u32 *vp, int bpp)
     }
 }
 
-#ifdef P96TRACING_ENABLED
+#if P96TRACING_ENABLED
 /*
 * Debugging dumps
 */
@@ -288,7 +286,7 @@ static void DumpTemplate (struct Template *tmp, unsigned long w, unsigned long h
 
 static void DumpLine( struct Line *line )
 {
-    if(line) {
+    if (line) {
        write_log ("Line->X = %d\n", line->X);
        write_log ("Line->Y = %d\n", line->Y);
        write_log ("Line->Length = %d\n", line->Length);
@@ -322,42 +320,30 @@ static void ShowSupportedResolutions (void)
 #endif
 }
 
-static uae_u8 GetBytesPerPixel2(uae_u32 RGBfmt, char *file, int line)
+static uae_u8 GetBytesPerPixel (uae_u32 RGBfmt)
 {
-    static BOOL bFailure = FALSE;
-
-    switch (RGBfmt) {
-    case RGBFB_CLUT:
-       return 1;
-
-    case RGBFB_A8R8G8B8:
-    case RGBFB_A8B8G8R8:
-    case RGBFB_R8G8B8A8:
-    case RGBFB_B8G8R8A8:
-       return 4;
-
-    case RGBFB_B8G8R8:
-    case RGBFB_R8G8B8:
-       return 3;
-
-    case RGBFB_R5G5B5:
-    case RGBFB_R5G6B5:
-    case RGBFB_R5G6B5PC:
-    case RGBFB_R5G5B5PC:
-    case RGBFB_B5G6R5PC:
-    case RGBFB_B5G5R5PC:
-       return 2;
-    default:
-       write_log ("ERROR - GetBytesPerPixel() from %s@%d was unsuccessful with 0x%x?!\n", file, line, RGBfmt);
-       if(!bFailure)
-       {
-           bFailure = TRUE;
-           return GetBytesPerPixel(picasso_vidinfo.rgbformat);
-       }
-       else
-       {
-           abort();
-       }
+    switch (RGBfmt)
+    {
+       case RGBFB_CLUT:
+           return 1;
+
+       case RGBFB_A8R8G8B8:
+       case RGBFB_A8B8G8R8:
+       case RGBFB_R8G8B8A8:
+       case RGBFB_B8G8R8A8:
+           return 4;
+
+       case RGBFB_B8G8R8:
+       case RGBFB_R8G8B8:
+           return 3;
+
+       case RGBFB_R5G5B5:
+       case RGBFB_R5G6B5:
+       case RGBFB_R5G6B5PC:
+       case RGBFB_R5G5B5PC:
+       case RGBFB_B5G6R5PC:
+       case RGBFB_B5G5R5PC:
+           return 2;
     }
     return 0;
 }
@@ -460,7 +446,7 @@ static int CopyTemplateStructureA2U (uaecptr amigamemptr, struct Template *tmpl)
 
 static int CopyLineStructureA2U(uaecptr amigamemptr, struct Line *line)
 {
-    if(valid_address(amigamemptr, sizeof(struct Line))) {
+    if (valid_address(amigamemptr, sizeof(struct Line))) {
        line->X = get_word (amigamemptr + PSSO_Line_X);
        line->Y = get_word (amigamemptr + PSSO_Line_Y);
        line->Length = get_word (amigamemptr + PSSO_Line_Length);
@@ -509,34 +495,32 @@ STATIC_INLINE void do_fillrect_frame_buffer (struct RenderInfo *ri, int X, int Y
                                            int Width, int Height, uae_u32 Pen, int Bpp)
 {
     int cols;
-    uae_u32 *p;
     uae_u8 *src, *dst;
     int lines;
-    int bpr = ri->BytesPerRow / 4;
+    int bpr = ri->BytesPerRow;
 
-    /* Do our virtual frame-buffer memory.  First, we do a single line fill by hand */
     dst = src = ri->Memory + X * Bpp + Y * ri->BytesPerRow;
     endianswap (&Pen, Bpp);
-    p = (uae_u32*)src;
     switch (Bpp)
     {
        case 1:
-           for (lines = 0; lines < Height; lines++, p += bpr) {
-               memset (p, Pen, Width);
+           for (lines = 0; lines < Height; lines++, dst += bpr) {
+               memset (dst, Pen, Width);
            }
        break;
        case 2:
            Pen |= Pen << 16;
-           for (lines = 0; lines < Height; lines++, p += bpr) {
+           for (lines = 0; lines < Height; lines++, dst += bpr) {
+               uae_u32 *p = (uae_u32*)dst;
                for (cols = 0; cols < Width / 2; cols++)
-                   p[cols] = Pen;
+                   *p++ = Pen;
                if (Width & 1)
-                   ((uae_u16*)(p + cols))[0] = Pen;
+                   ((uae_u16*)p)[0] = Pen;
            }
         break;
        case 3:
-           for (lines = 0; lines < Height; lines++, p += bpr) {
-               uae_u8 *d = (uae_u8*)p;
+           for (lines = 0; lines < Height; lines++, dst += bpr) {
+               uae_u8 *d = (uae_u8*)dst;
                for (cols = 0; cols < Width; cols++) {
                    *d++ = Pen >> 16;
                    *d++ = Pen >> 8;
@@ -545,11 +529,11 @@ STATIC_INLINE void do_fillrect_frame_buffer (struct RenderInfo *ri, int X, int Y
            }
        break;
        case 4:
-           for (lines = 0; lines < Height; lines++, p += bpr) {
+           for (lines = 0; lines < Height; lines++, dst += bpr) {
+               uae_u32 *p = (uae_u32*)dst;
                for (cols = 0; cols < Width; cols++)
-                   p[cols] = Pen;
+                   *p++ = Pen;
            }
-           return;
        break;
     }
 }
@@ -629,6 +613,7 @@ static void mouseupdate (void)
 void picasso_handle_vsync (void)
 {
     static int vsynccnt;
+    static int updatecnt;
     
     mouseupdate ();
     
@@ -637,8 +622,12 @@ void picasso_handle_vsync (void)
        if (vsynccnt < 2)
            return;
        vsynccnt = 0;
-    }    
-    flushpixels ();
+    }
+    updatecnt--;
+    if (updatecnt <= 0) {
+       flushpixels ();
+       updatecnt = 1;
+    }
     gfx_unlock_picasso ();
 }
 
@@ -874,7 +863,7 @@ STATIC_INLINE void do_blitrect_frame_buffer (struct RenderInfo *ri, struct
 {
 
     uae_u8 *src, *dst, *tmp, *tmp2, *tmp3;
-    uae_u8 Bpp = GetBytesPerPixel(ri->RGBFormat);
+    uae_u8 Bpp = GetBytesPerPixel (ri->RGBFormat);
     unsigned long total_width = width * Bpp;
     unsigned long linewidth = (total_width + 15) & ~15;
     unsigned long lines;
@@ -885,7 +874,7 @@ STATIC_INLINE void do_blitrect_frame_buffer (struct RenderInfo *ri, struct
        write_log ("WARNING - BlitRect() has mask 0x%x with Bpp %d.\n", mask, Bpp);
     }
 
-    P96TRACE (("(%dx%d)=(%dx%d)=(%dx%d)=%d ", srcx, srcy, dstx, dsty, width, height, opcode));
+    P96TRACE (("(%dx%d)=(%dx%d)=(%dx%d)=%d\n", srcx, srcy, dstx, dsty, width, height, opcode));
     if (mask == 0xFF || Bpp > 1) {
 
        if(opcode == BLIT_SRC) {
@@ -998,42 +987,6 @@ STATIC_INLINE void do_blitrect_frame_buffer (struct RenderInfo *ri, struct
     free (tmp3);
 }
 
-/*
-DrawLine:
-Synopsis: DrawLine(bi, ri, line, Mask, RGBFormat);
-Inputs: a0: struct BoardInfo *bi
-a1: struct RenderInfo *ri
-a2: struct Line *line
-d0.b: Mask
-d7.l: RGBFormat
-
-This function is used to paint a line on the board memory possibly using the blitter. It is called by Draw
-and obeyes the destination RGBFormat as well as ForeGround and BackGround pens and draw modes.
-*/
-#define P96_DRAWLINE
-uae_u32 REGPARAM2 picasso_DrawLine (struct regstruct *regs)
-{
-    uae_u32 result = 1;
-#ifdef P96_DRAWLINE
-    struct Line line;
-    struct RenderInfo ri;
-    uae_u8 Mask = m68k_dreg (regs, 0);
-    RGBFTYPE RGBFormat = m68k_dreg (regs, 7);
-
-    CopyRenderInfoStructureA2U(m68k_areg (regs, 1), &ri);
-    CopyLineStructureA2U(m68k_areg (regs, 2), &line);
-#if defined P96TRACING_ENABLED && P96TRACING_LEVEL > 0
-    DumpLine (&line);
-#endif
-#else
-    P96TRACE(("DrawLine() - not implemented!\n" ));
-#endif
-    write_log("drawline ****************************************************\n");
-    write_log ("%08x %08x %08x, %08x %08x\n", m68k_dreg(regs, 0), m68k_dreg(regs, 1), m68k_dreg(regs, 2), 
-       m68k_areg(regs, 0), m68k_areg(regs, 1));
-    return result;
-}
-
 /*
 SetSprite:
 Synopsis: SetSprite(bi, activate, RGBFormat);
@@ -1103,8 +1056,7 @@ uae_u32 REGPARAM2 picasso_SetSpriteColor (struct regstruct *regs)
     if (idx >= 4)
        return 0;
     cursorrgb[idx] = (red << 16) | (green << 8) | (blue << 0);
-    return 1;
-    P96TRACE_SPR (("SetSpriteColor(%08x,%d:%02X%02X%02X)\n", bi, idx, red, green, blue));
+    P96TRACE_SPR (("SetSpriteColor(%08x,%d:%02X%02X%02X). %x\n", bi, idx, red, green, blue, bi + PSSO_BoardInfo_MousePens));
     return 1;
 }
 
@@ -1151,7 +1103,7 @@ void picasso_putcursor (int sx, int sy, int sw, int sh)
     DWORD ddrval;
     LPDIRECTDRAWSURFACE7 dstsurf = dxdata.secondary;
     
-    if (cursorsurface == NULL || !cursorvisible)
+    if (cursorsurface == NULL || !cursorvisible || !hwsprite)
        return;
 
     if (remcursor_x + cursorwidth < sx)
@@ -1581,7 +1533,7 @@ static struct modeids mi[] =
    -1,-1,0
 };
 
-static int AssignModeID(int dm, int count, int *unkcnt)
+static int AssignModeID (int dm, int count, int *unkcnt)
 {
     int i, w, h;
 
@@ -1627,12 +1579,13 @@ void picasso96_alloc (TrapContext *ctx)
 {
     int i, j, size, cnt;
     int misscnt;
+    uaecptr rt;
     SYSTEM_INFO si;
 
     xfree (newmodes);
     newmodes = NULL;
     picasso96_amem = picasso96_amemend = 0;
-    if (currprefs.gfxmem_size == 0)
+    if (allocated_gfxmem == 0)
        return;
     misscnt = 0;
     cnt = 0;
@@ -1731,6 +1684,14 @@ void picasso96_alloc (TrapContext *ctx)
     gwwbufsize = allocated_gfxmem / gwwpagesize + 1;
     gwwpagemask = gwwpagesize - 1;
     gwwbuf = xmalloc (gwwbufsize * sizeof (void*));
+
+    /* put magic rtarea pointer to end of display ram */
+    put_long (p96ram_start + allocated_gfxmem - 12, 'UAE_');
+    rt = need_uae_boot_rom ();
+    if (rt)
+       rt += 0xff60;
+    put_long (p96ram_start + allocated_gfxmem - 8, rt);
+    put_long (p96ram_start + allocated_gfxmem - 4, '_UAE');
 }
 
 /****************************************
@@ -1784,48 +1745,6 @@ uae_u32 REGPARAM2 picasso_InitCard (struct regstruct *regs)
     put_word (AmigaBoardInfo + PSSO_BoardInfo_MaxVerResolution + 6, truecolour.height);
     put_word (AmigaBoardInfo + PSSO_BoardInfo_MaxVerResolution + 8, alphacolour.height);
 
-    put_long (AmigaBoardInfo + 0xaa, 1);
-    put_long (AmigaBoardInfo + 0xb2, 0);
-    put_long (AmigaBoardInfo + 0xae, 0);
-    put_long (AmigaBoardInfo + 0xfe, 1);
-    put_long (AmigaBoardInfo + 0x102, 1);
-    put_long (AmigaBoardInfo + 0x106, 1);
-    put_long (AmigaBoardInfo + 0x10a, 1);
-    put_long (AmigaBoardInfo + 0x10e, 1);
-    put_word (AmigaBoardInfo + 0xca, 0xffff);
-    put_word (AmigaBoardInfo + 0xcc, 0xffff);
-    put_word (AmigaBoardInfo + 0xce, 0xffff);
-    put_word (AmigaBoardInfo + 0xd0, 0xffff);
-    put_word (AmigaBoardInfo + 0xd2, 0xffff);
-    put_word (AmigaBoardInfo + 0xd4, 0xffff);
-    put_word (AmigaBoardInfo + 0xd6, 0xffff);
-    put_word (AmigaBoardInfo + 0xd8, 0xffff);
-    put_word (AmigaBoardInfo + 0xda, 0xffff);
-    put_word (AmigaBoardInfo + 0xdc, 0xffff);
-#if 0
-082CEC8E 257c 0000 0001 00aa      MOVE.L #$00000001,(A2, $00aa) == $0800be48
-082CEC96 257c 0000 0000 00b2      MOVE.L #$00000000,(A2, $00b2) == $0800be50
-082CEC9E 257c 0000 0000 00ae      MOVE.L #$00000000,(A2, $00ae) == $0800be4c
-082CECA6 00aa 0000 8002 00ba      OR.L #$00008002,(A2, $00ba) == $0800be58
-082CECCC 7001                     MOVE.L #$00000001,D0
-082CECCE 2540 00fe                MOVE.L D0,(A2, $00fe) == $0800be9c
-082CECD2 2540 0102                MOVE.L D0,(A2, $0102) == $0800bea0
-082CECD6 2540 0106                MOVE.L D0,(A2, $0106) == $0800bea4
-082CECDA 2540 010a                MOVE.L D0,(A2, $010a) == $0800bea8
-082CECDE 2540 010e                MOVE.L D0,(A2, $010e) == $0800beac
-082CECE2 303c ffff                MOVE.W #$ffff,D0
-082CECE6 3540 00ca                MOVE.W D0,(A2, $00ca) == $0800be68
-082CECEA 3540 00cc                MOVE.W D0,(A2, $00cc) == $0800be6a
-082CECEE 3540 00ce                MOVE.W D0,(A2, $00ce) == $0800be6c
-082CECF2 3540 00d0                MOVE.W D0,(A2, $00d0) == $0800be6e
-082CECF6 3540 00d2                MOVE.W D0,(A2, $00d2) == $0800be70
-082CECFA 3540 00d4                MOVE.W D0,(A2, $00d4) == $0800be72
-082CECFE 3540 00d6                MOVE.W D0,(A2, $00d6) == $0800be74
-082CED02 3540 00d8                MOVE.W D0,(A2, $00d8) == $0800be76
-082CED06 3540 00da                MOVE.W D0,(A2, $00da) == $0800be78
-082CED0A 3540 00dc                MOVE.W D0,(A2, $00dc) == $0800be7a
-#endif
-
     i = 0;
     unkcnt = 0;
     while (newmodes[i].depth >= 0) {
@@ -1859,7 +1778,7 @@ uae_u32 REGPARAM2 picasso_InitCard (struct regstruct *regs)
 
        LibResolutionStructureCount++;
        CopyLibResolutionStructureU2A (&res, amem);
-#if defined P96TRACING_ENABLED && P96TRACING_LEVEL > 1
+#if P96TRACING_ENABLED && P96TRACING_LEVEL > 1
        DumpLibResolutionStructure(amem);
 #endif
        AmigaListAddTail (AmigaBoardInfo + PSSO_BoardInfo_ResolutionsList, amem);
@@ -1898,9 +1817,7 @@ uae_u32 REGPARAM2 picasso_SetSwitch (struct regstruct *regs)
            picasso96_state.Width, picasso96_state.Height, picasso96_state.BytesPerPixel * 8,
            picasso_vidinfo.width, picasso_vidinfo.height, picasso_vidinfo.pixbytes * 8);
     write_log ("SetSwitch() - %s\n", flag ? p96text : "amiga");
-
     /* Put old switch-state in D0 */
-
     return !flag;
 }
 
@@ -2061,11 +1978,10 @@ static void picasso_SetPanningInit(void)
 {
     picasso96_state.XYOffset = picasso96_state.Address + (picasso96_state.XOffset * picasso96_state.BytesPerPixel)
        + (picasso96_state.YOffset * picasso96_state.BytesPerRow);
-    if((picasso96_state.VirtualWidth > picasso96_state.Width) || (picasso96_state.VirtualHeight > picasso96_state.Height))
+    if(picasso96_state.VirtualWidth > picasso96_state.Width || picasso96_state.VirtualHeight > picasso96_state.Height)
        picasso96_state.BigAssBitmap = 1;
     else
        picasso96_state.BigAssBitmap = 0;
-    picasso96_state.BytesPerRow = picasso96_state.VirtualWidth * picasso96_state.BytesPerPixel;
 }
 
 uae_u32 REGPARAM2 picasso_SetPanning (struct regstruct *regs)
@@ -2094,6 +2010,7 @@ uae_u32 REGPARAM2 picasso_SetPanning (struct regstruct *regs)
     picasso96_state.VirtualHeight = bme_height;
     picasso96_state.RGBFormat = m68k_dreg (regs, 7);
     picasso96_state.BytesPerPixel = GetBytesPerPixel (picasso96_state.RGBFormat);
+    picasso96_state.BytesPerRow = picasso96_state.VirtualWidth * picasso96_state.BytesPerPixel;
     picasso_SetPanningInit();
 
     palette_changed = 1;
@@ -2305,7 +2222,7 @@ STATIC_INLINE int BlitRectHelper (void)
     uae_u8 mask = blitrectdata.mask;
     BLIT_OPCODE opcode = blitrectdata.opcode;
 
-    uae_u8 Bpp = GetBytesPerPixel(ri->RGBFormat);
+    uae_u8 Bpp = GetBytesPerPixel (ri->RGBFormat);
     unsigned long total_width = width * Bpp;
     unsigned long linewidth = (total_width + 15) & ~15;
 
@@ -2354,7 +2271,7 @@ STATIC_INLINE int BlitRect (uaecptr ri, uaecptr dstri,
     blitrectdata.mask = mask;
     blitrectdata.opcode = opcode;
 
-    return BlitRectHelper();
+    return BlitRectHelper ();
 }
 
 /***********************************************************
@@ -2385,7 +2302,6 @@ uae_u32 REGPARAM2 picasso_BlitRect (struct regstruct *regs)
 
     P96TRACE(("BlitRect(%d, %d, %d, %d, %d, %d, 0x%x)\n", srcx, srcy, dstx, dsty, width, height, Mask));
     result = BlitRect (renderinfo, (uaecptr)NULL, srcx, srcy, dstx, dsty, width, height, Mask, BLIT_SRC);
-
     return result;
 }
 
@@ -2423,14 +2339,12 @@ uae_u32 REGPARAM2 picasso_BlitRectNoMaskComplete (struct regstruct *regs)
 
     P96TRACE(("BlitRectNoMaskComplete() op 0x%02x, %08x:(%4d,%4d) --> %08x:(%4d,%4d), wh(%4d,%4d)\n",
        OpCode, get_long (srcri + PSSO_RenderInfo_Memory), srcx, srcy, get_long (dstri + PSSO_RenderInfo_Memory), dstx, dsty, width, height));
-
-    result = BlitRect(srcri, dstri, srcx, srcy, dstx, dsty, width, height, 0xFF, OpCode);
-
+    result = BlitRect (srcri, dstri, srcx, srcy, dstx, dsty, width, height, 0xFF, OpCode);
     return result;
 }
 
 /* NOTE: fgpen MUST be in host byte order */
-STATIC_INLINE void PixelWrite(uae_u8 *mem, int bits, uae_u32 fgpen, int Bpp, uae_u32 mask)
+STATIC_INLINE void PixelWrite (uae_u8 *mem, int bits, uae_u32 fgpen, int Bpp, uae_u32 mask)
 {
     switch (Bpp)
     {
@@ -2520,9 +2434,9 @@ uae_u32 REGPARAM2 picasso_BlitPattern (struct regstruct *regs)
            P96TRACE(("BlitPattern() xy(%d,%d), wh(%d,%d) draw 0x%x, off(%d,%d), ph %d\n",
            X, Y, W, H, pattern.DrawMode, pattern.XOffset, pattern.YOffset, 1 << pattern.Size));
 
-#ifdef P96TRACING_ENABLED
+#if P96TRACING_ENABLED
            DumpPattern(&pattern);
-    #endif
+#endif
            ysize_mask = (1 << pattern.Size) - 1;
            xshift = pattern.XOffset & 15;
 
@@ -2687,7 +2601,7 @@ uae_u32 REGPARAM2 picasso_BlitTemplate (struct regstruct *regs)
 
            bitoffset = tmp.XOffset % 8;
 
-#if defined(P96TRACING_ENABLED) && (P96TRACING_LEVEL > 0)
+#if P96TRACING_ENABLED && P96TRACING_LEVEL > 0
            DumpTemplate(&tmp, W, H);
 #endif
 
@@ -2836,8 +2750,10 @@ void picasso_handle_hsync (void)
        p96hsync--;
     }
     if (p96hsync <= 0) {
-       if (uae_boot_rom)
-           rtarea[get_long (rtarea_base + 36) + 12 - 1]++;
+       if (uae_boot_rom) {
+           int off = get_long (rtarea_base + 36) + 12 - 1;
+           rtarea[off]++;
+       }
        p96hsync = p96syncrate;
     }
 }
@@ -2856,7 +2772,7 @@ void init_hz_p96 (void)
 }
 
 /* NOTE: Watch for those planeptrs of 0x00000000 and 0xFFFFFFFF for all zero / all one bitmaps !!!! */
-static void PlanarToChunky(struct RenderInfo *ri, struct BitMap *bm,
+static void PlanarToChunky (struct RenderInfo *ri, struct BitMap *bm,
                           unsigned long srcx, unsigned long srcy,
                           unsigned long dstx, unsigned long dsty,
                           unsigned long width, unsigned long height,
@@ -2876,7 +2792,7 @@ static void PlanarToChunky(struct RenderInfo *ri, struct BitMap *bm,
     for (j = 0; j < Depth; j++) {
        uae_u8 *p = bm->Planes[j];
        if (p != &all_zeros_bitmap && p != &all_ones_bitmap)
-           p += srcx/8 + srcy*bm->BytesPerRow;
+           p += srcx / 8 + srcy * bm->BytesPerRow;
        PLANAR[j] = p;
        if ((mask & (1 << j)) == 0)
            PLANAR[j] = &all_zeros_bitmap;
@@ -2974,14 +2890,14 @@ uae_u32 REGPARAM2 picasso_BlitPlanar2Chunky (struct regstruct *regs)
 }
 
 /* NOTE: Watch for those planeptrs of 0x00000000 and 0xFFFFFFFF for all zero / all one bitmaps !!!! */
-static void PlanarToDirect(struct RenderInfo *ri, struct BitMap *bm,
+static void PlanarToDirect (struct RenderInfo *ri, struct BitMap *bm,
                           unsigned long srcx, unsigned long srcy,
                           unsigned long dstx, unsigned long dsty,
                           unsigned long width, unsigned long height, uae_u8 mask,
                           struct ColorIndexMapping *cim)
 {
     int j;
-    int bpp = GetBytesPerPixel(ri->RGBFormat);
+    int bpp = GetBytesPerPixel (ri->RGBFormat);
     uae_u8 *PLANAR[8];
     uae_u8 *image = ri->Memory + dstx * bpp + dsty * ri->BytesPerRow;
     int Depth = bm->Depth;
@@ -3133,10 +3049,14 @@ static void flushpixels (void)
     if (!picasso_vidinfo.extra_mem || !gwwbuf)
        return;
 
+    if (palette_changed)
+       palette_changed = -1;
+
     for (;;) {
 
        gwwcnt = 0;
-       if (palette_changed) {
+       if (palette_changed < 0) {
+           palette_changed = 1;
            if (picasso_palette ()) {
                reloadcursor = 1;
                setspriteimage (cursorbi);
@@ -3144,7 +3064,6 @@ static void flushpixels (void)
            gwwcnt = allocated_gfxmem / gwwpagesize;
            for (i = 0; i < gwwcnt; i++)
                gwwbuf[i] = src + i * gwwpagesize;
-           palette_changed = 0;
        } else {
            ULONG ps;
            gwwcnt = gwwbufsize;
@@ -3156,7 +3075,7 @@ static void flushpixels (void)
            break;
 
        if (dst == NULL) {
-           if(DirectDraw_IsLocked() == FALSE) {
+           if (DirectDraw_IsLocked() == FALSE) {
                if (!lock)
                    dst = gfx_lock_picasso ();
                lock = 1;
@@ -3285,12 +3204,15 @@ static void flushpixels (void)
            }
 
        }
+       break;
     }
 
     if(lock)
        gfx_unlock_picasso ();
-    if (dst && gwwcnt)
+    if (dst && gwwcnt) {
        mman_ResetWatch (src, allocated_gfxmem);
+        palette_changed = 0;
+    }
     if (maxy >= 0)
        DX_Invalidate (0, miny, picasso96_state.Width, maxy - miny + 1);
 }
@@ -3363,7 +3285,7 @@ static uae_u8 *REGPARAM2 gfxmem_xlate (uaecptr addr)
 addrbank gfxmem_bankx = {
     gfxmem_lgetx, gfxmem_wgetx, gfxmem_bgetx,
     gfxmem_lputx, gfxmem_wputx, gfxmem_bputx,
-    gfxmem_xlate, gfxmem_check, NULL, "RTG RAM (Direct)",
+    gfxmem_xlate, gfxmem_check, NULL, "RTG RAM",
     dummy_lgeti, dummy_wgeti, ABFLAG_RAM
 };
 
index 1d97592e1ff870c00a281791fd9d8f2bdfddb186..b4152303ad2f267725da0c976a3231b36622fb10 100755 (executable)
@@ -361,27 +361,158 @@ struct Line {
 #define PSSO_BoardInfo_MouseRendered               PSSO_BoardInfo_MouseChunky + 4
 #define PSSO_BoardInfo_MouseSaveBuffer             PSSO_BoardInfo_MouseRendered + 4
 
-struct BoardInfo {
-    uae_u8 *RegisterBase, *MemoryBase, *MemoryIOBase;
-    uae_u32 MemorySize;
-    char *BoardName, VBIName[32];
-
-    uae_u16 MoniSwitch;
-    uae_u16 BitsPerCannon;
-    uae_u32 Flags;
-    uae_u16 SoftSpriteFlags;
-    uae_u16 ChipFlags; /* private, chip specific, not touched by RTG */
-    uae_u32 CardFlags; /* private, card specific, not touched by RTG */
-
-    uae_u16 BoardNum;
-    uae_s16 RGBFormats;
-
-    uae_u16 MaxHorValue[MAXMODES];
-    uae_u16 MaxVerValue[MAXMODES];
-    uae_u16 MaxHorResolution[MAXMODES];
-    uae_u16 MaxVerResolution[MAXMODES];
-    uae_u32 MaxMemorySize, MaxChunkSize;
+#if 0
+struct BoardInfo{
+       UBYTE                   *RegisterBase, *MemoryBase, *MemoryIOBase;
+       ULONG                   MemorySize;
+       char                    *BoardName,VBIName[32];
+       struct CardBase         *CardBase;
+       struct ChipBase         *ChipBase;
+       struct ExecBase         *ExecBase;
+       struct Library          *UtilBase;
+       struct Interrupt        HardInterrupt;
+       struct Interrupt        SoftInterrupt;
+       struct SignalSemaphore  BoardLock;
+       struct MinList          ResolutionsList;
+       BTYPE                   BoardType;
+       PCTYPE                  PaletteChipType;
+       GCTYPE                  GraphicsControllerType;
+       UWORD                   MoniSwitch;
+       UWORD                   BitsPerCannon;
+       ULONG                   Flags;
+       UWORD                   SoftSpriteFlags;
+       UWORD                   ChipFlags;      // private, chip specific, not touched by RTG
+       ULONG                   CardFlags;      // private, card specific, not touched by RTG
+       UWORD                   BoardNum;
+       UWORD                   RGBFormats;
+       UWORD                   MaxHorValue[MAXMODES];
+       UWORD                   MaxVerValue[MAXMODES];
+       UWORD                   MaxHorResolution[MAXMODES];
+       UWORD                   MaxVerResolution[MAXMODES];
+       ULONG                   MaxMemorySize, MaxChunkSize;
+       ULONG                   __obsolete;
+       ULONG                   PixelClockCount[MAXMODES];
+
+       APTR __asm              (*AllocCardMem)(register __a0 struct BoardInfo *bi, register __d0 ULONG size, register __d1 BOOL force, register __d2 BOOL system);
+       BOOL __asm              (*FreeCardMem)(register __a0 struct BoardInfo *bi, register __a1 APTR membase);
+
+       BOOL __asm              (*SetSwitch)(register __a0 struct BoardInfo *, register __d0 BOOL);
+
+       void __asm              (*SetColorArray)(register __a0 struct BoardInfo *, register __d0 UWORD, register __d1 UWORD);
+
+       void __asm              (*SetDAC)(register __a0 struct BoardInfo *, register __d7 RGBFTYPE);
+       void __asm              (*SetGC)(register __a0 struct BoardInfo *, register __a1 struct ModeInfo *, register __d0 BOOL);
+       void __asm              (*SetPanning)(register __a0 struct BoardInfo *, register __a1 UBYTE *, register __d0 UWORD, register __d1 WORD, register __d2 WORD, register __d7 RGBFTYPE);
+       UWORD __asm             (*CalculateBytesPerRow)(register __a0 struct BoardInfo *, register __d0 UWORD, register __d7 RGBFTYPE);
+       APTR __asm              (*CalculateMemory)(register __a0 struct BoardInfo *, register __a1 APTR, register __d7 RGBFTYPE);
+       ULONG __asm             (*GetCompatibleFormats)(register __a0 struct BoardInfo *, register __d7 RGBFTYPE);
+       BOOL __asm              (*SetDisplay)(register __a0 struct BoardInfo *, register __d0 BOOL);
+
+       LONG __asm              (*ResolvePixelClock)(register __a0 struct BoardInfo *, register __a1 struct ModeInfo *, register __d0 ULONG, register __d7 RGBFTYPE);
+       ULONG   __asm           (*GetPixelClock)(register __a0 struct BoardInfo *bi, register __a1 struct ModeInfo *mi, register __d0 Index, register __d7 RGBFormat);
+       void __asm              (*SetClock)(register __a0 struct BoardInfo *);
+
+       void __asm              (*SetMemoryMode)(register __a0 struct BoardInfo *, register __d7 RGBFTYPE);
+       void __asm              (*SetWriteMask)(register __a0 struct BoardInfo *, register __d0 UBYTE);
+       void __asm              (*SetClearMask)(register __a0 struct BoardInfo *, register __d0 UBYTE);
+       void __asm              (*SetReadPlane)(register __a0 struct BoardInfo *, register __d0 UBYTE);
+
+       void __asm              (*WaitVerticalSync)(register __a0 struct BoardInfo *, register __d0 BOOL);
+       BOOL __asm              (*SetInterrupt)(register __a0 struct BoardInfo *, register __d0 BOOL);
+
+       void __asm              (*WaitBlitter)(register __a0 struct BoardInfo *);
+
+       void __asm              (*ScrollPlanar)(register __a0 struct BoardInfo *, register __a1 struct RenderInfo *, register __d0 UWORD, register __d1 UWORD, register __d2 UWORD, register __d3 UWORD, register __d4 UWORD, register __d5 UWORD, register __d6 UBYTE);
+       void __asm              (*ScrollPlanarDefault)(register __a0 struct BoardInfo *, register __a1 struct RenderInfo *, register __d0 UWORD, register __d1 UWORD, register __d2 UWORD, register __d3 UWORD, register __d4 UWORD, register __d5 UWORD, register __d6 UBYTE);
+       void __asm              (*UpdatePlanar)(register __a0 struct BoardInfo *, register __a1 struct BitMap *, register __a2 struct RenderInfo *, register __d0 SHORT, register __d1 SHORT, register __d2 SHORT, register __d3 SHORT, register __d4 UBYTE);
+       void __asm              (*UpdatePlanarDefault)(register __a0 struct BoardInfo *, register __a1 struct BitMap *, register __a2 struct RenderInfo *, register __d0 SHORT, register __d1 SHORT, register __d2 SHORT, register __d3 SHORT, register __d4 UBYTE);
+       void __asm              (*BlitPlanar2Chunky)(register __a0 struct BoardInfo *, register __a1 struct BitMap *, register __a2 struct RenderInfo *, register __d0 SHORT, register __d1 SHORT, register __d2 SHORT, register __d3 SHORT, register __d4 SHORT, register __d5 SHORT, register __d6 UBYTE, register __d7 UBYTE);
+       void __asm              (*BlitPlanar2ChunkyDefault)(register __a0 struct BoardInfo *, register __a1 struct BitMap *, register __a2 struct RenderInfo *, register __d0 SHORT, register __d1 SHORT, register __d2 SHORT, register __d3 SHORT, register __d4 SHORT, register __d5 SHORT, register __d6 UBYTE, register __d7 UBYTE);
+
+       void __asm              (*FillRect)(register __a0 struct BoardInfo *, register __a1 struct RenderInfo *, register __d0 WORD, register __d1 WORD, register __d2 WORD, register __d3 WORD, register __d4 ULONG, register __d5 UBYTE, register __d7 RGBFTYPE);
+       void __asm              (*FillRectDefault)(register __a0 struct BoardInfo *, register __a1 struct RenderInfo *, register __d0 WORD, register __d1 WORD, register __d2 WORD, register __d3 WORD, register __d4 ULONG, register __d5 UBYTE, register __d7 RGBFTYPE);
+       void __asm              (*InvertRect)(register __a0 struct BoardInfo *, register __a1 struct RenderInfo *, register __d0 WORD, register __d1 WORD, register __d2 WORD, register __d3 WORD, register __d4 UBYTE, register __d7 RGBFTYPE);
+       void __asm              (*InvertRectDefault)(register __a0 struct BoardInfo *, register __a1 struct RenderInfo *, register __d0 WORD, register __d1 WORD, register __d2 WORD, register __d3 WORD, register __d4 UBYTE, register __d7 RGBFTYPE);
+       void __asm              (*BlitRect)(register __a0 struct BoardInfo *, register __a1 struct RenderInfo *, register __d0 WORD, register __d1 WORD, register __d2 WORD, register __d3 WORD, register __d4 WORD, register __d5 WORD, register __d6 UBYTE, register __d7 RGBFTYPE);
+       void __asm              (*BlitRectDefault)(register __a0 struct BoardInfo *, register __a1 struct RenderInfo *, register __d0 WORD, register __d1 WORD, register __d2 WORD, register __d3 WORD, register __d4 WORD, register __d5 WORD, register __d6 UBYTE, register __d7 RGBFTYPE);
+       void __asm              (*BlitTemplate)(register __a0 struct BoardInfo *, register __a1 struct RenderInfo *, register __a2 struct Template *, register __d0 WORD, register __d1 WORD, register __d2 WORD, register __d3 WORD, register __d4 UBYTE, register __d7 RGBFTYPE);
+       void __asm              (*BlitTemplateDefault)(register __a0 struct BoardInfo *, register __a1 struct RenderInfo *, register __a2 struct Template *, register __d0 WORD, register __d1 WORD, register __d2 WORD, register __d3 WORD, register __d4 UBYTE, register __d7 RGBFTYPE);
+       void __asm              (*BlitPattern)(register __a0 struct BoardInfo *, register __a1 struct RenderInfo *, register __a2 struct Pattern *, register __d0 WORD, register __d1 WORD, register __d2 WORD, register __d3 WORD, register __d4 UBYTE, register __d7 RGBFTYPE);
+       void __asm              (*BlitPatternDefault)(register __a0 struct BoardInfo *, register __a1 struct RenderInfo *, register __a2 struct Pattern *, register __d0 WORD, register __d1 WORD, register __d2 WORD, register __d3 WORD, register __d4 UBYTE, register __d7 RGBFTYPE);
+       void __asm              (*DrawLine)(register __a0 struct BoardInfo *, register __a1 struct RenderInfo *, register __d0 WORD, register __d1 WORD, register __d2 WORD, register __d3 WORD, register __d4 UBYTE, register __d7 RGBFTYPE);
+       void __asm              (*DrawLineDefault)(register __a0 struct BoardInfo *, register __a1 struct RenderInfo *, register __d0 WORD, register __d1 WORD, register __d2 WORD, register __d3 WORD, register __d4 UBYTE, register __d7 RGBFTYPE);
+       void __asm              (*BlitRectNoMaskComplete)(register __a0 struct BoardInfo *, register __a1 struct RenderInfo *, register __a2 struct RenderInfo *, register __d0 WORD, register __d1 WORD, register __d2 WORD, register __d3 WORD, register __d4 WORD, register __d5 WORD, register __d6 UBYTE, register __d7 RGBFTYPE);
+       void __asm              (*BlitRectNoMaskCompleteDefault)(register __a0 struct BoardInfo *, register __a1 struct RenderInfo *, register __a2 struct RenderInfo *, register __d0 WORD, register __d1 WORD, register __d2 WORD, register __d3 WORD, register __d4 WORD, register __d5 WORD, register __d6 UBYTE, register __d7 RGBFTYPE);
+       void __asm              (*BlitPlanar2Direct)(register __a0 struct BoardInfo *, register __a1 struct BitMap *, register __a2 struct RenderInfo *, register __a3 struct ColorIndexMapping *, register __d0 SHORT, register __d1 SHORT, register __d2 SHORT, register __d3 SHORT, register __d4 SHORT, register __d5 SHORT, register __d6 UBYTE, register __d7 UBYTE);
+       void __asm              (*BlitPlanar2DirectDefault)(register __a0 struct BoardInfo *, register __a1 struct BitMap *, register __a2 struct RenderInfo *, register __a3 struct ColorIndexMapping *, register __d0 SHORT, register __d1 SHORT, register __d2 SHORT, register __d3 SHORT, register __d4 SHORT, register __d5 SHORT, register __d6 UBYTE, register __d7 UBYTE);
+       void __asm              (*Reserved0)(register __a0 struct BoardInfo *);
+       void __asm              (*Reserved0Default)(register __a0 struct BoardInfo *);
+       void __asm              (*Reserved1)(register __a0 struct BoardInfo *);
+       void __asm              (*Reserved1Default)(register __a0 struct BoardInfo *);
+       void __asm              (*Reserved2)(register __a0 struct BoardInfo *);
+       void __asm              (*Reserved2Default)(register __a0 struct BoardInfo *);
+       void __asm              (*Reserved3)(register __a0 struct BoardInfo *);
+       void __asm              (*Reserved3Default)(register __a0 struct BoardInfo *);
+       void __asm              (*Reserved4)(register __a0 struct BoardInfo *);
+       void __asm              (*Reserved4Default)(register __a0 struct BoardInfo *);
+       void __asm              (*Reserved5)(register __a0 struct BoardInfo *);
+       void __asm              (*Reserved5Default)(register __a0 struct BoardInfo *);
+       void __asm              (*SetDPMSLevel)(register __a0 struct BoardInfo *, register __d0 ULONG DPMSLevel);
+       void __asm              (*ResetChip)(register __a0 struct BoardInfo *);
+       ULONG __asm                                                     (*GetFeatureAttrs)(register __a0 struct BoardInfo *bi, register __a1 APTR FeatureData, register __d0 ULONG Type, register __a2 struct TagItem *Tags);
+
+       struct BitMap * __asm   (*AllocBitMap)(register __a0 struct BoardInfo *, register __d0 ULONG, register __d1 ULONG, register __a1 struct TagItem *);
+       BOOL __asm              (*FreeBitMap)(register __a0 struct BoardInfo *, register __a1 struct BitMap *, register __a2 struct TagItem *);
+       ULONG __asm             (*GetBitMapAttr)(register __a0 struct BoardInfo *, register __a1 struct BitMap *, register __d0 ULONG);
+
+       BOOL __asm              (*SetSprite)(register __a0 struct BoardInfo *, register __d0 BOOL, register __d7 RGBFTYPE);
+       void __asm              (*SetSpritePosition)(register __a0 struct BoardInfo *, register __d0 WORD, register __d1 WORD, register __d7 RGBFTYPE);
+       void __asm              (*SetSpriteImage)(register __a0 struct BoardInfo *, register __d7 RGBFTYPE);
+       void __asm              (*SetSpriteColor)(register __a0 struct BoardInfo *, register __d0 UBYTE, register __d1 UBYTE, register __d2 UBYTE, register __d3 UBYTE, register __d7 RGBFTYPE);
+
+       APTR __asm              (*CreateFeature)(register __a0 struct BoardInfo *bi, register __d0 ULONG Type, register __a1 struct TagItem *Tags);
+       ULONG __asm             (*SetFeatureAttrs)(register __a0 struct BoardInfo *bi, register __a1 APTR FeatureData, register __d0 ULONG Type, register __a2 struct TagItem *Tags);
+       BOOL __asm              (*DeleteFeature)(register __a0 struct BoardInfo *bi, register __a1 APTR FeatureData, register __d0 ULONG Type);
+       struct MinList          SpecialFeatures;
+
+       struct ModeInfo         *ModeInfo;      /* Chip Settings Stuff */
+       RGBFTYPE                RGBFormat;
+       WORD                    XOffset, YOffset;
+       UBYTE                   Depth;
+       UBYTE                   ClearMask;
+       BOOL                    Border;
+       ULONG                   Mask;
+       struct CLUTEntry        CLUT[256];
+
+       struct ViewPort         *ViewPort;      /* ViewPort Stuff */
+       struct BitMap           *VisibleBitMap;
+       struct BitMapExtra      *BitMapExtra;
+       struct MinList          BitMapList;
+       struct MinList          MemList;
+
+       WORD                    MouseX, MouseY; /* Sprite Stuff */
+       UBYTE                   MouseWidth, MouseHeight;
+       UBYTE                   MouseXOffset, MouseYOffset;
+       UWORD                   *MouseImage;
+       UBYTE                   MousePens[4];
+       struct Rectangle        MouseRect;
+       UBYTE                   *MouseChunky;
+       UWORD                   *MouseRendered;
+       UBYTE                   *MouseSaveBuffer;
+
+       ULONG                   ChipData[16];   /* for chip driver needs */
+       ULONG                   CardData[16];   /* for card driver needs */
+       
+       APTR                    MemorySpaceBase; /* the base address of the board memory address space */
+       ULONG                   MemorySpaceSize; /* size of that area */
+
+       APTR                    DoubleBufferList; /* chain of dbinfos being notified on vblanks */
+       
+       struct timeval          SyncTime;       /* system time when screen was set up, used for pseudo vblanks */
+       ULONG                   SyncPeriod;     /* length of one frame in micros */
+       struct MsgPort          SoftVBlankPort; /* MsgPort for software emulation of board interrupt */
 };
+#endif
 
 /* BoardInfo flags */
 /*  0-15: hardware flags */
index ebdbbe88f8b68549a582eb5b170ed8e2c7d22d49..fb60e3d5340a4455d0968d3cadf5145ce52d9dbf 100755 (executable)
 #define IDC_PORT1_JOYS                  1027
 #define IDC_SCREENMODE_RTG              1027
 #define IDC_MBMEM1                      1028
-#define IDC_P96MODE                     1029
 #define IDC_SLOWMEM                     1030
 #define IDC_MBMEM2                      1031
 #define IDC_PARALLEL                    1033
 #define IDC_COMPATIBLE                  1214
 #define IDC_TRUST0                      1215
 #define IDC_TRUST1                      1216
-#define IDC_TRUST2                      1217
 #define IDC_CACHE                       1218
 #define IDC_CYCLEEXACT                  1219
 #define IDC_CS_CPU_TEXT2                1219
 #define IDC_DF3TYPE                     1598
 #define IDC_SOUNDSPEEDTEXT              1599
 #define IDC_SOUNDSPEEDCAPTION           1600
-#define IDC_NOOVERLAY                   1601
+#define IDC_RTGMATCHDEPTH               1601
 #define IDC_ROMFILE2TEXT                1602
 #define IDC_ROMTEXT                     1603
 #define IDC_KEYTEXT                     1604
 #define IDC_CS_KSMIRROR_E0              1716
 #define IDC_STRINGBOXEDIT               1716
 #define IDC_CS_CD32CD                   1717
-#define IDC_STRINGBOX_TEXT              1717
 #define IDC_CS_CD32C2P                  1718
 #define IDC_CS_CD32NVRAM                1719
 #define IDC_CS_CDTVCD                   1720
 #define ID_DBG_COPYLB                   40041
 #define ID_DBG_TOGGLEBP                 40042
 #define ID_DBG_DELETEBPS                40043
+#define ID_DBG_SETTOPC                  40044
 
 // Next default values for new objects
 // 
 #ifndef APSTUDIO_READONLY_SYMBOLS
 #define _APS_NO_MFC                     1
 #define _APS_3D_CONTROLS                     1
-#define _APS_NEXT_RESOURCE_VALUE        336
-#define _APS_NEXT_COMMAND_VALUE         40044
+#define _APS_NEXT_RESOURCE_VALUE        335
+#define _APS_NEXT_COMMAND_VALUE         40045
 #define _APS_NEXT_CONTROL_VALUE         1785
 #define _APS_NEXT_SYMED_VALUE           101
 #endif
index 31e8d74685878ead9d2481bc6e41e12d570945a0..2251cfb847bdf1e1103cbe5cf0c18be560f3c3bf 100755 (executable)
 #define IDC_COMPATIBLE                  1214
 #define IDC_TRUST0                      1215
 #define IDC_TRUST1                      1216
-#define IDC_TRUST2                      1217
 #define IDC_CACHE                       1218
 #define IDC_CYCLEEXACT                  1219
 #define IDC_CS_CPU_TEXT2                1219
index 2578df4bc219f2e3a037132fd60e442f3e30f1bf..327f9d8c7032808aace8b17cc8219b0b4420626c 100755 (executable)
@@ -185,7 +185,6 @@ BEGIN
     CONTROL         "No flags",IDC_NOFLAGS,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,168,155,62,10\r
     CONTROL         "Direct",IDC_TRUST0,"Button",BS_AUTORADIOBUTTON | WS_GROUP | WS_TABSTOP,235,141,52,10\r
     CONTROL         "Indirect",IDC_TRUST1,"Button",BS_AUTORADIOBUTTON | WS_TABSTOP,235,155,52,10\r
-    CONTROL         "After RTG",IDC_TRUST2,"Button",BS_AUTORADIOBUTTON | WS_TABSTOP,235,169,52,10\r
     CONTROL         "More compatible [] More compatible but slower FPU emulation.",IDC_COMPATIBLE_FPU,\r
                     "Button",BS_AUTOCHECKBOX | BS_LEFT | WS_GROUP | WS_TABSTOP,9,210,73,10\r
     GROUPBOX        "FPU",IDC_STATIC,6,146,81,80,BS_LEFT\r
@@ -402,7 +401,8 @@ BEGIN
     CONTROL         "Don't show taskbar button",IDC_NOTASKBARBUTTON,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,23,66,117,10\r
     CONTROL         "bsdsocket.library emulation",IDC_SOCKETS,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,161,15,120,10\r
     CONTROL         "Use CTRL-F11 to quit",IDC_CTRLF11,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,161,27,120,10\r
-    CONTROL         "RTG color depth matching [] Do not use color space conversion if possible.",IDC_RTGMATCHDEPTH,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,161,40,120,10\r
+    CONTROL         "RTG color depth matching [] Do not use color space conversion if possible.",IDC_RTGMATCHDEPTH,\r
+                    "Button",BS_AUTOCHECKBOX | WS_TABSTOP,161,40,120,10\r
     CONTROL         "Synchronize clock",IDC_CLOCKSYNC,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,161,53,115,10\r
     GROUPBOX        "Keyboard LEDs",IDC_STATIC,7,140,85,94\r
     COMBOBOX        IDC_KBLED1,22,154,56,65,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP\r
@@ -688,7 +688,7 @@ BEGIN
     COMBOBOX        IDC_FILTERVZMULT,67,63,27,150,CBS_DROPDOWNLIST | CBS_HASSTRINGS | WS_VSCROLL | WS_TABSTOP\r
     CONTROL         "Autoscale",IDC_FILTERAUTORES,"Button",BS_AUTOCHECKBOX | BS_LEFT | WS_GROUP | WS_TABSTOP,9,168,63,10\r
     COMBOBOX        IDC_FILTERXTRA,105,130,138,150,CBS_DROPDOWNLIST | CBS_HASSTRINGS | WS_VSCROLL | WS_TABSTOP\r
-    CONTROL         "Full screen filter",IDC_FILTERUPSCALE,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,9,153,87,10\r
+    CONTROL         "Full screen filter",IDC_FILTERUPSCALE,"Button",BS_AUTOCHECKBOX | WS_TABSTOP | NOT WS_VISIBLE,9,153,87,10\r
 END\r
 \r
 IDD_HARDDRIVE DIALOGEX 0, 0, 380, 76\r
@@ -846,7 +846,7 @@ BEGIN
 END\r
 \r
 IDD_DEBUGGER DIALOGEX 0, 0, 454, 368\r
-STYLE DS_LOCALEDIT | DS_SETFONT | DS_MODALFRAME | DS_3DLOOK | WS_POPUP | WS_OVERLAPPEDWINDOW
+STYLE DS_LOCALEDIT | DS_SETFONT | DS_MODALFRAME | DS_3DLOOK | WS_MINIMIZEBOX | WS_MAXIMIZEBOX | WS_POPUP | WS_CAPTION | WS_SYSMENU | WS_THICKFRAME\r
 EXSTYLE WS_EX_CONTROLPARENT\r
 CAPTION "WinUAE Debugger"\r
 FONT 8, "Courier New", 0, 0, 0x0\r
@@ -875,7 +875,7 @@ BEGIN
     LISTBOX         IDC_DBG_MCUSTOM,372,79,81,138,LBS_OWNERDRAWFIXED | LBS_HASSTRINGS | LBS_NOINTEGRALHEIGHT\r
     LISTBOX         IDC_DBG_MISC,1,79,370,262,LBS_OWNERDRAWFIXED | LBS_HASSTRINGS | LBS_NOINTEGRALHEIGHT | LBS_DISABLENOSCROLL | WS_VSCROLL\r
     LISTBOX         IDC_DBG_CUSTOM,1,79,370,262,LBS_OWNERDRAWFIXED | LBS_HASSTRINGS | LBS_NOINTEGRALHEIGHT | LBS_DISABLENOSCROLL | WS_VSCROLL\r
-    CONTROL         "Auto set",IDC_DBG_AUTOSET,"Button",BS_AUTOCHECKBOX | NOT WS_TABSTOP,84,79,50,12\r
+    CONTROL         "Auto set",IDC_DBG_AUTOSET,"Button",BS_AUTOCHECKBOX,84,79,50,12\r
     LISTBOX         IDC_DBG_DASM2,1,79,370,87,LBS_OWNERDRAWFIXED | LBS_HASSTRINGS | LBS_NOINTEGRALHEIGHT\r
     LISTBOX         IDC_DBG_MEM2,1,167,370,87,LBS_OWNERDRAWFIXED | LBS_HASSTRINGS | LBS_NOINTEGRALHEIGHT\r
 END\r
@@ -996,50 +996,51 @@ BEGIN
     BEGIN\r
         POPUP "Copy"\r
         BEGIN\r
-            MENUITEM "Copy line"                 ID_DBG_COPYLBLINE\r
-            MENUITEM "Copy all"                  ID_DBG_COPYLB\r
+            MENUITEM "Copy line",                   ID_DBG_COPYLBLINE\r
+            MENUITEM "Copy all",                    ID_DBG_COPYLB\r
         END\r
     END\r
     POPUP "Memory"\r
     BEGIN\r
         POPUP "Copy"\r
         BEGIN\r
-            MENUITEM "Copy line"                 ID_DBG_COPYLBLINE\r
-            MENUITEM "Copy all"                  ID_DBG_COPYLB\r
+            MENUITEM "Copy line",                   ID_DBG_COPYLBLINE\r
+            MENUITEM "Copy all",                    ID_DBG_COPYLB\r
         END\r
         POPUP "Set top address"\r
         BEGIN\r
-            MENUITEM "Set to A0",                ID_DBG_SETTOA0\r
-            MENUITEM "Set to A1",                ID_DBG_SETTOA1\r
-            MENUITEM "Set to A2",                ID_DBG_SETTOA2\r
-            MENUITEM "Set to A3",                ID_DBG_SETTOA3\r
-            MENUITEM "Set to A4",                ID_DBG_SETTOA4\r
-            MENUITEM "Set to A5",                ID_DBG_SETTOA5\r
-            MENUITEM "Set to A6",                ID_DBG_SETTOA6\r
-            MENUITEM "Set to A7",                ID_DBG_SETTOA7\r
-            MENUITEM "Enter address",            ID_DBG_ENTERADDR\r
+            MENUITEM "Set to A0",                   ID_DBG_SETTOA0\r
+            MENUITEM "Set to A1",                   ID_DBG_SETTOA1\r
+            MENUITEM "Set to A2",                   ID_DBG_SETTOA2\r
+            MENUITEM "Set to A3",                   ID_DBG_SETTOA3\r
+            MENUITEM "Set to A4",                   ID_DBG_SETTOA4\r
+            MENUITEM "Set to A5",                   ID_DBG_SETTOA5\r
+            MENUITEM "Set to A6",                   ID_DBG_SETTOA6\r
+            MENUITEM "Set to A7",                   ID_DBG_SETTOA7\r
+            MENUITEM "Enter address",               ID_DBG_ENTERADDR\r
         END\r
     END\r
     POPUP "Disassembly"\r
     BEGIN\r
         POPUP "Copy"\r
         BEGIN\r
-            MENUITEM "Copy line"                 ID_DBG_COPYLBLINE\r
-            MENUITEM "Copy all"                  ID_DBG_COPYLB\r
+            MENUITEM "Copy line",                   ID_DBG_COPYLBLINE\r
+            MENUITEM "Copy all",                    ID_DBG_COPYLB\r
         END\r
         POPUP "Breakpoints"\r
         BEGIN\r
-            MENUITEM "Toggle breakpoint"         ID_DBG_TOGGLEBP\r
-            MENUITEM "Clear all breakpoints"     ID_DBG_DELETEBPS\r
+            MENUITEM "Toggle breakpoint",           ID_DBG_TOGGLEBP\r
+            MENUITEM "Clear all breakpoints",       ID_DBG_DELETEBPS\r
         END\r
         POPUP "Set top address"\r
         BEGIN\r
-            MENUITEM "Set to PC"                 ID_DBG_SETTOPC\r
-            MENUITEM "Enter address",            ID_DBG_ENTERADDR\r
+            MENUITEM "Set to PC",                   ID_DBG_SETTOPC\r
+            MENUITEM "Enter address",               ID_DBG_ENTERADDR\r
         END\r
     END\r
 END\r
 \r
+\r
 /////////////////////////////////////////////////////////////////////////////\r
 //\r
 // WAVE\r
index 5f95f501af32c3cb0dd246f810b78831df7c99f7..07ada736192c191a11506f1cf3a7f9ca89ada880 100755 (executable)
@@ -231,12 +231,12 @@ void SERDAT (uae_u16 w)
 #endif
 
     if (seriallog)
-       console_out("%c", dochar (w));
+       console_out_f ("%c", dochar (w));
 
     if (serper == 372) {
        extern int enforcermode;
        if (enforcermode & 2) {
-           console_out ("%c", dochar (w));
+           console_out_f ("%c", dochar (w));
            if (w == 266)
                console_out("\n");
        }
index 6eb9286160bda17a6e856a5ee63753edc8da5894..560e5e471f78baaa3197118208777cfc509bc485 100755 (executable)
@@ -15,9 +15,9 @@
 #define GETBDM(x) (((x) - ((x / 10000) * 10000)) / 100)
 #define GETBDD(x) ((x) % 100)
 
-#define WINUAEBETA 6
+#define WINUAEBETA 7
 #define WINUAEPUBLICBETA 1
-#define WINUAEDATE MAKEBD(2008, 3, 4)
+#define WINUAEDATE MAKEBD(2008, 3, 9)
 #define WINUAEEXTRA ""
 #define WINUAEREV ""
 
index 150598647ee008faff8dca741300e5210072ba16..780decf4712d4bb4d62cbba3f15292e83613cfea 100755 (executable)
@@ -12,6 +12,7 @@
 #include "win32gfx.h"
 #include "gfxfilter.h"
 #include "dxwrap.h"
+#include "statusline.h"
 
 struct uae_filter uaefilters[] =
 {
@@ -23,7 +24,7 @@ struct uae_filter uaefilters[] =
 
     { UAE_FILTER_SCALE2X, 0, "Scale2X", "scale2x", 0, 0, UAE_FILTER_MODE_16_16 | UAE_FILTER_MODE_32_32, 0, 0 },
 
-    { UAE_FILTER_HQ, 0, "hq2x", "hqx", 0, 0, UAE_FILTER_MODE_16_16 | UAE_FILTER_MODE_16_32, 0, 0 },
+    { UAE_FILTER_HQ, 0, "hq2x", "hqx", 0, 0, UAE_FILTER_MODE_16_16 | UAE_FILTER_MODE_16_32, UAE_FILTER_MODE_16_16 | UAE_FILTER_MODE_16_32, UAE_FILTER_MODE_16_16 | UAE_FILTER_MODE_16_32 },
 
     { UAE_FILTER_SUPEREAGLE, 0, "SuperEagle", "supereagle", 0, 0, UAE_FILTER_MODE_16_16, 0, 0 },
 
@@ -38,9 +39,35 @@ struct uae_filter uaefilters[] =
 
 
 static int dst_width, dst_height, amiga_width, amiga_height, amiga_depth, dst_depth, scale;
+static int temp_width, temp_height;
 uae_u8 *bufmem_ptr;
 static LPDIRECTDRAWSURFACE7 tempsurf;
 static uae_u8 *tempsurf2, *tempsurf3;
+static uae_u32 rc[256], gc[256], bc[256];
+
+void draw_status_line_single (uae_u8 *buf, int bpp, int y, int totalwidth, uae_u32 *rc, uae_u32 *gc, uae_u32 *bc);
+static void statusline (void)
+{
+    DDSURFACEDESC2 desc;
+    RECT sr, dr;
+    int y;
+
+    if (!currprefs.leds_on_screen || !tempsurf)
+       return;
+    SetRect (&sr, 0, 0, dst_width, TD_TOTAL_HEIGHT);
+    SetRect (&dr, 0, dst_height - TD_TOTAL_HEIGHT, dst_width, dst_height);
+    DirectDraw_BlitRect (tempsurf, &sr, NULL, &dr);
+    if (locksurface (tempsurf, &desc)) {
+       int yy = 0;
+       for (y = dst_height - TD_TOTAL_HEIGHT; y < dst_height; y++) {
+           uae_u8 *buf = (uae_u8*)desc.lpSurface + yy * desc.lPitch;
+           draw_status_line_single (buf, dst_depth / 8, yy, dst_width, rc, gc, bc);
+           yy++;
+       }
+       unlocksurface (tempsurf);
+       DirectDraw_BlitRect (NULL, &dr, tempsurf, &sr);
+    }
+}
 
 void S2X_configure (int rb, int gb, int bb, int rs, int gs, int bs)
 {
@@ -52,6 +79,8 @@ void S2X_configure (int rb, int gb, int bb, int rs, int gs, int bs)
 
 void S2X_free (void)
 {
+    if (currprefs.leds_on_screen == STATUSLINE_TARGET)
+       changed_prefs.leds_on_screen = currprefs.leds_on_screen = STATUSLINE_BUILTIN;
 
     freesurface (tempsurf);
     tempsurf = 0;
@@ -65,6 +94,15 @@ void S2X_init (int dw, int dh, int aw, int ah, int mult, int ad, int dd)
 {
     int flags = 0;
 
+    if (currprefs.leds_on_screen == STATUSLINE_BUILTIN)
+       changed_prefs.leds_on_screen = currprefs.leds_on_screen = STATUSLINE_TARGET;
+
+    if (dd == 32)
+       alloc_colors_rgb (8, 8, 8, 16, 8, 0, 0, 0, 0, 0, rc, gc, bc);
+    else
+       alloc_colors_rgb (5, 6, 5, 11, 5, 0, 0, 0, 0, 0, rc, gc, bc);
+
+
     if (!currprefs.gfx_filter || !usedfilter) {
        usedfilter = &uaefilters[0];
        mult = 1;
@@ -84,138 +122,56 @@ void S2X_init (int dw, int dh, int aw, int ah, int mult, int ad, int dd)
     amiga_depth = ad;
     scale = mult;
 
-    tempsurf = allocsurface (dst_width, dst_height);
+    temp_width = dst_width * 3;
+    temp_height = dst_height * 3;
+    tempsurf = allocsurface (temp_width, temp_height);
     if (!tempsurf)
        write_log ("DDRAW: failed to create temp surface\n");
 
     if (usedfilter->type == UAE_FILTER_HQ) {
        int w = amiga_width > dst_width ? amiga_width : dst_width;
        int h = amiga_height > dst_height ? amiga_height : dst_height;
-       tempsurf2 = xmalloc (w * h * (amiga_depth / 8));
-       tempsurf3 = xmalloc (w * h *(dst_depth / 8) * 4);
+       tempsurf2 = xmalloc (w * h * (amiga_depth / 8) * ((scale + 1) / 2));
+       tempsurf3 = xmalloc (w * h *(dst_depth / 8) * 4 * scale);
     }
 }
 
 void S2X_render (void)
 {
-    int aw = amiga_width, ah = amiga_height, v, pitch;
+    int aw, ah, aws, ahs, pitch;
     uae_u8 *dptr, *enddptr, *sptr, *endsptr;
-    int ok = 0, temp_needed = 0;
+    int ok = 0;
     RECT sr, dr;
-    HRESULT ddrval;
-    LPDIRECTDRAWSURFACE7 dds;
     DDSURFACEDESC2 desc;
-    int dst_width2 = dst_width;
-    int dst_height2 = dst_height;
-    int dst_width3 = dst_width;
-    int dst_height3 = dst_height;
-
-    sptr = gfxvidinfo.bufmem;
-    v = (dst_width - amiga_width * scale) / 2;
-    sptr -= v * (amiga_depth / 8);
-    v = (dst_height - amiga_height * scale) / 2;
-    sptr -= v * gfxvidinfo.rowbytes;
-    endsptr = gfxvidinfo.bufmemend;
-
-    v = currprefs.gfx_filter ? currprefs.gfx_filter_horiz_offset : 0;
-    sptr += (int)(-v * amiga_width / 1000.0) * (amiga_depth / 8);
 
-    v = currprefs.gfx_filter ? currprefs.gfx_filter_vert_offset : 0;
-    sptr += (int)(-v * amiga_height / 1000.0) * gfxvidinfo.rowbytes;
+    aw = amiga_width;
+    ah = amiga_height;
+    aws = aw * scale;
+    ahs = ah * scale;
 
     if (ah < 16)
        return;
     if (aw < 16)
        return;
+    if (tempsurf == NULL)
+       return;
 
-    if (currprefs.gfx_filter && (currprefs.gfx_filter_horiz_zoom || currprefs.gfx_filter_vert_zoom ||
-           currprefs.gfx_filter_horiz_zoom_mult != 1000 ||
-           currprefs.gfx_filter_vert_zoom_mult != 1000 || currprefs.gfx_filter_upscale)) {
-
-       double hz = currprefs.gfx_filter_horiz_zoom / 1000.0 * currprefs.gfx_filter_horiz_zoom_mult;
-       double vz = currprefs.gfx_filter_vert_zoom / 1000.0 * currprefs.gfx_filter_vert_zoom_mult;
-
-        sr.left = (dst_width - amiga_width * scale) / 2;
-        sr.top = (dst_height - amiga_height * scale) / 2;
-       sr.right = amiga_width * scale + sr.left;
-       sr.bottom = amiga_height * scale + sr.top;
-
-       dr.left = dr.top = 0;
-       dr.right = dst_width;
-       dr.bottom = dst_height;
-
-        hz *= amiga_width / 2000.0;
-       if (hz > 0) {
-           dr.left += hz;
-           dr.right -= hz;
-       } else {
-           sr.left -= hz;
-           sr.right += hz;
-       }
-        vz *= amiga_height / 2000.0;
-       if (vz > 0) {
-           dr.top += vz;
-           dr.bottom -= vz;
-       } else {
-           sr.top -= vz;
-           sr.bottom += vz;
-       }
-
-
-       if (sr.left >= sr.right) {
-           sr.left = dst_width / 2 - 1;
-           sr.right = dst_width / 2 + 1;
-       }
-       if (sr.left < 0) {
-           dr.left = -sr.left;
-           sr.left = 0;
-       }
-       if (sr.right - sr.left > dst_width) {
-           dr.right = dst_width - (sr.right - dst_width);
-           sr.right = sr.left + dst_width;
-       }
-       if (sr.top >= sr.bottom) {
-           sr.top = dst_height / 2 - 1;
-           sr.bottom = dst_height / 2 + 1;
-       }
-       if (sr.top < 0) {
-           dr.top = -sr.top;
-           sr.top = 0;
-       }
-       if (sr.bottom - sr.top > dst_height) {
-           dr.bottom = dst_height - (sr.bottom - dst_height);
-           sr.bottom = sr.top + dst_height;
-       }
-
-       if (tempsurf && sr.left != 0 || sr.top != 0 || sr.right != dst_width || sr.bottom != dst_height ||
-           dr.top != 0 || dr.right != dst_width || dr.left != 0 || dr.bottom != dst_height || currprefs.gfx_filter_upscale) {
-               dds = tempsurf;
-               temp_needed = 1;
-       }
-    }
-
+    sr.left = -(aw - amiga_width) / 2;
+    sr.top = -(ah - amiga_height) / 2;
+    sptr = gfxvidinfo.bufmem + sr.left * gfxvidinfo.pixbytes + sr.top * gfxvidinfo.rowbytes;
+    endsptr = gfxvidinfo.bufmemend;
     bufmem_ptr = sptr;
 
-    if (temp_needed) {
-       desc.dwSize = sizeof (desc);
-       while (FAILED(ddrval = IDirectDrawSurface7_Lock (dds, NULL, &desc, DDLOCK_SURFACEMEMORYPTR | DDLOCK_WAIT, NULL))) {
-           if (ddrval == DDERR_SURFACELOST) {
-               ddrval = IDirectDrawSurface7_Restore (dds);
-               if (FAILED(ddrval))
-                   return;
-           } else if (ddrval != DDERR_SURFACEBUSY) {
-               return;
-           }
-       }
-       dptr = (uae_u8*)desc.lpSurface;
-       pitch = desc.lPitch;
-    } else {
-       if (!DirectDraw_SurfaceLock ())
-           return;
-       dptr = DirectDraw_GetSurfacePointer ();
-       pitch = DirectDraw_GetSurfacePitch ();
-    }
-    enddptr = dptr + pitch * dst_height;
+    if (!locksurface (tempsurf, &desc))
+       return;
+    dptr = (uae_u8*)desc.lpSurface;
+    pitch = desc.lPitch;
+    enddptr = dptr + pitch * temp_height;
+
+    dr.left = sr.left + (temp_width - aws) /2;
+    dr.top = sr.top + (temp_height - ahs) / 2;
+    dptr += dr.left * (dst_depth / 8);
+    dptr += dr.top * pitch;
 
     if (!dptr) /* weird things can happen */
        goto end;
@@ -232,7 +188,7 @@ void S2X_render (void)
 
     } else if (usedfilter->type == UAE_FILTER_HQ) { /* 32/2X+3X+4X */
 
-       if (tempsurf2 && scale == 2) {
+       if (tempsurf2 && scale >= 2 && scale <= 4) {
            /* Aaaaaaaarghhhghgh.. */
            uae_u8 *sptr2 = tempsurf3;
            uae_u8 *dptr2 = tempsurf2;
@@ -244,10 +200,20 @@ void S2X_render (void)
                sptr += gfxvidinfo.rowbytes;
            }
            if (amiga_depth == 16 && dst_depth == 32) {
-               hq2x_32 (tempsurf2, tempsurf3, aw, ah, aw * scale * 4);
+               if (scale == 2)
+                   hq2x_32 (tempsurf2, tempsurf3, aw, ah, aws * 4);
+               else if (scale == 3)
+                   hq3x_32 (tempsurf2, tempsurf3, aw, ah, aws * 4);
+               else if (scale == 4)
+                   hq4x_32 (tempsurf2, tempsurf3, aw, ah, aws * 4);
                ok = 1;
            } else if (amiga_depth == 16 && dst_depth == 16) {
-               hq2x_16 (tempsurf2, tempsurf3, aw, ah, aw * scale * 2);
+               if (scale == 2)
+                   hq2x_16 (tempsurf2, tempsurf3, aw, ah, aws * 2);
+               else if (scale == 3)
+                   hq3x_16 (tempsurf2, tempsurf3, aw, ah, aws * 2);
+               else if (scale == 4)
+                   hq4x_16 (tempsurf2, tempsurf3, aw, ah, aws * 2);
                ok = 1;
            }
            for (i = 0; i < ah * scale; i++) {
@@ -256,10 +222,6 @@ void S2X_render (void)
                sptr2 += w;
                dptr += pitch;
            }
-           while (i < dst_height && dptr < enddptr) {
-               memset (dptr, 0, dst_width * dst_depth / 8);
-               dptr += pitch;
-           }
        }
 
     } else if (usedfilter->type == UAE_FILTER_SUPEREAGLE) { /* 16/2X */
@@ -297,11 +259,8 @@ void S2X_render (void)
 
        if (amiga_depth == dst_depth) {
            int y;
-           for (y = 0; y < dst_height; y++) {
-               if (sptr < endsptr && sptr >= gfxvidinfo.bufmem)
-                   memcpy (dptr, sptr, dst_width * dst_depth / 8);
-               else
-                   memset (dptr, 0, dst_width * dst_depth / 8);
+           for (y = 0; y < ah; y++) {
+               memcpy (dptr, sptr, aw * dst_depth / 8);
                sptr += gfxvidinfo.rowbytes;
                dptr += pitch;
            }
@@ -316,11 +275,51 @@ void S2X_render (void)
     }
 
 end:
-    if (temp_needed) {
-       IDirectDrawSurface7_Unlock (dds, NULL);
-       DirectDraw_BlitRect (NULL, &dr, tempsurf, &sr);
-    } else {
-       DirectDraw_SurfaceUnlock ();
+    unlocksurface (tempsurf);
+
+    {
+       int xs, ys;
+       int xmult, ymult;
+       int v;
+
+       SetRect (&sr, 0, 0, dst_width, dst_height);
+
+       dr.left -= (dst_width - aws) / 2;
+       dr.top -= (dst_height - ahs) / 2;
+       dr.right = dr.left + dst_width;
+       dr.bottom = dr.top + dst_height;
+
+       v = currprefs.gfx_filter ? currprefs.gfx_filter_horiz_offset : 0;
+       OffsetRect (&dr, (int)(-v * aws / 1000.0), 0);
+       v = currprefs.gfx_filter ? currprefs.gfx_filter_vert_offset : 0;
+       OffsetRect (&dr, 0, (int)(-v * ahs / 1000.0));
+
+       xmult = currprefs.gfx_filter_horiz_zoom_mult;
+       if (xmult <= 0)
+           xmult = aws * 1000 / dst_width;
+       else
+           xmult = xmult + xmult * currprefs.gfx_filter_horiz_zoom / 2000;
+
+       ymult = currprefs.gfx_filter_vert_zoom_mult;
+       if (ymult <= 0)
+           ymult = ahs * 1000 / dst_height;
+       else
+           ymult = ymult + ymult * currprefs.gfx_filter_vert_zoom / 2000;
+
+       xs = dst_width - dst_width * xmult / 1000;
+
+       dr.left += xs / 2;
+       dr.right -= xs / 2;
+
+       ys = dst_height - dst_height * ymult / 1000;
+       dr.top += ys / 2;
+       dr.bottom -= ys / 2;
+
+       if (dr.left >= 0 && dr.top >= 0 && dr.right < temp_width && dr.bottom < temp_height) {
+           if (dr.left < dr.right && dr.top < dr.bottom)
+               DirectDraw_BlitRect (NULL, &sr, tempsurf, &dr);
+       }
+       statusline ();
     }
 }
 
@@ -332,7 +331,7 @@ void S2X_refresh (void)
     if (!DirectDraw_SurfaceLock ())
        return;
     dptr = DirectDraw_GetSurfacePointer ();
-    pitch = DirectDraw_GetSurfacePitch();
+    pitch = DirectDraw_GetSurfacePitch ();
     for (y = 0; y < dst_height; y++)
        memset (dptr + y * pitch, 0, dst_width * dst_depth / 8);
     DirectDraw_SurfaceUnlock ();
index 517b78792a9f79117b91d8a7ed0cabf61f147df1..69a7098a26ab5fb062b7cfffcdcbdbb4c00998b6 100755 (executable)
@@ -169,16 +169,6 @@ uae_u16 picasso96_pixel_format = RGBFF_CHUNKY;
 static char scrlinebuf[4096 * 4]; /* this is too large, but let's rather play on the safe side here */
 
 
-static COLORREF BuildColorRef(uae_u32 color)
-{
-    COLORREF result;
-    if(DirectDraw_GetCurrentDepth () <= 8)
-       result = color;
-    else
-       result = color;
-    return result;
-}
-
 void centerdstrect (RECT *dr, RECT *sr)
 {
     if(!(currentmode->flags & (DM_DX_FULLSCREEN | DM_W_FULLSCREEN)))
@@ -203,16 +193,11 @@ void centerdstrect (RECT *dr, RECT *sr)
 }
 
 
-int DX_Fill (int dstx, int dsty, int width, int height, uae_u32 color)
+void DX_Fill (int dstx, int dsty, int width, int height, uae_u32 color)
 {
     int result = 0;
     RECT dstrect;
     RECT srcrect;
-    DDBLTFX ddbltfx;
-
-    memset (&ddbltfx, 0, sizeof (ddbltfx));
-    ddbltfx.dwFillColor = BuildColorRef (color);
-    ddbltfx.dwSize = sizeof (ddbltfx);
 
     /* Set up our source rectangle.  This NEVER needs to be adjusted for windowed display, since the
      * source is ALWAYS in an offscreen buffer, or we're in full-screen mode. */
@@ -221,34 +206,15 @@ int DX_Fill (int dstx, int dsty, int width, int height, uae_u32 color)
     /* Set up our destination rectangle, and adjust for blit to windowed display (if necessary ) */
     SetRect (&dstrect, dstx, dsty, dstx + width, dsty + height);
     centerdstrect (&dstrect, &srcrect);
-#if 0
-    /* Render our fill to the visible (primary) surface */
-    hr = DirectDraw_Blt(primary_surface, &dstrect, invalid_surface, NULL, DDBLT_WAIT | DDBLT_COLORFILL, &ddbltfx);
-    if(SUCCEEDED(hr)) {
-       result = 1;
-       if(DirectDraw_GetLockableType() == secondary_surface) {
-           /* We've colour-filled the visible, but still need to colour-fill the offscreen */
-           hr = DirectDraw_Blt(secondary_surface, &srcrect, invalid_surface, NULL, DDBLT_WAIT | DDBLT_COLORFILL, &ddbltfx);
-           if (FAILED(hr)) {
-               write_log ("DX_Fill2(%dx%d %d%d): %s\n", dstx, dsty, width, height, DXError(hr));
-               result = 0;
-           }
-
-       }
-    } else {
-       write_log ("DX_Fill(%dx%d %d%d): %s\n", dstx, dsty, width, height, DXError(hr));
-    }
-#endif
-    return result;
+    DirectDraw_Fill (&dstrect, color);
 }
 
-int DX_Blit (int x, int y, int w, int h)
+void DX_Blit (int x, int y, int w, int h)
 {
     RECT r;
 
     SetRect (&r, x, y, x + w, y + h);
     DirectDraw_BlitToPrimary (&r);
-    return 1;
 }
 
 static int rgbformat_bits (RGBFTYPE t)
@@ -1771,7 +1737,8 @@ static BOOL doInit (void)
                    } else {
                        int j = 0, i = currprefs.gfx_filter_filtermode;
                        while (i >= 0) {
-                           while (!usedfilter->x[j]) j++;
+                           while (!usedfilter->x[j])
+                               j++;
                            if(i-- > 0)
                                j++;
                        }
index 9cf449295c606e91a6fafc3ce9c45ad6bc399c66..04e04173de7e4fa8891df3b9b2664ebb9438db77 100755 (executable)
@@ -34,8 +34,8 @@ extern void close_windows (void);
 extern void updatewinfsmode (struct uae_prefs *p);
 extern int is3dmode (void);
 
-int DX_Fill (int dstx, int dsty, int width, int height, uae_u32 color);
-int DX_Blit (int x, int y, int w, int h);
+void DX_Fill (int dstx, int dsty, int width, int height, uae_u32 color);
+void DX_Blit (int x, int y, int w, int h);
 void centerdstrect (RECT *, RECT *);
 
 
index fd650b632c9d2dd4ea8793abf4264890aafad2e2..b60280dff8f60bf420809f80bbd965fe417d65ee 100755 (executable)
@@ -287,15 +287,35 @@ static void writefavoritepaths (int num, char **values, char **paths)
 
 
 static int askinputcustom(HWND hDlg, char *custom, int maxlen);
-static void addfavoritepath (HWND hDlg, const char *path, int num, char **values, char **paths)
+static void addfavoritepath (HWND hDlg, int num, char **values, char **paths)
 {
     char name[MAX_DPATH];
     if (num >= MAXFAVORITES)
        return;
-    strcpy (name, path);
+    if (!stored_path[0])
+        GetModuleFileName(NULL, stored_path, MAX_DPATH);
+    while (stored_path[0]) {
+       DWORD v = GetFileAttributes (stored_path);
+       char *s;
+       if (v == INVALID_FILE_ATTRIBUTES)
+           break;
+       if (v & FILE_ATTRIBUTE_DIRECTORY)
+           break;
+       s = strrchr (stored_path, '\\');
+       if (!s)
+           s = strrchr (stored_path, '/');
+       if (!s) {
+           stored_path[0] = 0;
+           break;
+       }
+       s[0] = 0;
+    }
+    if (!askinputcustom (hDlg, stored_path, sizeof stored_path))
+       return;
+    strcpy (name, stored_path);
     if (askinputcustom (hDlg, name, sizeof name)) {
        values[num] = my_strdup (name);
-       paths[num] = my_strdup (path);
+       paths[num] = my_strdup (stored_path);
        num++;
        writefavoritepaths (num, values, paths);
     }
@@ -329,33 +349,13 @@ static void addeditmenu (HMENU menu, char **items)
     mii.fState = MFS_ENABLED;
     InsertMenuItem (menu, -1, TRUE, &mii);
 
-    if (!stored_path[0])
-        GetModuleFileName(NULL, stored_path, MAX_DPATH);
-    while (stored_path[0]) {
-       DWORD v = GetFileAttributes (stored_path);
-       char *s;
-       if (v == INVALID_FILE_ATTRIBUTES)
-           break;
-       if (v & FILE_ATTRIBUTE_DIRECTORY)
-           break;
-       s = strrchr (stored_path, '\\');
-       if (!s)
-           s = strrchr (stored_path, '/');
-       if (!s) {
-           stored_path[0] = 0;
-           break;
-       }
-       s[0] = 0;
-    }
     mii.fMask = MIIM_STRING | MIIM_ID;
     mii.fType = MFT_STRING;
     mii.fState = MFS_ENABLED;
-    sprintf (newpath, "Add '%s'", stored_path);
-    mii.dwTypeData = newpath;
+    mii.dwTypeData = "Add New";
     mii.cch = strlen (mii.dwTypeData);
     mii.wID = 1000;
-    if (stored_path[0])
-       InsertMenuItem (emenu, -1, TRUE, &mii);
+    InsertMenuItem (emenu, -1, TRUE, &mii);
     i = 0;
     while (items[i]) {
        mii.fMask = MIIM_STRING | MIIM_ID;
@@ -472,8 +472,7 @@ static char *favoritepopup (HWND hwnd)
        if (ret <= idx)
            break;
        if (ret == 1000) {
-           if (stored_path[0])
-               addfavoritepath (hwnd, stored_path, idx, values, paths);
+           addfavoritepath (hwnd, idx, values, paths);
        } else if (ret > 1000) {
            removefavoritepath (ret - 1001, idx, values, paths);
        }
@@ -2311,6 +2310,7 @@ void InitializeListView (HWND hDlg)
            struct uaedev_config_info *uci = &workprefs.mountconfig[i];
            int nosize = 0, type;
            struct mountedinfo mi;
+           char *rootdir = uci->rootdir;
 
            type = get_filesys_unitconfig (&workprefs, i, &mi);
            if (type < 0) {
@@ -2322,6 +2322,8 @@ void InitializeListView (HWND hDlg)
                strcpy (size_str, "n/a");
            else if (mi.size >= 1024 * 1024 * 1024)
                sprintf (size_str, "%.1fG", ((double)(uae_u32)(mi.size / (1024 * 1024))) / 1024.0);
+           else if (mi.size < 10 * 1024 * 1024)
+               sprintf (size_str, "%dK", mi.size / 1024);
            else
                sprintf (size_str, "%.1fM", ((double)(uae_u32)(mi.size / (1024))) / 1024.0);
 
@@ -2350,6 +2352,8 @@ void InitializeListView (HWND hDlg)
                strcpy (devname_str, "n/a");
                strcpy (volname_str, "n/a");
                strcpy (bootpri_str, "n/a");
+               if (!memcmp (rootdir, "HD_", 3))
+                   rootdir += 3;
            } else {
                strcpy (blocksize_str, "n/a");
                strcpy (devname_str, uci->devname);
@@ -2386,8 +2390,8 @@ void InitializeListView (HWND hDlg)
                    listview_column_width[2] = width;
 
                listview_column_width [3] = 150;
-               ListView_SetItemText(list, result, 3, uci->rootdir);
-               width = ListView_GetStringWidth(list, uci->rootdir) + 10;
+               ListView_SetItemText(list, result, 3, rootdir);
+               width = ListView_GetStringWidth(list, rootdir) + 10;
                if(width > listview_column_width[3])
                    listview_column_width[3] = width;
 
@@ -5615,7 +5619,7 @@ static INT_PTR CALLBACK MiscDlgProc2 (HWND hDlg, UINT msg, WPARAM wParam, LPARAM
 
 static int cpu_ids[]   = { IDC_CPU0, IDC_CPU1, IDC_CPU2, IDC_CPU3, IDC_CPU4, IDC_CPU5 };
 static int fpu_ids[]   = { IDC_FPU0, IDC_FPU1, IDC_FPU2, IDC_FPU3 };
-static int trust_ids[] = { IDC_TRUST0, IDC_TRUST1, IDC_TRUST1, IDC_TRUST2 };
+static int trust_ids[] = { IDC_TRUST0, IDC_TRUST1, IDC_TRUST1, IDC_TRUST1 };
 
 static void enable_for_cpudlg (HWND hDlg)
 {
@@ -5652,7 +5656,6 @@ static void enable_for_cpudlg (HWND hDlg)
 
     ew (hDlg, IDC_TRUST0, enable2);
     ew (hDlg, IDC_TRUST1, enable2);
-    ew (hDlg, IDC_TRUST2, enable2);
     ew (hDlg, IDC_HARDFLUSH, enable2);
     ew (hDlg, IDC_CONSTJUMP, enable2);
     ew (hDlg, IDC_JITFPU, enable2);
@@ -5740,7 +5743,7 @@ static void values_to_cpudlg (HWND hDlg)
        workprefs.comptrustnaddr= 0;
     }
 
-    CheckRadioButton(hDlg, IDC_TRUST0, IDC_TRUST2, trust_ids[workprefs.comptrustbyte]);
+    CheckRadioButton(hDlg, IDC_TRUST0, IDC_TRUST1, trust_ids[workprefs.comptrustbyte]);
 
     SendDlgItemMessage(hDlg, IDC_CACHE, TBM_SETPOS, TRUE, workprefs.cachesize / 1024);
     sprintf(cache, "%d MB", workprefs.cachesize / 1024 );
@@ -8957,8 +8960,8 @@ static void makefilter(char *s, int x, int flags)
        strcat (s, " (32bit)");
 }
 
-static char *filtermultnames[] = { "1x", "2x", "4x", "6x", "8x", NULL };
-static int filtermults[] = { 1000, 500, 250, 167, 125 };
+static char *filtermultnames[] = { "FS", "1/2x", "1x", "2x", "4x", "6x", "8x", NULL };
+static int filtermults[] = { 0, 2000, 1000, 500, 250, 167, 125 };
 struct filterxtra {
     char *label;
     int *varw, *varc;
index 0bda46d0b5e33c36b1294a74768c67c4d3b7f835..72494257189c9e6c5aca5699a9be594f5460b735 100755 (executable)
                                RelativePath="..\hardfile_win32.c"
                                >
                        </File>
+                       <File
+                               RelativePath="..\hq3x16.obj"
+                               >
+                       </File>
+                       <File
+                               RelativePath="..\hq3x32.obj"
+                               >
+                       </File>
+                       <File
+                               RelativePath="..\hq4x16.obj"
+                               >
+                       </File>
+                       <File
+                               RelativePath="..\hq4x32.obj"
+                               >
+                       </File>
                        <File
                                RelativePath="..\ioport.c"
                                >
index eb0187f7393b69c8302d198a0acb6a8deaf2f16e..6e6893822b99fd7627c088a8ee937235e671e74e 100755 (executable)
@@ -1,4 +1,39 @@
 
+Beta 7:
+
+NOTE: _DO_ use filters (except OGL/D3D). Vsync still broken.
+
+- Gayle and Gary CIA select lines work slightly differently, Gayle
+  does not select any CIAs if both CIA select bits are zero (address
+  bits 12 and 13). Gary enables both. Gary-style selection was
+  emulated long time ago, Gayle-style emulated now. (I think 'select
+  both CIAs' was only used by some games accidentally and perhaps
+  also used by some copy protections)
+- car color flickering in hires mode super skid marks (ancient bug)
+- real PCMCIA SRAM card detection really works now
+- uaegfx.card updated, now works even if "UAE boot rom" is not in
+  "standard" 0xf00000 location (=CDTV ROM enabled) or disabled.
+  Previos version simply crashed.
+- uae boot rom backup location moved from 0xe70000 to 0xef0000
+  (0xe70000 can conflict with KS mirror)
+- jit memory allocation update, should prevent random RTG memory
+  allocation failures
+- Picasso96 emulation updates and tweaks
+- CPU panel "After RTG" radiobutton removed, not needed anymore
+- big filter rewrite (NOTE: OpenGL/Direct3D not yet updated)
+  Settings needs to be readjusted
+  + works more intuitively now
+  + scaling does not affect centering anymore.
+  + position setting (0,0) = centered.
+  + 1/2x scaling factor added
+  + "FS" scaling factor added = full screen scaling (obsoletes short
+  lived "full screen filter" setting)
+  + user scaling factor (FS,1/2x,1x,2x..) never include filter's
+  possible internal scaling
+  + hq3x and hq4x added
+  + onscreen leds are now rendered after filter calculations
+  + (some kind of "keep aspect ratio" option needed?)
+
 Beta 6:
 
 NOTE: Do not use filters, do not use vsync.
index 45899e4968209e14f2b17d6023e50a07bea26a2f..df1e04ee2c03f95a16b29767e1444cacf0538058 100755 (executable)
@@ -144,7 +144,7 @@ void close_console(void)
     consoleopen = 0;
 }
 
-static void writeconsole(char *buffer)
+static void writeconsole(const char *buffer)
 {
     DWORD temp;
     if (!consoleopen)
@@ -155,7 +155,7 @@ static void writeconsole(char *buffer)
        WriteConsole(stdoutput, buffer, strlen(buffer), &temp,0);
 }
 
-void console_out (const char *format,...)
+void console_out_f (const char *format,...)
 {
     va_list parms;
     char buffer[WRITE_LOG_BUF_SIZE];
@@ -163,8 +163,13 @@ void console_out (const char *format,...)
     va_start (parms, format);
     _vsnprintf (buffer, WRITE_LOG_BUF_SIZE-1, format, parms);
     va_end (parms);
-    openconsole();
-    writeconsole(buffer);
+    openconsole ();
+    writeconsole (buffer);
+}
+void console_out (const char *txt)
+{
+    openconsole ();
+    writeconsole (txt);
 }
 
 int console_get (char *out, int maxlen)
diff --git a/sana2.c b/sana2.c
index 6b7ca1c44292667346a3b763b441f2ad94f4b1b1..18b6456a1e5f4182fe041a059d84b05cd0d74d9b 100755 (executable)
--- a/sana2.c
+++ b/sana2.c
@@ -321,7 +321,6 @@ static int openfail (uaecptr ioreq, int error)
     return (uae_u32)-1;
 }
 
-/* AARGHHH!! */
 static uae_u32 REGPARAM2 uaenet_int_handler (TrapContext *ctx);
 static int irq_init;
 static int initint (TrapContext *ctx)
index 5e5f1f13bd286655c397cf6f2565ba2b7fae0cc1..ab4b282f44da55da36410c99acdef2c96514eb48 100755 (executable)
@@ -869,7 +869,7 @@ void savestate_listrewind (void)
            break;
        p = st->cpu + 17 * 4;
        pc = restore_u32_func (&p);
-       console_out ("%d: PC=%08X %c\n", cnt, pc, regs.pc == pc ? '*' : ' ');
+       console_out_f ("%d: PC=%08X %c\n", cnt, pc, regs.pc == pc ? '*' : ' ');
        cnt++;
        i--;
        if (i < 0)
index 403b031f21684502c19ba53e97867b6f33b20f03..fd21b6c429d4bf3a584746c4f2c9c76926b9224b 100755 (executable)
--- a/uaelib.c
+++ b/uaelib.c
@@ -429,7 +429,6 @@ static uae_u32 REGPARAM2 uaelib_demux (TrapContext *context)
      case 37: return picasso_SetSpritePosition (&context->regs);
      case 38: return picasso_SetSpriteImage (&context->regs);
      case 39: return picasso_SetSpriteColor (&context->regs);
-     case 40: return picasso_DrawLine (&context->regs);
 #endif
      case 68: return emulib_Minimize ();
      case 69: return emulib_ExecuteNativeCode (&context->regs);