--- /dev/null
+#include <stdlib.h>
+#include <stddef.h>
+#include <math.h>
+#include "ibm.h"
+#include "device.h"
+#include "mem.h"
+#include "pci.h"
+#include "thread.h"
+#include "timer.h"
+#include "video.h"
+#include "vid_svga.h"
+#include "vid_voodoo.h"
+#include "vid_voodoo_common.h"
+#include "vid_voodoo_blitter.h"
+#include "vid_voodoo_display.h"
+#include "vid_voodoo_dither.h"
+#include "vid_voodoo_fb.h"
+#include "vid_voodoo_fifo.h"
+#include "vid_voodoo_reg.h"
+#include "vid_voodoo_regs.h"
+#include "vid_voodoo_render.h"
+#include "vid_voodoo_texture.h"
+
+
+rgba8_t rgb332[0x100], ai44[0x100], rgb565[0x10000], argb1555[0x10000], argb4444[0x10000], ai88[0x10000];
+
+
+int tris = 0;
+
+static uint64_t status_time = 0;
+
+
+void voodoo_recalc(voodoo_t *voodoo)
+{
+ uint32_t buffer_offset = ((voodoo->fbiInit2 >> 11) & 511) * 4096;
+
+ if (voodoo->type >= VOODOO_BANSHEE)
+ return;
+
+ voodoo->params.front_offset = voodoo->disp_buffer*buffer_offset;
+ voodoo->back_offset = voodoo->draw_buffer*buffer_offset;
+
+ voodoo->buffer_cutoff = TRIPLE_BUFFER ? (buffer_offset * 4) : (buffer_offset * 3);
+ if (TRIPLE_BUFFER)
+ voodoo->params.aux_offset = buffer_offset * 3;
+ else
+ voodoo->params.aux_offset = buffer_offset * 2;
+
+ switch (voodoo->lfbMode & LFB_WRITE_MASK)
+ {
+ case LFB_WRITE_FRONT:
+ voodoo->fb_write_offset = voodoo->params.front_offset;
+ voodoo->fb_write_buffer = voodoo->disp_buffer;
+ break;
+ case LFB_WRITE_BACK:
+ voodoo->fb_write_offset = voodoo->back_offset;
+ voodoo->fb_write_buffer = voodoo->draw_buffer;
+ break;
+
+ default:
+ /*BreakNeck sets invalid LFB write buffer select*/
+ voodoo->fb_write_offset = voodoo->params.front_offset;
+ break;
+ }
+
+ switch (voodoo->lfbMode & LFB_READ_MASK)
+ {
+ case LFB_READ_FRONT:
+ voodoo->fb_read_offset = voodoo->params.front_offset;
+ break;
+ case LFB_READ_BACK:
+ voodoo->fb_read_offset = voodoo->back_offset;
+ break;
+ case LFB_READ_AUX:
+ voodoo->fb_read_offset = voodoo->params.aux_offset;
+ break;
+
+ default:
+ fatal("voodoo_recalc : unknown lfb source\n");
+ }
+
+ switch (voodoo->params.fbzMode & FBZ_DRAW_MASK)
+ {
+ case FBZ_DRAW_FRONT:
+ voodoo->params.draw_offset = voodoo->params.front_offset;
+ voodoo->fb_draw_buffer = voodoo->disp_buffer;
+ break;
+ case FBZ_DRAW_BACK:
+ voodoo->params.draw_offset = voodoo->back_offset;
+ voodoo->fb_draw_buffer = voodoo->draw_buffer;
+ break;
+
+ default:
+ fatal("voodoo_recalc : unknown draw buffer\n");
+ }
+
+ voodoo->block_width = ((voodoo->fbiInit1 >> 4) & 15) * 2;
+ if (voodoo->fbiInit6 & (1 << 30))
+ voodoo->block_width += 1;
+ if (voodoo->fbiInit1 & (1 << 24))
+ voodoo->block_width += 32;
+ voodoo->row_width = voodoo->block_width * 32 * 2;
+ voodoo->params.row_width = voodoo->row_width;
+ voodoo->aux_row_width = voodoo->row_width;
+ voodoo->params.aux_row_width = voodoo->aux_row_width;
+
+/* pclog("voodoo_recalc : front_offset %08X back_offset %08X aux_offset %08X draw_offset %08x\n", voodoo->params.front_offset, voodoo->back_offset, voodoo->params.aux_offset, voodoo->params.draw_offset);
+ pclog(" fb_read_offset %08X fb_write_offset %08X row_width %i %08x %08x\n", voodoo->fb_read_offset, voodoo->fb_write_offset, voodoo->row_width, voodoo->lfbMode, voodoo->params.fbzMode);*/
+}
+
+
+static uint16_t voodoo_readw(uint32_t addr, void *p)
+{
+ voodoo_t *voodoo = (voodoo_t *)p;
+
+ addr &= 0xffffff;
+
+ cycles -= voodoo->read_time;
+
+ if ((addr & 0xc00000) == 0x400000) /*Framebuffer*/
+ {
+ if (SLI_ENABLED)
+ {
+ voodoo_set_t *set = voodoo->set;
+ int y = (addr >> 11) & 0x3ff;
+
+ if (y & 1)
+ voodoo = set->voodoos[1];
+ else
+ voodoo = set->voodoos[0];
+ }
+
+ voodoo->flush = 1;
+ while (!FIFO_EMPTY)
+ {
+ voodoo_wake_fifo_thread_now(voodoo);
+ thread_wait_event(voodoo->fifo_not_full_event, 1);
+ }
+ voodoo_wait_for_render_thread_idle(voodoo);
+ voodoo->flush = 0;
+
+ return voodoo_fb_readw(addr, voodoo);
+ }
+
+ return 0xffff;
+}
+
+
+static uint32_t voodoo_readl(uint32_t addr, void *p)
+{
+ voodoo_t *voodoo = (voodoo_t *)p;
+ uint32_t temp;
+ int fifo_size;
+ voodoo->rd_count++;
+ addr &= 0xffffff;
+
+ cycles -= voodoo->read_time;
+
+ if (addr & 0x800000) /*Texture*/
+ {
+ }
+ else if (addr & 0x400000) /*Framebuffer*/
+ {
+ if (SLI_ENABLED)
+ {
+ voodoo_set_t *set = voodoo->set;
+ int y = (addr >> 11) & 0x3ff;
+
+ if (y & 1)
+ voodoo = set->voodoos[1];
+ else
+ voodoo = set->voodoos[0];
+ }
+
+ voodoo->flush = 1;
+ while (!FIFO_EMPTY)
+ {
+ voodoo_wake_fifo_thread_now(voodoo);
+ thread_wait_event(voodoo->fifo_not_full_event, 1);
+ }
+ voodoo_wait_for_render_thread_idle(voodoo);
+ voodoo->flush = 0;
+
+ temp = voodoo_fb_readl(addr, voodoo);
+ }
+ else switch (addr & 0x3fc)
+ {
+ case SST_status:
+ {
+ int fifo_entries = FIFO_ENTRIES;
+ int swap_count = voodoo->swap_count;
+ int written = voodoo->cmd_written + voodoo->cmd_written_fifo;
+ int busy = (written - voodoo->cmd_read) || (voodoo->cmdfifo_depth_rd != voodoo->cmdfifo_depth_wr);
+
+ if (SLI_ENABLED && voodoo->type != VOODOO_2)
+ {
+ voodoo_t *voodoo_other = (voodoo == voodoo->set->voodoos[0]) ? voodoo->set->voodoos[1] : voodoo->set->voodoos[0];
+ int other_written = voodoo_other->cmd_written + voodoo_other->cmd_written_fifo;
+
+ if (voodoo_other->swap_count > swap_count)
+ swap_count = voodoo_other->swap_count;
+ if ((voodoo_other->fifo_write_idx - voodoo_other->fifo_read_idx) > fifo_entries)
+ fifo_entries = voodoo_other->fifo_write_idx - voodoo_other->fifo_read_idx;
+ if ((other_written - voodoo_other->cmd_read) ||
+ (voodoo_other->cmdfifo_depth_rd != voodoo_other->cmdfifo_depth_wr))
+ busy = 1;
+ if (!voodoo_other->voodoo_busy)
+ voodoo_wake_fifo_thread(voodoo_other);
+ }
+
+ fifo_size = 0xffff - fifo_entries;
+ temp = fifo_size << 12;
+ if (fifo_size < 0x40)
+ temp |= fifo_size;
+ else
+ temp |= 0x3f;
+ if (swap_count < 7)
+ temp |= (swap_count << 28);
+ else
+ temp |= (7 << 28);
+ if (!voodoo->v_retrace)
+ temp |= 0x40;
+
+ if (busy)
+ temp |= 0x380; /*Busy*/
+
+ if (!voodoo->voodoo_busy)
+ voodoo_wake_fifo_thread(voodoo);
+ }
+ break;
+
+ case SST_fbzColorPath:
+ voodoo_flush(voodoo);
+ temp = voodoo->params.fbzColorPath;
+ break;
+ case SST_fogMode:
+ voodoo_flush(voodoo);
+ temp = voodoo->params.fogMode;
+ break;
+ case SST_alphaMode:
+ voodoo_flush(voodoo);
+ temp = voodoo->params.alphaMode;
+ break;
+ case SST_fbzMode:
+ voodoo_flush(voodoo);
+ temp = voodoo->params.fbzMode;
+ break;
+ case SST_lfbMode:
+ voodoo_flush(voodoo);
+ temp = voodoo->lfbMode;
+ break;
+ case SST_clipLeftRight:
+ voodoo_flush(voodoo);
+ temp = voodoo->params.clipRight | (voodoo->params.clipLeft << 16);
+ break;
+ case SST_clipLowYHighY:
+ voodoo_flush(voodoo);
+ temp = voodoo->params.clipHighY | (voodoo->params.clipLowY << 16);
+ break;
+
+ case SST_stipple:
+ voodoo_flush(voodoo);
+ temp = voodoo->params.stipple;
+ break;
+ case SST_color0:
+ voodoo_flush(voodoo);
+ temp = voodoo->params.color0;
+ break;
+ case SST_color1:
+ voodoo_flush(voodoo);
+ temp = voodoo->params.color1;
+ break;
+
+ case SST_fbiPixelsIn:
+ temp = voodoo->fbiPixelsIn & 0xffffff;
+ break;
+ case SST_fbiChromaFail:
+ temp = voodoo->fbiChromaFail & 0xffffff;
+ break;
+ case SST_fbiZFuncFail:
+ temp = voodoo->fbiZFuncFail & 0xffffff;
+ break;
+ case SST_fbiAFuncFail:
+ temp = voodoo->fbiAFuncFail & 0xffffff;
+ break;
+ case SST_fbiPixelsOut:
+ temp = voodoo->fbiPixelsOut & 0xffffff;
+ break;
+
+ case SST_fbiInit4:
+ temp = voodoo->fbiInit4;
+ break;
+ case SST_fbiInit0:
+ temp = voodoo->fbiInit0;
+ break;
+ case SST_fbiInit1:
+ temp = voodoo->fbiInit1;
+ break;
+ case SST_fbiInit2:
+ if (voodoo->initEnable & 0x04)
+ temp = voodoo->dac_readdata;
+ else
+ temp = voodoo->fbiInit2;
+ break;
+ case SST_fbiInit3:
+ temp = voodoo->fbiInit3 | (1 << 10) | (2 << 8);
+ break;
+
+ case SST_vRetrace:
+ temp = voodoo->line & 0x1fff;
+ break;
+ case SST_hvRetrace:
+ {
+ uint32_t line_time = (uint32_t)(voodoo->line_time >> 32);
+ uint32_t diff = (timer_get_ts_int(&voodoo->timer) > (tsc & 0xffffffff)) ? (timer_get_ts_int(&voodoo->timer) - (tsc & 0xffffffff)) : 0;
+ uint32_t pre_div = diff * voodoo->h_total;
+ uint32_t post_div = pre_div / line_time;
+ uint32_t h_pos = (voodoo->h_total - 1) - post_div;
+
+ if (h_pos >= voodoo->h_total)
+ h_pos = 0;
+
+ temp = voodoo->line & 0x1fff;
+ temp |= (h_pos << 16);
+ }
+ break;
+
+ case SST_fbiInit5:
+ temp = voodoo->fbiInit5 & ~0x1ff;
+ break;
+ case SST_fbiInit6:
+ temp = voodoo->fbiInit6;
+ break;
+ case SST_fbiInit7:
+ temp = voodoo->fbiInit7 & ~0xff;
+ break;
+
+ case SST_cmdFifoBaseAddr:
+ temp = voodoo->cmdfifo_base >> 12;
+ temp |= (voodoo->cmdfifo_end >> 12) << 16;
+ break;
+
+ case SST_cmdFifoRdPtr:
+ temp = voodoo->cmdfifo_rp;
+ break;
+ case SST_cmdFifoAMin:
+ temp = voodoo->cmdfifo_amin;
+ break;
+ case SST_cmdFifoAMax:
+ temp = voodoo->cmdfifo_amax;
+ break;
+ case SST_cmdFifoDepth:
+ temp = voodoo->cmdfifo_depth_wr - voodoo->cmdfifo_depth_rd;
+ break;
+
+ default:
+ pclog("voodoo_readl : bad addr %08X\n", addr);
+ temp = 0xffffffff;
+ }
+
+ return temp;
+}
+
+static void voodoo_writew(uint32_t addr, uint16_t val, void *p)
+{
+ voodoo_t *voodoo = (voodoo_t *)p;
+ voodoo->wr_count++;
+ addr &= 0xffffff;
+
+ cycles -= voodoo->write_time;
+
+ if ((addr & 0xc00000) == 0x400000) /*Framebuffer*/
+ voodoo_queue_command(voodoo, addr | FIFO_WRITEW_FB, val);
+}
+
+static void voodoo_writel(uint32_t addr, uint32_t val, void *p)
+{
+ voodoo_t *voodoo = (voodoo_t *)p;
+
+ voodoo->wr_count++;
+
+ addr &= 0xffffff;
+
+ if (addr == voodoo->last_write_addr+4)
+ cycles -= voodoo->burst_time;
+ else
+ cycles -= voodoo->write_time;
+ voodoo->last_write_addr = addr;
+
+ if (addr & 0x800000) /*Texture*/
+ {
+ voodoo->tex_count++;
+ voodoo_queue_command(voodoo, addr | FIFO_WRITEL_TEX, val);
+ }
+ else if (addr & 0x400000) /*Framebuffer*/
+ {
+ voodoo_queue_command(voodoo, addr | FIFO_WRITEL_FB, val);
+ }
+ else if ((addr & 0x200000) && (voodoo->fbiInit7 & FBIINIT7_CMDFIFO_ENABLE))
+ {
+// pclog("Write CMDFIFO %08x(%08x) %08x %08x\n", addr, voodoo->cmdfifo_base + (addr & 0x3fffc), val, (voodoo->cmdfifo_base + (addr & 0x3fffc)) & voodoo->fb_mask);
+ *(uint32_t *)&voodoo->fb_mem[(voodoo->cmdfifo_base + (addr & 0x3fffc)) & voodoo->fb_mask] = val;
+ voodoo->cmdfifo_depth_wr++;
+ if ((voodoo->cmdfifo_depth_wr - voodoo->cmdfifo_depth_rd) < 20)
+ voodoo_wake_fifo_thread(voodoo);
+ }
+ else switch (addr & 0x3fc)
+ {
+ case SST_intrCtrl:
+ fatal("intrCtrl write %08x\n", val);
+ break;
+
+ case SST_userIntrCMD:
+ fatal("userIntrCMD write %08x\n", val);
+ break;
+
+ case SST_swapbufferCMD:
+ voodoo->cmd_written++;
+ thread_lock_mutex(voodoo->swap_mutex);
+ voodoo->swap_count++;
+ thread_unlock_mutex(voodoo->swap_mutex);
+ if (voodoo->fbiInit7 & FBIINIT7_CMDFIFO_ENABLE)
+ return;
+ voodoo_queue_command(voodoo, addr | FIFO_WRITEL_REG, val);
+ if (!voodoo->voodoo_busy)
+ voodoo_wake_fifo_threads(voodoo->set, voodoo);
+ break;
+ case SST_triangleCMD:
+ if (voodoo->fbiInit7 & FBIINIT7_CMDFIFO_ENABLE)
+ return;
+ voodoo->cmd_written++;
+ voodoo_queue_command(voodoo, addr | FIFO_WRITEL_REG, val);
+ if (!voodoo->voodoo_busy)
+ voodoo_wake_fifo_threads(voodoo->set, voodoo);
+ break;
+ case SST_ftriangleCMD:
+ if (voodoo->fbiInit7 & FBIINIT7_CMDFIFO_ENABLE)
+ return;
+ voodoo->cmd_written++;
+ voodoo_queue_command(voodoo, addr | FIFO_WRITEL_REG, val);
+ if (!voodoo->voodoo_busy)
+ voodoo_wake_fifo_threads(voodoo->set, voodoo);
+ break;
+ case SST_fastfillCMD:
+ if (voodoo->fbiInit7 & FBIINIT7_CMDFIFO_ENABLE)
+ return;
+ voodoo->cmd_written++;
+ voodoo_queue_command(voodoo, addr | FIFO_WRITEL_REG, val);
+ if (!voodoo->voodoo_busy)
+ voodoo_wake_fifo_threads(voodoo->set, voodoo);
+ break;
+ case SST_nopCMD:
+ if (voodoo->fbiInit7 & FBIINIT7_CMDFIFO_ENABLE)
+ return;
+ voodoo->cmd_written++;
+ voodoo_queue_command(voodoo, addr | FIFO_WRITEL_REG, val);
+ if (!voodoo->voodoo_busy)
+ voodoo_wake_fifo_threads(voodoo->set, voodoo);
+ break;
+
+ case SST_fbiInit4:
+ if (voodoo->initEnable & 0x01)
+ {
+ voodoo->fbiInit4 = val;
+ voodoo->read_time = pci_nonburst_time + pci_burst_time * ((voodoo->fbiInit4 & 1) ? 2 : 1);
+// pclog("fbiInit4 write %08x - read_time=%i\n", val, voodoo->read_time);
+ }
+ break;
+ case SST_backPorch:
+ voodoo->backPorch = val;
+ break;
+ case SST_videoDimensions:
+ voodoo->videoDimensions = val;
+ voodoo->h_disp = (val & 0xfff) + 1;
+ voodoo->v_disp = (val >> 16) & 0xfff;
+ break;
+ case SST_fbiInit0:
+ if (voodoo->initEnable & 0x01)
+ {
+ voodoo->fbiInit0 = val;
+ if (voodoo->set->nr_cards == 2)
+ svga_set_override(voodoo->svga, (voodoo->set->voodoos[0]->fbiInit0 | voodoo->set->voodoos[1]->fbiInit0) & 1);
+ else
+ svga_set_override(voodoo->svga, val & 1);
+ if (val & FBIINIT0_GRAPHICS_RESET)
+ {
+ /*Reset display/draw buffer selection. This may not actually
+ happen here on a real Voodoo*/
+ voodoo->disp_buffer = 0;
+ voodoo->draw_buffer = 1;
+ voodoo_recalc(voodoo);
+ voodoo->front_offset = voodoo->params.front_offset;
+ }
+ }
+ break;
+ case SST_fbiInit1:
+ if (voodoo->initEnable & 0x01)
+ {
+ if ((voodoo->fbiInit1 & FBIINIT1_VIDEO_RESET) && !(val & FBIINIT1_VIDEO_RESET))
+ {
+ voodoo->line = 0;
+ thread_lock_mutex(voodoo->swap_mutex);
+ voodoo->swap_count = 0;
+ thread_unlock_mutex(voodoo->swap_mutex);
+ voodoo->retrace_count = 0;
+ }
+ voodoo->fbiInit1 = (val & ~5) | (voodoo->fbiInit1 & 5);
+ voodoo->write_time = pci_nonburst_time + pci_burst_time * ((voodoo->fbiInit1 & 2) ? 1 : 0);
+ voodoo->burst_time = pci_burst_time * ((voodoo->fbiInit1 & 2) ? 2 : 1);
+// pclog("fbiInit1 write %08x - write_time=%i burst_time=%i\n", val, voodoo->write_time, voodoo->burst_time);
+ }
+ break;
+ case SST_fbiInit2:
+ if (voodoo->initEnable & 0x01)
+ {
+ voodoo->fbiInit2 = val;
+ voodoo_recalc(voodoo);
+ }
+ break;
+ case SST_fbiInit3:
+ if (voodoo->initEnable & 0x01)
+ voodoo->fbiInit3 = val;
+ break;
+
+ case SST_hSync:
+ voodoo->hSync = val;
+ voodoo->h_total = (val & 0xffff) + (val >> 16);
+ voodoo_pixelclock_update(voodoo);
+ break;
+ case SST_vSync:
+ voodoo->vSync = val;
+ voodoo->v_total = (val & 0xffff) + (val >> 16);
+ break;
+
+ case SST_clutData:
+ voodoo->clutData[(val >> 24) & 0x3f].b = val & 0xff;
+ voodoo->clutData[(val >> 24) & 0x3f].g = (val >> 8) & 0xff;
+ voodoo->clutData[(val >> 24) & 0x3f].r = (val >> 16) & 0xff;
+ if (val & 0x20000000)
+ {
+ voodoo->clutData[(val >> 24) & 0x3f].b = 255;
+ voodoo->clutData[(val >> 24) & 0x3f].g = 255;
+ voodoo->clutData[(val >> 24) & 0x3f].r = 255;
+ }
+ voodoo->clutData_dirty = 1;
+ break;
+
+ case SST_dacData:
+ voodoo->dac_reg = (val >> 8) & 7;
+ voodoo->dac_readdata = 0xff;
+ if (val & 0x800)
+ {
+// pclog(" dacData read %i %02X\n", voodoo->dac_reg, voodoo->dac_data[7]);
+ if (voodoo->dac_reg == 5)
+ {
+ switch (voodoo->dac_data[7])
+ {
+ case 0x01: voodoo->dac_readdata = 0x55; break;
+ case 0x07: voodoo->dac_readdata = 0x71; break;
+ case 0x0b: voodoo->dac_readdata = 0x79; break;
+ }
+ }
+ else
+ voodoo->dac_readdata = voodoo->dac_data[voodoo->dac_readdata & 7];
+ }
+ else
+ {
+ if (voodoo->dac_reg == 5)
+ {
+ if (!voodoo->dac_reg_ff)
+ voodoo->dac_pll_regs[voodoo->dac_data[4] & 0xf] = (voodoo->dac_pll_regs[voodoo->dac_data[4] & 0xf] & 0xff00) | val;
+ else
+ voodoo->dac_pll_regs[voodoo->dac_data[4] & 0xf] = (voodoo->dac_pll_regs[voodoo->dac_data[4] & 0xf] & 0xff) | (val << 8);
+// pclog("Write PLL reg %x %04x\n", voodoo->dac_data[4] & 0xf, voodoo->dac_pll_regs[voodoo->dac_data[4] & 0xf]);
+ voodoo->dac_reg_ff = !voodoo->dac_reg_ff;
+ if (!voodoo->dac_reg_ff)
+ voodoo->dac_data[4]++;
+
+ }
+ else
+ {
+ voodoo->dac_data[voodoo->dac_reg] = val & 0xff;
+ voodoo->dac_reg_ff = 0;
+ }
+ voodoo_pixelclock_update(voodoo);
+ }
+ break;
+
+ case SST_scrFilter:
+ if (voodoo->initEnable & 0x01)
+ {
+ voodoo->scrfilterEnabled = 1;
+ voodoo->scrfilterThreshold = val; /* update the threshold values and generate a new lookup table if necessary */
+
+ if (val < 1)
+ voodoo->scrfilterEnabled = 0;
+ voodoo_threshold_check(voodoo);
+ pclog("Voodoo Filter: %06x\n", val);
+ }
+ break;
+
+ case SST_fbiInit5:
+ if (voodoo->initEnable & 0x01)
+ voodoo->fbiInit5 = (val & ~0x41e6) | (voodoo->fbiInit5 & 0x41e6);
+ break;
+ case SST_fbiInit6:
+ if (voodoo->initEnable & 0x01)
+ voodoo->fbiInit6 = val;
+ break;
+ case SST_fbiInit7:
+ if (voodoo->initEnable & 0x01)
+ {
+ voodoo->fbiInit7 = val;
+ voodoo->cmdfifo_enabled = val & 0x100;
+ }
+ break;
+
+ case SST_cmdFifoBaseAddr:
+ voodoo->cmdfifo_base = (val & 0x3ff) << 12;
+ voodoo->cmdfifo_end = ((val >> 16) & 0x3ff) << 12;
+// pclog("CMDFIFO base=%08x end=%08x\n", voodoo->cmdfifo_base, voodoo->cmdfifo_end);
+ break;
+
+ case SST_cmdFifoRdPtr:
+ voodoo->cmdfifo_rp = val;
+ break;
+ case SST_cmdFifoAMin:
+ voodoo->cmdfifo_amin = val;
+ break;
+ case SST_cmdFifoAMax:
+ voodoo->cmdfifo_amax = val;
+ break;
+ case SST_cmdFifoDepth:
+ voodoo->cmdfifo_depth_rd = 0;
+ voodoo->cmdfifo_depth_wr = val & 0xffff;
+ break;
+
+ default:
+ if (voodoo->fbiInit7 & FBIINIT7_CMDFIFO_ENABLE)
+ {
+ pclog("Unknown register write in CMDFIFO mode %08x %08x\n", addr, val);
+ }
+ else
+ {
+ voodoo_queue_command(voodoo, addr | FIFO_WRITEL_REG, val);
+ }
+ break;
+ }
+}
+
+static uint16_t voodoo_snoop_readw(uint32_t addr, void *p)
+{
+ voodoo_set_t *set = (voodoo_set_t *)p;
+
+ return voodoo_readw(addr, set->voodoos[0]);
+}
+static uint32_t voodoo_snoop_readl(uint32_t addr, void *p)
+{
+ voodoo_set_t *set = (voodoo_set_t *)p;
+
+ return voodoo_readl(addr, set->voodoos[0]);
+}
+
+static void voodoo_snoop_writew(uint32_t addr, uint16_t val, void *p)
+{
+ voodoo_set_t *set = (voodoo_set_t *)p;
+
+ voodoo_writew(addr, val, set->voodoos[0]);
+ voodoo_writew(addr, val, set->voodoos[1]);
+}
+static void voodoo_snoop_writel(uint32_t addr, uint32_t val, void *p)
+{
+ voodoo_set_t *set = (voodoo_set_t *)p;
+
+ voodoo_writel(addr, val, set->voodoos[0]);
+ voodoo_writel(addr, val, set->voodoos[1]);
+}
+
+static void voodoo_recalcmapping(voodoo_set_t *set)
+{
+ if (set->nr_cards == 2)
+ {
+ if (set->voodoos[0]->pci_enable && set->voodoos[0]->memBaseAddr)
+ {
+ if (set->voodoos[0]->type == VOODOO_2 && set->voodoos[1]->initEnable & (1 << 23))
+ {
+ pclog("voodoo_recalcmapping (pri) with snoop : memBaseAddr %08X\n", set->voodoos[0]->memBaseAddr);
+ mem_mapping_disable(&set->voodoos[0]->mapping);
+ mem_mapping_set_addr(&set->snoop_mapping, set->voodoos[0]->memBaseAddr, 0x01000000);
+ }
+ else if (set->voodoos[1]->pci_enable && (set->voodoos[0]->memBaseAddr == set->voodoos[1]->memBaseAddr))
+ {
+ pclog("voodoo_recalcmapping (pri) (sec) same addr : memBaseAddr %08X\n", set->voodoos[0]->memBaseAddr);
+ mem_mapping_disable(&set->voodoos[0]->mapping);
+ mem_mapping_disable(&set->voodoos[1]->mapping);
+ mem_mapping_set_addr(&set->snoop_mapping, set->voodoos[0]->memBaseAddr, 0x01000000);
+ return;
+ }
+ else
+ {
+ pclog("voodoo_recalcmapping (pri) : memBaseAddr %08X\n", set->voodoos[0]->memBaseAddr);
+ mem_mapping_disable(&set->snoop_mapping);
+ mem_mapping_set_addr(&set->voodoos[0]->mapping, set->voodoos[0]->memBaseAddr, 0x01000000);
+ }
+ }
+ else
+ {
+ pclog("voodoo_recalcmapping (pri) : disabled\n");
+ mem_mapping_disable(&set->voodoos[0]->mapping);
+ }
+
+ if (set->voodoos[1]->pci_enable && set->voodoos[1]->memBaseAddr)
+ {
+ pclog("voodoo_recalcmapping (sec) : memBaseAddr %08X\n", set->voodoos[1]->memBaseAddr);
+ mem_mapping_set_addr(&set->voodoos[1]->mapping, set->voodoos[1]->memBaseAddr, 0x01000000);
+ }
+ else
+ {
+ pclog("voodoo_recalcmapping (sec) : disabled\n");
+ mem_mapping_disable(&set->voodoos[1]->mapping);
+ }
+ }
+ else
+ {
+ voodoo_t *voodoo = set->voodoos[0];
+
+ if (voodoo->pci_enable && voodoo->memBaseAddr)
+ {
+ pclog("voodoo_recalcmapping : memBaseAddr %08X\n", voodoo->memBaseAddr);
+ mem_mapping_set_addr(&voodoo->mapping, voodoo->memBaseAddr, 0x01000000);
+ }
+ else
+ {
+ pclog("voodoo_recalcmapping : disabled\n");
+ mem_mapping_disable(&voodoo->mapping);
+ }
+ }
+}
+
+uint8_t voodoo_pci_read(int func, int addr, void *p)
+{
+ voodoo_t *voodoo = (voodoo_t *)p;
+
+ if (func)
+ return 0;
+
+// pclog("Voodoo PCI read %08X PC=%08x\n", addr, cpu_state.pc);
+
+ switch (addr)
+ {
+ case 0x00: return 0x1a; /*3dfx*/
+ case 0x01: return 0x12;
+
+ case 0x02:
+ if (voodoo->type == VOODOO_2)
+ return 0x02; /*Voodoo 2*/
+ else
+ return 0x01; /*SST-1 (Voodoo Graphics)*/
+ case 0x03: return 0x00;
+
+ case 0x04: return voodoo->pci_enable ? 0x02 : 0x00; /*Respond to memory accesses*/
+
+ case 0x08: return 2; /*Revision ID*/
+ case 0x09: return 0; /*Programming interface*/
+ case 0x0a: return 0;
+ case 0x0b: return 0x04;
+
+ case 0x10: return 0x00; /*memBaseAddr*/
+ case 0x11: return 0x00;
+ case 0x12: return 0x00;
+ case 0x13: return voodoo->memBaseAddr >> 24;
+
+ case 0x40:
+ return voodoo->initEnable & 0xff;
+ case 0x41:
+ if (voodoo->type == VOODOO_2)
+ return 0x50 | ((voodoo->initEnable >> 8) & 0x0f);
+ return (voodoo->initEnable >> 8) & 0x0f;
+ case 0x42:
+ return (voodoo->initEnable >> 16) & 0xff;
+ case 0x43:
+ return (voodoo->initEnable >> 24) & 0xff;
+ }
+ return 0;
+}
+
+void voodoo_pci_write(int func, int addr, uint8_t val, void *p)
+{
+ voodoo_t *voodoo = (voodoo_t *)p;
+
+ if (func)
+ return;
+
+// pclog("Voodoo PCI write %04X %02X PC=%08x\n", addr, val, cpu_state.pc);
+
+ switch (addr)
+ {
+ case 0x04:
+ voodoo->pci_enable = val & 2;
+ voodoo_recalcmapping(voodoo->set);
+ break;
+
+ case 0x13:
+ voodoo->memBaseAddr = val << 24;
+ voodoo_recalcmapping(voodoo->set);
+ break;
+
+ case 0x40:
+ voodoo->initEnable = (voodoo->initEnable & ~0x000000ff) | val;
+ break;
+ case 0x41:
+ voodoo->initEnable = (voodoo->initEnable & ~0x0000ff00) | (val << 8);
+ break;
+ case 0x42:
+ voodoo->initEnable = (voodoo->initEnable & ~0x00ff0000) | (val << 16);
+ voodoo_recalcmapping(voodoo->set);
+ break;
+ case 0x43:
+ voodoo->initEnable = (voodoo->initEnable & ~0xff000000) | (val << 24);
+ voodoo_recalcmapping(voodoo->set);
+ break;
+ }
+}
+
+
+static void voodoo_add_status_info(char *s, int max_len, void *p)
+{
+ voodoo_set_t *voodoo_set = (voodoo_set_t *)p;
+ voodoo_t *voodoo = voodoo_set->voodoos[0];
+ voodoo_t *voodoo_slave = voodoo_set->voodoos[1];
+ char temps[512], temps2[256];
+ int pixel_count_current[4];
+ int pixel_count_total;
+ int texel_count_current[4];
+ int texel_count_total;
+ int render_time[4];
+ uint64_t new_time = timer_read();
+ uint64_t status_diff = new_time - status_time;
+ status_time = new_time;
+ int c;
+
+ if (!status_diff)
+ status_diff = 1;
+
+ for (c = 0; c < 4; c++)
+ {
+ pixel_count_current[c] = voodoo->pixel_count[c];
+ texel_count_current[c] = voodoo->texel_count[c];
+ render_time[c] = voodoo->render_time[c];
+ }
+ if (voodoo_set->nr_cards == 2)
+ {
+ for (c = 0; c < 4; c++)
+ {
+ pixel_count_current[c] += voodoo_slave->pixel_count[c];
+ texel_count_current[c] += voodoo_slave->texel_count[c];
+ render_time[c] = (render_time[c] + voodoo_slave->render_time[c]) / 2;
+ }
+ }
+ pixel_count_total = (pixel_count_current[0] + pixel_count_current[1] + pixel_count_current[2] + pixel_count_current[3]) -
+ (voodoo->pixel_count_old[0] + voodoo->pixel_count_old[1] + voodoo->pixel_count_old[2] + voodoo->pixel_count_old[3]);
+ texel_count_total = (texel_count_current[0] + texel_count_current[1] + texel_count_current[2] + texel_count_current[3]) -
+ (voodoo->texel_count_old[0] + voodoo->texel_count_old[1] + voodoo->texel_count_old[2] + voodoo->texel_count_old[3]);
+ sprintf(temps, "%f Mpixels/sec (%f)\n%f Mtexels/sec (%f)\n%f ktris/sec\n%f%% CPU (%f%% real)\n%d frames/sec (%i)\n%f%% CPU (%f%% real)\n"/*%d reads/sec\n%d write/sec\n%d tex/sec\n*/,
+ (double)pixel_count_total/1000000.0,
+ ((double)pixel_count_total/1000000.0) / ((double)render_time[0] / status_diff),
+ (double)texel_count_total/1000000.0,
+ ((double)texel_count_total/1000000.0) / ((double)render_time[0] / status_diff),
+ (double)voodoo->tri_count/1000.0, ((double)voodoo->time * 100.0) / timer_freq, ((double)voodoo->time * 100.0) / status_diff, voodoo->frame_count, voodoo_recomp,
+ ((double)voodoo->render_time[0] * 100.0) / timer_freq, ((double)voodoo->render_time[0] * 100.0) / status_diff);
+ if (voodoo->render_threads >= 2)
+ {
+ sprintf(temps2, "%f%% CPU (%f%% real)\n",
+ ((double)voodoo->render_time[1] * 100.0) / timer_freq, ((double)voodoo->render_time[1] * 100.0) / status_diff);
+ strncat(temps, temps2, sizeof(temps)-1);
+ }
+ if (voodoo->render_threads == 4)
+ {
+ sprintf(temps2, "%f%% CPU (%f%% real)\n%f%% CPU (%f%% real)\n",
+ ((double)voodoo->render_time[2] * 100.0) / timer_freq, ((double)voodoo->render_time[2] * 100.0) / status_diff,
+ ((double)voodoo->render_time[3] * 100.0) / timer_freq, ((double)voodoo->render_time[3] * 100.0) / status_diff);
+ strncat(temps, temps2, sizeof(temps)-1);
+ }
+ if (voodoo_set->nr_cards == 2)
+ {
+ sprintf(temps2, "%f%% CPU (%f%% real)\n",
+ ((double)voodoo_slave->render_time[0] * 100.0) / timer_freq, ((double)voodoo_slave->render_time[0] * 100.0) / status_diff);
+ strncat(temps, temps2, sizeof(temps)-1);
+
+ if (voodoo_slave->render_threads >= 2)
+ {
+ sprintf(temps2, "%f%% CPU (%f%% real)\n",
+ ((double)voodoo_slave->render_time[1] * 100.0) / timer_freq, ((double)voodoo_slave->render_time[1] * 100.0) / status_diff);
+ strncat(temps, temps2, sizeof(temps)-1);
+ }
+ if (voodoo_slave->render_threads == 4)
+ {
+ sprintf(temps2, "%f%% CPU (%f%% real)\n%f%% CPU (%f%% real)\n",
+ ((double)voodoo_slave->render_time[2] * 100.0) / timer_freq, ((double)voodoo_slave->render_time[2] * 100.0) / status_diff,
+ ((double)voodoo_slave->render_time[3] * 100.0) / timer_freq, ((double)voodoo_slave->render_time[3] * 100.0) / status_diff);
+ strncat(temps, temps2, sizeof(temps)-1);
+ }
+ }
+ strncat(s, temps, max_len);
+
+ for (c = 0; c < 4; c++)
+ {
+ voodoo->pixel_count_old[c] = pixel_count_current[c];
+ voodoo->texel_count_old[c] = texel_count_current[c];
+ voodoo->render_time[c] = 0;
+ }
+ voodoo->tri_count = voodoo->frame_count = 0;
+ voodoo->rd_count = voodoo->wr_count = voodoo->tex_count = 0;
+ voodoo->time = 0;
+ if (voodoo_set->nr_cards == 2)
+ {
+ for (c = 0; c < 4; c++)
+ {
+ voodoo_slave->pixel_count_old[c] = pixel_count_current[c];
+ voodoo_slave->texel_count_old[c] = texel_count_current[c];
+ voodoo_slave->render_time[c] = 0;
+ }
+ voodoo_slave->tri_count = voodoo_slave->frame_count = 0;
+ voodoo_slave->rd_count = voodoo_slave->wr_count = voodoo_slave->tex_count = 0;
+ voodoo_slave->time = 0;
+ }
+ voodoo_recomp = 0;
+}
+
+static void voodoo_speed_changed(void *p)
+{
+ voodoo_set_t *voodoo_set = (voodoo_set_t *)p;
+
+ voodoo_pixelclock_update(voodoo_set->voodoos[0]);
+ voodoo_set->voodoos[0]->read_time = pci_nonburst_time + pci_burst_time * ((voodoo_set->voodoos[0]->fbiInit4 & 1) ? 2 : 1);
+ voodoo_set->voodoos[0]->write_time = pci_nonburst_time + pci_burst_time * ((voodoo_set->voodoos[0]->fbiInit1 & 2) ? 1 : 0);
+ voodoo_set->voodoos[0]->burst_time = pci_burst_time * ((voodoo_set->voodoos[0]->fbiInit1 & 2) ? 2 : 1);
+ if (voodoo_set->nr_cards == 2)
+ {
+ voodoo_pixelclock_update(voodoo_set->voodoos[1]);
+ voodoo_set->voodoos[1]->read_time = pci_nonburst_time + pci_burst_time * ((voodoo_set->voodoos[1]->fbiInit4 & 1) ? 2 : 1);
+ voodoo_set->voodoos[1]->write_time = pci_nonburst_time + pci_burst_time * ((voodoo_set->voodoos[1]->fbiInit1 & 2) ? 1 : 0);
+ voodoo_set->voodoos[1]->burst_time = pci_burst_time * ((voodoo_set->voodoos[1]->fbiInit1 & 2) ? 2 : 1);
+ }
+// pclog("Voodoo read_time=%i write_time=%i burst_time=%i %08x %08x\n", voodoo->read_time, voodoo->write_time, voodoo->burst_time, voodoo->fbiInit1, voodoo->fbiInit4);
+}
+
+void *voodoo_card_init()
+{
+ int c;
+ voodoo_t *voodoo = malloc(sizeof(voodoo_t));
+ memset(voodoo, 0, sizeof(voodoo_t));
+
+ voodoo->bilinear_enabled = device_get_config_int("bilinear");
+ voodoo->scrfilter = device_get_config_int("dacfilter");
+ voodoo->texture_size = device_get_config_int("texture_memory");
+ voodoo->texture_mask = (voodoo->texture_size << 20) - 1;
+ voodoo->fb_size = device_get_config_int("framebuffer_memory");
+ voodoo->fb_mask = (voodoo->fb_size << 20) - 1;
+ voodoo->render_threads = device_get_config_int("render_threads");
+ voodoo->odd_even_mask = voodoo->render_threads - 1;
+#ifndef NO_CODEGEN
+ voodoo->use_recompiler = device_get_config_int("recompiler");
+#endif
+ voodoo->type = device_get_config_int("type");
+ switch (voodoo->type)
+ {
+ case VOODOO_1:
+ voodoo->dual_tmus = 0;
+ break;
+ case VOODOO_SB50:
+ voodoo->dual_tmus = 1;
+ break;
+ case VOODOO_2:
+ voodoo->dual_tmus = 1;
+ break;
+ }
+
+ if (voodoo->type == VOODOO_2) /*generate filter lookup tables*/
+ voodoo_generate_filter_v2(voodoo);
+ else
+ voodoo_generate_filter_v1(voodoo);
+
+ pci_add(voodoo_pci_read, voodoo_pci_write, voodoo);
+
+ mem_mapping_add(&voodoo->mapping, 0, 0, NULL, voodoo_readw, voodoo_readl, NULL, voodoo_writew, voodoo_writel, NULL, MEM_MAPPING_EXTERNAL, voodoo);
+
+ voodoo->fb_mem = malloc(4 * 1024 * 1024);
+ voodoo->tex_mem[0] = malloc(voodoo->texture_size * 1024 * 1024);
+ if (voodoo->dual_tmus)
+ voodoo->tex_mem[1] = malloc(voodoo->texture_size * 1024 * 1024);
+ voodoo->tex_mem_w[0] = (uint16_t *)voodoo->tex_mem[0];
+ voodoo->tex_mem_w[1] = (uint16_t *)voodoo->tex_mem[1];
+
+ for (c = 0; c < TEX_CACHE_MAX; c++)
+ {
+ voodoo->texture_cache[0][c].data = malloc((256*256 + 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4 + 2*2) * 4);
+ voodoo->texture_cache[0][c].base = -1; /*invalid*/
+ voodoo->texture_cache[0][c].refcount = 0;
+ if (voodoo->dual_tmus)
+ {
+ voodoo->texture_cache[1][c].data = malloc((256*256 + 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4 + 2*2) * 4);
+ voodoo->texture_cache[1][c].base = -1; /*invalid*/
+ voodoo->texture_cache[1][c].refcount = 0;
+ }
+ }
+
+ timer_add(&voodoo->timer, voodoo_callback, voodoo, 1);
+
+ voodoo->svga = svga_get_pri();
+ voodoo->fbiInit0 = 0;
+
+ voodoo->wake_fifo_thread = thread_create_event();
+ voodoo->wake_render_thread[0] = thread_create_event();
+ voodoo->wake_render_thread[1] = thread_create_event();
+ voodoo->wake_render_thread[2] = thread_create_event();
+ voodoo->wake_render_thread[3] = thread_create_event();
+ voodoo->wake_main_thread = thread_create_event();
+ voodoo->fifo_not_full_event = thread_create_event();
+ voodoo->render_not_full_event[0] = thread_create_event();
+ voodoo->render_not_full_event[1] = thread_create_event();
+ voodoo->render_not_full_event[2] = thread_create_event();
+ voodoo->render_not_full_event[3] = thread_create_event();
+ voodoo->fifo_thread = thread_create(voodoo_fifo_thread, voodoo);
+ voodoo->render_thread[0] = thread_create(voodoo_render_thread_1, voodoo);
+ if (voodoo->render_threads >= 2)
+ voodoo->render_thread[1] = thread_create(voodoo_render_thread_2, voodoo);
+ if (voodoo->render_threads == 4)
+ {
+ voodoo->render_thread[2] = thread_create(voodoo_render_thread_3, voodoo);
+ voodoo->render_thread[3] = thread_create(voodoo_render_thread_4, voodoo);
+ }
+ voodoo->swap_mutex = thread_create_mutex();
+ timer_add(&voodoo->wake_timer, voodoo_wake_timer, (void *)voodoo, 0);
+
+ for (c = 0; c < 0x100; c++)
+ {
+ rgb332[c].r = c & 0xe0;
+ rgb332[c].g = (c << 3) & 0xe0;
+ rgb332[c].b = (c << 6) & 0xc0;
+ rgb332[c].r = rgb332[c].r | (rgb332[c].r >> 3) | (rgb332[c].r >> 6);
+ rgb332[c].g = rgb332[c].g | (rgb332[c].g >> 3) | (rgb332[c].g >> 6);
+ rgb332[c].b = rgb332[c].b | (rgb332[c].b >> 2);
+ rgb332[c].b = rgb332[c].b | (rgb332[c].b >> 4);
+ rgb332[c].a = 0xff;
+
+ ai44[c].a = (c & 0xf0) | ((c & 0xf0) >> 4);
+ ai44[c].r = (c & 0x0f) | ((c & 0x0f) << 4);
+ ai44[c].g = ai44[c].b = ai44[c].r;
+ }
+
+ for (c = 0; c < 0x10000; c++)
+ {
+ rgb565[c].r = (c >> 8) & 0xf8;
+ rgb565[c].g = (c >> 3) & 0xfc;
+ rgb565[c].b = (c << 3) & 0xf8;
+ rgb565[c].r |= (rgb565[c].r >> 5);
+ rgb565[c].g |= (rgb565[c].g >> 6);
+ rgb565[c].b |= (rgb565[c].b >> 5);
+ rgb565[c].a = 0xff;
+
+ argb1555[c].r = (c >> 7) & 0xf8;
+ argb1555[c].g = (c >> 2) & 0xf8;
+ argb1555[c].b = (c << 3) & 0xf8;
+ argb1555[c].r |= (argb1555[c].r >> 5);
+ argb1555[c].g |= (argb1555[c].g >> 5);
+ argb1555[c].b |= (argb1555[c].b >> 5);
+ argb1555[c].a = (c & 0x8000) ? 0xff : 0;
+
+ argb4444[c].a = (c >> 8) & 0xf0;
+ argb4444[c].r = (c >> 4) & 0xf0;
+ argb4444[c].g = c & 0xf0;
+ argb4444[c].b = (c << 4) & 0xf0;
+ argb4444[c].a |= (argb4444[c].a >> 4);
+ argb4444[c].r |= (argb4444[c].r >> 4);
+ argb4444[c].g |= (argb4444[c].g >> 4);
+ argb4444[c].b |= (argb4444[c].b >> 4);
+
+ ai88[c].a = (c >> 8);
+ ai88[c].r = c & 0xff;
+ ai88[c].g = c & 0xff;
+ ai88[c].b = c & 0xff;
+ }
+#ifndef NO_CODEGEN
+ voodoo_codegen_init(voodoo);
+#endif
+
+ voodoo->disp_buffer = 0;
+ voodoo->draw_buffer = 1;
+
+ return voodoo;
+}
+
+void *voodoo_2d3d_card_init(int type)
+{
+ int c;
+ voodoo_t *voodoo = malloc(sizeof(voodoo_t));
+ memset(voodoo, 0, sizeof(voodoo_t));
+
+ voodoo->bilinear_enabled = device_get_config_int("bilinear");
+ voodoo->scrfilter = device_get_config_int("dacfilter");
+ voodoo->render_threads = device_get_config_int("render_threads");
+ voodoo->odd_even_mask = voodoo->render_threads - 1;
+#ifndef NO_CODEGEN
+ voodoo->use_recompiler = device_get_config_int("recompiler");
+#endif
+ voodoo->type = type;
+ voodoo->dual_tmus = (type == VOODOO_3) ? 1 : 0;
+
+ /*generate filter lookup tables*/
+ voodoo_generate_filter_v2(voodoo);
+
+ for (c = 0; c < TEX_CACHE_MAX; c++)
+ {
+ voodoo->texture_cache[0][c].data = malloc((256*256 + 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4 + 2*2) * 4);
+ voodoo->texture_cache[0][c].base = -1; /*invalid*/
+ voodoo->texture_cache[0][c].refcount = 0;
+ if (voodoo->dual_tmus)
+ {
+ voodoo->texture_cache[1][c].data = malloc((256*256 + 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4 + 2*2) * 4);
+ voodoo->texture_cache[1][c].base = -1; /*invalid*/
+ voodoo->texture_cache[1][c].refcount = 0;
+ }
+ }
+
+ timer_add(&voodoo->timer, voodoo_callback, voodoo, 1);
+
+ voodoo->fbiInit0 = 0;
+
+ voodoo->wake_fifo_thread = thread_create_event();
+ voodoo->wake_render_thread[0] = thread_create_event();
+ voodoo->wake_render_thread[1] = thread_create_event();
+ voodoo->wake_render_thread[2] = thread_create_event();
+ voodoo->wake_render_thread[3] = thread_create_event();
+ voodoo->wake_main_thread = thread_create_event();
+ voodoo->fifo_not_full_event = thread_create_event();
+ voodoo->render_not_full_event[0] = thread_create_event();
+ voodoo->render_not_full_event[1] = thread_create_event();
+ voodoo->render_not_full_event[2] = thread_create_event();
+ voodoo->render_not_full_event[3] = thread_create_event();
+ voodoo->fifo_thread = thread_create(voodoo_fifo_thread, voodoo);
+ voodoo->render_thread[0] = thread_create(voodoo_render_thread_1, voodoo);
+ if (voodoo->render_threads >= 2)
+ voodoo->render_thread[1] = thread_create(voodoo_render_thread_2, voodoo);
+ if (voodoo->render_threads == 4)
+ {
+ voodoo->render_thread[2] = thread_create(voodoo_render_thread_3, voodoo);
+ voodoo->render_thread[3] = thread_create(voodoo_render_thread_4, voodoo);
+ }
+ voodoo->swap_mutex = thread_create_mutex();
+ timer_add(&voodoo->wake_timer, voodoo_wake_timer, (void *)voodoo, 0);
+
+ for (c = 0; c < 0x100; c++)
+ {
+ rgb332[c].r = c & 0xe0;
+ rgb332[c].g = (c << 3) & 0xe0;
+ rgb332[c].b = (c << 6) & 0xc0;
+ rgb332[c].r = rgb332[c].r | (rgb332[c].r >> 3) | (rgb332[c].r >> 6);
+ rgb332[c].g = rgb332[c].g | (rgb332[c].g >> 3) | (rgb332[c].g >> 6);
+ rgb332[c].b = rgb332[c].b | (rgb332[c].b >> 2);
+ rgb332[c].b = rgb332[c].b | (rgb332[c].b >> 4);
+ rgb332[c].a = 0xff;
+
+ ai44[c].a = (c & 0xf0) | ((c & 0xf0) >> 4);
+ ai44[c].r = (c & 0x0f) | ((c & 0x0f) << 4);
+ ai44[c].g = ai44[c].b = ai44[c].r;
+ }
+
+ for (c = 0; c < 0x10000; c++)
+ {
+ rgb565[c].r = (c >> 8) & 0xf8;
+ rgb565[c].g = (c >> 3) & 0xfc;
+ rgb565[c].b = (c << 3) & 0xf8;
+ rgb565[c].r |= (rgb565[c].r >> 5);
+ rgb565[c].g |= (rgb565[c].g >> 6);
+ rgb565[c].b |= (rgb565[c].b >> 5);
+ rgb565[c].a = 0xff;
+
+ argb1555[c].r = (c >> 7) & 0xf8;
+ argb1555[c].g = (c >> 2) & 0xf8;
+ argb1555[c].b = (c << 3) & 0xf8;
+ argb1555[c].r |= (argb1555[c].r >> 5);
+ argb1555[c].g |= (argb1555[c].g >> 5);
+ argb1555[c].b |= (argb1555[c].b >> 5);
+ argb1555[c].a = (c & 0x8000) ? 0xff : 0;
+
+ argb4444[c].a = (c >> 8) & 0xf0;
+ argb4444[c].r = (c >> 4) & 0xf0;
+ argb4444[c].g = c & 0xf0;
+ argb4444[c].b = (c << 4) & 0xf0;
+ argb4444[c].a |= (argb4444[c].a >> 4);
+ argb4444[c].r |= (argb4444[c].r >> 4);
+ argb4444[c].g |= (argb4444[c].g >> 4);
+ argb4444[c].b |= (argb4444[c].b >> 4);
+
+ ai88[c].a = (c >> 8);
+ ai88[c].r = c & 0xff;
+ ai88[c].g = c & 0xff;
+ ai88[c].b = c & 0xff;
+ }
+#ifndef NO_CODEGEN
+ voodoo_codegen_init(voodoo);
+#endif
+
+ voodoo->disp_buffer = 0;
+ voodoo->draw_buffer = 1;
+
+ return voodoo;
+}
+
+void *voodoo_init()
+{
+ voodoo_set_t *voodoo_set = malloc(sizeof(voodoo_set_t));
+ uint32_t tmuConfig = 1;
+ int type;
+ memset(voodoo_set, 0, sizeof(voodoo_set_t));
+
+ type = device_get_config_int("type");
+
+ voodoo_set->nr_cards = device_get_config_int("sli") ? 2 : 1;
+ voodoo_set->voodoos[0] = voodoo_card_init();
+ voodoo_set->voodoos[0]->set = voodoo_set;
+ if (voodoo_set->nr_cards == 2)
+ {
+ voodoo_set->voodoos[1] = voodoo_card_init();
+
+ voodoo_set->voodoos[1]->set = voodoo_set;
+
+ if (type == VOODOO_2)
+ {
+ voodoo_set->voodoos[0]->fbiInit5 |= FBIINIT5_MULTI_CVG;
+ voodoo_set->voodoos[1]->fbiInit5 |= FBIINIT5_MULTI_CVG;
+ }
+ else
+ {
+ voodoo_set->voodoos[0]->fbiInit1 |= FBIINIT1_MULTI_SST;
+ voodoo_set->voodoos[1]->fbiInit1 |= FBIINIT1_MULTI_SST;
+ }
+ }
+
+ switch (type)
+ {
+ case VOODOO_1:
+ if (voodoo_set->nr_cards == 2)
+ tmuConfig = 1 | (3 << 3);
+ else
+ tmuConfig = 1;
+ break;
+ case VOODOO_SB50:
+ if (voodoo_set->nr_cards == 2)
+ tmuConfig = 1 | (3 << 3) | (3 << 6) | (2 << 9);
+ else
+ tmuConfig = 1 | (3 << 6);
+ break;
+ case VOODOO_2:
+ tmuConfig = 1 | (3 << 6);
+ break;
+ }
+
+ voodoo_set->voodoos[0]->tmuConfig = tmuConfig;
+ if (voodoo_set->nr_cards == 2)
+ voodoo_set->voodoos[1]->tmuConfig = tmuConfig;
+
+ mem_mapping_add(&voodoo_set->snoop_mapping, 0, 0, NULL, voodoo_snoop_readw, voodoo_snoop_readl, NULL, voodoo_snoop_writew, voodoo_snoop_writel, NULL, MEM_MAPPING_EXTERNAL, voodoo_set);
+
+ return voodoo_set;
+}
+
+void voodoo_card_close(voodoo_t *voodoo)
+{
+#ifndef RELEASE_BUILD
+ FILE *f;
+#endif
+ int c;
+
+#ifndef RELEASE_BUILD
+ if (voodoo->tex_mem[0])
+ {
+ f = romfopen("texram.dmp", "wb");
+ fwrite(voodoo->tex_mem[0], voodoo->texture_size*1024*1024, 1, f);
+ fclose(f);
+ if (voodoo->dual_tmus)
+ {
+ f = romfopen("texram2.dmp", "wb");
+ fwrite(voodoo->tex_mem[1], voodoo->texture_size*1024*1024, 1, f);
+ fclose(f);
+ }
+ }
+#endif
+
+ thread_kill(voodoo->fifo_thread);
+ thread_kill(voodoo->render_thread[0]);
+ if (voodoo->render_threads >= 2)
+ thread_kill(voodoo->render_thread[1]);
+ if (voodoo->render_threads == 4)
+ {
+ thread_kill(voodoo->render_thread[2]);
+ thread_kill(voodoo->render_thread[3]);
+ }
+ thread_destroy_event(voodoo->fifo_not_full_event);
+ thread_destroy_event(voodoo->wake_main_thread);
+ thread_destroy_event(voodoo->wake_fifo_thread);
+ thread_destroy_event(voodoo->wake_render_thread[0]);
+ thread_destroy_event(voodoo->wake_render_thread[1]);
+ thread_destroy_event(voodoo->render_not_full_event[0]);
+ thread_destroy_event(voodoo->render_not_full_event[1]);
+
+ for (c = 0; c < TEX_CACHE_MAX; c++)
+ {
+ if (voodoo->dual_tmus)
+ free(voodoo->texture_cache[1][c].data);
+ free(voodoo->texture_cache[0][c].data);
+ }
+#ifndef NO_CODEGEN
+ voodoo_codegen_close(voodoo);
+#endif
+ if (voodoo->type < VOODOO_BANSHEE && voodoo->fb_mem)
+ {
+ free(voodoo->fb_mem);
+ if (voodoo->dual_tmus)
+ free(voodoo->tex_mem[1]);
+ free(voodoo->tex_mem[0]);
+ }
+ free(voodoo);
+}
+
+void voodoo_close(void *p)
+{
+ voodoo_set_t *voodoo_set = (voodoo_set_t *)p;
+
+ if (voodoo_set->nr_cards == 2)
+ voodoo_card_close(voodoo_set->voodoos[1]);
+ voodoo_card_close(voodoo_set->voodoos[0]);
+
+ free(voodoo_set);
+}
+
+static device_config_t voodoo_config[] =
+{
+ {
+ .name = "type",
+ .description = "Voodoo type",
+ .type = CONFIG_SELECTION,
+ .selection =
+ {
+ {
+ .description = "Voodoo Graphics",
+ .value = VOODOO_1
+ },
+ {
+ .description = "Obsidian SB50 + Amethyst (2 TMUs)",
+ .value = VOODOO_SB50
+ },
+ {
+ .description = "Voodoo 2",
+ .value = VOODOO_2
+ },
+ {
+ .description = ""
+ }
+ },
+ .default_int = 0
+ },
+ {
+ .name = "framebuffer_memory",
+ .description = "Framebuffer memory size",
+ .type = CONFIG_SELECTION,
+ .selection =
+ {
+ {
+ .description = "2 MB",
+ .value = 2
+ },
+ {
+ .description = "4 MB",
+ .value = 4
+ },
+ {
+ .description = ""
+ }
+ },
+ .default_int = 2
+ },
+ {
+ .name = "texture_memory",
+ .description = "Texture memory size",
+ .type = CONFIG_SELECTION,
+ .selection =
+ {
+ {
+ .description = "2 MB",
+ .value = 2
+ },
+ {
+ .description = "4 MB",
+ .value = 4
+ },
+ {
+ .description = ""
+ }
+ },
+ .default_int = 2
+ },
+ {
+ .name = "bilinear",
+ .description = "Bilinear filtering",
+ .type = CONFIG_BINARY,
+ .default_int = 1
+ },
+ {
+ .name = "dacfilter",
+ .description = "Screen Filter",
+ .type = CONFIG_BINARY,
+ .default_int = 0
+ },
+ {
+ .name = "render_threads",
+ .description = "Render threads",
+ .type = CONFIG_SELECTION,
+ .selection =
+ {
+ {
+ .description = "1",
+ .value = 1
+ },
+ {
+ .description = "2",
+ .value = 2
+ },
+ {
+ .description = "4",
+ .value = 4
+ },
+ {
+ .description = ""
+ }
+ },
+ .default_int = 2
+ },
+ {
+ .name = "sli",
+ .description = "SLI",
+ .type = CONFIG_BINARY,
+ .default_int = 0
+ },
+#ifndef NO_CODEGEN
+ {
+ .name = "recompiler",
+ .description = "Recompiler",
+ .type = CONFIG_BINARY,
+ .default_int = 1
+ },
+#endif
+ {
+ .type = -1
+ }
+};
+
+device_t voodoo_device =
+{
+ "3DFX Voodoo Graphics",
+ DEVICE_PCI,
+ voodoo_init,
+ voodoo_close,
+ NULL,
+ voodoo_speed_changed,
+ NULL,
+ voodoo_add_status_info,
+ voodoo_config
+};
--- /dev/null
+extern device_t voodoo_device;
--- /dev/null
+#include <stdlib.h>
+#include "ibm.h"
+#include "device.h"
+#include "io.h"
+#include "mem.h"
+#include "pci.h"
+#include "rom.h"
+#include "thread.h"
+#include "video.h"
+#include "vid_ddc.h"
+#include "vid_svga.h"
+#include "vid_svga_render.h"
+#include "vid_voodoo_banshee.h"
+#include "vid_voodoo_common.h"
+#include "vid_voodoo_display.h"
+#include "vid_voodoo_fifo.h"
+#include "vid_voodoo_regs.h"
+#include "vid_voodoo_render.h"
+#include "x86.h"
+
+#ifdef CLAMP
+#undef CLAMP
+#endif
+
+static uint8_t vb_filter_v1_rb[256][256];
+static uint8_t vb_filter_v1_g [256][256];
+
+static uint8_t vb_filter_bx_rb[256][256];
+static uint8_t vb_filter_bx_g [256][256];
+
+enum
+{
+ TYPE_BANSHEE = 0,
+ TYPE_V3_2000,
+ TYPE_V3_3000
+};
+
+typedef struct banshee_t
+{
+ svga_t svga;
+
+ rom_t bios_rom;
+
+ uint8_t pci_regs[256];
+
+ uint32_t memBaseAddr0;
+ uint32_t memBaseAddr1;
+ uint32_t ioBaseAddr;
+
+ uint32_t agpInit0;
+ uint32_t dramInit0, dramInit1;
+ uint32_t lfbMemoryConfig;
+ uint32_t miscInit0, miscInit1;
+ uint32_t pciInit0;
+ uint32_t vgaInit0, vgaInit1;
+
+ uint32_t command_2d;
+ uint32_t srcBaseAddr_2d;
+
+ uint32_t pllCtrl0, pllCtrl1, pllCtrl2;
+
+ uint32_t dacMode;
+ int dacAddr;
+
+ uint32_t vidDesktopOverlayStride;
+ uint32_t vidDesktopStartAddr;
+ uint32_t vidProcCfg;
+ uint32_t vidScreenSize;
+ uint32_t vidSerialParallelPort;
+
+ int overlay_pix_fmt;
+
+ uint32_t hwCurPatAddr, hwCurLoc, hwCurC0, hwCurC1;
+
+ uint32_t intrCtrl;
+
+ uint32_t overlay_buffer[2][4096];
+
+ mem_mapping_t linear_mapping;
+
+ mem_mapping_t reg_mapping_low; /*0000000-07fffff*/
+ mem_mapping_t reg_mapping_high; /*0c00000-1ffffff - Windows 2000 puts the BIOS ROM in between these two areas*/
+
+ voodoo_t *voodoo;
+
+ uint32_t desktop_addr;
+ int desktop_y;
+ uint32_t desktop_stride_tiled;
+
+ int type;
+} banshee_t;
+
+enum
+{
+ Init_status = 0x00,
+ Init_pciInit0 = 0x04,
+ Init_lfbMemoryConfig = 0x0c,
+ Init_miscInit0 = 0x10,
+ Init_miscInit1 = 0x14,
+ Init_dramInit0 = 0x18,
+ Init_dramInit1 = 0x1c,
+ Init_agpInit0 = 0x20,
+ Init_vgaInit0 = 0x28,
+ Init_vgaInit1 = 0x2c,
+ Init_2dCommand = 0x30,
+ Init_2dSrcBaseAddr = 0x34,
+ Init_strapInfo = 0x38,
+
+ PLL_pllCtrl0 = 0x40,
+ PLL_pllCtrl1 = 0x44,
+ PLL_pllCtrl2 = 0x48,
+
+ DAC_dacMode = 0x4c,
+ DAC_dacAddr = 0x50,
+ DAC_dacData = 0x54,
+
+ Video_vidProcCfg = 0x5c,
+ Video_maxRgbDelta = 0x58,
+ Video_hwCurPatAddr = 0x60,
+ Video_hwCurLoc = 0x64,
+ Video_hwCurC0 = 0x68,
+ Video_hwCurC1 = 0x6c,
+ Video_vidSerialParallelPort = 0x78,
+ Video_vidScreenSize = 0x98,
+ Video_vidOverlayStartCoords = 0x9c,
+ Video_vidOverlayEndScreenCoords = 0xa0,
+ Video_vidOverlayDudx = 0xa4,
+ Video_vidOverlayDudxOffsetSrcWidth = 0xa8,
+ Video_vidOverlayDvdy = 0xac,
+ Video_vidOverlayDvdyOffset = 0xe0,
+ Video_vidDesktopStartAddr = 0xe4,
+ Video_vidDesktopOverlayStride = 0xe8
+};
+
+enum
+{
+ cmdBaseAddr0 = 0x20,
+ cmdBaseSize0 = 0x24,
+ cmdBump0 = 0x28,
+ cmdRdPtrL0 = 0x2c,
+ cmdRdPtrH0 = 0x30,
+ cmdAMin0 = 0x34,
+ cmdAMax0 = 0x3c,
+ cmdFifoDepth0 = 0x44,
+ cmdHoleCnt0 = 0x48
+};
+
+#define VGAINIT0_EXTENDED_SHIFT_OUT (1 << 12)
+
+#define VIDPROCCFG_CURSOR_MODE (1 << 1)
+#define VIDPROCCFG_HALF_MODE (1 << 4)
+#define VIDPROCCFG_OVERLAY_ENABLE (1 << 8)
+#define VIDPROCCFG_OVERLAY_CLUT_BYPASS (1 << 11)
+#define VIDPROCCFG_OVERLAY_CLUT_SEL (1 << 13)
+#define VIDPROCCFG_H_SCALE_ENABLE (1 << 14)
+#define VIDPROCCFG_V_SCALE_ENABLE (1 << 15)
+#define VIDPROCCFG_FILTER_MODE_MASK (3 << 16)
+#define VIDPROCCFG_FILTER_MODE_POINT (0 << 16)
+#define VIDPROCCFG_FILTER_MODE_DITHER_2X2 (1 << 16)
+#define VIDPROCCFG_FILTER_MODE_DITHER_4X4 (2 << 16)
+#define VIDPROCCFG_FILTER_MODE_BILINEAR (3 << 16)
+#define VIDPROCCFG_DESKTOP_PIX_FORMAT ((banshee->vidProcCfg >> 18) & 7)
+#define VIDPROCCFG_OVERLAY_PIX_FORMAT ((banshee->vidProcCfg >> 21) & 7)
+#define VIDPROCCFG_OVERLAY_PIX_FORMAT_SHIFT (21)
+#define VIDPROCCFG_OVERLAY_PIX_FORMAT_MASK (7 << VIDPROCCFG_OVERLAY_PIX_FORMAT_SHIFT)
+#define VIDPROCCFG_DESKTOP_TILE (1 << 24)
+#define VIDPROCCFG_OVERLAY_TILE (1 << 25)
+#define VIDPROCCFG_2X_MODE (1 << 26)
+#define VIDPROCCFG_HWCURSOR_ENA (1 << 27)
+
+#define OVERLAY_FMT_565 (1)
+#define OVERLAY_FMT_YUYV422 (5)
+#define OVERLAY_FMT_UYVY422 (6)
+#define OVERLAY_FMT_565_DITHER (7)
+
+#define OVERLAY_START_X_MASK (0xfff)
+#define OVERLAY_START_Y_SHIFT (12)
+#define OVERLAY_START_Y_MASK (0xfff << OVERLAY_START_Y_SHIFT)
+
+#define OVERLAY_END_X_MASK (0xfff)
+#define OVERLAY_END_Y_SHIFT (12)
+#define OVERLAY_END_Y_MASK (0xfff << OVERLAY_END_Y_SHIFT)
+
+#define OVERLAY_SRC_WIDTH_SHIFT (19)
+#define OVERLAY_SRC_WIDTH_MASK (0x1fff << OVERLAY_SRC_WIDTH_SHIFT)
+
+#define VID_STRIDE_OVERLAY_SHIFT (16)
+#define VID_STRIDE_OVERLAY_MASK (0x7fff << VID_STRIDE_OVERLAY_SHIFT)
+
+#define VID_DUDX_MASK (0xffffff)
+#define VID_DVDY_MASK (0xffffff)
+
+#define PIX_FORMAT_8 0
+#define PIX_FORMAT_RGB565 1
+#define PIX_FORMAT_RGB24 2
+#define PIX_FORMAT_RGB32 3
+
+#define VIDSERIAL_DDC_DCK_W (1 << 19)
+#define VIDSERIAL_DDC_DDA_W (1 << 20)
+#define VIDSERIAL_DDC_DCK_R (1 << 21)
+#define VIDSERIAL_DDC_DDA_R (1 << 22)
+#define VIDSERIAL_I2C_SCK_W (1 << 24)
+#define VIDSERIAL_I2C_SDA_W (1 << 25)
+#define VIDSERIAL_I2C_SCK_R (1 << 26)
+#define VIDSERIAL_I2C_SDA_R (1 << 27)
+
+static uint32_t banshee_status(banshee_t *banshee);
+
+static void banshee_out(uint16_t addr, uint8_t val, void *p)
+{
+ banshee_t *banshee = (banshee_t *)p;
+ svga_t *svga = &banshee->svga;
+ uint8_t old;
+
+// /*if (addr != 0x3c9) */pclog("banshee_out : %04X %02X %04X:%04X\n", addr, val, CS,cpu_state.pc);
+
+ if (((addr & 0xfff0) == 0x3d0 || (addr & 0xfff0) == 0x3b0) && !(svga->miscout & 1))
+ addr ^= 0x60;
+
+ switch (addr)
+ {
+ case 0x3D4:
+ svga->crtcreg = val & 0x3f;
+ return;
+ case 0x3D5:
+ if ((svga->crtcreg < 7) && (svga->crtc[0x11] & 0x80))
+ return;
+ if ((svga->crtcreg == 7) && (svga->crtc[0x11] & 0x80))
+ val = (svga->crtc[7] & ~0x10) | (val & 0x10);
+ old = svga->crtc[svga->crtcreg];
+ svga->crtc[svga->crtcreg] = val;
+ if (old != val)
+ {
+ if (svga->crtcreg < 0xe || svga->crtcreg > 0x10)
+ {
+ svga->fullchange = changeframecount;
+ svga_recalctimings(svga);
+ }
+ }
+ break;
+ }
+ svga_out(addr, val, svga);
+}
+
+static uint8_t banshee_in(uint16_t addr, void *p)
+{
+ banshee_t *banshee = (banshee_t *)p;
+ svga_t *svga = &banshee->svga;
+ uint8_t temp;
+
+// if (addr != 0x3da) pclog("banshee_in : %04X ", addr);
+
+ if (((addr & 0xfff0) == 0x3d0 || (addr & 0xfff0) == 0x3b0) && !(svga->miscout & 1))
+ addr ^= 0x60;
+
+ switch (addr)
+ {
+ case 0x3c2:
+ if ((svga->vgapal[0].r + svga->vgapal[0].g + svga->vgapal[0].b) >= 0x40)
+ temp = 0;
+ else
+ temp = 0x10;
+ break;
+ case 0x3D4:
+ temp = svga->crtcreg;
+ break;
+ case 0x3D5:
+ temp = svga->crtc[svga->crtcreg];
+ break;
+ default:
+ temp = svga_in(addr, svga);
+ break;
+ }
+// if (addr != 0x3da) pclog("%02X %04X:%04X %i\n", temp, CS,cpu_state.pc, ins);
+ return temp;
+}
+
+static void banshee_updatemapping(banshee_t *banshee)
+{
+ svga_t *svga = &banshee->svga;
+
+ if (!(banshee->pci_regs[PCI_REG_COMMAND] & PCI_COMMAND_MEM))
+ {
+// pclog("Update mapping - PCI disabled\n");
+ mem_mapping_disable(&svga->mapping);
+ mem_mapping_disable(&banshee->linear_mapping);
+ mem_mapping_disable(&banshee->reg_mapping_low);
+ mem_mapping_disable(&banshee->reg_mapping_high);
+ return;
+ }
+
+ pclog("Update mapping - bank %02X ", svga->gdcreg[6] & 0xc);
+ switch (svga->gdcreg[6] & 0xc) /*Banked framebuffer*/
+ {
+ case 0x0: /*128k at A0000*/
+ mem_mapping_set_addr(&svga->mapping, 0xa0000, 0x20000);
+ svga->banked_mask = 0xffff;
+ break;
+ case 0x4: /*64k at A0000*/
+ mem_mapping_set_addr(&svga->mapping, 0xa0000, 0x10000);
+ svga->banked_mask = 0xffff;
+ break;
+ case 0x8: /*32k at B0000*/
+ mem_mapping_set_addr(&svga->mapping, 0xb0000, 0x08000);
+ svga->banked_mask = 0x7fff;
+ break;
+ case 0xC: /*32k at B8000*/
+ mem_mapping_set_addr(&svga->mapping, 0xb8000, 0x08000);
+ svga->banked_mask = 0x7fff;
+ break;
+ }
+
+ pclog("Linear framebuffer %08X ", banshee->memBaseAddr1);
+ mem_mapping_set_addr(&banshee->linear_mapping, banshee->memBaseAddr1, 32 << 20);
+ pclog("registers %08X\n", banshee->memBaseAddr0);
+ mem_mapping_set_addr(&banshee->reg_mapping_low, banshee->memBaseAddr0, 8 << 20);
+ mem_mapping_set_addr(&banshee->reg_mapping_high, banshee->memBaseAddr0 + 0xc00000, 20 << 20);
+}
+
+static void banshee_render_16bpp_tiled(svga_t *svga)
+{
+ banshee_t *banshee = (banshee_t *)svga->p;
+ int x;
+ int offset = 32;
+ uint32_t *p = &((uint32_t *)buffer32->line[svga->displine])[offset];
+ uint32_t addr;
+ int drawn = 0;
+
+ if (banshee->vidProcCfg & VIDPROCCFG_HALF_MODE)
+ addr = banshee->desktop_addr + ((banshee->desktop_y >> 1) & 31) * 128 + ((banshee->desktop_y >> 6) * banshee->desktop_stride_tiled);
+ else
+ addr = banshee->desktop_addr + (banshee->desktop_y & 31) * 128 + ((banshee->desktop_y >> 5) * banshee->desktop_stride_tiled);
+
+ for (x = 0; x <= svga->hdisp; x += 64)
+ {
+ if (svga->hwcursor_on || svga->overlay_on)
+ svga->changedvram[addr >> 12] = 2;
+ if (svga->changedvram[addr >> 12] || svga->fullchange)
+ {
+ uint16_t *vram_p = (uint16_t *)&svga->vram[addr & svga->vram_display_mask];
+ int xx;
+
+ for (xx = 0; xx < 64; xx++)
+ *p++ = video_16to32[*vram_p++];
+
+ drawn = 1;
+ }
+ else
+ p += 64;
+ addr += 128*32;
+ }
+
+ if (drawn)
+ {
+ if (svga->firstline_draw == 2000)
+ svga->firstline_draw = svga->displine;
+ svga->lastline_draw = svga->displine;
+ }
+
+ banshee->desktop_y++;
+}
+
+static void banshee_recalctimings(svga_t *svga)
+{
+ banshee_t *banshee = (banshee_t *)svga->p;
+ voodoo_t *voodoo = banshee->voodoo;
+
+/*7 R/W Horizontal Retrace End bit 5. -
+ 6 R/W Horizontal Retrace Start bit 8 0x4
+ 5 R/W Horizontal Blank End bit 6. -
+ 4 R/W Horizontal Blank Start bit 8. 0x3
+ 3 R/W Reserved. -
+ 2 R/W Horizontal Display Enable End bit 8. 0x1
+ 1 R/W Reserved. -
+ 0 R/W Horizontal Total bit 8. 0x0*/
+ if (svga->crtc[0x1a] & 0x01) svga->htotal += 0x100;
+ if (svga->crtc[0x1a] & 0x04) svga->hdisp += 0x100;
+/*6 R/W Vertical Retrace Start bit 10 0x10
+ 5 R/W Reserved. -
+ 4 R/W Vertical Blank Start bit 10. 0x15
+ 3 R/W Reserved. -
+ 2 R/W Vertical Display Enable End bit 10 0x12
+ 1 R/W Reserved. -
+ 0 R/W Vertical Total bit 10. 0x6*/
+ if (svga->crtc[0x1b] & 0x01) svga->vtotal += 0x400;
+ if (svga->crtc[0x1b] & 0x04) svga->dispend += 0x400;
+ if (svga->crtc[0x1b] & 0x10) svga->vblankstart += 0x400;
+ if (svga->crtc[0x1b] & 0x40) svga->vsyncstart += 0x400;
+// pclog("svga->hdisp=%i\n", svga->hdisp);
+
+ if (banshee->vgaInit0 & VGAINIT0_EXTENDED_SHIFT_OUT)
+ {
+ switch (VIDPROCCFG_DESKTOP_PIX_FORMAT)
+ {
+ case PIX_FORMAT_8:
+ svga->render = svga_render_8bpp_highres;
+ svga->bpp = 8;
+ break;
+ case PIX_FORMAT_RGB565:
+ svga->render = (banshee->vidProcCfg & VIDPROCCFG_DESKTOP_TILE) ? banshee_render_16bpp_tiled : svga_render_16bpp_highres;
+ svga->bpp = 16;
+ break;
+ case PIX_FORMAT_RGB24:
+ svga->render = svga_render_24bpp_highres;
+ svga->bpp = 24;
+ break;
+ case PIX_FORMAT_RGB32:
+ svga->render = svga_render_32bpp_highres;
+ svga->bpp = 32;
+ break;
+
+#ifndef RELEASE_BUILD
+ default:
+ fatal("Unknown pixel format %08x\n", banshee->vgaInit0);
+#endif
+ }
+ svga->rowcount = 0;
+ if (!(banshee->vidProcCfg & VIDPROCCFG_DESKTOP_TILE) && (banshee->vidProcCfg & VIDPROCCFG_HALF_MODE))
+ svga->linedbl = 1;
+ else
+ svga->linedbl = 0;
+ if (banshee->vidProcCfg & VIDPROCCFG_DESKTOP_TILE)
+ svga->rowoffset = ((banshee->vidDesktopOverlayStride & 0x3fff) * 128) >> 3;
+ else
+ svga->rowoffset = (banshee->vidDesktopOverlayStride & 0x3fff) >> 3;
+ svga->ma_latch = banshee->vidDesktopStartAddr >> 2;
+ banshee->desktop_stride_tiled = (banshee->vidDesktopOverlayStride & 0x3fff) * 128 * 32;
+// pclog("Extended shift out %i rowoffset=%i %02x\n", VIDPROCCFG_DESKTOP_PIX_FORMAT, svga->rowoffset, svga->crtc[1]);
+
+ svga->char_width = 8;
+ svga->split = 99999;
+
+ if (banshee->vidProcCfg & VIDPROCCFG_2X_MODE)
+ {
+ svga->hdisp *= 2;
+ svga->htotal *= 2;
+ }
+
+ svga->overlay.ena = banshee->vidProcCfg & VIDPROCCFG_OVERLAY_ENABLE;
+
+ svga->overlay.x = voodoo->overlay.start_x;
+ svga->overlay.y = voodoo->overlay.start_y;
+ svga->overlay.xsize = voodoo->overlay.size_x;
+ svga->overlay.ysize = voodoo->overlay.size_y;
+ svga->overlay.pitch = (banshee->vidDesktopOverlayStride & VID_STRIDE_OVERLAY_MASK) >> VID_STRIDE_OVERLAY_SHIFT;
+ if (banshee->vidProcCfg & VIDPROCCFG_OVERLAY_TILE)
+ svga->overlay.pitch *= 128*32;
+ if (svga->overlay.xsize <= 0 || svga->overlay.ysize <= 0)
+ svga->overlay.ena = 0;
+ if (svga->overlay.ena)
+ {
+/* pclog("Overlay enabled : start=%i,%i end=%i,%i size=%i,%i pitch=%x\n",
+ voodoo->overlay.start_x, voodoo->overlay.start_y,
+ voodoo->overlay.end_x, voodoo->overlay.end_y,
+ voodoo->overlay.size_x, voodoo->overlay.size_y,
+ svga->overlay.pitch);*/
+ if (!voodoo->overlay.start_x && !voodoo->overlay.start_y &&
+ svga->hdisp == voodoo->overlay.size_x && svga->dispend == voodoo->overlay.size_y)
+ {
+ /*Overlay is full screen, so don't bother rendering the desktop
+ behind it*/
+ svga->render = svga_render_null;
+ svga->bpp = 0;
+ }
+ }
+
+ svga->video_res_override = 1;
+ svga->video_res_x = svga->hdisp;
+ svga->video_res_y = svga->dispend;
+ svga->video_bpp = svga->bpp;
+ }
+ else
+ {
+// pclog("Normal shift out\n");
+ svga->bpp = 8;
+ svga->video_res_override = 0;
+ }
+
+ if (((svga->miscout >> 2) & 3) == 3)
+ {
+ int k = banshee->pllCtrl0 & 3;
+ int m = (banshee->pllCtrl0 >> 2) & 0x3f;
+ int n = (banshee->pllCtrl0 >> 8) & 0xff;
+ double freq = (((double)n + 2) / (((double)m + 2) * (double)(1 << k))) * 14318184.0;
+
+ svga->clock = (cpuclock * (float)(1ull << 32)) / freq;
+// svga->clock = cpuclock / freq;
+
+// pclog("svga->clock = %g %g m=%i k=%i n=%i\n", freq, freq / 1000000.0, m, k, n);
+ }
+}
+
+static void banshee_ext_out(uint16_t addr, uint8_t val, void *p)
+{
+// banshee_t *banshee = (banshee_t *)p;
+// svga_t *svga = &banshee->svga;
+
+// pclog("banshee_ext_out: addr=%04x val=%02x\n", addr, val);
+
+ switch (addr & 0xff)
+ {
+ case 0xb0: case 0xb1: case 0xb2: case 0xb3:
+ case 0xb4: case 0xb5: case 0xb6: case 0xb7:
+ case 0xb8: case 0xb9: case 0xba: case 0xbb:
+ case 0xbc: case 0xbd: case 0xbe: case 0xbf:
+ case 0xc0: case 0xc1: case 0xc2: case 0xc3:
+ case 0xc4: case 0xc5: case 0xc6: case 0xc7:
+ case 0xc8: case 0xc9: case 0xca: case 0xcb:
+ case 0xcc: case 0xcd: case 0xce: case 0xcf:
+ case 0xd0: case 0xd1: case 0xd2: case 0xd3:
+ case 0xd4: case 0xd5: case 0xd6: case 0xd7:
+ case 0xd8: case 0xd9: case 0xda: case 0xdb:
+ case 0xdc: case 0xdd: case 0xde: case 0xdf:
+ banshee_out((addr & 0xff)+0x300, val, p);
+ break;
+
+ default:
+ pclog("bad banshee_ext_out: addr=%04x val=%02x\n", addr, val);
+ }
+}
+static void banshee_ext_outl(uint16_t addr, uint32_t val, void *p)
+{
+ banshee_t *banshee = (banshee_t *)p;
+ voodoo_t *voodoo = banshee->voodoo;
+ svga_t *svga = &banshee->svga;
+
+// pclog("banshee_ext_outl: addr=%04x val=%08x %04x(%08x):%08x\n", addr, val, CS,cs,cpu_state.pc);
+
+ switch (addr & 0xff)
+ {
+ case Init_pciInit0:
+ banshee->pciInit0 = val;
+ voodoo->read_time = pci_nonburst_time + pci_burst_time * ((val & 0x100) ? 2 : 1);
+ voodoo->burst_time = pci_burst_time * ((val & 0x200) ? 1 : 0);
+ voodoo->write_time = pci_nonburst_time + voodoo->burst_time;
+ break;
+
+ case Init_lfbMemoryConfig:
+ banshee->lfbMemoryConfig = val;
+// pclog("lfbMemoryConfig=%08x\n", val);
+ voodoo->tile_base = (val & 0x1fff) << 12;
+ voodoo->tile_stride = 1024 << ((val >> 13) & 7);
+ voodoo->tile_stride_shift = 10 + ((val >> 13) & 7);
+ voodoo->tile_x = ((val >> 16) & 0x7f) * 128;
+ voodoo->tile_x_real = ((val >> 16) & 0x7f) * 128*32;
+ break;
+
+ case Init_miscInit0:
+ banshee->miscInit0 = val;
+ break;
+ case Init_miscInit1:
+ banshee->miscInit1 = val;
+ break;
+ case Init_dramInit0:
+ banshee->dramInit0 = val;
+ break;
+ case Init_dramInit1:
+ banshee->dramInit1 = val;
+ break;
+ case Init_agpInit0:
+ banshee->agpInit0 = val;
+ break;
+
+ case Init_2dCommand:
+ banshee->command_2d = val;
+ break;
+ case Init_2dSrcBaseAddr:
+ banshee->srcBaseAddr_2d = val;
+ break;
+ case Init_vgaInit0:
+ banshee->vgaInit0 = val;
+ break;
+ case Init_vgaInit1:
+ banshee->vgaInit1 = val;
+ svga->write_bank = (val & 0x3ff) << 15;
+ svga->read_bank = ((val >> 10) & 0x3ff) << 15;
+ break;
+
+ case PLL_pllCtrl0:
+ banshee->pllCtrl0 = val;
+ break;
+ case PLL_pllCtrl1:
+ banshee->pllCtrl1 = val;
+ break;
+ case PLL_pllCtrl2:
+ banshee->pllCtrl2 = val;
+ break;
+
+ case DAC_dacMode:
+ banshee->dacMode = val;
+ break;
+ case DAC_dacAddr:
+ banshee->dacAddr = val & 0x1ff;
+ break;
+ case DAC_dacData:
+ svga->pallook[banshee->dacAddr] = val & 0xffffff;
+ svga->fullchange = changeframecount;
+ break;
+
+ case Video_vidProcCfg:
+ banshee->vidProcCfg = val;
+// pclog("vidProcCfg=%08x\n", val);
+ banshee->overlay_pix_fmt = (val & VIDPROCCFG_OVERLAY_PIX_FORMAT_MASK) >> VIDPROCCFG_OVERLAY_PIX_FORMAT_SHIFT;
+ svga->hwcursor.ena = val & VIDPROCCFG_HWCURSOR_ENA;
+ svga->fullchange = changeframecount;
+ svga_recalctimings(svga);
+ break;
+
+ case Video_maxRgbDelta:
+ banshee->voodoo->scrfilterThreshold = val;
+ if (val > 0x00)
+ banshee->voodoo->scrfilterEnabled = 1;
+ else
+ banshee->voodoo->scrfilterEnabled = 0;
+ voodoo_threshold_check(banshee->voodoo);
+ pclog("Banshee Filter: %06x\n", val);
+
+ break;
+
+ case Video_hwCurPatAddr:
+ banshee->hwCurPatAddr = val;
+ svga->hwcursor.addr = (val & 0xfffff0) + (svga->hwcursor.yoff * 16);
+ break;
+ case Video_hwCurLoc:
+ banshee->hwCurLoc = val;
+ svga->hwcursor.x = (val & 0x7ff) - 32;
+ svga->hwcursor.y = ((val >> 16) & 0x7ff) - 64;
+ if (svga->hwcursor.y < 0)
+ {
+ svga->hwcursor.yoff = -svga->hwcursor.y;
+ svga->hwcursor.y = 0;
+ }
+ else
+ svga->hwcursor.yoff = 0;
+ svga->hwcursor.addr = (banshee->hwCurPatAddr & 0xfffff0) + (svga->hwcursor.yoff * 16);
+ svga->hwcursor.xsize = 64;
+ svga->hwcursor.ysize = 64;
+// pclog("hwCurLoc %08x %i\n", val, svga->hwcursor.y);
+ break;
+ case Video_hwCurC0:
+ banshee->hwCurC0 = val;
+ break;
+ case Video_hwCurC1:
+ banshee->hwCurC1 = val;
+ break;
+
+ case Video_vidSerialParallelPort:
+ banshee->vidSerialParallelPort = val;
+// pclog("vidSerialParallelPort: write %08x %08x %04x(%08x):%08x\n", val, val & (VIDSERIAL_DDC_DCK_W | VIDSERIAL_DDC_DDA_W), CS,cs,cpu_state.pc);
+ ddc_i2c_change((val & VIDSERIAL_DDC_DCK_W) ? 1 : 0, (val & VIDSERIAL_DDC_DDA_W) ? 1 : 0);
+ break;
+
+ case Video_vidScreenSize:
+ banshee->vidScreenSize = val;
+ voodoo->h_disp = (val & 0xfff) + 1;
+ voodoo->v_disp = (val >> 12) & 0xfff;
+ break;
+ case Video_vidOverlayStartCoords:
+ voodoo->overlay.vidOverlayStartCoords = val;
+ voodoo->overlay.start_x = val & OVERLAY_START_X_MASK;
+ voodoo->overlay.start_y = (val & OVERLAY_START_Y_MASK) >> OVERLAY_START_Y_SHIFT;
+ voodoo->overlay.size_x = voodoo->overlay.end_x - voodoo->overlay.start_x;
+ voodoo->overlay.size_y = voodoo->overlay.end_y - voodoo->overlay.start_y;
+ svga_recalctimings(svga);
+ break;
+ case Video_vidOverlayEndScreenCoords:
+ voodoo->overlay.vidOverlayEndScreenCoords = val;
+ voodoo->overlay.end_x = val & OVERLAY_END_X_MASK;
+ voodoo->overlay.end_y = (val & OVERLAY_END_Y_MASK) >> OVERLAY_END_Y_SHIFT;
+ voodoo->overlay.size_x = (voodoo->overlay.end_x - voodoo->overlay.start_x) + 1;
+ voodoo->overlay.size_y = (voodoo->overlay.end_y - voodoo->overlay.start_y) + 1;
+ svga_recalctimings(svga);
+ break;
+ case Video_vidOverlayDudx:
+ voodoo->overlay.vidOverlayDudx = val & VID_DUDX_MASK;
+// pclog("vidOverlayDudx=%08x\n", val);
+ break;
+ case Video_vidOverlayDudxOffsetSrcWidth:
+ voodoo->overlay.vidOverlayDudxOffsetSrcWidth = val;
+ voodoo->overlay.overlay_bytes = (val & OVERLAY_SRC_WIDTH_MASK) >> OVERLAY_SRC_WIDTH_SHIFT;
+// pclog("vidOverlayDudxOffsetSrcWidth=%08x\n", val);
+ break;
+ case Video_vidOverlayDvdy:
+ voodoo->overlay.vidOverlayDvdy = val & VID_DVDY_MASK;
+// pclog("vidOverlayDvdy=%08x\n", val);
+ break;
+ case Video_vidOverlayDvdyOffset:
+ voodoo->overlay.vidOverlayDvdyOffset = val;
+ break;
+
+
+ case Video_vidDesktopStartAddr:
+ banshee->vidDesktopStartAddr = val & 0xffffff;
+// pclog("vidDesktopStartAddr=%08x\n", val);
+ svga->fullchange = changeframecount;
+ svga_recalctimings(svga);
+ break;
+ case Video_vidDesktopOverlayStride:
+ banshee->vidDesktopOverlayStride = val;
+// pclog("vidDesktopOverlayStride=%08x\n", val);
+ svga->fullchange = changeframecount;
+ svga_recalctimings(svga);
+ break;
+// default:
+// fatal("bad banshee_ext_outl: addr=%04x val=%08x\n", addr, val);
+ }
+}
+
+static uint8_t banshee_ext_in(uint16_t addr, void *p)
+{
+ banshee_t *banshee = (banshee_t *)p;
+// svga_t *svga = &banshee->svga;
+ uint8_t ret = 0xff;
+
+ switch (addr & 0xff)
+ {
+ case Init_status: case Init_status+1: case Init_status+2: case Init_status+3:
+ ret = (banshee_status(banshee) >> ((addr & 3) * 8)) & 0xff;
+// pclog("Read status reg! %04x(%08x):%08x\n", CS, cs, cpu_state.pc);
+ break;
+
+ case 0xb0: case 0xb1: case 0xb2: case 0xb3:
+ case 0xb4: case 0xb5: case 0xb6: case 0xb7:
+ case 0xb8: case 0xb9: case 0xba: case 0xbb:
+ case 0xbc: case 0xbd: case 0xbe: case 0xbf:
+ case 0xc0: case 0xc1: case 0xc2: case 0xc3:
+ case 0xc4: case 0xc5: case 0xc6: case 0xc7:
+ case 0xc8: case 0xc9: case 0xca: case 0xcb:
+ case 0xcc: case 0xcd: case 0xce: case 0xcf:
+ case 0xd0: case 0xd1: case 0xd2: case 0xd3:
+ case 0xd4: case 0xd5: case 0xd6: case 0xd7:
+ case 0xd8: case 0xd9: case 0xda: case 0xdb:
+ case 0xdc: case 0xdd: case 0xde: case 0xdf:
+ ret = banshee_in((addr & 0xff)+0x300, p);
+ break;
+
+ default:
+ pclog("bad banshee_ext_in: addr=%04x\n", addr);
+ break;
+ }
+
+// pclog("banshee_ext_in: addr=%04x val=%02x\n", addr, ret);
+
+ return ret;
+}
+
+static uint32_t banshee_status(banshee_t *banshee)
+{
+ voodoo_t *voodoo = banshee->voodoo;
+ svga_t *svga = &banshee->svga;
+ int fifo_entries = FIFO_ENTRIES;
+ int fifo_size = 0xffff - fifo_entries;
+ int swap_count = voodoo->swap_count;
+ int written = voodoo->cmd_written + voodoo->cmd_written_fifo;
+ int busy = (written - voodoo->cmd_read) || (voodoo->cmdfifo_depth_rd != voodoo->cmdfifo_depth_wr) ||
+ voodoo->render_voodoo_busy[0] || voodoo->render_voodoo_busy[1] ||
+ voodoo->render_voodoo_busy[2] || voodoo->render_voodoo_busy[3] ||
+ voodoo->voodoo_busy;
+ uint32_t ret;
+
+ ret = 0;
+ if (fifo_size < 0x20)
+ ret |= fifo_size;
+ else
+ ret |= 0x1f;
+ if (fifo_size)
+ ret |= 0x20;
+ if (swap_count < 7)
+ ret |= (swap_count << 28);
+ else
+ ret |= (7 << 28);
+ if (!(svga->cgastat & 8))
+ ret |= 0x40;
+
+ if (busy)
+ ret |= 0x780; /*Busy*/
+
+ if (voodoo->cmdfifo_depth_rd != voodoo->cmdfifo_depth_wr)
+ ret |= (1 << 11);
+
+ if (!voodoo->voodoo_busy)
+ voodoo_wake_fifo_thread(voodoo);
+
+// pclog("banshee_status: busy %i %i (%i %i) %i %i %i %04x(%08x):%08x %08x\n", busy, written, voodoo->cmd_written, voodoo->cmd_written_fifo, voodoo->cmd_read, voodoo->cmdfifo_depth_rd, voodoo->cmdfifo_depth_wr, CS,cs,cpu_state.pc, ret);
+
+ return ret;
+}
+
+static uint32_t banshee_ext_inl(uint16_t addr, void *p)
+{
+ banshee_t *banshee = (banshee_t *)p;
+ voodoo_t *voodoo = banshee->voodoo;
+ svga_t *svga = &banshee->svga;
+ uint32_t ret = 0xffffffff;
+
+ cycles -= voodoo->read_time;
+
+ switch (addr & 0xff)
+ {
+ case Init_status:
+ ret = banshee_status(banshee);
+// pclog("Read status reg! %04x(%08x):%08x\n", CS, cs, cpu_state.pc);
+ break;
+ case Init_pciInit0:
+ ret = banshee->pciInit0;
+ break;
+ case Init_lfbMemoryConfig:
+ ret = banshee->lfbMemoryConfig;
+ break;
+
+ case Init_miscInit0:
+ ret = banshee->miscInit0;
+ break;
+ case Init_miscInit1:
+ ret = banshee->miscInit1;
+ break;
+ case Init_dramInit0:
+ ret = banshee->dramInit0;
+ break;
+ case Init_dramInit1:
+ ret = banshee->dramInit1;
+ break;
+ case Init_agpInit0:
+ ret = banshee->agpInit0;
+ break;
+
+ case Init_vgaInit0:
+ ret = banshee->vgaInit0;
+ break;
+ case Init_vgaInit1:
+ ret = banshee->vgaInit1;
+ break;
+
+ case Init_2dCommand:
+ ret = banshee->command_2d;
+ break;
+ case Init_2dSrcBaseAddr:
+ ret = banshee->srcBaseAddr_2d;
+ break;
+ case Init_strapInfo:
+ ret = 0x00000040; /*8 MB SGRAM, PCI, IRQ enabled, 32kB BIOS*/
+ break;
+
+ case PLL_pllCtrl0:
+ ret = banshee->pllCtrl0;
+ break;
+ case PLL_pllCtrl1:
+ ret = banshee->pllCtrl1;
+ break;
+ case PLL_pllCtrl2:
+ ret = banshee->pllCtrl2;
+ break;
+
+ case DAC_dacMode:
+ ret = banshee->dacMode;
+ break;
+ case DAC_dacAddr:
+ ret = banshee->dacAddr;
+ break;
+ case DAC_dacData:
+ ret = svga->pallook[banshee->dacAddr];
+ break;
+
+ case Video_vidProcCfg:
+ ret = banshee->vidProcCfg;
+ break;
+
+ case Video_hwCurPatAddr:
+ ret = banshee->hwCurPatAddr;
+ break;
+ case Video_hwCurLoc:
+ ret = banshee->hwCurLoc;
+ break;
+ case Video_hwCurC0:
+ ret = banshee->hwCurC0;
+ break;
+ case Video_hwCurC1:
+ ret = banshee->hwCurC1;
+ break;
+
+ case Video_vidSerialParallelPort:
+ ret = banshee->vidSerialParallelPort & ~(VIDSERIAL_DDC_DCK_R | VIDSERIAL_DDC_DDA_R);
+ if ((banshee->vidSerialParallelPort & VIDSERIAL_DDC_DCK_W) && ddc_read_clock())
+ ret |= VIDSERIAL_DDC_DCK_R;
+ if ((banshee->vidSerialParallelPort & VIDSERIAL_DDC_DDA_W) && ddc_read_data())
+ ret |= VIDSERIAL_DDC_DDA_R;
+ ret = ret & ~(VIDSERIAL_I2C_SCK_R | VIDSERIAL_I2C_SDA_R);
+ if (banshee->vidSerialParallelPort & VIDSERIAL_I2C_SCK_W)
+ ret |= VIDSERIAL_I2C_SCK_R;
+ if (banshee->vidSerialParallelPort & VIDSERIAL_I2C_SDA_W)
+ ret |= VIDSERIAL_I2C_SDA_R;
+// pclog("vidSerialParallelPort: read %08x %08x %04x(%08x):%08x\n", ret, ret & (VIDSERIAL_DDC_DCK_R | VIDSERIAL_DDC_DDA_R), CS,cs,cpu_state.pc);
+ break;
+
+ case Video_vidScreenSize:
+ ret = banshee->vidScreenSize;
+ break;
+ case Video_vidOverlayStartCoords:
+ ret = voodoo->overlay.vidOverlayStartCoords;
+ break;
+ case Video_vidOverlayEndScreenCoords:
+ ret = voodoo->overlay.vidOverlayEndScreenCoords;
+ break;
+ case Video_vidOverlayDudx:
+ ret = voodoo->overlay.vidOverlayDudx;
+ break;
+ case Video_vidOverlayDudxOffsetSrcWidth:
+ ret = voodoo->overlay.vidOverlayDudxOffsetSrcWidth;
+ break;
+ case Video_vidOverlayDvdy:
+ ret = voodoo->overlay.vidOverlayDvdy;
+ break;
+ case Video_vidOverlayDvdyOffset:
+ ret = voodoo->overlay.vidOverlayDvdyOffset;
+ break;
+
+ case Video_vidDesktopStartAddr:
+ ret = banshee->vidDesktopStartAddr;
+ break;
+ case Video_vidDesktopOverlayStride:
+ ret = banshee->vidDesktopOverlayStride;
+ break;
+
+ default:
+// fatal("bad banshee_ext_inl: addr=%04x\n", addr);
+ break;
+ }
+
+// /*if (addr) */pclog("banshee_ext_inl: addr=%04x val=%08x\n", addr, ret);
+
+ return ret;
+}
+
+
+static uint32_t banshee_reg_readl(uint32_t addr, void *p);
+
+static uint8_t banshee_reg_read(uint32_t addr, void *p)
+{
+// pclog("banshee_reg_read: addr=%08x\n", addr);
+ return banshee_reg_readl(addr & ~3, p) >> (8*(addr & 3));
+}
+
+static uint16_t banshee_reg_readw(uint32_t addr, void *p)
+{
+// pclog("banshee_reg_readw: addr=%08x\n", addr);
+ return banshee_reg_readl(addr & ~3, p) >> (8*(addr & 2));
+}
+
+static uint32_t banshee_cmd_read(banshee_t *banshee, uint32_t addr)
+{
+ voodoo_t *voodoo = banshee->voodoo;
+ uint32_t ret = 0xffffffff;
+
+ switch (addr & 0x1fc)
+ {
+ case cmdBaseAddr0:
+ ret = voodoo->cmdfifo_base >> 12;
+// pclog("Read cmdfifo_base %08x\n", ret);
+ break;
+
+ case cmdRdPtrL0:
+ ret = voodoo->cmdfifo_rp;
+// pclog("Read cmdfifo_rp %08x\n", ret);
+ break;
+
+ case cmdFifoDepth0:
+ ret = voodoo->cmdfifo_depth_wr - voodoo->cmdfifo_depth_rd;
+// pclog("Read cmdfifo_depth %08x\n", ret);
+ break;
+
+ case 0x108:
+ break;
+
+#ifndef RELEASE_BUILD
+ default:
+ fatal("Unknown banshee_cmd_read %08x\n", addr);
+#endif
+ }
+
+ return ret;
+}
+
+static uint32_t banshee_reg_readl(uint32_t addr, void *p)
+{
+ banshee_t *banshee = (banshee_t *)p;
+ voodoo_t *voodoo = banshee->voodoo;
+ uint32_t ret = 0xffffffff;
+
+ cycles -= voodoo->read_time;
+
+ switch (addr & 0x1f00000)
+ {
+ case 0x0000000: /*IO remap*/
+ if (!(addr & 0x80000))
+ ret = banshee_ext_inl(addr & 0xff, banshee);
+ else
+ ret = banshee_cmd_read(banshee, addr);
+ break;
+
+ case 0x0100000: /*2D registers*/
+ voodoo_flush(voodoo);
+ switch (addr & 0x1fc)
+ {
+ case 0x08:
+ ret = voodoo->banshee_blt.clip0Min;
+ break;
+ case 0x0c:
+ ret = voodoo->banshee_blt.clip0Max;
+ break;
+ case 0x10:
+ ret = voodoo->banshee_blt.dstBaseAddr;
+ break;
+ case 0x14:
+ ret = voodoo->banshee_blt.dstFormat;
+ break;
+ case 0x34:
+ ret = voodoo->banshee_blt.srcBaseAddr;
+ break;
+ case 0x38:
+ ret = voodoo->banshee_blt.commandExtra;
+ break;
+ case 0x5c:
+ ret = voodoo->banshee_blt.srcXY;
+ break;
+ case 0x60:
+ ret = voodoo->banshee_blt.colorBack;
+ break;
+ case 0x64:
+ ret = voodoo->banshee_blt.colorFore;
+ break;
+ case 0x68:
+ ret = voodoo->banshee_blt.dstSize;
+ break;
+ case 0x6c:
+ ret = voodoo->banshee_blt.dstXY;
+ break;
+ case 0x70:
+ ret = voodoo->banshee_blt.command;
+ break;
+ default:
+ pclog("banshee_reg_readl: addr=%08x\n", addr);
+ }
+ break;
+
+ case 0x0200000: case 0x0300000: case 0x0400000: case 0x0500000: /*3D registers*/
+ switch (addr & 0x3fc)
+ {
+ case SST_status:
+ ret = banshee_status(banshee);
+ break;
+
+ case SST_intrCtrl:
+ ret = banshee->intrCtrl & 0x0030003f;
+ break;
+
+ case SST_fbzColorPath:
+ voodoo_flush(voodoo);
+ ret = voodoo->params.fbzColorPath;
+ break;
+ case SST_fogMode:
+ voodoo_flush(voodoo);
+ ret = voodoo->params.fogMode;
+ break;
+ case SST_alphaMode:
+ voodoo_flush(voodoo);
+ ret = voodoo->params.alphaMode;
+ break;
+ case SST_fbzMode:
+ voodoo_flush(voodoo);
+ ret = voodoo->params.fbzMode;
+ break;
+ case SST_lfbMode:
+ voodoo_flush(voodoo);
+ ret = voodoo->lfbMode;
+ break;
+ case SST_clipLeftRight:
+ ret = voodoo->params.clipRight | (voodoo->params.clipLeft << 16);
+ break;
+ case SST_clipLowYHighY:
+ ret = voodoo->params.clipHighY | (voodoo->params.clipLowY << 16);
+ break;
+
+ case SST_clipLeftRight1:
+ ret = voodoo->params.clipRight1 | (voodoo->params.clipLeft1 << 16);
+ break;
+ case SST_clipTopBottom1:
+ ret = voodoo->params.clipHighY1 | (voodoo->params.clipLowY1 << 16);
+ break;
+
+ case SST_stipple:
+ voodoo_flush(voodoo);
+ ret = voodoo->params.stipple;
+ break;
+ case SST_color0:
+ voodoo_flush(voodoo);
+ ret = voodoo->params.color0;
+ break;
+ case SST_color1:
+ voodoo_flush(voodoo);
+ ret = voodoo->params.color1;
+ break;
+
+ case SST_fbiPixelsIn:
+ ret = voodoo->fbiPixelsIn & 0xffffff;
+ break;
+ case SST_fbiChromaFail:
+ ret = voodoo->fbiChromaFail & 0xffffff;
+ break;
+ case SST_fbiZFuncFail:
+ ret = voodoo->fbiZFuncFail & 0xffffff;
+ break;
+ case SST_fbiAFuncFail:
+ ret = voodoo->fbiAFuncFail & 0xffffff;
+ break;
+ case SST_fbiPixelsOut:
+ ret = voodoo->fbiPixelsOut & 0xffffff;
+ break;
+
+ default:
+ pclog("banshee_reg_readl: 3D addr=%08x\n", addr);
+ break;
+ }
+ break;
+ }
+
+// /*if (addr != 0xe0000000) */pclog("banshee_reg_readl: addr=%08x ret=%08x %04x(%08x):%08x\n", addr, ret, CS,cs,cpu_state.pc);
+// if (cpu_state.pc == 0x1000e437)
+// output = 3;
+ return ret;
+}
+
+static void banshee_reg_write(uint32_t addr, uint8_t val, void *p)
+{
+// pclog("banshee_reg_writeb: addr=%08x val=%02x\n", addr, val);
+}
+
+static void banshee_reg_writew(uint32_t addr, uint16_t val, void *p)
+{
+ banshee_t *banshee = (banshee_t *)p;
+ voodoo_t *voodoo = banshee->voodoo;
+
+ cycles -= voodoo->write_time;
+
+// pclog("banshee_reg_writew: addr=%08x val=%04x\n", addr, val);
+ switch (addr & 0x1f00000)
+ {
+ case 0x1000000: case 0x1100000: case 0x1200000: case 0x1300000: /*3D LFB*/
+ case 0x1400000: case 0x1500000: case 0x1600000: case 0x1700000:
+ case 0x1800000: case 0x1900000: case 0x1a00000: case 0x1b00000:
+ case 0x1c00000: case 0x1d00000: case 0x1e00000: case 0x1f00000:
+ voodoo_queue_command(voodoo, (addr & 0xffffff) | FIFO_WRITEW_FB, val);
+ break;
+ }
+}
+
+static void banshee_cmd_write(banshee_t *banshee, uint32_t addr, uint32_t val)
+{
+ voodoo_t *voodoo = banshee->voodoo;
+// pclog("banshee_cmd_write: addr=%03x val=%08x\n", addr & 0x1fc, val);
+ switch (addr & 0x1fc)
+ {
+ case cmdBaseAddr0:
+ voodoo->cmdfifo_base = (val & 0xfff) << 12;
+ voodoo->cmdfifo_end = voodoo->cmdfifo_base + (((voodoo->cmdfifo_size & 0xff) + 1) << 12);
+// pclog("cmdfifo_base=%08x cmdfifo_end=%08x %08x\n", voodoo->cmdfifo_base, voodoo->cmdfifo_end, val);
+ break;
+
+ case cmdBaseSize0:
+ voodoo->cmdfifo_size = val;
+ voodoo->cmdfifo_end = voodoo->cmdfifo_base + (((voodoo->cmdfifo_size & 0xff) + 1) << 12);
+ voodoo->cmdfifo_enabled = val & 0x100;
+ if (!voodoo->cmdfifo_enabled)
+ voodoo->cmdfifo_in_sub = 0; /*Not sure exactly when this should be reset*/
+// pclog("cmdfifo_base=%08x cmdfifo_end=%08x\n", voodoo->cmdfifo_base, voodoo->cmdfifo_end);
+ break;
+
+// voodoo->cmdfifo_end = ((val >> 16) & 0x3ff) << 12;
+// pclog("CMDFIFO base=%08x end=%08x\n", voodoo->cmdfifo_base, voodoo->cmdfifo_end);
+// break;
+
+ case cmdRdPtrL0:
+ voodoo->cmdfifo_rp = val;
+ break;
+ case cmdAMin0:
+ voodoo->cmdfifo_amin = val;
+ break;
+ case cmdAMax0:
+ voodoo->cmdfifo_amax = val;
+ break;
+ case cmdFifoDepth0:
+ voodoo->cmdfifo_depth_rd = 0;
+ voodoo->cmdfifo_depth_wr = val & 0xffff;
+ break;
+
+ default:
+ pclog("Unknown banshee_cmd_write: addr=%08x val=%08x\n", addr, val);
+ break;
+ }
+
+/* cmdBaseSize0 = 0x24,
+ cmdBump0 = 0x28,
+ cmdRdPtrL0 = 0x2c,
+ cmdRdPtrH0 = 0x30,
+ cmdAMin0 = 0x34,
+ cmdAMax0 = 0x3c,
+ cmdFifoDepth0 = 0x44,
+ cmdHoleCnt0 = 0x48
+ }*/
+}
+
+static void banshee_reg_writel(uint32_t addr, uint32_t val, void *p)
+{
+ banshee_t *banshee = (banshee_t *)p;
+ voodoo_t *voodoo = banshee->voodoo;
+
+ if (addr == voodoo->last_write_addr+4)
+ cycles -= voodoo->burst_time;
+ else
+ cycles -= voodoo->write_time;
+ voodoo->last_write_addr = addr;
+
+// pclog("banshee_reg_writel: addr=%08x val=%08x\n", addr, val);
+
+ switch (addr & 0x1f00000)
+ {
+ case 0x0000000: /*IO remap*/
+ if (!(addr & 0x80000))
+ banshee_ext_outl(addr & 0xff, val, banshee);
+ else
+ banshee_cmd_write(banshee, addr, val);
+// pclog("CMD!!! write %08x %08x\n", addr, val);
+ break;
+
+ case 0x0100000: /*2D registers*/
+ voodoo_queue_command(voodoo, (addr & 0x1fc) | FIFO_WRITEL_2DREG, val);
+ break;
+
+ case 0x0200000: case 0x0300000: case 0x0400000: case 0x0500000: /*3D registers*/
+ switch (addr & 0x3fc)
+ {
+ case SST_intrCtrl:
+ banshee->intrCtrl = val & 0x0030003f;
+// pclog("intrCtrl=%08x\n", val);
+ break;
+
+ case SST_userIntrCMD:
+#ifndef RELEASE_BUILD
+ fatal("userIntrCMD write %08x\n", val);
+#endif
+ break;
+
+ case SST_swapbufferCMD:
+ voodoo->cmd_written++;
+ voodoo_queue_command(voodoo, (addr & 0x3fc) | FIFO_WRITEL_REG, val);
+ if (!voodoo->voodoo_busy)
+ voodoo_wake_fifo_threads(voodoo->set, voodoo);
+// pclog("SST_swapbufferCMD write: %i %i\n", voodoo->cmd_written, voodoo->cmd_written_fifo);
+ break;
+ case SST_triangleCMD:
+ voodoo->cmd_written++;
+ voodoo_queue_command(voodoo, (addr & 0x3fc) | FIFO_WRITEL_REG, val);
+ if (!voodoo->voodoo_busy)
+ voodoo_wake_fifo_threads(voodoo->set, voodoo);
+ break;
+ case SST_ftriangleCMD:
+ voodoo->cmd_written++;
+ voodoo_queue_command(voodoo, (addr & 0x3fc) | FIFO_WRITEL_REG, val);
+ if (!voodoo->voodoo_busy)
+ voodoo_wake_fifo_threads(voodoo->set, voodoo);
+ break;
+ case SST_fastfillCMD:
+ voodoo->cmd_written++;
+ voodoo_queue_command(voodoo, (addr & 0x3fc) | FIFO_WRITEL_REG, val);
+ if (!voodoo->voodoo_busy)
+ voodoo_wake_fifo_threads(voodoo->set, voodoo);
+ break;
+ case SST_nopCMD:
+ voodoo->cmd_written++;
+ voodoo_queue_command(voodoo, (addr & 0x3fc) | FIFO_WRITEL_REG, val);
+ if (!voodoo->voodoo_busy)
+ voodoo_wake_fifo_threads(voodoo->set, voodoo);
+ break;
+
+ case SST_swapPending:
+ thread_lock_mutex(voodoo->swap_mutex);
+ voodoo->swap_count++;
+ thread_unlock_mutex(voodoo->swap_mutex);
+// voodoo->cmd_written++;
+ break;
+
+ default:
+ voodoo_queue_command(voodoo, (addr & 0x3ffffc) | FIFO_WRITEL_REG, val);
+ break;
+ }
+ break;
+
+ case 0x0600000: case 0x0700000: /*Texture download*/
+ voodoo->tex_count++;
+ voodoo_queue_command(voodoo, (addr & 0x1ffffc) | FIFO_WRITEL_TEX, val);
+ break;
+
+ case 0x1000000: case 0x1100000: case 0x1200000: case 0x1300000: /*3D LFB*/
+ case 0x1400000: case 0x1500000: case 0x1600000: case 0x1700000:
+ case 0x1800000: case 0x1900000: case 0x1a00000: case 0x1b00000:
+ case 0x1c00000: case 0x1d00000: case 0x1e00000: case 0x1f00000:
+ voodoo_queue_command(voodoo, (addr & 0xfffffc) | FIFO_WRITEL_FB, val);
+ break;
+ }
+}
+
+static uint8_t banshee_read_linear(uint32_t addr, void *p)
+{
+ banshee_t *banshee = (banshee_t *)p;
+ voodoo_t *voodoo = banshee->voodoo;
+ svga_t *svga = &banshee->svga;
+
+ cycles -= voodoo->read_time;
+ cycles_lost += voodoo->read_time;
+
+ addr &= svga->decode_mask;
+ if (addr >= voodoo->tile_base)
+ {
+ int x, y;
+
+ addr -= voodoo->tile_base;
+ x = addr & (voodoo->tile_stride-1);
+ y = addr >> voodoo->tile_stride_shift;
+
+ addr = voodoo->tile_base + (x & 127) + ((x >> 7) * 128*32) + ((y & 31) * 128) + (y >> 5)*voodoo->tile_x_real;
+// pclog(" Tile rb %08x->%08x %i %i\n", old_addr, addr, x, y);
+ }
+ if (addr >= svga->vram_max)
+ return 0xff;
+
+ egareads++;
+ cycles -= video_timing_read_b;
+ cycles_lost += video_timing_read_b;
+
+// pclog("read_linear: addr=%08x val=%02x\n", addr, svga->vram[addr & svga->vram_mask]);
+
+ return svga->vram[addr & svga->vram_mask];
+}
+
+static uint16_t banshee_read_linear_w(uint32_t addr, void *p)
+{
+ banshee_t *banshee = (banshee_t *)p;
+ voodoo_t *voodoo = banshee->voodoo;
+ svga_t *svga = &banshee->svga;
+
+ if (addr & 1)
+ return banshee_read_linear(addr, p) | (banshee_read_linear(addr+1, p) << 8);
+
+ cycles -= voodoo->read_time;
+ cycles_lost += voodoo->read_time;
+
+ addr &= svga->decode_mask;
+ if (addr >= voodoo->tile_base)
+ {
+ int x, y;
+
+ addr -= voodoo->tile_base;
+ x = addr & (voodoo->tile_stride-1);
+ y = addr >> voodoo->tile_stride_shift;
+
+ addr = voodoo->tile_base + (x & 127) + ((x >> 7) * 128*32) + ((y & 31) * 128) + (y >> 5)*voodoo->tile_x_real;
+// pclog(" Tile rb %08x->%08x %i %i\n", old_addr, addr, x, y);
+ }
+ if (addr >= svga->vram_max)
+ return 0xff;
+
+ egareads++;
+ cycles -= video_timing_read_w;
+ cycles_lost += video_timing_read_w;
+
+// pclog("read_linear: addr=%08x val=%02x\n", addr, svga->vram[addr & svga->vram_mask]);
+
+ return *(uint16_t *)&svga->vram[addr & svga->vram_mask];
+}
+
+static uint32_t banshee_read_linear_l(uint32_t addr, void *p)
+{
+ banshee_t *banshee = (banshee_t *)p;
+ voodoo_t *voodoo = banshee->voodoo;
+ svga_t *svga = &banshee->svga;
+
+ if (addr & 3)
+ return banshee_read_linear_w(addr, p) | (banshee_read_linear_w(addr+2, p) << 16);
+
+ cycles -= voodoo->read_time;
+ cycles_lost += voodoo->read_time;
+
+ addr &= svga->decode_mask;
+ if (addr >= voodoo->tile_base)
+ {
+ int x, y;
+
+ addr -= voodoo->tile_base;
+ x = addr & (voodoo->tile_stride-1);
+ y = addr >> voodoo->tile_stride_shift;
+
+ addr = voodoo->tile_base + (x & 127) + ((x >> 7) * 128*32) + ((y & 31) * 128) + (y >> 5)*voodoo->tile_x_real;
+// pclog(" Tile rb %08x->%08x %i %i\n", old_addr, addr, x, y);
+ }
+ if (addr >= svga->vram_max)
+ return 0xff;
+
+ egareads++;
+ cycles -= video_timing_read_l;
+ cycles_lost += video_timing_read_l;
+
+// pclog("read_linear: addr=%08x val=%02x\n", addr, svga->vram[addr & svga->vram_mask]);
+
+ return *(uint32_t *)&svga->vram[addr & svga->vram_mask];
+}
+
+static void banshee_write_linear(uint32_t addr, uint8_t val, void *p)
+{
+ banshee_t *banshee = (banshee_t *)p;
+ voodoo_t *voodoo = banshee->voodoo;
+ svga_t *svga = &banshee->svga;
+
+ cycles -= voodoo->write_time;
+ cycles_lost += voodoo->write_time;
+
+// pclog("write_linear: addr=%08x val=%02x\n", addr, val);
+ addr &= svga->decode_mask;
+ if (addr >= voodoo->tile_base)
+ {
+ int x, y;
+
+ addr -= voodoo->tile_base;
+ x = addr & (voodoo->tile_stride-1);
+ y = addr >> voodoo->tile_stride_shift;
+
+ addr = voodoo->tile_base + (x & 127) + ((x >> 7) * 128*32) + ((y & 31) * 128) + (y >> 5)*voodoo->tile_x_real;
+// pclog(" Tile b %08x->%08x %i %i\n", old_addr, addr, x, y);
+ }
+ if (addr >= svga->vram_max)
+ return;
+
+ egawrites++;
+
+ cycles -= video_timing_write_b;
+ cycles_lost += video_timing_write_b;
+
+ svga->changedvram[addr >> 12] = changeframecount;
+ svga->vram[addr & svga->vram_mask] = val;
+}
+
+static void banshee_write_linear_w(uint32_t addr, uint16_t val, void *p)
+{
+ banshee_t *banshee = (banshee_t *)p;
+ voodoo_t *voodoo = banshee->voodoo;
+ svga_t *svga = &banshee->svga;
+
+ if (addr & 1)
+ {
+ banshee_write_linear(addr, val, p);
+ banshee_write_linear(addr + 1, val >> 8, p);
+ return;
+ }
+
+ cycles -= voodoo->write_time;
+ cycles_lost += voodoo->write_time;
+
+// pclog("write_linear: addr=%08x val=%02x\n", addr, val);
+ addr &= svga->decode_mask;
+ if (addr >= voodoo->tile_base)
+ {
+ int x, y;
+
+ addr -= voodoo->tile_base;
+ x = addr & (voodoo->tile_stride-1);
+ y = addr >> voodoo->tile_stride_shift;
+
+ addr = voodoo->tile_base + (x & 127) + ((x >> 7) * 128*32) + ((y & 31) * 128) + (y >> 5)*voodoo->tile_x_real;
+// pclog(" Tile b %08x->%08x %i %i\n", old_addr, addr, x, y);
+ }
+ if (addr >= svga->vram_max)
+ return;
+
+ egawrites++;
+
+ cycles -= video_timing_write_w;
+ cycles_lost += video_timing_write_w;
+
+ svga->changedvram[addr >> 12] = changeframecount;
+ *(uint16_t *)&svga->vram[addr & svga->vram_mask] = val;
+}
+
+static void banshee_write_linear_l(uint32_t addr, uint32_t val, void *p)
+{
+ banshee_t *banshee = (banshee_t *)p;
+ voodoo_t *voodoo = banshee->voodoo;
+ svga_t *svga = &banshee->svga;
+ int timing;
+
+ if (addr & 3)
+ {
+ banshee_write_linear_w(addr, val, p);
+ banshee_write_linear_w(addr + 2, val >> 16, p);
+ return;
+ }
+
+ if (addr == voodoo->last_write_addr+4)
+ timing = voodoo->burst_time;
+ else
+ timing = voodoo->write_time;
+ cycles -= timing;
+ cycles_lost += timing;
+ voodoo->last_write_addr = addr;
+
+// /*if (val) */pclog("write_linear_l: addr=%08x val=%08x %08x\n", addr, val, voodoo->tile_base);
+ addr &= svga->decode_mask;
+ if (addr >= voodoo->tile_base)
+ {
+ int x, y;
+
+ addr -= voodoo->tile_base;
+ x = addr & (voodoo->tile_stride-1);
+ y = addr >> voodoo->tile_stride_shift;
+
+ addr = voodoo->tile_base + (x & 127) + ((x >> 7) * 128*32) + ((y & 31) * 128) + (y >> 5)*voodoo->tile_x_real;
+// pclog(" Tile %08x->%08x->%08x->%08x %i %i tile_x=%i\n", old_addr, addr_off, addr2, addr, x, y, voodoo->tile_x_real);
+ }
+
+ if (addr >= svga->vram_max)
+ return;
+
+ egawrites += 4;
+
+ cycles -= video_timing_write_l;
+ cycles_lost += video_timing_write_l;
+
+ svga->changedvram[addr >> 12] = changeframecount;
+ *(uint32_t *)&svga->vram[addr & svga->vram_mask] = val;
+ if (voodoo->cmdfifo_enabled && addr >= voodoo->cmdfifo_base && addr < voodoo->cmdfifo_end)
+ {
+// pclog("CMDFIFO write %08x %08x old amin=%08x amax=%08x hlcnt=%i depth_wr=%i rp=%08x\n", addr, val, voodoo->cmdfifo_amin, voodoo->cmdfifo_amax, voodoo->cmdfifo_holecount, voodoo->cmdfifo_depth_wr, voodoo->cmdfifo_rp);
+ if (addr == voodoo->cmdfifo_base && !voodoo->cmdfifo_holecount)
+ {
+// if (voodoo->cmdfifo_holecount)
+// fatal("CMDFIFO reset pointers while outstanding holes\n");
+ /*Reset pointers*/
+ voodoo->cmdfifo_amin = voodoo->cmdfifo_base;
+ voodoo->cmdfifo_amax = voodoo->cmdfifo_base;
+ voodoo->cmdfifo_depth_wr++;
+ voodoo_wake_fifo_thread(voodoo);
+ }
+ else if (voodoo->cmdfifo_holecount)
+ {
+// if ((addr <= voodoo->cmdfifo_amin && voodoo->cmdfifo_amin != -4) || addr >= voodoo->cmdfifo_amax)
+// fatal("CMDFIFO holecount write outside of amin/amax - amin=%08x amax=%08x holecount=%i\n", voodoo->cmdfifo_amin, voodoo->cmdfifo_amax, voodoo->cmdfifo_holecount);
+// pclog("holecount %i\n", voodoo->cmdfifo_holecount);
+ voodoo->cmdfifo_holecount--;
+ if (!voodoo->cmdfifo_holecount)
+ {
+ /*Filled in holes, resume normal operation*/
+ voodoo->cmdfifo_depth_wr += ((voodoo->cmdfifo_amax - voodoo->cmdfifo_amin) >> 2);
+ voodoo->cmdfifo_amin = voodoo->cmdfifo_amax;
+ voodoo_wake_fifo_thread(voodoo);
+// pclog("hole filled! amin=%08x amax=%08x added %i words\n", voodoo->cmdfifo_amin, voodoo->cmdfifo_amax, words_to_add);
+ }
+ }
+ else if (addr == voodoo->cmdfifo_amax+4)
+ {
+ /*In-order write*/
+ voodoo->cmdfifo_amin = addr;
+ voodoo->cmdfifo_amax = addr;
+ voodoo->cmdfifo_depth_wr++;
+ voodoo_wake_fifo_thread(voodoo);
+ }
+ else
+ {
+ /*Out-of-order write*/
+ if (addr < voodoo->cmdfifo_amin)
+ {
+ /*Reset back to start. Note that write is still out of order!*/
+ voodoo->cmdfifo_amin = voodoo->cmdfifo_base-4;
+
+ }
+// else if (addr < voodoo->cmdfifo_amax)
+// fatal("Out-of-order write really out of order\n");
+ voodoo->cmdfifo_amax = addr;
+ voodoo->cmdfifo_holecount = ((voodoo->cmdfifo_amax - voodoo->cmdfifo_amin) >> 2) - 1;
+// pclog("CMDFIFO out of order: amin=%08x amax=%08x holecount=%i\n", voodoo->cmdfifo_amin, voodoo->cmdfifo_amax, voodoo->cmdfifo_holecount);
+ }
+ }
+}
+
+void banshee_hwcursor_draw(svga_t *svga, int displine)
+{
+ banshee_t *banshee = (banshee_t *)svga->p;
+ int x, c;
+ int x_off;
+ uint32_t col0 = banshee->hwCurC0;
+ uint32_t col1 = banshee->hwCurC1;
+ uint8_t plane0[8], plane1[8];
+
+ for (c = 0; c < 8; c++)
+ plane0[c] = svga->vram[svga->hwcursor_latch.addr + c];
+ for (c = 0; c < 8; c++)
+ plane1[c] = svga->vram[svga->hwcursor_latch.addr + c + 8];
+ svga->hwcursor_latch.addr += 16;
+
+ x_off = svga->hwcursor_latch.x;
+
+ if (banshee->vidProcCfg & VIDPROCCFG_CURSOR_MODE)
+ {
+ /*X11 mode*/
+ for (x = 0; x < 64; x += 8)
+ {
+ if (x_off > (32-8))
+ {
+ int xx;
+
+ for (xx = 0; xx < 8; xx++)
+ {
+ if (plane0[x >> 3] & (1 << 7))
+ ((uint32_t *)buffer32->line[displine])[x_off + xx] = (plane1[x >> 3] & (1 << 7)) ? col1 : col0;
+
+ plane0[x >> 3] <<= 1;
+ plane1[x >> 3] <<= 1;
+ }
+ }
+
+ x_off += 8;
+ }
+ }
+ else
+ {
+ /*Windows mode*/
+ for (x = 0; x < 64; x += 8)
+ {
+ if (x_off > (32-8))
+ {
+ int xx;
+
+ for (xx = 0; xx < 8; xx++)
+ {
+ if (!(plane0[x >> 3] & (1 << 7)))
+ ((uint32_t *)buffer32->line[displine])[x_off + xx] = (plane1[x >> 3] & (1 << 7)) ? col1 : col0;
+ else if (plane1[x >> 3] & (1 << 7))
+ ((uint32_t *)buffer32->line[displine])[x_off + xx] ^= 0xffffff;
+
+ plane0[x >> 3] <<= 1;
+ plane1[x >> 3] <<= 1;
+ }
+ }
+
+ x_off += 8;
+ }
+ }
+}
+
+#define CLAMP(x) do \
+ { \
+ if ((x) & ~0xff) \
+ x = ((x) < 0) ? 0 : 0xff; \
+ } \
+ while (0)
+
+#define DECODE_RGB565(buf) \
+ do \
+ { \
+ int c; \
+ int wp = 0; \
+ \
+ for (c = 0; c < voodoo->overlay.overlay_bytes; c += 2) \
+ { \
+ uint16_t data = *(uint16_t *)src; \
+ int r = data & 0x1f; \
+ int g = (data >> 5) & 0x3f; \
+ int b = data >> 11; \
+ \
+ if (banshee->vidProcCfg & VIDPROCCFG_OVERLAY_CLUT_BYPASS) \
+ buf[wp++] = (r << 3) | (g << 10) | (b << 19); \
+ else \
+ buf[wp++] = (clut[r << 3] & 0x0000ff) | \
+ (clut[g << 2] & 0x00ff00) | \
+ (clut[b << 3] & 0xff0000); \
+ src += 2; \
+ } \
+ } while (0)
+
+#define DECODE_RGB565_TILED(buf) \
+ do \
+ { \
+ int c; \
+ int wp = 0; \
+ uint32_t base_addr = (buf == banshee->overlay_buffer[1]) ? src_addr2 : src_addr; \
+ \
+ for (c = 0; c < voodoo->overlay.overlay_bytes; c += 2) \
+ { \
+ uint16_t data = *(uint16_t *)&svga->vram[(base_addr + (c & 127) + (c >> 7)*128*32) & svga->vram_mask]; \
+ int r = data & 0x1f; \
+ int g = (data >> 5) & 0x3f; \
+ int b = data >> 11; \
+ \
+ if (banshee->vidProcCfg & VIDPROCCFG_OVERLAY_CLUT_BYPASS) \
+ buf[wp++] = (r << 3) | (g << 10) | (b << 19); \
+ else \
+ buf[wp++] = (clut[r << 3] & 0x0000ff) | \
+ (clut[g << 2] & 0x00ff00) | \
+ (clut[b << 3] & 0xff0000); \
+ } \
+ } while (0)
+
+#define DECODE_YUYV422(buf) \
+ do \
+ { \
+ int c; \
+ int wp = 0; \
+ \
+ for (c = 0; c < voodoo->overlay.overlay_bytes; c += 4) \
+ { \
+ uint8_t y1, y2; \
+ int8_t Cr, Cb; \
+ int dR, dG, dB; \
+ int r, g, b; \
+ \
+ y1 = src[0]; \
+ Cr = src[1] - 0x80; \
+ y2 = src[2]; \
+ Cb = src[3] - 0x80; \
+ src += 4; \
+ \
+ dR = (359*Cr) >> 8; \
+ dG = (88*Cb + 183*Cr) >> 8; \
+ dB = (453*Cb) >> 8; \
+ \
+ r = y1 + dR; \
+ CLAMP(r); \
+ g = y1 - dG; \
+ CLAMP(g); \
+ b = y1 + dB; \
+ CLAMP(b); \
+ buf[wp++] = r | (g << 8) | (b << 16); \
+ \
+ r = y2 + dR; \
+ CLAMP(r); \
+ g = y2 - dG; \
+ CLAMP(g); \
+ b = y2 + dB; \
+ CLAMP(b); \
+ buf[wp++] = r | (g << 8) | (b << 16); \
+ } \
+ } while (0)
+
+#define DECODE_UYUV422(buf) \
+ do \
+ { \
+ int c; \
+ int wp = 0; \
+ \
+ for (c = 0; c < voodoo->overlay.overlay_bytes; c += 4) \
+ { \
+ uint8_t y1, y2; \
+ int8_t Cr, Cb; \
+ int dR, dG, dB; \
+ int r, g, b; \
+ \
+ Cr = src[0] - 0x80; \
+ y1 = src[1]; \
+ Cb = src[2] - 0x80; \
+ y2 = src[3]; \
+ src += 4; \
+ \
+ dR = (359*Cr) >> 8; \
+ dG = (88*Cb + 183*Cr) >> 8; \
+ dB = (453*Cb) >> 8; \
+ \
+ r = y1 + dR; \
+ CLAMP(r); \
+ g = y1 - dG; \
+ CLAMP(g); \
+ b = y1 + dB; \
+ CLAMP(b); \
+ buf[wp++] = r | (g << 8) | (b << 16); \
+ \
+ r = y2 + dR; \
+ CLAMP(r); \
+ g = y2 - dG; \
+ CLAMP(g); \
+ b = y2 + dB; \
+ CLAMP(b); \
+ buf[wp++] = r | (g << 8) | (b << 16); \
+ } \
+ } while (0)
+
+
+#define OVERLAY_SAMPLE(buf) \
+ do \
+ { \
+ switch (banshee->overlay_pix_fmt) \
+ { \
+ case 0: \
+ break; \
+ \
+ case OVERLAY_FMT_YUYV422: \
+ DECODE_YUYV422(buf); \
+ break; \
+ \
+ case OVERLAY_FMT_UYVY422: \
+ DECODE_UYUV422(buf); \
+ break; \
+ \
+ case OVERLAY_FMT_565: \
+ case OVERLAY_FMT_565_DITHER: \
+ if (banshee->vidProcCfg & VIDPROCCFG_OVERLAY_TILE) \
+ DECODE_RGB565_TILED(buf); \
+ else \
+ DECODE_RGB565(buf); \
+ break; \
+ } \
+ } while (0)
+
+/* generate both filters for the static table here */
+void voodoo_generate_vb_filters(voodoo_t *voodoo, int fcr, int fcg)
+{
+ int g, h;
+ float difference, diffg;
+ float thiscol, thiscolg;
+ float clr, clg = 0;
+ float hack = 1.0f;
+ // pre-clamping
+
+ fcr *= hack;
+ fcg *= hack;
+
+
+ /* box prefilter */
+ for (g=0;g<256;g++) // pixel 1 - our target pixel we want to bleed into
+ {
+ for (h=0;h<256;h++) // pixel 2 - our main pixel
+ {
+ float avg;
+ float avgdiff;
+
+ difference = (float)(g - h);
+ avg = g;
+ avgdiff = avg - h;
+
+ avgdiff = avgdiff * 0.75f;
+ if (avgdiff < 0) avgdiff *= -1;
+ if (difference < 0) difference *= -1;
+
+ thiscol = thiscolg = g;
+
+ if (h > g)
+ {
+ clr = clg = avgdiff;
+
+ if (clr>fcr) clr=fcr;
+ if (clg>fcg) clg=fcg;
+
+ thiscol = g;
+ thiscolg = g;
+
+ if (thiscol>g+fcr)
+ thiscol=g+fcr;
+ if (thiscolg>g+fcg)
+ thiscolg=g+fcg;
+
+ if (thiscol>g+difference)
+ thiscol=g+difference;
+ if (thiscolg>g+difference)
+ thiscolg=g+difference;
+
+ // hmm this might not be working out..
+ int ugh = g - h;
+ if (ugh < fcr)
+ thiscol = h;
+ if (ugh < fcg)
+ thiscolg = h;
+ }
+
+ if (difference > fcr)
+ thiscol = g;
+ if (difference > fcg)
+ thiscolg = g;
+
+ // clamp
+ if (thiscol < 0) thiscol = 0;
+ if (thiscolg < 0) thiscolg = 0;
+
+ if (thiscol > 255) thiscol = 255;
+ if (thiscolg > 255) thiscolg = 255;
+
+ vb_filter_bx_rb[g][h] = (thiscol);
+ vb_filter_bx_g [g][h] = (thiscolg);
+
+ }
+ float lined = g + 4;
+ if (lined > 255)
+ lined = 255;
+ voodoo->purpleline[g][0] = lined;
+ voodoo->purpleline[g][2] = lined;
+
+ lined = g + 0;
+ if (lined > 255)
+ lined = 255;
+ voodoo->purpleline[g][1] = lined;
+ }
+
+ /* 4x1 and 2x2 filter */
+ //fcr *= 5;
+ //fcg *= 6;
+
+ for (g=0;g<256;g++) // pixel 1
+ {
+ for (h=0;h<256;h++) // pixel 2
+ {
+ difference = (float)(h - g);
+ diffg = difference;
+
+ thiscol = thiscolg = g;
+
+ if (difference > fcr)
+ difference = fcr;
+ if (difference < -fcr)
+ difference = -fcr;
+
+ if (diffg > fcg)
+ diffg = fcg;
+ if (diffg < -fcg)
+ diffg = -fcg;
+
+ if ((difference < fcr) || (-difference > -fcr))
+ thiscol = g + (difference / 2);
+ if ((diffg < fcg) || (-diffg > -fcg))
+ thiscolg = g + (diffg / 2);
+
+ if (thiscol < 0)
+ thiscol = 0;
+ if (thiscol > 255)
+ thiscol = 255;
+
+ if (thiscolg < 0)
+ thiscolg = 0;
+ if (thiscolg > 255)
+ thiscolg = 255;
+
+ vb_filter_v1_rb[g][h] = thiscol;
+ vb_filter_v1_g [g][h] = thiscolg;
+
+ }
+ }
+
+}
+
+
+static void banshee_overlay_draw(svga_t *svga, int displine)
+{
+ banshee_t *banshee = (banshee_t *)svga->p;
+ voodoo_t *voodoo = banshee->voodoo;
+ uint32_t *p;
+ int x;
+ int y = voodoo->overlay.src_y >> 20;
+ uint32_t src_addr = svga->overlay_latch.addr + ((banshee->vidProcCfg & VIDPROCCFG_OVERLAY_TILE) ?
+ ((y & 31) * 128 + (y >> 5) * svga->overlay_latch.pitch) :
+ y * svga->overlay_latch.pitch);
+ uint32_t src_addr2 = svga->overlay_latch.addr + ((banshee->vidProcCfg & VIDPROCCFG_OVERLAY_TILE) ?
+ (((y + 1) & 31) * 128 + ((y + 1) >> 5) * svga->overlay_latch.pitch) :
+ (y + 1) * svga->overlay_latch.pitch);
+ uint8_t *src = &svga->vram[src_addr & svga->vram_mask];
+ uint32_t src_x = 0;
+ unsigned int y_coeff = (voodoo->overlay.src_y & 0xfffff) >> 4;
+ int skip_filtering;
+ uint32_t *clut = &svga->pallook[(banshee->vidProcCfg & VIDPROCCFG_OVERLAY_CLUT_SEL) ? 256 : 0];
+
+ if (svga->render == svga_render_null &&
+ !svga->changedvram[src_addr >> 12] && !svga->changedvram[src_addr2 >> 12] &&
+ !svga->fullchange &&
+ ((voodoo->overlay.src_y >> 20) < 2048 && !voodoo->dirty_line[voodoo->overlay.src_y >> 20]) &&
+ !(banshee->vidProcCfg & VIDPROCCFG_V_SCALE_ENABLE))
+ {
+ voodoo->overlay.src_y += (1 << 20);
+ return;
+ }
+
+ if ((voodoo->overlay.src_y >> 20) < 2048)
+ voodoo->dirty_line[voodoo->overlay.src_y >> 20] = 0;
+// pclog("displine=%i addr=%08x %08x %08x %08x\n", displine, svga->overlay_latch.addr, src_addr, voodoo->overlay.vidOverlayDvdy, *(uint32_t *)src);
+// if (src_addr >= 0x800000)
+// fatal("overlay out of range!\n");
+ p = &((uint32_t *)buffer32->line[displine])[svga->overlay_latch.x + 32];
+
+ if (banshee->voodoo->scrfilter && banshee->voodoo->scrfilterEnabled)
+ skip_filtering = ((banshee->vidProcCfg & VIDPROCCFG_FILTER_MODE_MASK) != VIDPROCCFG_FILTER_MODE_BILINEAR &&
+ !(banshee->vidProcCfg & VIDPROCCFG_H_SCALE_ENABLE) && !(banshee->vidProcCfg & VIDPROCCFG_FILTER_MODE_DITHER_4X4) &&
+ !(banshee->vidProcCfg & VIDPROCCFG_FILTER_MODE_DITHER_2X2));
+ else
+ skip_filtering = ((banshee->vidProcCfg & VIDPROCCFG_FILTER_MODE_MASK) != VIDPROCCFG_FILTER_MODE_BILINEAR &&
+ !(banshee->vidProcCfg & VIDPROCCFG_H_SCALE_ENABLE));
+
+ if (skip_filtering)
+ {
+ /*No scaling or filtering required, just write straight to output buffer*/
+ OVERLAY_SAMPLE(p);
+ }
+ else
+ {
+ OVERLAY_SAMPLE(banshee->overlay_buffer[0]);
+
+ switch (banshee->vidProcCfg & VIDPROCCFG_FILTER_MODE_MASK)
+ {
+ case VIDPROCCFG_FILTER_MODE_BILINEAR:
+ src = &svga->vram[src_addr2 & svga->vram_mask];
+ OVERLAY_SAMPLE(banshee->overlay_buffer[1]);
+ if (banshee->vidProcCfg & VIDPROCCFG_H_SCALE_ENABLE)
+ {
+ for (x = 0; x < svga->overlay_latch.xsize; x++)
+ {
+ unsigned int x_coeff = (src_x & 0xfffff) >> 4;
+ unsigned int coeffs[4] = {
+ ((0x10000 - x_coeff) * (0x10000 - y_coeff)) >> 16,
+ ( x_coeff * (0x10000 - y_coeff)) >> 16,
+ ((0x10000 - x_coeff) * y_coeff) >> 16,
+ ( x_coeff * y_coeff) >> 16
+ };
+ uint32_t samp0 = banshee->overlay_buffer[0][src_x >> 20];
+ uint32_t samp1 = banshee->overlay_buffer[0][(src_x >> 20) + 1];
+ uint32_t samp2 = banshee->overlay_buffer[1][src_x >> 20];
+ uint32_t samp3 = banshee->overlay_buffer[1][(src_x >> 20) + 1];
+ int r = (((samp0 >> 16) & 0xff) * coeffs[0] +
+ ((samp1 >> 16) & 0xff) * coeffs[1] +
+ ((samp2 >> 16) & 0xff) * coeffs[2] +
+ ((samp3 >> 16) & 0xff) * coeffs[3]) >> 16;
+ int g = (((samp0 >> 8) & 0xff) * coeffs[0] +
+ ((samp1 >> 8) & 0xff) * coeffs[1] +
+ ((samp2 >> 8) & 0xff) * coeffs[2] +
+ ((samp3 >> 8) & 0xff) * coeffs[3]) >> 16;
+ int b = ((samp0 & 0xff) * coeffs[0] +
+ (samp1 & 0xff) * coeffs[1] +
+ (samp2 & 0xff) * coeffs[2] +
+ (samp3 & 0xff) * coeffs[3]) >> 16;
+ p[x] = (r << 16) | (g << 8) | b;
+
+ src_x += voodoo->overlay.vidOverlayDudx;
+ }
+ }
+ else
+ {
+ for (x = 0; x < svga->overlay_latch.xsize; x++)
+ {
+ uint32_t samp0 = banshee->overlay_buffer[0][src_x >> 20];
+ uint32_t samp1 = banshee->overlay_buffer[1][src_x >> 20];
+ int r = (((samp0 >> 16) & 0xff) * (0x10000 - y_coeff) +
+ ((samp1 >> 16) & 0xff) * y_coeff) >> 16;
+ int g = (((samp0 >> 8) & 0xff) * (0x10000 - y_coeff) +
+ ((samp1 >> 8) & 0xff) * y_coeff) >> 16;
+ int b = ((samp0 & 0xff) * (0x10000 - y_coeff) +
+ (samp1 & 0xff) * y_coeff) >> 16;
+ p[x] = (r << 16) | (g << 8) | b;
+ }
+ }
+ break;
+
+ case VIDPROCCFG_FILTER_MODE_DITHER_4X4:
+ if (banshee->voodoo->scrfilter && banshee->voodoo->scrfilterEnabled)
+ {
+ uint8_t fil[(svga->overlay_latch.xsize) * 3];
+ uint8_t fil3[(svga->overlay_latch.xsize) * 3];
+
+ if (banshee->vidProcCfg & VIDPROCCFG_H_SCALE_ENABLE) /* leilei HACK - don't know of real 4x1 hscaled behavior yet, double for now */
+ {
+ for (x=0; x<svga->overlay_latch.xsize;x++)
+ {
+ fil[x*3] = ((banshee->overlay_buffer[0][src_x >> 20]));
+ fil[x*3+1] = ((banshee->overlay_buffer[0][src_x >> 20] >> 8));
+ fil[x*3+2] = ((banshee->overlay_buffer[0][src_x >> 20] >> 16));
+ fil3[x*3+0] = fil[x*3+0];
+ fil3[x*3+1] = fil[x*3+1];
+ fil3[x*3+2] = fil[x*3+2];
+ src_x += voodoo->overlay.vidOverlayDudx;
+ }
+ }
+ else
+ {
+ for (x=0; x<svga->overlay_latch.xsize;x++)
+ {
+ fil[x*3] = ((banshee->overlay_buffer[0][x]));
+ fil[x*3+1] = ((banshee->overlay_buffer[0][x] >> 8));
+ fil[x*3+2] = ((banshee->overlay_buffer[0][x] >> 16));
+ fil3[x*3+0] = fil[x*3+0];
+ fil3[x*3+1] = fil[x*3+1];
+ fil3[x*3+2] = fil[x*3+2];
+ }
+ }
+ if (y % 2 == 0)
+ {
+ for (x=0; x<svga->overlay_latch.xsize;x++)
+ {
+ fil[x*3] = banshee->voodoo->purpleline[fil[x*3+0]][0];
+ fil[x*3+1] = banshee->voodoo->purpleline[fil[x*3+1]][1];
+ fil[x*3+2] = banshee->voodoo->purpleline[fil[x*3+2]][2];
+ }
+ }
+
+ for (x=1; x<svga->overlay_latch.xsize;x++)
+ {
+ fil3[(x)*3] = vb_filter_v1_rb [fil[x*3]] [fil[(x-1) *3]];
+ fil3[(x)*3+1] = vb_filter_v1_g [fil[x*3+1]][fil[(x-1) *3+1]];
+ fil3[(x)*3+2] = vb_filter_v1_rb [fil[x*3+2]] [fil[(x-1) *3+2]];
+ }
+ for (x=1; x<svga->overlay_latch.xsize;x++)
+ {
+ fil[(x)*3] = vb_filter_v1_rb [fil[x*3]] [fil3[(x-1) *3]];
+ fil[(x)*3+1] = vb_filter_v1_g [fil[x*3+1]][fil3[(x-1) *3+1]];
+ fil[(x)*3+2] = vb_filter_v1_rb [fil[x*3+2]] [fil3[(x-1) *3+2]];
+ }
+ for (x=1; x<svga->overlay_latch.xsize;x++)
+ {
+ fil3[(x)*3] = vb_filter_v1_rb [fil[x*3]] [fil[(x-1) *3]];
+ fil3[(x)*3+1] = vb_filter_v1_g [fil[x*3+1]][fil[(x-1) *3+1]];
+ fil3[(x)*3+2] = vb_filter_v1_rb [fil[x*3+2]] [fil[(x-1) *3+2]];
+ }
+ for (x=0; x<svga->overlay_latch.xsize;x++)
+ {
+ fil[(x)*3] = vb_filter_v1_rb [fil[x*3]] [fil3[(x+1) *3]];
+ fil[(x)*3+1] = vb_filter_v1_g [fil[x*3+1]][fil3[(x+1) *3+1]];
+ fil[(x)*3+2] = vb_filter_v1_rb [fil[x*3+2]] [fil3[(x+1) *3+2]];
+ p[x] = (fil[x*3+2] << 16) | (fil[x*3+1] << 8) | fil[x*3];
+ }
+ }
+ else /* filter disabled by emulator option */
+ {
+ if (banshee->vidProcCfg & VIDPROCCFG_H_SCALE_ENABLE)
+ {
+ for (x = 0; x < svga->overlay_latch.xsize; x++)
+ {
+ p[x] = banshee->overlay_buffer[0][src_x >> 20];
+ src_x += voodoo->overlay.vidOverlayDudx;
+ }
+ }
+ else
+ {
+ for (x = 0; x < svga->overlay_latch.xsize; x++)
+ p[x] = banshee->overlay_buffer[0][x];
+ }
+ }
+ break;
+
+ case VIDPROCCFG_FILTER_MODE_DITHER_2X2:
+ if (banshee->voodoo->scrfilter && banshee->voodoo->scrfilterEnabled)
+ {
+ uint8_t fil[(svga->overlay_latch.xsize) * 3];
+ uint8_t soak[(svga->overlay_latch.xsize) * 3];
+ uint8_t soak2[(svga->overlay_latch.xsize) * 3];
+
+ uint8_t samp1[(svga->overlay_latch.xsize) * 3];
+ uint8_t samp2[(svga->overlay_latch.xsize) * 3];
+ uint8_t samp3[(svga->overlay_latch.xsize) * 3];
+ uint8_t samp4[(svga->overlay_latch.xsize) * 3];
+
+ src = &svga->vram[src_addr2 & svga->vram_mask];
+ OVERLAY_SAMPLE(banshee->overlay_buffer[1]);
+ for (x=0; x<svga->overlay_latch.xsize;x++)
+ {
+ samp1[x*3] = ((banshee->overlay_buffer[0][x]));
+ samp1[x*3+1] = ((banshee->overlay_buffer[0][x] >> 8));
+ samp1[x*3+2] = ((banshee->overlay_buffer[0][x] >> 16));
+
+ samp2[x*3+0] = ((banshee->overlay_buffer[0][x+1]));
+ samp2[x*3+1] = ((banshee->overlay_buffer[0][x+1] >> 8));
+ samp2[x*3+2] = ((banshee->overlay_buffer[0][x+1] >> 16));
+
+ samp3[x*3+0] = ((banshee->overlay_buffer[1][x]));
+ samp3[x*3+1] = ((banshee->overlay_buffer[1][x] >> 8));
+ samp3[x*3+2] = ((banshee->overlay_buffer[1][x] >> 16));
+
+ samp4[x*3+0] = ((banshee->overlay_buffer[1][x+1]));
+ samp4[x*3+1] = ((banshee->overlay_buffer[1][x+1] >> 8));
+ samp4[x*3+2] = ((banshee->overlay_buffer[1][x+1] >> 16));
+
+ /* sample two lines */
+
+ soak[x*3+0] = vb_filter_bx_rb [samp1[x*3+0]] [samp2[x*3+0]];
+ soak[x*3+1] = vb_filter_bx_g [samp1[x*3+1]] [samp2[x*3+1]];
+ soak[x*3+2] = vb_filter_bx_rb [samp1[x*3+2]] [samp2[x*3+2]];
+
+ soak2[x*3+0] = vb_filter_bx_rb[samp3[x*3+0]] [samp4[x*3+0]];
+ soak2[x*3+1] = vb_filter_bx_g [samp3[x*3+1]] [samp4[x*3+1]];
+ soak2[x*3+2] = vb_filter_bx_rb[samp3[x*3+2]] [samp4[x*3+2]];
+
+ /* then pour it on the rest */
+
+ fil[x*3+0] = vb_filter_v1_rb[soak[x*3+0]] [soak2[x*3+0]];
+ fil[x*3+1] = vb_filter_v1_g [soak[x*3+1]] [soak2[x*3+1]];
+ fil[x*3+2] = vb_filter_v1_rb[soak[x*3+2]] [soak2[x*3+2]];
+ }
+
+ if (banshee->vidProcCfg & VIDPROCCFG_H_SCALE_ENABLE) /* 2x2 on a scaled low res */
+ {
+ for (x=0; x<svga->overlay_latch.xsize;x++)
+ {
+ p[x] = (fil[(src_x >> 20)*3+2] << 16) | (fil[(src_x >> 20)*3+1] << 8) | fil[(src_x >> 20)*3];
+ src_x += voodoo->overlay.vidOverlayDudx;
+ }
+ }
+ else
+ {
+ for (x=0; x<svga->overlay_latch.xsize;x++)
+ {
+ p[x] = (fil[x*3+2] << 16) | (fil[x*3+1] << 8) | fil[x*3];
+ }
+ }
+ }
+ else /* filter disabled by emulator option */
+ {
+ if (banshee->vidProcCfg & VIDPROCCFG_H_SCALE_ENABLE)
+ {
+ for (x = 0; x < svga->overlay_latch.xsize; x++)
+ {
+ p[x] = banshee->overlay_buffer[0][src_x >> 20];
+
+ src_x += voodoo->overlay.vidOverlayDudx;
+ }
+ }
+ else
+ {
+ for (x = 0; x < svga->overlay_latch.xsize; x++)
+ p[x] = banshee->overlay_buffer[0][x];
+ }
+ }
+ break;
+
+ case VIDPROCCFG_FILTER_MODE_POINT:
+ default:
+ if (banshee->vidProcCfg & VIDPROCCFG_H_SCALE_ENABLE)
+ {
+ for (x = 0; x < svga->overlay_latch.xsize; x++)
+ {
+ p[x] = banshee->overlay_buffer[0][src_x >> 20];
+
+ src_x += voodoo->overlay.vidOverlayDudx;
+ }
+ }
+ else
+ {
+ for (x = 0; x < svga->overlay_latch.xsize; x++)
+ p[x] = banshee->overlay_buffer[0][x];
+ }
+ break;
+ }
+ }
+
+ if (banshee->vidProcCfg & VIDPROCCFG_V_SCALE_ENABLE)
+ voodoo->overlay.src_y += voodoo->overlay.vidOverlayDvdy;
+ else
+ voodoo->overlay.src_y += (1 << 20);
+}
+
+void banshee_set_overlay_addr(void *p, uint32_t addr)
+{
+ banshee_t *banshee = (banshee_t *)p;
+ voodoo_t *voodoo = banshee->voodoo;
+
+ banshee->svga.overlay.addr = banshee->voodoo->leftOverlayBuf & 0xfffffff;
+ banshee->svga.overlay_latch.addr = banshee->voodoo->leftOverlayBuf & 0xfffffff;
+ memset(voodoo->dirty_line, 1, sizeof(voodoo->dirty_line));
+}
+
+static void banshee_vsync_callback(svga_t *svga)
+{
+ banshee_t *banshee = (banshee_t *)svga->p;
+ voodoo_t *voodoo = banshee->voodoo;
+
+ voodoo->retrace_count++;
+ thread_lock_mutex(voodoo->swap_mutex);
+ if (voodoo->swap_pending && (voodoo->retrace_count > voodoo->swap_interval))
+ {
+ if (voodoo->swap_count > 0)
+ voodoo->swap_count--;
+ voodoo->swap_pending = 0;
+ thread_unlock_mutex(voodoo->swap_mutex);
+
+ memset(voodoo->dirty_line, 1, sizeof(voodoo->dirty_line));
+ voodoo->retrace_count = 0;
+ banshee_set_overlay_addr(banshee, voodoo->swap_offset);
+ thread_set_event(voodoo->wake_fifo_thread);
+ voodoo->frame_count++;
+ }
+ else
+ thread_unlock_mutex(voodoo->swap_mutex);
+
+ voodoo->overlay.src_y = 0;
+ banshee->desktop_addr = banshee->vidDesktopStartAddr;
+ banshee->desktop_y = 0;
+}
+
+static uint8_t banshee_pci_read(int func, int addr, void *p)
+{
+ banshee_t *banshee = (banshee_t *)p;
+// svga_t *svga = &banshee->svga;
+ uint8_t ret = 0;
+
+ if (func)
+ return 0xff;
+// pclog("Banshee PCI read %08X ", addr);
+ switch (addr)
+ {
+ case 0x00: ret = 0x1a; break; /*3DFX*/
+ case 0x01: ret = 0x12; break;
+
+ case 0x02: ret = (banshee->type == TYPE_BANSHEE) ? 0x03 : 0x05; break;
+ case 0x03: ret = 0x00; break;
+
+ case 0x04: ret = banshee->pci_regs[0x04] & 0x27; break;
+
+ case 0x07: ret = banshee->pci_regs[0x07] & 0x36; break;
+
+ case 0x08: ret = (banshee->type == TYPE_BANSHEE) ? 3 : 1; break; /*Revision ID*/
+ case 0x09: ret = 0; break; /*Programming interface*/
+
+ case 0x0a: ret = 0x00; break; /*Supports VGA interface*/
+ case 0x0b: ret = 0x03; /*output = 3; */break;
+
+ case 0x0d: ret = banshee->pci_regs[0x0d] & 0xf8; break;
+
+ case 0x10: ret = 0x00; break; /*memBaseAddr0*/
+ case 0x11: ret = 0x00; break;
+ case 0x12: ret = 0x00; break;
+ case 0x13: ret = banshee->memBaseAddr0 >> 24; break;
+
+ case 0x14: ret = 0x00; break; /*memBaseAddr1*/
+ case 0x15: ret = 0x00; break;
+ case 0x16: ret = 0x00; break;
+ case 0x17: ret = banshee->memBaseAddr1 >> 24; break;
+
+ case 0x18: ret = 0x01; break; /*ioBaseAddr*/
+ case 0x19: ret = banshee->ioBaseAddr >> 8; break;
+ case 0x1a: ret = 0x00; break;
+ case 0x1b: ret = 0x00; break;
+
+ /*Subsystem vendor ID*/
+ case 0x2c: ret = banshee->pci_regs[0x2c]; break;
+ case 0x2d: ret = banshee->pci_regs[0x2d]; break;
+ case 0x2e: ret = banshee->pci_regs[0x2e]; break;
+ case 0x2f: ret = banshee->pci_regs[0x2f]; break;
+
+ case 0x30: ret = banshee->pci_regs[0x30] & 0x01; break; /*BIOS ROM address*/
+ case 0x31: ret = 0x00; break;
+ case 0x32: ret = banshee->pci_regs[0x32]; break;
+ case 0x33: ret = banshee->pci_regs[0x33]; break;
+
+ case 0x3c: ret = banshee->pci_regs[0x3c]; break;
+
+ case 0x3d: ret = 0x01; break; /*INTA*/
+
+ case 0x3e: ret = 0x04; break;
+ case 0x3f: ret = 0xff; break;
+
+ }
+// pclog("%02X\n", ret);
+ return ret;
+}
+
+static void banshee_pci_write(int func, int addr, uint8_t val, void *p)
+{
+ banshee_t *banshee = (banshee_t *)p;
+// svga_t *svga = &banshee->svga;
+
+ if (func)
+ return;
+// pclog("Banshee write %08X %02X %04X:%08X\n", addr, val, CS, cpu_state.pc);
+ switch (addr)
+ {
+ case 0x00: case 0x01: case 0x02: case 0x03:
+ case 0x08: case 0x09: case 0x0a: case 0x0b:
+ case 0x3d: case 0x3e: case 0x3f:
+ return;
+
+ case PCI_REG_COMMAND:
+ if (val & PCI_COMMAND_IO)
+ {
+ io_removehandler(0x03c0, 0x0020, banshee_in, NULL, NULL, banshee_out, NULL, NULL, banshee);
+ if (banshee->ioBaseAddr)
+ io_removehandler(banshee->ioBaseAddr, 0x0100, banshee_ext_in, NULL, banshee_ext_inl, banshee_ext_out, NULL, banshee_ext_outl, banshee);
+
+ io_sethandler(0x03c0, 0x0020, banshee_in, NULL, NULL, banshee_out, NULL, NULL, banshee);
+ if (banshee->ioBaseAddr)
+ io_sethandler(banshee->ioBaseAddr, 0x0100, banshee_ext_in, NULL, banshee_ext_inl, banshee_ext_out, NULL, banshee_ext_outl, banshee);
+ }
+ else
+ {
+ io_removehandler(0x03c0, 0x0020, banshee_in, NULL, NULL, banshee_out, NULL, NULL, banshee);
+ io_removehandler(banshee->ioBaseAddr, 0x0100, banshee_ext_in, NULL, banshee_ext_inl, banshee_ext_out, NULL, banshee_ext_outl, banshee);
+ }
+ banshee->pci_regs[PCI_REG_COMMAND] = val & 0x27;
+ banshee_updatemapping(banshee);
+ return;
+ case 0x07:
+ banshee->pci_regs[0x07] = val & 0x3e;
+ return;
+ case 0x0d:
+ banshee->pci_regs[0x0d] = val & 0xf8;
+ return;
+
+ case 0x13:
+ banshee->memBaseAddr0 = (val & 0xfe) << 24;
+ banshee_updatemapping(banshee);
+ return;
+
+ case 0x17:
+ banshee->memBaseAddr1 = (val & 0xfe) << 24;
+ banshee_updatemapping(banshee);
+ return;
+
+ case 0x19:
+ if (banshee->pci_regs[PCI_REG_COMMAND] & PCI_COMMAND_IO)
+ io_removehandler(banshee->ioBaseAddr, 0x0100, banshee_ext_in, NULL, banshee_ext_inl, banshee_ext_out, NULL, banshee_ext_outl, banshee);
+ banshee->ioBaseAddr = val << 8;
+ if ((banshee->pci_regs[PCI_REG_COMMAND] & PCI_COMMAND_IO) && banshee->ioBaseAddr)
+ io_sethandler(banshee->ioBaseAddr, 0x0100, banshee_ext_in, NULL, banshee_ext_inl, banshee_ext_out, NULL, banshee_ext_outl, banshee);
+ pclog("Banshee ioBaseAddr=%08x\n", banshee->ioBaseAddr);
+// s3_virge_updatemapping(virge);
+ return;
+
+ case 0x30: case 0x32: case 0x33:
+ banshee->pci_regs[addr] = val;
+ if (banshee->pci_regs[0x30] & 0x01)
+ {
+ uint32_t addr = (banshee->pci_regs[0x32] << 16) | (banshee->pci_regs[0x33] << 24);
+ pclog("Banshee bios_rom enabled at %08x\n", addr);
+ mem_mapping_set_addr(&banshee->bios_rom.mapping, addr, 0x10000);
+ mem_mapping_enable(&banshee->bios_rom.mapping);
+ }
+ else
+ {
+ pclog("Banshee bios_rom disabled\n");
+ mem_mapping_disable(&banshee->bios_rom.mapping);
+ }
+ return;
+ case 0x3c:
+ banshee->pci_regs[0x3c] = val;
+ return;
+ }
+}
+
+static device_config_t banshee_sgram_config[] =
+{
+ {
+ .name = "memory",
+ .description = "Memory size",
+ .type = CONFIG_SELECTION,
+ .selection =
+ {
+ {
+ .description = "8 MB",
+ .value = 8
+ },
+ {
+ .description = "16 MB",
+ .value = 16
+ },
+ {
+ .description = ""
+ }
+ },
+ .default_int = 16
+ },
+ {
+ .name = "bilinear",
+ .description = "Bilinear filtering",
+ .type = CONFIG_BINARY,
+ .default_int = 1
+ },
+ {
+ .name = "dacfilter",
+ .description = "Screen Filter",
+ .type = CONFIG_BINARY,
+ .default_int = 0
+ },
+ {
+ .name = "render_threads",
+ .description = "Render threads",
+ .type = CONFIG_SELECTION,
+ .selection =
+ {
+ {
+ .description = "1",
+ .value = 1
+ },
+ {
+ .description = "2",
+ .value = 2
+ },
+ {
+ .description = "4",
+ .value = 4
+ },
+ {
+ .description = ""
+ }
+ },
+ .default_int = 2
+ },
+#ifndef NO_CODEGEN
+ {
+ .name = "recompiler",
+ .description = "Recompiler",
+ .type = CONFIG_BINARY,
+ .default_int = 1
+ },
+#endif
+ {
+ .type = -1
+ }
+};
+
+static device_config_t banshee_sdram_config[] =
+{
+ {
+ .name = "bilinear",
+ .description = "Bilinear filtering",
+ .type = CONFIG_BINARY,
+ .default_int = 1
+ },
+ {
+ .name = "dacfilter",
+ .description = "Screen Filter",
+ .type = CONFIG_BINARY,
+ .default_int = 0
+ },
+ {
+ .name = "render_threads",
+ .description = "Render threads",
+ .type = CONFIG_SELECTION,
+ .selection =
+ {
+ {
+ .description = "1",
+ .value = 1
+ },
+ {
+ .description = "2",
+ .value = 2
+ },
+ {
+ .description = "4",
+ .value = 4
+ },
+ {
+ .description = ""
+ }
+ },
+ .default_int = 2
+ },
+#ifndef NO_CODEGEN
+ {
+ .name = "recompiler",
+ .description = "Recompiler",
+ .type = CONFIG_BINARY,
+ .default_int = 1
+ },
+#endif
+ {
+ .type = -1
+ }
+};
+
+static void *banshee_init_common(char *fn, int has_sgram, int type, int voodoo_type)
+{
+ int mem_size;
+ banshee_t *banshee = malloc(sizeof(banshee_t));
+ memset(banshee, 0, sizeof(banshee_t));
+
+ banshee->type = type;
+
+ rom_init(&banshee->bios_rom, fn, 0xc0000, 0x10000, 0xffff, 0, MEM_MAPPING_EXTERNAL);
+ mem_mapping_disable(&banshee->bios_rom.mapping);
+
+ if (has_sgram)
+ mem_size = device_get_config_int("memory");
+ else
+ mem_size = 16; /*SDRAM Banshee only supports 16 MB*/
+
+ svga_init(&banshee->svga, banshee, mem_size << 20,
+ banshee_recalctimings,
+ banshee_in, banshee_out,
+ banshee_hwcursor_draw,
+ banshee_overlay_draw);
+ banshee->svga.vsync_callback = banshee_vsync_callback;
+
+ mem_mapping_add(&banshee->linear_mapping, 0, 0, banshee_read_linear,
+ banshee_read_linear_w,
+ banshee_read_linear_l,
+ banshee_write_linear,
+ banshee_write_linear_w,
+ banshee_write_linear_l,
+ NULL,
+ MEM_MAPPING_EXTERNAL,
+ &banshee->svga);
+ mem_mapping_add(&banshee->reg_mapping_low, 0, 0,banshee_reg_read,
+ banshee_reg_readw,
+ banshee_reg_readl,
+ banshee_reg_write,
+ banshee_reg_writew,
+ banshee_reg_writel,
+ NULL,
+ MEM_MAPPING_EXTERNAL,
+ banshee);
+ mem_mapping_add(&banshee->reg_mapping_high, 0,0,banshee_reg_read,
+ banshee_reg_readw,
+ banshee_reg_readl,
+ banshee_reg_write,
+ banshee_reg_writew,
+ banshee_reg_writel,
+ NULL,
+ MEM_MAPPING_EXTERNAL,
+ banshee);
+
+// io_sethandler(0x03c0, 0x0020, banshee_in, NULL, NULL, banshee_out, NULL, NULL, banshee);
+
+ banshee->svga.bpp = 8;
+ banshee->svga.miscout = 1;
+
+ banshee->dramInit0 = 1 << 27;
+ if (has_sgram && mem_size == 16)
+ banshee->dramInit0 |= (1 << 26); /*2xSGRAM = 16 MB*/
+ if (!has_sgram)
+ banshee->dramInit1 = 1 << 30; /*SDRAM*/
+ banshee->svga.decode_mask = 0x1ffffff;
+
+ pci_add(banshee_pci_read, banshee_pci_write, banshee);
+
+ banshee->voodoo = voodoo_2d3d_card_init(voodoo_type);
+ banshee->voodoo->p = banshee;
+ banshee->voodoo->vram = banshee->svga.vram;
+ banshee->voodoo->changedvram = banshee->svga.changedvram;
+ banshee->voodoo->fb_mem = banshee->svga.vram;
+ banshee->voodoo->fb_mask = banshee->svga.vram_mask;
+ banshee->voodoo->tex_mem[0] = banshee->svga.vram;
+ banshee->voodoo->tex_mem_w[0] = (uint16_t *)banshee->svga.vram;
+ banshee->voodoo->tex_mem[1] = banshee->svga.vram;
+ banshee->voodoo->tex_mem_w[1] = (uint16_t *)banshee->svga.vram;
+ banshee->voodoo->texture_mask = banshee->svga.vram_mask;
+ voodoo_generate_filter_v1(banshee->voodoo);
+
+ banshee->vidSerialParallelPort = VIDSERIAL_DDC_DCK_W | VIDSERIAL_DDC_DDA_W;
+
+ ddc_init();
+
+ switch (type)
+ {
+ case TYPE_BANSHEE:
+ if (has_sgram)
+ {
+ banshee->pci_regs[0x2c] = 0x1a;
+ banshee->pci_regs[0x2d] = 0x12;
+ banshee->pci_regs[0x2e] = 0x04;
+ banshee->pci_regs[0x2f] = 0x00;
+ }
+ else
+ {
+ banshee->pci_regs[0x2c] = 0x02;
+ banshee->pci_regs[0x2d] = 0x11;
+ banshee->pci_regs[0x2e] = 0x17;
+ banshee->pci_regs[0x2f] = 0x10;
+ }
+ break;
+
+ case TYPE_V3_2000:
+ banshee->pci_regs[0x2c] = 0x1a;
+ banshee->pci_regs[0x2d] = 0x12;
+ banshee->pci_regs[0x2e] = 0x30;
+ banshee->pci_regs[0x2f] = 0x00;
+ break;
+
+ case TYPE_V3_3000:
+ banshee->pci_regs[0x2c] = 0x1a;
+ banshee->pci_regs[0x2d] = 0x12;
+ banshee->pci_regs[0x2e] = 0x3a;
+ banshee->pci_regs[0x2f] = 0x00;
+ break;
+ }
+
+ return banshee;
+}
+
+static void *banshee_init()
+{
+ return banshee_init_common("pci_sg.rom", 1, TYPE_BANSHEE, VOODOO_BANSHEE);
+}
+static void *creative_banshee_init()
+{
+ return banshee_init_common("blasterpci.rom", 0, TYPE_BANSHEE, VOODOO_BANSHEE);
+}
+static void *v3_2000_init()
+{
+ return banshee_init_common("voodoo3_2000/2k11sd.rom", 0, TYPE_V3_2000, VOODOO_3);
+}
+static void *v3_3000_init()
+{
+ return banshee_init_common("voodoo3_3000/3k12sd.rom", 0, TYPE_V3_3000, VOODOO_3);
+}
+
+static int banshee_available()
+{
+ return rom_present("pci_sg.rom");
+}
+static int creative_banshee_available()
+{
+ return rom_present("blasterpci.rom");
+}
+static int v3_2000_available()
+{
+ return rom_present("voodoo3_2000/2k11sd.rom");
+}
+static int v3_3000_available()
+{
+ return rom_present("voodoo3_3000/3k12sd.rom");
+}
+
+static void banshee_close(void *p)
+{
+ banshee_t *banshee = (banshee_t *)p;
+
+ voodoo_card_close(banshee->voodoo);
+ svga_close(&banshee->svga);
+
+ free(banshee);
+}
+
+static void banshee_speed_changed(void *p)
+{
+ banshee_t *banshee = (banshee_t *)p;
+
+ svga_recalctimings(&banshee->svga);
+}
+
+static void banshee_force_redraw(void *p)
+{
+ banshee_t *banshee = (banshee_t *)p;
+
+ banshee->svga.fullchange = changeframecount;
+}
+
+static uint64_t status_time = 0;
+
+static void banshee_add_status_info(char *s, int max_len, void *p)
+{
+ banshee_t *banshee = (banshee_t *)p;
+ voodoo_t *voodoo = banshee->voodoo;
+ char temps[512];
+ int pixel_count_current[4];
+ int pixel_count_total;
+ int texel_count_current[4];
+ int texel_count_total;
+ int render_time[4];
+ uint64_t new_time = timer_read();
+ uint64_t status_diff = new_time - status_time;
+ int c;
+ status_time = new_time;
+
+ svga_add_status_info(s, max_len, &banshee->svga);
+
+
+ for (c = 0; c < 4; c++)
+ {
+ pixel_count_current[c] = voodoo->pixel_count[c];
+ texel_count_current[c] = voodoo->texel_count[c];
+ render_time[c] = voodoo->render_time[c];
+ }
+
+ pixel_count_total = (pixel_count_current[0] + pixel_count_current[1] + pixel_count_current[2] + pixel_count_current[3]) -
+ (voodoo->pixel_count_old[0] + voodoo->pixel_count_old[1] + voodoo->pixel_count_old[2] + voodoo->pixel_count_old[3]);
+ texel_count_total = (texel_count_current[0] + texel_count_current[1] + texel_count_current[2] + texel_count_current[3]) -
+ (voodoo->texel_count_old[0] + voodoo->texel_count_old[1] + voodoo->texel_count_old[2] + voodoo->texel_count_old[3]);
+ sprintf(temps, "%f Mpixels/sec (%f)\n%f Mtexels/sec (%f)\n%f ktris/sec\n%f%% CPU (%f%% real)\n%d frames/sec (%i)\n%f%% CPU (%f%% real)\n"/*%d reads/sec\n%d write/sec\n%d tex/sec\n*/,
+ (double)pixel_count_total/1000000.0,
+ ((double)pixel_count_total/1000000.0) / ((double)render_time[0] / status_diff),
+ (double)texel_count_total/1000000.0,
+ ((double)texel_count_total/1000000.0) / ((double)render_time[0] / status_diff),
+ (double)voodoo->tri_count/1000.0, ((double)voodoo->time * 100.0) / timer_freq, ((double)voodoo->time * 100.0) / status_diff, voodoo->frame_count, voodoo_recomp,
+ ((double)voodoo->render_time[0] * 100.0) / timer_freq, ((double)voodoo->render_time[0] * 100.0) / status_diff);
+ if (voodoo->render_threads >= 2)
+ {
+ char temps2[512];
+ sprintf(temps2, "%f%% CPU (%f%% real)\n",
+ ((double)voodoo->render_time[1] * 100.0) / timer_freq, ((double)voodoo->render_time[1] * 100.0) / status_diff);
+ strncat(temps, temps2, sizeof(temps)-1);
+ }
+ if (voodoo->render_threads == 4)
+ {
+ char temps2[512];
+ sprintf(temps2, "%f%% CPU (%f%% real)\n%f%% CPU (%f%% real)\n",
+ ((double)voodoo->render_time[2] * 100.0) / timer_freq, ((double)voodoo->render_time[2] * 100.0) / status_diff,
+ ((double)voodoo->render_time[3] * 100.0) / timer_freq, ((double)voodoo->render_time[3] * 100.0) / status_diff);
+ strncat(temps, temps2, sizeof(temps)-1);
+ }
+
+ strncat(s, temps, max_len);
+
+ strncat(s, "Overlay mode: ", max_len); /* leilei debug additions */
+ if ((banshee->vidProcCfg & VIDPROCCFG_FILTER_MODE_MASK) == VIDPROCCFG_FILTER_MODE_DITHER_2X2)
+ strncat(s, "2x2 box filter\n", max_len);
+ if ((banshee->vidProcCfg & VIDPROCCFG_FILTER_MODE_MASK) == VIDPROCCFG_FILTER_MODE_DITHER_4X4)
+ strncat(s, "4x1 tap filter\n", max_len);
+ if ((banshee->vidProcCfg & VIDPROCCFG_FILTER_MODE_MASK) == VIDPROCCFG_FILTER_MODE_POINT)
+ strncat(s, "Nearest neighbor\n", max_len);
+ if ((banshee->vidProcCfg & VIDPROCCFG_FILTER_MODE_MASK) == VIDPROCCFG_FILTER_MODE_BILINEAR)
+ strncat(s, "Bilinear filtered\n", max_len);
+ if ((banshee->vidProcCfg & VIDPROCCFG_H_SCALE_ENABLE))
+ strncat(s, "H scaled \n", max_len);
+ if ((banshee->vidProcCfg & VIDPROCCFG_V_SCALE_ENABLE))
+ strncat(s, "V scaled \n", max_len);
+ if ((banshee->vidProcCfg & VIDPROCCFG_2X_MODE))
+ strncat(s, "2X mode\n", max_len);
+
+ strncat(s, "\n", max_len);
+
+ for (c = 0; c < 4; c++)
+ {
+ voodoo->pixel_count_old[c] = pixel_count_current[c];
+ voodoo->texel_count_old[c] = texel_count_current[c];
+ voodoo->render_time[c] = 0;
+ }
+
+ voodoo->tri_count = voodoo->frame_count = 0;
+ voodoo->rd_count = voodoo->wr_count = voodoo->tex_count = 0;
+ voodoo->time = 0;
+
+ voodoo->read_time = pci_nonburst_time + pci_burst_time;
+
+ voodoo_recomp = 0;
+}
+
+device_t voodoo_banshee_device =
+{
+ "Voodoo Banshee PCI (reference)",
+ DEVICE_PCI,
+ banshee_init,
+ banshee_close,
+ banshee_available,
+ banshee_speed_changed,
+ banshee_force_redraw,
+ banshee_add_status_info,
+ banshee_sgram_config
+};
+
+device_t creative_voodoo_banshee_device =
+{
+ "Creative Labs 3D Blaster Banshee PCI",
+ DEVICE_PCI,
+ creative_banshee_init,
+ banshee_close,
+ creative_banshee_available,
+ banshee_speed_changed,
+ banshee_force_redraw,
+ banshee_add_status_info,
+ banshee_sdram_config
+};
+
+device_t voodoo_3_2000_device =
+{
+ "Voodoo 3 2000 PCI",
+ DEVICE_PCI,
+ v3_2000_init,
+ banshee_close,
+ v3_2000_available,
+ banshee_speed_changed,
+ banshee_force_redraw,
+ banshee_add_status_info,
+ banshee_sdram_config
+};
+
+device_t voodoo_3_3000_device =
+{
+ "Voodoo 3 3000 PCI",
+ DEVICE_PCI,
+ v3_3000_init,
+ banshee_close,
+ v3_3000_available,
+ banshee_speed_changed,
+ banshee_force_redraw,
+ banshee_add_status_info,
+ banshee_sdram_config
+};
--- /dev/null
+extern device_t voodoo_banshee_device;
+extern device_t creative_voodoo_banshee_device;
+extern device_t voodoo_3_2000_device;
+extern device_t voodoo_3_3000_device;
+
+void banshee_set_overlay_addr(void *p, uint32_t addr);
--- /dev/null
+/*Current issues :
+ - missing screen->screen scaled blits with format conversion
+ - missing YUV blits
+ - missing linestyle
+ - missing wait for vsync
+ - missing reversible lines
+
+ Notes :
+ - 16 bpp runs with tiled framebuffer - to aid 3D?
+ 8 and 32 bpp use linear
+*/
+#include <math.h>
+#include <stddef.h>
+#include "ibm.h"
+#include "device.h"
+#include "mem.h"
+#include "thread.h"
+#include "video.h"
+#include "vid_svga.h"
+#include "vid_voodoo.h"
+#include "vid_voodoo_common.h"
+#include "vid_voodoo_banshee_blitter.h"
+#include "vid_voodoo_render.h"
+
+#define COMMAND_CMD_MASK (0xf)
+#define COMMAND_CMD_NOP (0 << 0)
+#define COMMAND_CMD_SCREEN_TO_SCREEN_BLT (1 << 0)
+#define COMMAND_CMD_SCREEN_TO_SCREEN_STRETCH_BLT (2 << 0)
+#define COMMAND_CMD_HOST_TO_SCREEN_BLT (3 << 0)
+#define COMMAND_CMD_HOST_TO_SCREEN_STRETCH_BLT (4 << 0)
+#define COMMAND_CMD_RECTFILL (5 << 0)
+#define COMMAND_CMD_LINE (6 << 0)
+#define COMMAND_CMD_POLYLINE (7 << 0)
+#define COMMAND_CMD_POLYFILL (8 << 0)
+#define COMMAND_INITIATE (1 << 8)
+#define COMMAND_INC_X_START (1 << 10)
+#define COMMAND_INC_Y_START (1 << 11)
+#define COMMAND_STIPPLE_LINE (1 << 12)
+#define COMMAND_PATTERN_MONO (1 << 13)
+#define COMMAND_DX (1 << 14)
+#define COMMAND_DY (1 << 15)
+#define COMMAND_TRANS_MONO (1 << 16)
+#define COMMAND_PATOFF_X_MASK (7 << 17)
+#define COMMAND_PATOFF_X_SHIFT (17)
+#define COMMAND_PATOFF_Y_MASK (7 << 20)
+#define COMMAND_PATOFF_Y_SHIFT (20)
+#define COMMAND_CLIP_SEL (1 << 23)
+
+#define CMDEXTRA_SRC_COLORKEY (1 << 0)
+#define CMDEXTRA_DST_COLORKEY (1 << 1)
+#define CMDEXTRA_FORCE_PAT_ROW0 (1 << 3)
+
+#define SRC_FORMAT_STRIDE_MASK (0x1fff)
+#define SRC_FORMAT_COL_MASK (0xf << 16)
+#define SRC_FORMAT_COL_1_BPP (0 << 16)
+#define SRC_FORMAT_COL_8_BPP (1 << 16)
+#define SRC_FORMAT_COL_16_BPP (3 << 16)
+#define SRC_FORMAT_COL_24_BPP (4 << 16)
+#define SRC_FORMAT_COL_32_BPP (5 << 16)
+#define SRC_FORMAT_COL_YUYV (8 << 16)
+#define SRC_FORMAT_COL_UYVY (9 << 16)
+#define SRC_FORMAT_BYTE_SWIZZLE (1 << 20)
+#define SRC_FORMAT_WORD_SWIZZLE (1 << 21)
+#define SRC_FORMAT_PACKING_MASK (3 << 22)
+#define SRC_FORMAT_PACKING_STRIDE (0 << 22)
+#define SRC_FORMAT_PACKING_BYTE (1 << 22)
+#define SRC_FORMAT_PACKING_WORD (2 << 22)
+#define SRC_FORMAT_PACKING_DWORD (3 << 22)
+
+#define DST_FORMAT_STRIDE_MASK (0x1fff)
+#define DST_FORMAT_COL_MASK (0xf << 16)
+#define DST_FORMAT_COL_8_BPP (1 << 16)
+#define DST_FORMAT_COL_16_BPP (3 << 16)
+#define DST_FORMAT_COL_24_BPP (4 << 16)
+#define DST_FORMAT_COL_32_BPP (5 << 16)
+
+#define BRES_ERROR_MASK (0xffff)
+#define BRES_ERROR_USE (1 << 31)
+
+enum
+{
+ COLORKEY_8,
+ COLORKEY_16,
+ COLORKEY_32
+};
+
+static int colorkey(voodoo_t *voodoo, uint32_t src, int src_notdst, int color_format)
+{
+ uint32_t min = src_notdst ? voodoo->banshee_blt.srcColorkeyMin : voodoo->banshee_blt.dstColorkeyMin;
+ uint32_t max = src_notdst ? voodoo->banshee_blt.srcColorkeyMax : voodoo->banshee_blt.dstColorkeyMax;
+
+ if (!(voodoo->banshee_blt.commandExtra & (src_notdst ? CMDEXTRA_SRC_COLORKEY : CMDEXTRA_DST_COLORKEY)))
+ return 0;
+
+ switch (color_format)
+ {
+ case COLORKEY_8:
+ return ((src & 0xff) >= (min & 0xff)) && ((src & 0xff) <= (max & 0xff));
+
+ case COLORKEY_16:
+ {
+ int r = (src >> 11) & 0x1f, r_min = (min >> 11) & 0x1f, r_max = (max >> 11) & 0x1f;
+ int g = (src >> 5) & 0x3f, g_min = (min >> 5) & 0x3f, g_max = (max >> 5) & 0x3f;
+ int b = src & 0x1f, b_min = min & 0x1f, b_max = max & 0x1f;
+
+ return (r >= r_min) && (r <= r_max) && (g >= g_min) && (g <= g_max) &&
+ (b >= b_min) && (b <= b_max);
+ }
+
+ case COLORKEY_32:
+ {
+ int r = (src >> 16) & 0xff, r_min = (min >> 16) & 0xff, r_max = (max >> 16) & 0xff;
+ int g = (src >> 8) & 0xff, g_min = (min >> 8) & 0xff, g_max = (max >> 8) & 0xff;
+ int b = src & 0xff, b_min = min & 0xff, b_max = max & 0xff;
+
+ return (r >= r_min) && (r <= r_max) && (g >= g_min) && (g <= g_max) &&
+ (b >= b_min) && (b <= b_max);
+ }
+
+ default:
+ return 0;
+ }
+}
+
+static uint32_t MIX(voodoo_t *voodoo, uint32_t dest, uint32_t src, uint32_t pattern, int colour_format_src, int colour_format_dest)
+{
+ int rop_nr = 0;
+ uint32_t result = 0;
+ uint32_t rop;
+
+ if (colorkey(voodoo, src, 1, colour_format_src))
+ rop_nr |= 2;
+ if (colorkey(voodoo, dest, 0, colour_format_dest))
+ rop_nr |= 1;
+
+ rop = voodoo->banshee_blt.rops[rop_nr];
+
+ if (rop & 0x01)
+ result |= (~pattern & ~src & ~dest);
+ if (rop & 0x02)
+ result |= (~pattern & ~src & dest);
+ if (rop & 0x04)
+ result |= (~pattern & src & ~dest);
+ if (rop & 0x08)
+ result |= (~pattern & src & dest);
+ if (rop & 0x10)
+ result |= ( pattern & ~src & ~dest);
+ if (rop & 0x20)
+ result |= ( pattern & ~src & dest);
+ if (rop & 0x40)
+ result |= ( pattern & src & ~dest);
+ if (rop & 0x80)
+ result |= ( pattern & src & dest);
+
+ return result;
+}
+
+static uint32_t get_addr(voodoo_t *voodoo, int x, int y, int src_notdst, uint32_t src_stride)
+{
+ uint32_t stride = src_notdst ? src_stride : voodoo->banshee_blt.dst_stride;
+ uint32_t base_addr = src_notdst ? voodoo->banshee_blt.srcBaseAddr : voodoo->banshee_blt.dstBaseAddr;
+
+ if (src_notdst ? voodoo->banshee_blt.srcBaseAddr_tiled : voodoo->banshee_blt.dstBaseAddr_tiled)
+ return (base_addr + (x & 127) + ((x >> 7) * 128*32) + ((y & 31) * 128) + (y >> 5)*stride) & voodoo->fb_mask;
+ else
+ return (base_addr + x + y*stride) & voodoo->fb_mask;
+}
+
+static void PLOT(voodoo_t *voodoo, int x, int y, int pat_x, int pat_y, uint8_t pattern_mask, uint8_t rop, uint32_t src, int src_colorkey)
+{
+ switch (voodoo->banshee_blt.dstFormat & DST_FORMAT_COL_MASK)
+ {
+ case DST_FORMAT_COL_8_BPP:
+ {
+ uint32_t addr = get_addr(voodoo, x, y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + x + y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask;
+ uint32_t dest = voodoo->vram[addr];
+ uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ?
+ ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) :
+ voodoo->banshee_blt.colorPattern8[(pat_x & 7) + (pat_y & 7)*8];
+
+ voodoo->vram[addr] = MIX(voodoo, dest, src, pattern, src_colorkey, COLORKEY_8);
+ voodoo->changedvram[addr >> 12] = changeframecount;
+ break;
+ }
+ case DST_FORMAT_COL_16_BPP:
+ {
+ uint32_t addr = get_addr(voodoo, x*2, y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + x*2 + y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask;
+ uint32_t dest = *(uint16_t *)&voodoo->vram[addr];
+ uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ?
+ ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) :
+ voodoo->banshee_blt.colorPattern16[(pat_x & 7) + (pat_y & 7)*8];
+
+ *(uint16_t *)&voodoo->vram[addr] = MIX(voodoo, dest, src, pattern, src_colorkey, COLORKEY_16);
+ voodoo->changedvram[addr >> 12] = changeframecount;
+ break;
+ }
+ case DST_FORMAT_COL_24_BPP:
+ {
+ uint32_t addr = get_addr(voodoo, x*3, y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + x*3 + y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask;
+ uint32_t dest = *(uint32_t *)&voodoo->vram[addr];
+ uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ?
+ ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) :
+ voodoo->banshee_blt.colorPattern24[(pat_x & 7) + (pat_y & 7)*8];
+
+ *(uint32_t *)&voodoo->vram[addr] = (MIX(voodoo, dest, src, pattern, src_colorkey, COLORKEY_32) & 0xffffff) | (dest & 0xff000000);
+ voodoo->changedvram[addr >> 12] = changeframecount;
+ break;
+ }
+ case DST_FORMAT_COL_32_BPP:
+ {
+ uint32_t addr = get_addr(voodoo, x*4, y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + x*4 + y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask;
+ uint32_t dest = *(uint32_t *)&voodoo->vram[addr];
+ uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ?
+ ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) :
+ voodoo->banshee_blt.colorPattern[(pat_x & 7) + (pat_y & 7)*8];
+
+ *(uint32_t *)&voodoo->vram[addr] = MIX(voodoo, dest, src, pattern, src_colorkey, COLORKEY_32);
+ voodoo->changedvram[addr >> 12] = changeframecount;
+ break;
+ }
+ }
+}
+
+static void PLOT_LINE(voodoo_t *voodoo, int x, int y, uint8_t rop, uint32_t pattern, int src_colorkey)
+{
+ switch (voodoo->banshee_blt.dstFormat & DST_FORMAT_COL_MASK)
+ {
+ case DST_FORMAT_COL_8_BPP:
+ {
+ uint32_t addr = get_addr(voodoo, x, y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + x + y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask;
+ uint32_t dest = voodoo->vram[addr];
+
+ voodoo->vram[addr] = MIX(voodoo, dest, voodoo->banshee_blt.colorFore, pattern, src_colorkey, COLORKEY_8);
+ voodoo->changedvram[addr >> 12] = changeframecount;
+ break;
+ }
+ case DST_FORMAT_COL_16_BPP:
+ {
+ uint32_t addr = get_addr(voodoo, x*2, y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + x*2 + y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask;
+ uint32_t dest = *(uint16_t *)&voodoo->vram[addr];
+
+ *(uint16_t *)&voodoo->vram[addr] = MIX(voodoo, dest, voodoo->banshee_blt.colorFore, pattern, src_colorkey, COLORKEY_16);
+ voodoo->changedvram[addr >> 12] = changeframecount;
+ break;
+ }
+ case DST_FORMAT_COL_24_BPP:
+ {
+ uint32_t addr = get_addr(voodoo, x*3, y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + x*3 + y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask;
+ uint32_t dest = *(uint32_t *)&voodoo->vram[addr];
+
+ *(uint32_t *)&voodoo->vram[addr] = (MIX(voodoo, dest, voodoo->banshee_blt.colorFore, pattern, src_colorkey, COLORKEY_32) & 0xffffff) | (dest & 0xff000000);
+ voodoo->changedvram[addr >> 12] = changeframecount;
+ break;
+ }
+ case DST_FORMAT_COL_32_BPP:
+ {
+ uint32_t addr = get_addr(voodoo, x*4, y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + x*4 + y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask;
+ uint32_t dest = *(uint32_t *)&voodoo->vram[addr];
+
+ *(uint32_t *)&voodoo->vram[addr] = MIX(voodoo, dest, voodoo->banshee_blt.colorFore, pattern, src_colorkey, COLORKEY_32);
+ voodoo->changedvram[addr >> 12] = changeframecount;
+ break;
+ }
+ }
+}
+
+static void update_src_stride(voodoo_t *voodoo)
+{
+ int bpp;
+
+ switch (voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK)
+ {
+ case SRC_FORMAT_COL_1_BPP:
+ bpp = 1;
+ break;
+ case SRC_FORMAT_COL_8_BPP:
+ bpp = 8;
+ break;
+ case SRC_FORMAT_COL_16_BPP:
+ bpp = 16;
+ break;
+ case SRC_FORMAT_COL_24_BPP:
+ bpp = 24;
+ break;
+ case SRC_FORMAT_COL_32_BPP:
+ bpp = 32;
+ break;
+
+ default:
+ bpp = 16;
+ break;
+ }
+
+ switch (voodoo->banshee_blt.srcFormat & SRC_FORMAT_PACKING_MASK)
+ {
+ case SRC_FORMAT_PACKING_STRIDE:
+ voodoo->banshee_blt.src_stride_src = voodoo->banshee_blt.src_stride; //voodoo->banshee_blt.srcFormat & SRC_FORMAT_STRIDE_MASK;
+ voodoo->banshee_blt.src_stride_dest = voodoo->banshee_blt.src_stride; //voodoo->banshee_blt.srcFormat & SRC_FORMAT_STRIDE_MASK;
+ voodoo->banshee_blt.host_data_size_src = (voodoo->banshee_blt.srcSizeX * bpp + 7) >> 3;
+ voodoo->banshee_blt.host_data_size_dest = (voodoo->banshee_blt.dstSizeX * bpp + 7) >> 3;
+// pclog("Stride packing %08x %08x bpp=%i dstSizeX=%i\n", voodoo->banshee_blt.src_stride_dest, voodoo->banshee_blt.host_data_size_dest, bpp, voodoo->banshee_blt.dstSizeX);
+ break;
+
+ case SRC_FORMAT_PACKING_BYTE:
+ voodoo->banshee_blt.src_stride_src = (voodoo->banshee_blt.srcSizeX * bpp + 7) >> 3;
+ voodoo->banshee_blt.src_stride_dest = (voodoo->banshee_blt.dstSizeX * bpp + 7) >> 3;
+ voodoo->banshee_blt.host_data_size_src = voodoo->banshee_blt.src_stride_src;
+ voodoo->banshee_blt.host_data_size_dest = voodoo->banshee_blt.src_stride_dest;
+// pclog("Byte packing %08x %08x\n", voodoo->banshee_blt.src_stride_dest, voodoo->banshee_blt.host_data_size_dest);
+ break;
+
+ case SRC_FORMAT_PACKING_WORD:
+ voodoo->banshee_blt.src_stride_src = ((voodoo->banshee_blt.srcSizeX * bpp + 15) >> 4) * 2;
+ voodoo->banshee_blt.src_stride_dest = ((voodoo->banshee_blt.dstSizeX * bpp + 15) >> 4) * 2;
+ voodoo->banshee_blt.host_data_size_src = voodoo->banshee_blt.src_stride_src;
+ voodoo->banshee_blt.host_data_size_dest = voodoo->banshee_blt.src_stride_dest;
+// pclog("Word packing %08x %08x\n", voodoo->banshee_blt.src_stride_dest, voodoo->banshee_blt.host_data_size_dest);
+ break;
+
+ case SRC_FORMAT_PACKING_DWORD:
+ voodoo->banshee_blt.src_stride_src = ((voodoo->banshee_blt.srcSizeX * bpp + 31) >> 5) * 4;
+ voodoo->banshee_blt.src_stride_dest = ((voodoo->banshee_blt.dstSizeX * bpp + 31) >> 5) * 4;
+ voodoo->banshee_blt.host_data_size_src = voodoo->banshee_blt.src_stride_src;
+ voodoo->banshee_blt.host_data_size_dest = voodoo->banshee_blt.src_stride_dest;
+// pclog("Dword packing %08x %08x\n", voodoo->banshee_blt.src_stride_dest, voodoo->banshee_blt.host_data_size_dest);
+ break;
+ }
+}
+
+static void end_command(voodoo_t *voodoo)
+{
+ /*Update dest coordinates if required*/
+ if (voodoo->banshee_blt.command & COMMAND_INC_X_START)
+ {
+ voodoo->banshee_blt.dstXY &= ~0x0000ffff;
+ voodoo->banshee_blt.dstXY |= (voodoo->banshee_blt.dstX & 0xffff);
+ }
+
+ if (voodoo->banshee_blt.command & COMMAND_INC_Y_START)
+ {
+ voodoo->banshee_blt.dstXY &= ~0xffff0000;
+ voodoo->banshee_blt.dstXY |= (voodoo->banshee_blt.dstY << 16);
+ }
+}
+
+static void banshee_do_rectfill(voodoo_t *voodoo)
+{
+ clip_t *clip = &voodoo->banshee_blt.clip[(voodoo->banshee_blt.command & COMMAND_CLIP_SEL) ? 1 : 0];
+ int dst_y = voodoo->banshee_blt.dstY;
+ uint8_t *pattern_mono = (uint8_t *)voodoo->banshee_blt.colorPattern;
+ int pat_y = (voodoo->banshee_blt.commandExtra & CMDEXTRA_FORCE_PAT_ROW0) ? 0 : (voodoo->banshee_blt.patoff_y + voodoo->banshee_blt.dstY);
+ int use_pattern_trans = (voodoo->banshee_blt.command & (COMMAND_PATTERN_MONO | COMMAND_TRANS_MONO)) ==
+ (COMMAND_PATTERN_MONO | COMMAND_TRANS_MONO);
+ uint8_t rop = voodoo->banshee_blt.command >> 24;
+
+// pclog("banshee_do_rectfill: size=%i,%i dst=%i,%i\n", voodoo->banshee_blt.dstSizeX, voodoo->banshee_blt.dstSizeY, voodoo->banshee_blt.dstX, voodoo->banshee_blt.dstY);
+// pclog("clipping: %i,%i -> %i,%i\n", clip->x_min, clip->y_min, clip->x_max, clip->y_max);
+// pclog("colorFore=%08x\n", voodoo->banshee_blt.colorFore);
+ for (voodoo->banshee_blt.cur_y = 0; voodoo->banshee_blt.cur_y < voodoo->banshee_blt.dstSizeY; voodoo->banshee_blt.cur_y++)
+ {
+ int dst_x = voodoo->banshee_blt.dstX;
+
+ if (dst_y >= clip->y_min && dst_y < clip->y_max)
+ {
+ int pat_x = voodoo->banshee_blt.patoff_x + voodoo->banshee_blt.dstX;
+ uint8_t pattern_mask = pattern_mono[pat_y & 7];
+
+ for (voodoo->banshee_blt.cur_x = 0; voodoo->banshee_blt.cur_x < voodoo->banshee_blt.dstSizeX; voodoo->banshee_blt.cur_x++)
+ {
+ int pattern_trans = use_pattern_trans ? (pattern_mask & (1 << (7-(pat_x & 7)))) : 1;
+
+ if (dst_x >= clip->x_min && dst_x < clip->x_max && pattern_trans)
+ PLOT(voodoo, dst_x, dst_y, pat_x, pat_y, pattern_mask, rop, voodoo->banshee_blt.colorFore, COLORKEY_32);
+
+ dst_x += (voodoo->banshee_blt.command & COMMAND_DX) ? -1 : 1;
+ pat_x += (voodoo->banshee_blt.command & COMMAND_DX) ? -1 : 1;
+ }
+ }
+ dst_y += (voodoo->banshee_blt.command & COMMAND_DY) ? -1 : 1;
+ if (!(voodoo->banshee_blt.commandExtra & CMDEXTRA_FORCE_PAT_ROW0))
+ pat_y += (voodoo->banshee_blt.command & COMMAND_DY) ? -1 : 1;
+ }
+
+ end_command(voodoo);
+}
+
+static void do_screen_to_screen_line(voodoo_t *voodoo, uint8_t *src_p, int use_x_dir, int src_x, int src_tiled)
+{
+ clip_t *clip = &voodoo->banshee_blt.clip[(voodoo->banshee_blt.command & COMMAND_CLIP_SEL) ? 1 : 0];
+ int dst_y = voodoo->banshee_blt.dstY;
+ int pat_y = (voodoo->banshee_blt.commandExtra & CMDEXTRA_FORCE_PAT_ROW0) ? 0 : (voodoo->banshee_blt.patoff_y + voodoo->banshee_blt.dstY);
+ uint8_t *pattern_mono = (uint8_t *)voodoo->banshee_blt.colorPattern;
+ int use_pattern_trans = (voodoo->banshee_blt.command & (COMMAND_PATTERN_MONO | COMMAND_TRANS_MONO)) ==
+ (COMMAND_PATTERN_MONO | COMMAND_TRANS_MONO);
+ uint8_t rop = voodoo->banshee_blt.command >> 24;
+ int src_colorkey;
+
+ switch (voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK)
+ {
+ case SRC_FORMAT_COL_8_BPP:
+ src_colorkey = COLORKEY_8;
+ break;
+ case SRC_FORMAT_COL_16_BPP:
+ src_colorkey = COLORKEY_16;
+ break;
+ default:
+ src_colorkey = COLORKEY_32;
+ break;
+ }
+// pclog("do_screen_to_screen_line: srcFormat=%08x dst=%08x\n", voodoo->banshee_blt.srcFormat, voodoo->banshee_blt.dstFormat);
+ if ((voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) ==
+ (voodoo->banshee_blt.dstFormat & DST_FORMAT_COL_MASK))
+ {
+ /*No conversion required*/
+ if (dst_y >= clip->y_min && dst_y < clip->y_max)
+ {
+ int dst_x = voodoo->banshee_blt.dstX;
+ int pat_x = voodoo->banshee_blt.patoff_x + voodoo->banshee_blt.dstX;
+ uint8_t pattern_mask = pattern_mono[pat_y & 7];
+
+ for (voodoo->banshee_blt.cur_x = 0; voodoo->banshee_blt.cur_x < voodoo->banshee_blt.dstSizeX; voodoo->banshee_blt.cur_x++)
+ {
+ int pattern_trans = use_pattern_trans ? (pattern_mask & (1 << (7-(pat_x & 7)))) : 1;
+ int src_x_real = (src_x * voodoo->banshee_blt.src_bpp) >> 3;
+
+ if (src_tiled)
+ src_x_real = (src_x_real & 127) + ((src_x_real >> 7) * 128*32);
+
+ if (dst_x >= clip->x_min && dst_x < clip->x_max && pattern_trans)
+ {
+ switch (voodoo->banshee_blt.dstFormat & DST_FORMAT_COL_MASK)
+ {
+ case DST_FORMAT_COL_8_BPP:
+ {
+ uint32_t dst_addr = get_addr(voodoo, dst_x, dst_y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + dst_x + dst_y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask;
+ uint32_t src = src_p[src_x_real];
+ uint32_t dest = voodoo->vram[dst_addr];
+ uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ?
+ ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) :
+ voodoo->banshee_blt.colorPattern8[(pat_x & 7) + (pat_y & 7)*8];
+
+ voodoo->vram[dst_addr] = MIX(voodoo, dest, src, pattern, COLORKEY_8, COLORKEY_8);
+ voodoo->changedvram[dst_addr >> 12] = changeframecount;
+ break;
+ }
+ case DST_FORMAT_COL_16_BPP:
+ {
+ uint32_t dst_addr = get_addr(voodoo, dst_x*2, dst_y, 0, 0);//dst_addr = (voodoo->banshee_blt.dstBaseAddr + dst_x*2 + dst_y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask;
+ uint32_t src = *(uint16_t *)&src_p[src_x_real];
+ uint32_t dest = *(uint16_t *)&voodoo->vram[dst_addr];
+ uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ?
+ ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) :
+ voodoo->banshee_blt.colorPattern16[(pat_x & 7) + (pat_y & 7)*8];
+
+ *(uint16_t *)&voodoo->vram[dst_addr] = MIX(voodoo, dest, src, pattern, COLORKEY_16, COLORKEY_16);
+ voodoo->changedvram[dst_addr >> 12] = changeframecount;
+ break;
+ }
+ case DST_FORMAT_COL_24_BPP:
+ {
+ uint32_t dst_addr = get_addr(voodoo, dst_x*3, dst_y, 0, 0);//dst_addr = (voodoo->banshee_blt.dstBaseAddr + dst_x*3 + dst_y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask;
+ uint32_t src = *(uint32_t *)&src_p[src_x_real];
+ uint32_t dest = *(uint32_t *)&voodoo->vram[dst_addr];
+ uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ?
+ ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) :
+ voodoo->banshee_blt.colorPattern24[(pat_x & 7) + (pat_y & 7)*8];
+
+ *(uint32_t *)&voodoo->vram[dst_addr] = (MIX(voodoo, dest, src, pattern, COLORKEY_32, COLORKEY_32) & 0xffffff) | (dest & 0xff000000);
+ voodoo->changedvram[dst_addr >> 12] = changeframecount;
+ break;
+ }
+ case DST_FORMAT_COL_32_BPP:
+ {
+ uint32_t dst_addr = get_addr(voodoo, dst_x*4, dst_y, 0, 0);//dst_addr = (voodoo->banshee_blt.dstBaseAddr + dst_x*4 + dst_y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask;
+ uint32_t src = *(uint32_t *)&src_p[src_x_real];
+ uint32_t dest = *(uint32_t *)&voodoo->vram[dst_addr];
+ uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ?
+ ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) :
+ voodoo->banshee_blt.colorPattern[(pat_x & 7) + (pat_y & 7)*8];
+
+ *(uint32_t *)&voodoo->vram[dst_addr] = MIX(voodoo, dest, src, pattern, COLORKEY_32, COLORKEY_32);
+ voodoo->changedvram[dst_addr >> 12] = changeframecount;
+ break;
+ }
+ }
+ }
+ if (use_x_dir)
+ {
+ src_x += (voodoo->banshee_blt.command & COMMAND_DX) ? -1 : 1;
+ dst_x += (voodoo->banshee_blt.command & COMMAND_DX) ? -1 : 1;
+ pat_x += (voodoo->banshee_blt.command & COMMAND_DX) ? -1 : 1;
+ }
+ else
+ {
+ src_x++;
+ dst_x++;
+ pat_x++;
+ }
+ }
+ }
+ voodoo->banshee_blt.srcY += (voodoo->banshee_blt.command & COMMAND_DY) ? -1 : 1;
+ voodoo->banshee_blt.dstY += (voodoo->banshee_blt.command & COMMAND_DY) ? -1 : 1;
+ }
+ else
+ {
+ /*Conversion required*/
+ if (dst_y >= clip->y_min && dst_y < clip->y_max)
+ {
+// int src_x = voodoo->banshee_blt.srcX;
+ int dst_x = voodoo->banshee_blt.dstX;
+ int pat_x = voodoo->banshee_blt.patoff_x + voodoo->banshee_blt.dstX;
+ uint8_t pattern_mask = pattern_mono[pat_y & 7];
+
+ for (voodoo->banshee_blt.cur_x = 0; voodoo->banshee_blt.cur_x < voodoo->banshee_blt.dstSizeX; voodoo->banshee_blt.cur_x++)
+ {
+ int pattern_trans = use_pattern_trans ? (pattern_mask & (1 << (7-(pat_x & 7)))) : 1;
+ int src_x_real = (src_x * voodoo->banshee_blt.src_bpp) >> 3;
+
+ if (src_tiled)
+ src_x_real = (src_x_real & 127) + ((src_x_real >> 7) * 128*32);
+
+ if (dst_x >= clip->x_min && dst_x < clip->x_max && pattern_trans)
+ {
+ uint32_t src_data = 0;
+ int transparent = 0;
+
+ switch (voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK)
+ {
+ case SRC_FORMAT_COL_1_BPP:
+ {
+ uint8_t src_byte = src_p[src_x_real];
+ src_data = (src_byte & (0x80 >> (src_x & 7))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack;
+ if (voodoo->banshee_blt.command & COMMAND_TRANS_MONO)
+ transparent = !(src_byte & (0x80 >> (src_x & 7)));
+// pclog(" 1bpp src_byte=%02x src_x=%i src_data=%x transparent=%i\n", src_byte, src_x, src_data, transparent);
+ break;
+ }
+ case SRC_FORMAT_COL_8_BPP:
+ {
+ src_data = src_p[src_x_real];
+ break;
+ }
+ case SRC_FORMAT_COL_16_BPP:
+ {
+ uint16_t src_16 = *(uint16_t *)&src_p[src_x_real];
+ int r = (src_16 >> 11);
+ int g = (src_16 >> 5) & 0x3f;
+ int b = src_16 & 0x1f;
+
+ r = (r << 3) | (r >> 2);
+ g = (g << 2) | (g >> 4);
+ b = (b << 3) | (b >> 2);
+ src_data = (r << 16) | (g << 8) | b;
+ break;
+ }
+ case SRC_FORMAT_COL_24_BPP:
+ {
+ src_data = *(uint32_t *)&src_p[src_x_real];
+ break;
+ }
+ case SRC_FORMAT_COL_32_BPP:
+ {
+ src_data = *(uint32_t *)&src_p[src_x_real];
+ break;
+ }
+#ifndef RELEASE_BUILD
+ default:
+ fatal("banshee_do_screen_to_screen_blt: unknown srcFormat %08x\n", voodoo->banshee_blt.srcFormat);
+#endif
+ }
+
+ if ((voodoo->banshee_blt.dstFormat & DST_FORMAT_COL_MASK) == DST_FORMAT_COL_16_BPP &&
+ (voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) != SRC_FORMAT_COL_1_BPP)
+ {
+ int r = src_data >> 16;
+ int g = (src_data >> 8) & 0xff;
+ int b = src_data & 0xff;
+
+ src_data = (b >> 3) | ((g >> 2) << 5) | ((r >> 3) << 11);
+ }
+
+ if (!transparent)
+ PLOT(voodoo, dst_x, dst_y, pat_x, pat_y, pattern_mask, rop, src_data, src_colorkey);
+ }
+ if (use_x_dir)
+ {
+ src_x += (voodoo->banshee_blt.command & COMMAND_DX) ? -1 : 1;
+ dst_x += (voodoo->banshee_blt.command & COMMAND_DX) ? -1 : 1;
+ pat_x += (voodoo->banshee_blt.command & COMMAND_DX) ? -1 : 1;
+ }
+ else
+ {
+ src_x++;
+ dst_x++;
+ pat_x++;
+ }
+ }
+ }
+ voodoo->banshee_blt.srcY += (voodoo->banshee_blt.command & COMMAND_DY) ? -1 : 1;
+ voodoo->banshee_blt.dstY += (voodoo->banshee_blt.command & COMMAND_DY) ? -1 : 1;
+ }
+}
+
+static void banshee_do_screen_to_screen_blt(voodoo_t *voodoo)
+{
+// pclog("screen_to_screen: %08x %08x %08x\n", voodoo->banshee_blt.srcFormat, voodoo->banshee_blt.src_stride, voodoo->banshee_blt.src_stride_dest);
+// return;
+ for (voodoo->banshee_blt.cur_y = 0; voodoo->banshee_blt.cur_y < voodoo->banshee_blt.dstSizeY; voodoo->banshee_blt.cur_y++)
+ {
+ uint32_t src_addr = get_addr(voodoo, 0, voodoo->banshee_blt.srcY, 1, voodoo->banshee_blt.src_stride_dest);
+// if ((voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) == SRC_FORMAT_COL_1_BPP)
+// pclog(" srcY=%i src_addr=%08x\n", voodoo->banshee_blt.srcY, src_addr);
+ do_screen_to_screen_line(voodoo, &voodoo->vram[src_addr], 1, voodoo->banshee_blt.srcX, voodoo->banshee_blt.srcBaseAddr_tiled);
+ }
+ end_command(voodoo);
+}
+
+static void banshee_do_host_to_screen_blt(voodoo_t *voodoo, int count, uint32_t data)
+{
+// if (voodoo->banshee_blt.dstBaseAddr == 0xee5194)
+// pclog("banshee_do_host_to_screen_blt: data=%08x host_data_count=%i src_stride_dest=%i host_data_size_dest=%i\n", data, voodoo->banshee_blt.host_data_count, voodoo->banshee_blt.src_stride_dest, voodoo->banshee_blt.host_data_size_dest);
+
+ if (voodoo->banshee_blt.srcFormat & SRC_FORMAT_BYTE_SWIZZLE)
+ data = (data >> 24) | ((data >> 8) & 0xff00) | ((data << 8) & 0xff0000) | (data << 24);
+ if (voodoo->banshee_blt.srcFormat & SRC_FORMAT_WORD_SWIZZLE)
+ data = (data >> 16) | (data << 16);
+
+ if ((voodoo->banshee_blt.srcFormat & SRC_FORMAT_PACKING_MASK) == SRC_FORMAT_PACKING_STRIDE)
+ {
+ int last_byte;
+
+ if ((voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) == SRC_FORMAT_COL_1_BPP)
+ last_byte = ((voodoo->banshee_blt.srcX & 31) + voodoo->banshee_blt.dstSizeX + 7) >> 3;
+ else
+ last_byte = (voodoo->banshee_blt.srcX & 3) + voodoo->banshee_blt.host_data_size_dest;
+
+ *(uint32_t *)&voodoo->banshee_blt.host_data[voodoo->banshee_blt.host_data_count] = data;
+ voodoo->banshee_blt.host_data_count += 4;
+ if (voodoo->banshee_blt.host_data_count >= last_byte)
+ {
+// pclog(" %i %i srcX=%i srcFormat=%08x\n", voodoo->banshee_blt.cur_y, voodoo->banshee_blt.dstSizeY, voodoo->banshee_blt.srcX);
+ if (voodoo->banshee_blt.cur_y < voodoo->banshee_blt.dstSizeY)
+ {
+ if ((voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) == SRC_FORMAT_COL_1_BPP)
+ do_screen_to_screen_line(voodoo, &voodoo->banshee_blt.host_data[(voodoo->banshee_blt.srcX >> 3) & 3], 0, voodoo->banshee_blt.srcX & 7, 0);
+ else
+ do_screen_to_screen_line(voodoo, &voodoo->banshee_blt.host_data[voodoo->banshee_blt.srcX & 3], 0, 0, 0);
+ voodoo->banshee_blt.cur_y++;
+ if (voodoo->banshee_blt.cur_y == voodoo->banshee_blt.dstSizeY)
+ end_command(voodoo);
+ }
+
+ if ((voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) == SRC_FORMAT_COL_1_BPP)
+ voodoo->banshee_blt.srcX += (voodoo->banshee_blt.srcFormat & SRC_FORMAT_STRIDE_MASK) << 3;
+ else
+ voodoo->banshee_blt.srcX += (voodoo->banshee_blt.srcFormat & SRC_FORMAT_STRIDE_MASK);
+
+ voodoo->banshee_blt.host_data_count = 0;
+ }
+ }
+ else
+ {
+ *(uint32_t *)&voodoo->banshee_blt.host_data[voodoo->banshee_blt.host_data_count] = data;
+ voodoo->banshee_blt.host_data_count += 4;
+ while (voodoo->banshee_blt.host_data_count >= voodoo->banshee_blt.src_stride_dest)
+ {
+ voodoo->banshee_blt.host_data_count -= voodoo->banshee_blt.src_stride_dest;
+
+// pclog(" %i %i\n", voodoo->banshee_blt.cur_y, voodoo->banshee_blt.dstSizeY);
+ if (voodoo->banshee_blt.cur_y < voodoo->banshee_blt.dstSizeY)
+ {
+ do_screen_to_screen_line(voodoo, voodoo->banshee_blt.host_data, 0, 0, 0);
+ voodoo->banshee_blt.cur_y++;
+ if (voodoo->banshee_blt.cur_y == voodoo->banshee_blt.dstSizeY)
+ end_command(voodoo);
+ }
+
+ if (voodoo->banshee_blt.host_data_count)
+ {
+// pclog(" remaining=%i\n", voodoo->banshee_blt.host_data_count);
+ *(uint32_t *)&voodoo->banshee_blt.host_data[0] = data >> (4-voodoo->banshee_blt.host_data_count)*8;
+ }
+ }
+ }
+}
+
+static void do_screen_to_screen_stretch_line(voodoo_t *voodoo,uint8_t *src_p, int src_x, int *src_y)
+{
+ clip_t *clip = &voodoo->banshee_blt.clip[(voodoo->banshee_blt.command & COMMAND_CLIP_SEL) ? 1 : 0];
+// int src_y = voodoo->banshee_blt.srcY;
+ int dst_y = voodoo->banshee_blt.dstY;
+ int pat_y = (voodoo->banshee_blt.commandExtra & CMDEXTRA_FORCE_PAT_ROW0) ? 0 : (voodoo->banshee_blt.patoff_y + voodoo->banshee_blt.dstY);
+ uint8_t *pattern_mono = (uint8_t *)voodoo->banshee_blt.colorPattern;
+ int use_pattern_trans = (voodoo->banshee_blt.command & (COMMAND_PATTERN_MONO | COMMAND_TRANS_MONO)) ==
+ (COMMAND_PATTERN_MONO | COMMAND_TRANS_MONO);
+ uint32_t *colorPattern = voodoo->banshee_blt.colorPattern;
+
+ //int error_y = voodoo->banshee_blt.dstSizeY / 2;
+
+/* pclog("banshee_do_screen_to_screen_stretch_blt:\n");
+ pclog(" srcXY=%i,%i srcsizeXY=%i,%i\n", voodoo->banshee_blt.srcX, voodoo->banshee_blt.srcY, voodoo->banshee_blt.srcSizeX, voodoo->banshee_blt.srcSizeY);
+ pclog(" dstXY=%i,%i dstsizeXY=%i,%i\n", voodoo->banshee_blt.dstX, voodoo->banshee_blt.dstY, voodoo->banshee_blt.dstSizeX, voodoo->banshee_blt.dstSizeY);*/
+ if (dst_y >= clip->y_min && dst_y < clip->y_max)
+ {
+// int src_x = voodoo->banshee_blt.srcX;
+ int dst_x = voodoo->banshee_blt.dstX;
+ int pat_x = voodoo->banshee_blt.patoff_x + voodoo->banshee_blt.dstX;
+ uint8_t pattern_mask = pattern_mono[pat_y & 7];
+ int error_x = voodoo->banshee_blt.dstSizeX / 2;
+
+// pclog(" Plot dest line %03i : src line %03i\n", dst_y, src_y);
+ for (voodoo->banshee_blt.cur_x = 0; voodoo->banshee_blt.cur_x < voodoo->banshee_blt.dstSizeX; voodoo->banshee_blt.cur_x++)
+ {
+ int pattern_trans = use_pattern_trans ? (pattern_mask & (1 << (7-(pat_x & 7)))) : 1;
+
+ if (dst_x >= clip->x_min && dst_x < clip->x_max && pattern_trans)
+ {
+ switch (voodoo->banshee_blt.dstFormat & DST_FORMAT_COL_MASK)
+ {
+ case DST_FORMAT_COL_8_BPP:
+ {
+ uint32_t dst_addr = get_addr(voodoo, dst_x, dst_y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + dst_x + dst_y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask;
+ uint32_t src = src_p[src_x];
+ uint32_t dest = voodoo->vram[dst_addr];
+ uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ?
+ ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) :
+ colorPattern[(pat_x & 7) + (pat_y & 7)*8];
+
+ voodoo->vram[dst_addr] = MIX(voodoo, dest, src, pattern, COLORKEY_8, COLORKEY_8);
+// pclog("%i,%i : sdp=%02x,%02x,%02x res=%02x\n", voodoo->banshee_blt.cur_x, voodoo->banshee_blt.cur_y, src, dest, pattern, voodoo->vram[dst_addr]);
+ voodoo->changedvram[dst_addr >> 12] = changeframecount;
+ break;
+ }
+ case DST_FORMAT_COL_16_BPP:
+ {
+ uint32_t dst_addr = get_addr(voodoo, dst_x*2, dst_y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + dst_x*2 + dst_y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask;
+ uint32_t src = *(uint16_t *)&src_p[src_x*2];
+ uint32_t dest = *(uint16_t *)&voodoo->vram[dst_addr];
+ uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ?
+ ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) :
+ colorPattern[(pat_x & 7) + (pat_y & 7)*8];
+
+ *(uint16_t *)&voodoo->vram[dst_addr] = MIX(voodoo, dest, src, pattern, COLORKEY_16, COLORKEY_16);
+// pclog("%i,%i : sdp=%02x,%02x,%02x res=%02x\n", voodoo->banshee_blt.cur_x, voodoo->banshee_blt.cur_y, src, dest, pattern, *(uint16_t *)&voodoo->vram[dst_addr]);
+ voodoo->changedvram[dst_addr >> 12] = changeframecount;
+ break;
+ }
+ case DST_FORMAT_COL_24_BPP:
+ {
+ uint32_t dst_addr = get_addr(voodoo, dst_x*3, dst_y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + dst_x*3 + dst_y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask;
+ uint32_t src = *(uint32_t *)&src_p[src_x*3];
+ uint32_t dest = *(uint32_t *)&voodoo->vram[dst_addr];
+ uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ?
+ ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) :
+ colorPattern[(pat_x & 7) + (pat_y & 7)*8];
+
+ *(uint32_t *)&voodoo->vram[dst_addr] = (MIX(voodoo, dest, src, pattern, COLORKEY_32, COLORKEY_32) & 0xffffff) | (*(uint32_t *)&voodoo->vram[dst_addr] & 0xff000000);
+// pclog("%i,%i : sdp=%02x,%02x,%02x res=%02x\n", voodoo->banshee_blt.cur_x, voodoo->banshee_blt.cur_y, src, dest, pattern, voodoo->vram[dst_addr]);
+ voodoo->changedvram[dst_addr >> 12] = changeframecount;
+ break;
+ }
+ case DST_FORMAT_COL_32_BPP:
+ {
+ uint32_t dst_addr = get_addr(voodoo, dst_x*4, dst_y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + dst_x*4 + dst_y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask;
+ uint32_t src = *(uint32_t *)&src_p[src_x*4];
+ uint32_t dest = *(uint32_t *)&voodoo->vram[dst_addr];
+ uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ?
+ ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) :
+ colorPattern[(pat_x & 7) + (pat_y & 7)*8];
+
+ *(uint32_t *)&voodoo->vram[dst_addr] = MIX(voodoo, dest, src, pattern, COLORKEY_32, COLORKEY_32);
+// pclog("%i,%i : sdp=%02x,%02x,%02x res=%02x\n", voodoo->banshee_blt.cur_x, voodoo->banshee_blt.cur_y, src, dest, pattern, voodoo->vram[dst_addr]);
+ voodoo->changedvram[dst_addr >> 12] = changeframecount;
+ break;
+ }
+ }
+ }
+
+ error_x -= voodoo->banshee_blt.srcSizeX;
+ while (error_x < 0)
+ {
+ error_x += voodoo->banshee_blt.dstSizeX;
+ src_x++;
+ }
+ dst_x++;
+ pat_x++;
+ }
+ }
+
+ voodoo->banshee_blt.bres_error_0 -= voodoo->banshee_blt.srcSizeY;
+ while (voodoo->banshee_blt.bres_error_0 < 0)
+ {
+ voodoo->banshee_blt.bres_error_0 += voodoo->banshee_blt.dstSizeY;
+ if (src_y)
+ (*src_y) += (voodoo->banshee_blt.command & COMMAND_DY) ? -1 : 1;
+ }
+ voodoo->banshee_blt.dstY += (voodoo->banshee_blt.command & COMMAND_DY) ? -1 : 1;
+// pat_y += (voodoo->banshee_blt.command & COMMAND_DY) ? -1 : 1;
+}
+
+static void banshee_do_screen_to_screen_stretch_blt(voodoo_t *voodoo)
+{
+// pclog("screen_to_screen: %08x %08x %08x\n", voodoo->banshee_blt.srcFormat, voodoo->banshee_blt.src_stride, voodoo->banshee_blt.src_stride_dest);
+// return;
+ for (voodoo->banshee_blt.cur_y = 0; voodoo->banshee_blt.cur_y < voodoo->banshee_blt.dstSizeY; voodoo->banshee_blt.cur_y++)
+ {
+ uint32_t src_addr = get_addr(voodoo, 0, voodoo->banshee_blt.srcY, 1, voodoo->banshee_blt.src_stride_src);//(voodoo->banshee_blt.srcBaseAddr + voodoo->banshee_blt.srcY*voodoo->banshee_blt.src_stride_src) & voodoo->fb_mask;
+// pclog("scale_blit %i %08x %08x\n", voodoo->banshee_blt.cur_y, src_addr, voodoo->banshee_blt.command);
+// if ((voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) == SRC_FORMAT_COL_1_BPP)
+// pclog(" srcY=%i src_addr=%08x\n", voodoo->banshee_blt.srcY, src_addr);
+ do_screen_to_screen_stretch_line(voodoo, &voodoo->vram[src_addr], voodoo->banshee_blt.srcX, &voodoo->banshee_blt.srcY);
+ }
+ end_command(voodoo);
+}
+
+static void banshee_do_host_to_screen_stretch_blt(voodoo_t *voodoo, int count, uint32_t data)
+{
+// if (voodoo->banshee_blt.dstBaseAddr == 0xee5194)
+// pclog("banshee_do_host_to_screen_blt: data=%08x host_data_count=%i src_stride_dest=%i host_data_size_dest=%i\n", data, voodoo->banshee_blt.host_data_count, voodoo->banshee_blt.src_stride_dest, voodoo->banshee_blt.host_data_size_dest);
+
+ if (voodoo->banshee_blt.srcFormat & SRC_FORMAT_BYTE_SWIZZLE)
+ data = (data >> 24) | ((data >> 8) & 0xff00) | ((data << 8) & 0xff0000) | (data << 24);
+ if (voodoo->banshee_blt.srcFormat & SRC_FORMAT_WORD_SWIZZLE)
+ data = (data >> 16) | (data << 16);
+
+ if ((voodoo->banshee_blt.srcFormat & SRC_FORMAT_PACKING_MASK) == SRC_FORMAT_PACKING_STRIDE)
+ {
+ int last_byte = (voodoo->banshee_blt.srcX & 3) + voodoo->banshee_blt.host_data_size_src;
+
+ *(uint32_t *)&voodoo->banshee_blt.host_data[voodoo->banshee_blt.host_data_count] = data;
+ voodoo->banshee_blt.host_data_count += 4;
+ if (voodoo->banshee_blt.host_data_count >= last_byte)
+ {
+// pclog(" %i %i srcX=%i srcFormat=%08x\n", voodoo->banshee_blt.cur_y, voodoo->banshee_blt.dstSizeY, voodoo->banshee_blt.srcX);
+ if (voodoo->banshee_blt.cur_y < voodoo->banshee_blt.dstSizeY)
+ {
+ if ((voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) == SRC_FORMAT_COL_1_BPP)
+ do_screen_to_screen_stretch_line(voodoo, &voodoo->banshee_blt.host_data[(voodoo->banshee_blt.srcX >> 3) & 3], voodoo->banshee_blt.srcX & 7, NULL);
+ else
+ do_screen_to_screen_stretch_line(voodoo, &voodoo->banshee_blt.host_data[voodoo->banshee_blt.srcX & 3], 0, NULL);
+ voodoo->banshee_blt.cur_y++;
+ if (voodoo->banshee_blt.cur_y == voodoo->banshee_blt.dstSizeY)
+ end_command(voodoo);
+ }
+
+ if ((voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) == SRC_FORMAT_COL_1_BPP)
+ voodoo->banshee_blt.srcX += (voodoo->banshee_blt.srcFormat & SRC_FORMAT_STRIDE_MASK) << 3;
+ else
+ voodoo->banshee_blt.srcX += (voodoo->banshee_blt.srcFormat & SRC_FORMAT_STRIDE_MASK);
+
+ voodoo->banshee_blt.host_data_count = 0;
+ }
+ }
+ else
+ {
+ *(uint32_t *)&voodoo->banshee_blt.host_data[voodoo->banshee_blt.host_data_count] = data;
+ voodoo->banshee_blt.host_data_count += 4;
+ while (voodoo->banshee_blt.host_data_count >= voodoo->banshee_blt.src_stride_src)
+ {
+ voodoo->banshee_blt.host_data_count -= voodoo->banshee_blt.src_stride_src;
+
+// pclog(" %i %i\n", voodoo->banshee_blt.cur_y, voodoo->banshee_blt.dstSizeY);
+ if (voodoo->banshee_blt.cur_y < voodoo->banshee_blt.dstSizeY)
+ {
+ do_screen_to_screen_stretch_line(voodoo, voodoo->banshee_blt.host_data, 0, NULL);
+ voodoo->banshee_blt.cur_y++;
+ if (voodoo->banshee_blt.cur_y == voodoo->banshee_blt.dstSizeY)
+ end_command(voodoo);
+ }
+
+ if (voodoo->banshee_blt.host_data_count)
+ {
+// pclog(" remaining=%i\n", voodoo->banshee_blt.host_data_count);
+ *(uint32_t *)&voodoo->banshee_blt.host_data[0] = data >> (4-voodoo->banshee_blt.host_data_count)*8;
+ }
+ }
+ }
+}
+
+static void step_line(voodoo_t *voodoo)
+{
+ if (voodoo->banshee_blt.line_pix_pos == voodoo->banshee_blt.line_rep_cnt)
+ {
+ voodoo->banshee_blt.line_pix_pos = 0;
+ if (voodoo->banshee_blt.line_bit_pos == voodoo->banshee_blt.line_bit_mask_size)
+ voodoo->banshee_blt.line_bit_pos = 0;
+ else
+ voodoo->banshee_blt.line_bit_pos++;
+ }
+ else
+ voodoo->banshee_blt.line_pix_pos++;
+}
+
+static void banshee_do_line(voodoo_t *voodoo, int draw_last_pixel)
+{
+ clip_t *clip = &voodoo->banshee_blt.clip[(voodoo->banshee_blt.command & COMMAND_CLIP_SEL) ? 1 : 0];
+ uint8_t rop = voodoo->banshee_blt.command >> 24;
+ int dx = ABS(voodoo->banshee_blt.dstX - voodoo->banshee_blt.srcX);
+ int dy = ABS(voodoo->banshee_blt.dstY - voodoo->banshee_blt.srcY);
+ int x_inc = (voodoo->banshee_blt.dstX > voodoo->banshee_blt.srcX) ? 1 : -1;
+ int y_inc = (voodoo->banshee_blt.dstY > voodoo->banshee_blt.srcY) ? 1 : -1;
+ int x = voodoo->banshee_blt.srcX;
+ int y = voodoo->banshee_blt.srcY;
+ int error;
+ uint32_t stipple = (voodoo->banshee_blt.command & COMMAND_STIPPLE_LINE) ?
+ voodoo->banshee_blt.lineStipple : ~0;
+
+ if (dx > dy) /*X major*/
+ {
+ error = dx/2;
+ while (x != voodoo->banshee_blt.dstX)
+ {
+ int mask = stipple & (1 << voodoo->banshee_blt.line_bit_pos);
+ int pattern_trans = (voodoo->banshee_blt.command & COMMAND_TRANS_MONO) ? mask : 1;
+
+ if (y >= clip->y_min && y < clip->y_max && x >= clip->x_min && x < clip->x_max && pattern_trans)
+ PLOT_LINE(voodoo, x, y, rop, mask ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack, COLORKEY_32);
+
+ error -= dy;
+ if (error < 0)
+ {
+ error += dx;
+ y += y_inc;
+ }
+ x += x_inc;
+ step_line(voodoo);
+ }
+ }
+ else /*Y major*/
+ {
+ error = dy/2;
+ while (y != voodoo->banshee_blt.dstY)
+ {
+ int mask = stipple & (1 << voodoo->banshee_blt.line_bit_pos);
+ int pattern_trans = (voodoo->banshee_blt.command & COMMAND_TRANS_MONO) ? mask : 1;
+
+ if (y >= clip->y_min && y < clip->y_max && x >= clip->x_min && x < clip->x_max && pattern_trans)
+ PLOT_LINE(voodoo, x, y, rop, mask ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack, COLORKEY_32);
+
+ error -= dx;
+ if (error < 0)
+ {
+ error += dy;
+ x += x_inc;
+ }
+ y += y_inc;
+ step_line(voodoo);
+ }
+ }
+
+ if (draw_last_pixel)
+ {
+ int mask = stipple & (1 << voodoo->banshee_blt.line_bit_pos);
+ int pattern_trans = (voodoo->banshee_blt.command & COMMAND_TRANS_MONO) ? mask : 1;
+
+ if (y >= clip->y_min && y < clip->y_max && x >= clip->x_min && x < clip->x_max && pattern_trans)
+ PLOT_LINE(voodoo, x, y, rop, mask ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack, COLORKEY_32);
+ }
+
+ voodoo->banshee_blt.srcXY = (x & 0xffff) | (y << 16);
+ voodoo->banshee_blt.srcX = x;
+ voodoo->banshee_blt.srcY = y;
+}
+
+static void banshee_polyfill_start(voodoo_t *voodoo)
+{
+ voodoo->banshee_blt.lx[0] = voodoo->banshee_blt.srcX;
+ voodoo->banshee_blt.ly[0] = voodoo->banshee_blt.srcY;
+ voodoo->banshee_blt.rx[0] = voodoo->banshee_blt.dstX;
+ voodoo->banshee_blt.ry[0] = voodoo->banshee_blt.dstY;
+ voodoo->banshee_blt.lx[1] = voodoo->banshee_blt.srcX;
+ voodoo->banshee_blt.ly[1] = voodoo->banshee_blt.srcY;
+ voodoo->banshee_blt.rx[1] = voodoo->banshee_blt.dstX;
+ voodoo->banshee_blt.ry[1] = voodoo->banshee_blt.dstY;
+ voodoo->banshee_blt.lx_cur = voodoo->banshee_blt.srcX;
+ voodoo->banshee_blt.rx_cur = voodoo->banshee_blt.dstX;
+}
+
+static void banshee_polyfill_continue(voodoo_t *voodoo, uint32_t data)
+{
+ clip_t *clip = &voodoo->banshee_blt.clip[(voodoo->banshee_blt.command & COMMAND_CLIP_SEL) ? 1 : 0];
+ uint8_t *pattern_mono = (uint8_t *)voodoo->banshee_blt.colorPattern;
+ int use_pattern_trans = (voodoo->banshee_blt.command & (COMMAND_PATTERN_MONO | COMMAND_TRANS_MONO)) ==
+ (COMMAND_PATTERN_MONO | COMMAND_TRANS_MONO);
+ uint8_t rop = voodoo->banshee_blt.command >> 24;
+ int y = MAX(voodoo->banshee_blt.ly[0], voodoo->banshee_blt.ry[0]);
+ int y_end;
+
+// pclog("Polyfill : data %08x\n", data);
+
+ /*if r1.y>=l1.y, next vertex is left*/
+ if (voodoo->banshee_blt.ry[1] >= voodoo->banshee_blt.ly[1])
+ {
+ voodoo->banshee_blt.lx[1] = ((int32_t)(data << 19)) >> 19;
+ voodoo->banshee_blt.ly[1] = ((int32_t)(data << 3)) >> 19;
+ voodoo->banshee_blt.dx[0] = ABS(voodoo->banshee_blt.lx[1] - voodoo->banshee_blt.lx[0]);
+ voodoo->banshee_blt.dy[0] = ABS(voodoo->banshee_blt.ly[1] - voodoo->banshee_blt.ly[0]);
+ voodoo->banshee_blt.x_inc[0] = (voodoo->banshee_blt.lx[1] > voodoo->banshee_blt.lx[0]) ? 1 : -1;
+ voodoo->banshee_blt.error[0] = voodoo->banshee_blt.dy[0] / 2;
+ }
+ else
+ {
+ voodoo->banshee_blt.rx[1] = ((int32_t)(data << 19)) >> 19;
+ voodoo->banshee_blt.ry[1] = ((int32_t)(data << 3)) >> 19;
+ voodoo->banshee_blt.dx[1] = ABS(voodoo->banshee_blt.rx[1] - voodoo->banshee_blt.rx[0]);
+ voodoo->banshee_blt.dy[1] = ABS(voodoo->banshee_blt.ry[1] - voodoo->banshee_blt.ry[0]);
+ voodoo->banshee_blt.x_inc[1] = (voodoo->banshee_blt.rx[1] > voodoo->banshee_blt.rx[0]) ? 1 : -1;
+ voodoo->banshee_blt.error[1] = voodoo->banshee_blt.dy[1] / 2;
+ }
+
+/* pclog(" verts now : %03i,%03i %03i,%03i\n", voodoo->banshee_blt.lx[0], voodoo->banshee_blt.ly[0], voodoo->banshee_blt.rx[0], voodoo->banshee_blt.ry[0]);
+ pclog(" %03i,%03i %03i,%03i\n", voodoo->banshee_blt.lx[1], voodoo->banshee_blt.ly[1], voodoo->banshee_blt.rx[1], voodoo->banshee_blt.ry[1]);
+ pclog(" left dx=%i dy=%i x_inc=%i error=%i\n", voodoo->banshee_blt.dx[0],voodoo->banshee_blt.dy[0],voodoo->banshee_blt.x_inc[0],voodoo->banshee_blt.error[0]);
+ pclog(" right dx=%i dy=%i x_inc=%i error=%i\n", voodoo->banshee_blt.dx[1],voodoo->banshee_blt.dy[1],voodoo->banshee_blt.x_inc[1],voodoo->banshee_blt.error[1]);*/
+ y_end = MIN(voodoo->banshee_blt.ly[1], voodoo->banshee_blt.ry[1]);
+// pclog("Polyfill : draw spans from %i-%i\n", y, y_end);
+ for (; y < y_end; y++)
+ {
+// pclog(" %i: %i %i\n", y, voodoo->banshee_blt.lx_cur, voodoo->banshee_blt.rx_cur);
+ /*Draw span from lx_cur to rx_cur*/
+ if (y >= clip->y_min && y < clip->y_max)
+ {
+ int pat_y = (voodoo->banshee_blt.commandExtra & CMDEXTRA_FORCE_PAT_ROW0) ? 0 : (voodoo->banshee_blt.patoff_y + y);
+ uint8_t pattern_mask = pattern_mono[pat_y & 7];
+ int x;
+
+ for (x = voodoo->banshee_blt.lx_cur; x < voodoo->banshee_blt.rx_cur; x++)
+ {
+ int pat_x = voodoo->banshee_blt.patoff_x + x;
+ int pattern_trans = use_pattern_trans ? (pattern_mask & (1 << (7-(pat_x & 7)))) : 1;
+
+ if (x >= clip->x_min && x < clip->x_max && pattern_trans)
+ PLOT(voodoo, x, y, pat_x, pat_y, pattern_mask, rop, voodoo->banshee_blt.colorFore, COLORKEY_32);
+ }
+ }
+
+ voodoo->banshee_blt.error[0] -= voodoo->banshee_blt.dx[0];
+ while (voodoo->banshee_blt.error[0] < 0)
+ {
+ voodoo->banshee_blt.error[0] += voodoo->banshee_blt.dy[0];
+ voodoo->banshee_blt.lx_cur += voodoo->banshee_blt.x_inc[0];
+ }
+ voodoo->banshee_blt.error[1] -= voodoo->banshee_blt.dx[1];
+ while (voodoo->banshee_blt.error[1] < 0)
+ {
+ voodoo->banshee_blt.error[1] += voodoo->banshee_blt.dy[1];
+ voodoo->banshee_blt.rx_cur += voodoo->banshee_blt.x_inc[1];
+ }
+ }
+
+ if (voodoo->banshee_blt.ry[1] == voodoo->banshee_blt.ly[1])
+ {
+ voodoo->banshee_blt.lx[0] = voodoo->banshee_blt.lx[1];
+ voodoo->banshee_blt.ly[0] = voodoo->banshee_blt.ly[1];
+ voodoo->banshee_blt.rx[0] = voodoo->banshee_blt.rx[1];
+ voodoo->banshee_blt.ry[0] = voodoo->banshee_blt.ry[1];
+ }
+ else if (voodoo->banshee_blt.ry[1] >= voodoo->banshee_blt.ly[1])
+ {
+ voodoo->banshee_blt.lx[0] = voodoo->banshee_blt.lx[1];
+ voodoo->banshee_blt.ly[0] = voodoo->banshee_blt.ly[1];
+ }
+ else
+ {
+ voodoo->banshee_blt.rx[0] = voodoo->banshee_blt.rx[1];
+ voodoo->banshee_blt.ry[0] = voodoo->banshee_blt.ry[1];
+ }
+}
+
+static void banshee_do_2d_blit(voodoo_t *voodoo, int count, uint32_t data)
+{
+ switch (voodoo->banshee_blt.command & COMMAND_CMD_MASK)
+ {
+ case COMMAND_CMD_NOP:
+ break;
+
+ case COMMAND_CMD_SCREEN_TO_SCREEN_BLT:
+ banshee_do_screen_to_screen_blt(voodoo);
+ break;
+
+ case COMMAND_CMD_SCREEN_TO_SCREEN_STRETCH_BLT:
+ banshee_do_screen_to_screen_stretch_blt(voodoo);
+ break;
+
+ case COMMAND_CMD_HOST_TO_SCREEN_BLT:
+ banshee_do_host_to_screen_blt(voodoo, count, data);
+ break;
+
+ case COMMAND_CMD_HOST_TO_SCREEN_STRETCH_BLT:
+ banshee_do_host_to_screen_stretch_blt(voodoo, count, data);
+ break;
+
+ case COMMAND_CMD_RECTFILL:
+ banshee_do_rectfill(voodoo);
+ break;
+
+ case COMMAND_CMD_LINE:
+ banshee_do_line(voodoo, 1);
+ break;
+
+ case COMMAND_CMD_POLYLINE:
+ banshee_do_line(voodoo, 0);
+ break;
+
+#ifndef RELEASE_BUILD
+ default:
+ fatal("banshee_do_2d_blit: unknown command=%08x\n", voodoo->banshee_blt.command);
+#endif
+ }
+}
+
+void voodoo_2d_reg_writel(voodoo_t *voodoo, uint32_t addr, uint32_t val)
+{
+// /*if ((addr & 0x1fc) != 0x80) */pclog("2D reg write %03x %08x\n", addr & 0x1fc, val);
+ switch (addr & 0x1fc)
+ {
+ case 0x08:
+ voodoo->banshee_blt.clip0Min = val;
+ voodoo->banshee_blt.clip[0].x_min = val & 0xfff;
+ voodoo->banshee_blt.clip[0].y_min = (val >> 16) & 0xfff;
+ break;
+ case 0x0c:
+ voodoo->banshee_blt.clip0Max = val;
+ voodoo->banshee_blt.clip[0].x_max = val & 0xfff;
+ voodoo->banshee_blt.clip[0].y_max = (val >> 16) & 0xfff;
+ break;
+ case 0x10:
+ voodoo->banshee_blt.dstBaseAddr = val & 0xffffff;
+ voodoo->banshee_blt.dstBaseAddr_tiled = val & 0x80000000;
+ if (voodoo->banshee_blt.dstBaseAddr_tiled)
+ voodoo->banshee_blt.dst_stride = (voodoo->banshee_blt.dstFormat & DST_FORMAT_STRIDE_MASK) * 128*32;
+ else
+ voodoo->banshee_blt.dst_stride = voodoo->banshee_blt.dstFormat & DST_FORMAT_STRIDE_MASK;
+// pclog("dstBaseAddr=%08x\n", val);
+ break;
+ case 0x14:
+ voodoo->banshee_blt.dstFormat = val;
+ if (voodoo->banshee_blt.dstBaseAddr_tiled)
+ voodoo->banshee_blt.dst_stride = (voodoo->banshee_blt.dstFormat & DST_FORMAT_STRIDE_MASK) * 128*32;
+ else
+ voodoo->banshee_blt.dst_stride = voodoo->banshee_blt.dstFormat & DST_FORMAT_STRIDE_MASK;
+// pclog("dstFormat=%08x\n", val);
+ break;
+
+ case 0x18:
+ voodoo->banshee_blt.srcColorkeyMin = val & 0xffffff;
+ break;
+ case 0x1c:
+ voodoo->banshee_blt.srcColorkeyMax = val & 0xffffff;
+ break;
+ case 0x20:
+ voodoo->banshee_blt.dstColorkeyMin = val & 0xffffff;
+ break;
+ case 0x24:
+ voodoo->banshee_blt.dstColorkeyMax = val & 0xffffff;
+ break;
+
+ case 0x28:
+ voodoo->banshee_blt.bresError0 = val;
+ voodoo->banshee_blt.bres_error_0 = val & 0xffff;
+ break;
+ case 0x2c:
+ voodoo->banshee_blt.bresError1 = val;
+ voodoo->banshee_blt.bres_error_1 = val & 0xffff;
+ break;
+
+ case 0x30:
+ voodoo->banshee_blt.rop = val;
+ voodoo->banshee_blt.rops[1] = val & 0xff;
+ voodoo->banshee_blt.rops[2] = (val >> 8) & 0xff;
+ voodoo->banshee_blt.rops[3] = (val >> 16) & 0xff;
+// pclog("rop=%08x\n", val);
+ break;
+ case 0x34:
+ voodoo->banshee_blt.srcBaseAddr = val & 0xffffff;
+ voodoo->banshee_blt.srcBaseAddr_tiled = val & 0x80000000;
+ if (voodoo->banshee_blt.srcBaseAddr_tiled)
+ voodoo->banshee_blt.src_stride = (voodoo->banshee_blt.srcFormat & SRC_FORMAT_STRIDE_MASK) * 128*32;
+ else
+ voodoo->banshee_blt.src_stride = voodoo->banshee_blt.srcFormat & SRC_FORMAT_STRIDE_MASK;
+ update_src_stride(voodoo);
+// pclog("srcBaseAddr=%08x\n", val);
+ break;
+ case 0x38:
+ voodoo->banshee_blt.commandExtra = val;
+// pclog("commandExtra=%08x\n", val);
+ break;
+ case 0x3c:
+ voodoo->banshee_blt.lineStipple = val;
+ break;
+ case 0x40:
+ voodoo->banshee_blt.lineStyle = val;
+ voodoo->banshee_blt.line_rep_cnt = val & 0xff;
+ voodoo->banshee_blt.line_bit_mask_size = (val >> 8) & 0x1f;
+ voodoo->banshee_blt.line_pix_pos = (val >> 16) & 0xff;
+ voodoo->banshee_blt.line_bit_pos = (val >> 24) & 0x1f;
+ break;
+ case 0x44:
+ voodoo->banshee_blt.colorPattern[0] = val;
+// pclog("colorPattern0=%08x\n", val);
+ voodoo->banshee_blt.colorPattern24[0] = val & 0xffffff;
+ voodoo->banshee_blt.colorPattern24[1] = (voodoo->banshee_blt.colorPattern24[1] & 0xffff00) | (val >> 24);
+ voodoo->banshee_blt.colorPattern16[0] = val & 0xffff;
+ voodoo->banshee_blt.colorPattern16[1] = (val >> 16) & 0xffff;
+ voodoo->banshee_blt.colorPattern8[0] = val & 0xff;
+ voodoo->banshee_blt.colorPattern8[1] = (val >> 8) & 0xff;
+ voodoo->banshee_blt.colorPattern8[2] = (val >> 16) & 0xff;
+ voodoo->banshee_blt.colorPattern8[3] = (val >> 24) & 0xff;
+ break;
+ case 0x48:
+ voodoo->banshee_blt.colorPattern[1] = val;
+// pclog("colorPattern1=%08x\n", val);
+ voodoo->banshee_blt.colorPattern24[1] = (voodoo->banshee_blt.colorPattern24[1] & 0xff) | ((val & 0xffff) << 8);
+ voodoo->banshee_blt.colorPattern24[2] = (voodoo->banshee_blt.colorPattern24[2] & 0xff0000) | (val >> 16);
+ voodoo->banshee_blt.colorPattern16[2] = val & 0xffff;
+ voodoo->banshee_blt.colorPattern16[3] = (val >> 16) & 0xffff;
+ voodoo->banshee_blt.colorPattern8[4] = val & 0xff;
+ voodoo->banshee_blt.colorPattern8[5] = (val >> 8) & 0xff;
+ voodoo->banshee_blt.colorPattern8[6] = (val >> 16) & 0xff;
+ voodoo->banshee_blt.colorPattern8[7] = (val >> 24) & 0xff;
+ break;
+ case 0x4c:
+ voodoo->banshee_blt.clip1Min = val;
+ voodoo->banshee_blt.clip[1].x_min = val & 0xfff;
+ voodoo->banshee_blt.clip[1].y_min = (val >> 16) & 0xfff;
+ break;
+ case 0x50:
+ voodoo->banshee_blt.clip1Max = val;
+ voodoo->banshee_blt.clip[1].x_max = val & 0xfff;
+ voodoo->banshee_blt.clip[1].y_max = (val >> 16) & 0xfff;
+ break;
+ case 0x54:
+ voodoo->banshee_blt.srcFormat = val;
+ if (voodoo->banshee_blt.srcBaseAddr_tiled)
+ voodoo->banshee_blt.src_stride = (voodoo->banshee_blt.srcFormat & SRC_FORMAT_STRIDE_MASK) * 128*32;
+ else
+ voodoo->banshee_blt.src_stride = voodoo->banshee_blt.srcFormat & SRC_FORMAT_STRIDE_MASK;
+ update_src_stride(voodoo);
+ switch (voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK)
+ {
+ case SRC_FORMAT_COL_1_BPP:
+ voodoo->banshee_blt.src_bpp = 1;
+ break;
+ case SRC_FORMAT_COL_8_BPP:
+ voodoo->banshee_blt.src_bpp = 8;
+ break;
+ case SRC_FORMAT_COL_24_BPP:
+ voodoo->banshee_blt.src_bpp = 24;
+ break;
+ case SRC_FORMAT_COL_32_BPP:
+ voodoo->banshee_blt.src_bpp = 32;
+ break;
+ case SRC_FORMAT_COL_16_BPP: default:
+ voodoo->banshee_blt.src_bpp = 16;
+ break;
+ }
+// pclog("srcFormat=%08x\n", val);
+ break;
+ case 0x58:
+ voodoo->banshee_blt.srcSize = val;
+ voodoo->banshee_blt.srcSizeX = voodoo->banshee_blt.srcSize & 0x1fff;
+ voodoo->banshee_blt.srcSizeY = (voodoo->banshee_blt.srcSize >> 16) & 0x1fff;
+ update_src_stride(voodoo);
+// pclog("srcSize=%08x\n", val);
+ break;
+ case 0x5c:
+ voodoo->banshee_blt.srcXY = val;
+ voodoo->banshee_blt.srcX = ((int32_t)(val << 19)) >> 19;
+ voodoo->banshee_blt.srcY = ((int32_t)(val << 3)) >> 19;
+ update_src_stride(voodoo);
+// pclog("srcXY=%08x\n", val);
+ break;
+ case 0x60:
+ voodoo->banshee_blt.colorBack = val;
+ break;
+ case 0x64:
+ voodoo->banshee_blt.colorFore = val;
+ break;
+ case 0x68:
+ voodoo->banshee_blt.dstSize = val;
+ voodoo->banshee_blt.dstSizeX = voodoo->banshee_blt.dstSize & 0x1fff;
+ voodoo->banshee_blt.dstSizeY = (voodoo->banshee_blt.dstSize >> 16) & 0x1fff;
+ update_src_stride(voodoo);
+// pclog("dstSize=%08x\n", val);
+ break;
+ case 0x6c:
+ voodoo->banshee_blt.dstXY = val;
+ voodoo->banshee_blt.dstX = ((int32_t)(val << 19)) >> 19;
+ voodoo->banshee_blt.dstY = ((int32_t)(val << 3)) >> 19;
+// pclog("dstXY=%08x\n", val);
+ break;
+ case 0x70:
+ voodoo_wait_for_render_thread_idle(voodoo);
+ voodoo->banshee_blt.command = val;
+ voodoo->banshee_blt.rops[0] = val >> 24;
+// pclog("command=%x %08x\n", voodoo->banshee_blt.command & COMMAND_CMD_MASK, val);
+ voodoo->banshee_blt.patoff_x = (val & COMMAND_PATOFF_X_MASK) >> COMMAND_PATOFF_X_SHIFT;
+ voodoo->banshee_blt.patoff_y = (val & COMMAND_PATOFF_Y_MASK) >> COMMAND_PATOFF_Y_SHIFT;
+ voodoo->banshee_blt.cur_x = 0;
+ voodoo->banshee_blt.cur_y = 0;
+ voodoo->banshee_blt.dstX = ((int32_t)(voodoo->banshee_blt.dstXY << 19)) >> 19;
+ voodoo->banshee_blt.dstY = ((int32_t)(voodoo->banshee_blt.dstXY << 3)) >> 19;
+ voodoo->banshee_blt.srcX = ((int32_t)(voodoo->banshee_blt.srcXY << 19)) >> 19;
+ voodoo->banshee_blt.srcY = ((int32_t)(voodoo->banshee_blt.srcXY << 3)) >> 19;
+ voodoo->banshee_blt.old_srcX = voodoo->banshee_blt.srcX;
+ voodoo->banshee_blt.host_data_remainder = 0;
+ voodoo->banshee_blt.host_data_count = 0;
+ switch (voodoo->banshee_blt.command & COMMAND_CMD_MASK)
+ {
+/* case COMMAND_CMD_SCREEN_TO_SCREEN_STRETCH_BLT:
+ if (voodoo->banshee_blt.bresError0 & BRES_ERROR_USE)
+ voodoo->banshee_blt.bres_error_0 = (int32_t)(int16_t)(voodoo->banshee_blt.bresError0 & BRES_ERROR_MASK);
+ else
+ voodoo->banshee_blt.bres_error_0 = voodoo->banshee_blt.dstSizeY / 2;
+ if (voodoo->banshee_blt.bresError1 & BRES_ERROR_USE)
+ voodoo->banshee_blt.bres_error_1 = (int32_t)(int16_t)(voodoo->banshee_blt.bresError1 & BRES_ERROR_MASK);
+ else
+ voodoo->banshee_blt.bres_error_1 = voodoo->banshee_blt.dstSizeX / 2;
+
+ if (val & COMMAND_INITIATE)
+ banshee_do_2d_blit(voodoo, -1, 0);
+ break;*/
+
+ case COMMAND_CMD_POLYFILL:
+ if (val & COMMAND_INITIATE)
+ {
+ voodoo->banshee_blt.dstXY = voodoo->banshee_blt.srcXY;
+ voodoo->banshee_blt.dstX = voodoo->banshee_blt.srcX;
+ voodoo->banshee_blt.dstY = voodoo->banshee_blt.srcY;
+ }
+ banshee_polyfill_start(voodoo);
+ break;
+
+ default:
+ if (val & COMMAND_INITIATE)
+ {
+ banshee_do_2d_blit(voodoo, -1, 0);
+ // fatal("Initiate command!\n");
+ }
+ break;
+ }
+ break;
+
+ case 0x80: case 0x84: case 0x88: case 0x8c:
+ case 0x90: case 0x94: case 0x98: case 0x9c:
+ case 0xa0: case 0xa4: case 0xa8: case 0xac:
+ case 0xb0: case 0xb4: case 0xb8: case 0xbc:
+ case 0xc0: case 0xc4: case 0xc8: case 0xcc:
+ case 0xd0: case 0xd4: case 0xd8: case 0xdc:
+ case 0xe0: case 0xe4: case 0xe8: case 0xec:
+ case 0xf0: case 0xf4: case 0xf8: case 0xfc:
+// pclog("launch %08x %08x %08x %08x\n", voodoo->banshee_blt.command, voodoo->banshee_blt.commandExtra, voodoo->banshee_blt.srcColorkeyMin, voodoo->banshee_blt.srcColorkeyMax);
+ switch (voodoo->banshee_blt.command & COMMAND_CMD_MASK)
+ {
+ case COMMAND_CMD_SCREEN_TO_SCREEN_BLT:
+ voodoo->banshee_blt.srcXY = val;
+ voodoo->banshee_blt.srcX = ((int32_t)(val << 19)) >> 19;
+ voodoo->banshee_blt.srcY = ((int32_t)(val << 3)) >> 19;
+ banshee_do_screen_to_screen_blt(voodoo);
+ break;
+
+ case COMMAND_CMD_HOST_TO_SCREEN_BLT:
+ banshee_do_2d_blit(voodoo, 32, val);
+ break;
+
+ case COMMAND_CMD_HOST_TO_SCREEN_STRETCH_BLT:
+ banshee_do_2d_blit(voodoo, 32, val);
+ break;
+
+ case COMMAND_CMD_RECTFILL:
+ voodoo->banshee_blt.dstXY = val;
+ voodoo->banshee_blt.dstX = ((int32_t)(val << 19)) >> 19;
+ voodoo->banshee_blt.dstY = ((int32_t)(val << 3)) >> 19;
+ banshee_do_rectfill(voodoo);
+ break;
+
+ case COMMAND_CMD_LINE:
+ voodoo->banshee_blt.dstXY = val;
+ voodoo->banshee_blt.dstX = ((int32_t)(val << 19)) >> 19;
+ voodoo->banshee_blt.dstY = ((int32_t)(val << 3)) >> 19;
+ banshee_do_line(voodoo, 1);
+ break;
+
+ case COMMAND_CMD_POLYLINE:
+ voodoo->banshee_blt.dstXY = val;
+ voodoo->banshee_blt.dstX = ((int32_t)(val << 19)) >> 19;
+ voodoo->banshee_blt.dstY = ((int32_t)(val << 3)) >> 19;
+ banshee_do_line(voodoo, 0);
+ break;
+
+ case COMMAND_CMD_POLYFILL:
+ banshee_polyfill_continue(voodoo, val);
+ break;
+
+#ifndef RELEASE_BUILD
+ default:
+ fatal("launch area write, command=%08x\n", voodoo->banshee_blt.command);
+#endif
+ }
+ break;
+
+ case 0x100: case 0x104: case 0x108: case 0x10c:
+ case 0x110: case 0x114: case 0x118: case 0x11c:
+ case 0x120: case 0x124: case 0x128: case 0x12c:
+ case 0x130: case 0x134: case 0x138: case 0x13c:
+ case 0x140: case 0x144: case 0x148: case 0x14c:
+ case 0x150: case 0x154: case 0x158: case 0x15c:
+ case 0x160: case 0x164: case 0x168: case 0x16c:
+ case 0x170: case 0x174: case 0x178: case 0x17c:
+ case 0x180: case 0x184: case 0x188: case 0x18c:
+ case 0x190: case 0x194: case 0x198: case 0x19c:
+ case 0x1a0: case 0x1a4: case 0x1a8: case 0x1ac:
+ case 0x1b0: case 0x1b4: case 0x1b8: case 0x1bc:
+ case 0x1c0: case 0x1c4: case 0x1c8: case 0x1cc:
+ case 0x1d0: case 0x1d4: case 0x1d8: case 0x1dc:
+ case 0x1e0: case 0x1e4: case 0x1e8: case 0x1ec:
+ case 0x1f0: case 0x1f4: case 0x1f8: case 0x1fc:
+ voodoo->banshee_blt.colorPattern[(addr >> 2) & 63] = val;
+ if ((addr & 0x1fc) < 0x1c0)
+ {
+ int base_addr = (addr & 0xfc) / 0xc;
+ uintptr_t src_p = (uintptr_t)&voodoo->banshee_blt.colorPattern[base_addr * 3];
+ int col24 = base_addr * 4;
+
+ voodoo->banshee_blt.colorPattern24[col24] = *(uint32_t *)src_p & 0xffffff;
+ voodoo->banshee_blt.colorPattern24[col24 + 1] = *(uint32_t *)(src_p + 3) & 0xffffff;
+ voodoo->banshee_blt.colorPattern24[col24 + 2] = *(uint32_t *)(src_p + 6) & 0xffffff;
+ voodoo->banshee_blt.colorPattern24[col24 + 3] = *(uint32_t *)(src_p + 9) & 0xffffff;
+ }
+ if ((addr & 0x1fc) < 0x180)
+ {
+ voodoo->banshee_blt.colorPattern16[(addr >> 1) & 62] = val & 0xffff;
+ voodoo->banshee_blt.colorPattern16[((addr >> 1) & 62) + 1] = (val >> 16) & 0xffff;
+ }
+ if ((addr & 0x1fc) < 0x140)
+ {
+ voodoo->banshee_blt.colorPattern8[addr & 60] = val & 0xff;
+ voodoo->banshee_blt.colorPattern8[(addr & 60) + 1] = (val >> 8) & 0xff;
+ voodoo->banshee_blt.colorPattern8[(addr & 60) + 2] = (val >> 16) & 0xff;
+ voodoo->banshee_blt.colorPattern8[(addr & 60) + 3] = (val >> 24) & 0xff;
+ }
+// pclog("colorPattern%02x=%08x\n", (addr >> 2) & 63, val);
+ break;
+
+#ifndef RELEASE_BUILD
+ default:
+ fatal("Unknown 2D reg write %03x %08x\n", addr & 0x1fc, val);
+#endif
+ }
+}
--- /dev/null
+void voodoo_2d_reg_writel(voodoo_t *voodoo, uint32_t addr, uint32_t val);
--- /dev/null
+#include <math.h>
+#include <stddef.h>
+#include "ibm.h"
+#include "device.h"
+#include "mem.h"
+#include "thread.h"
+#include "video.h"
+#include "vid_svga.h"
+#include "vid_voodoo.h"
+#include "vid_voodoo_common.h"
+#include "vid_voodoo_blitter.h"
+#include "vid_voodoo_dither.h"
+#include "vid_voodoo_regs.h"
+#include "vid_voodoo_render.h"
+
+enum
+{
+ BLIT_COMMAND_SCREEN_TO_SCREEN = 0,
+ BLIT_COMMAND_CPU_TO_SCREEN = 1,
+ BLIT_COMMAND_RECT_FILL = 2,
+ BLIT_COMMAND_SGRAM_FILL = 3
+};
+
+enum
+{
+ BLIT_SRC_1BPP = (0 << 3),
+ BLIT_SRC_1BPP_BYTE_PACKED = (1 << 3),
+ BLIT_SRC_16BPP = (2 << 3),
+ BLIT_SRC_24BPP = (3 << 3),
+ BLIT_SRC_24BPP_DITHER_2X2 = (4 << 3),
+ BLIT_SRC_24BPP_DITHER_4X4 = (5 << 3)
+};
+
+enum
+{
+ BLIT_SRC_RGB_ARGB = (0 << 6),
+ BLIT_SRC_RGB_ABGR = (1 << 6),
+ BLIT_SRC_RGB_RGBA = (2 << 6),
+ BLIT_SRC_RGB_BGRA = (3 << 6)
+};
+
+enum
+{
+ BLIT_COMMAND_MASK = 7,
+ BLIT_SRC_FORMAT = (7 << 3),
+ BLIT_SRC_RGB_FORMAT = (3 << 6),
+ BLIT_SRC_CHROMA = (1 << 10),
+ BLIT_DST_CHROMA = (1 << 12),
+ BLIT_CLIPPING_ENABLED = (1 << 16)
+};
+
+enum
+{
+ BLIT_ROP_DST_PASS = (1 << 0),
+ BLIT_ROP_SRC_PASS = (1 << 1)
+};
+
+#define MIX(src_dat, dst_dat, rop) \
+ switch (rop) \
+ { \
+ case 0x0: dst_dat = 0; break; \
+ case 0x1: dst_dat = ~(src_dat | dst_dat); break; \
+ case 0x2: dst_dat = ~src_dat & dst_dat; break; \
+ case 0x3: dst_dat = ~src_dat; break; \
+ case 0x4: dst_dat = src_dat & ~dst_dat; break; \
+ case 0x5: dst_dat = ~dst_dat; break; \
+ case 0x6: dst_dat = src_dat ^ dst_dat; break; \
+ case 0x7: dst_dat = ~(src_dat & dst_dat); break; \
+ case 0x8: dst_dat = src_dat & dst_dat; break; \
+ case 0x9: dst_dat = ~(src_dat ^ dst_dat); break; \
+ case 0xa: dst_dat = dst_dat; break; \
+ case 0xb: dst_dat = ~src_dat | dst_dat; break; \
+ case 0xc: dst_dat = src_dat; break; \
+ case 0xd: dst_dat = src_dat | ~dst_dat; break; \
+ case 0xe: dst_dat = src_dat | dst_dat; break; \
+ case 0xf: dst_dat = 0xffff; break; \
+ }
+
+void voodoo_v2_blit_start(voodoo_t *voodoo)
+{
+ uint64_t dat64;
+ int size_x = ABS(voodoo->bltSizeX), size_y = ABS(voodoo->bltSizeY);
+ int x_dir = (voodoo->bltSizeX > 0) ? 1 : -1;
+ int y_dir = (voodoo->bltSizeY > 0) ? 1 : -1;
+ int dst_x;
+ int src_y = voodoo->bltSrcY & 0x7ff, dst_y = voodoo->bltDstY & 0x7ff;
+ int src_stride = (voodoo->bltCommand & BLTCMD_SRC_TILED) ? ((voodoo->bltSrcXYStride & 0x3f) * 32*2) : (voodoo->bltSrcXYStride & 0xff8);
+ int dst_stride = (voodoo->bltCommand & BLTCMD_DST_TILED) ? ((voodoo->bltDstXYStride & 0x3f) * 32*2) : (voodoo->bltDstXYStride & 0xff8);
+ uint32_t src_base_addr = (voodoo->bltCommand & BLTCMD_SRC_TILED) ? ((voodoo->bltSrcBaseAddr & 0x3ff) << 12) : (voodoo->bltSrcBaseAddr & 0x3ffff8);
+ uint32_t dst_base_addr = (voodoo->bltCommand & BLTCMD_DST_TILED) ? ((voodoo->bltDstBaseAddr & 0x3ff) << 12) : (voodoo->bltDstBaseAddr & 0x3ffff8);
+ int x, y;
+
+/* pclog("blit_start: command=%08x srcX=%i srcY=%i dstX=%i dstY=%i sizeX=%i sizeY=%i color=%04x,%04x\n",
+ voodoo->bltCommand, voodoo->bltSrcX, voodoo->bltSrcY, voodoo->bltDstX, voodoo->bltDstY, voodoo->bltSizeX, voodoo->bltSizeY, voodoo->bltColorFg, voodoo->bltColorBg);*/
+
+ voodoo_wait_for_render_thread_idle(voodoo);
+
+ switch (voodoo->bltCommand & BLIT_COMMAND_MASK)
+ {
+ case BLIT_COMMAND_SCREEN_TO_SCREEN:
+ for (y = 0; y <= size_y; y++)
+ {
+ uint16_t *src = (uint16_t *)&voodoo->fb_mem[src_base_addr + src_y*src_stride];
+ uint16_t *dst = (uint16_t *)&voodoo->fb_mem[dst_base_addr + dst_y*dst_stride];
+ int src_x = voodoo->bltSrcX, dst_x = voodoo->bltDstX;
+
+ for (x = 0; x <= size_x; x++)
+ {
+ uint16_t src_dat = src[src_x];
+ uint16_t dst_dat = dst[dst_x];
+ int rop = 0;
+
+ if (voodoo->bltCommand & BLIT_CLIPPING_ENABLED)
+ {
+ if (dst_x < voodoo->bltClipLeft || dst_x >= voodoo->bltClipRight ||
+ dst_y < voodoo->bltClipLowY || dst_y >= voodoo->bltClipHighY)
+ goto skip_pixel_blit;
+ }
+
+ if (voodoo->bltCommand & BLIT_SRC_CHROMA)
+ {
+ int r = (src_dat >> 11);
+ int g = (src_dat >> 5) & 0x3f;
+ int b = src_dat & 0x1f;
+
+ if (r >= voodoo->bltSrcChromaMinR && r <= voodoo->bltSrcChromaMaxR &&
+ g >= voodoo->bltSrcChromaMinG && g <= voodoo->bltSrcChromaMaxG &&
+ b >= voodoo->bltSrcChromaMinB && b <= voodoo->bltSrcChromaMaxB)
+ rop |= BLIT_ROP_SRC_PASS;
+ }
+ if (voodoo->bltCommand & BLIT_DST_CHROMA)
+ {
+ int r = (dst_dat >> 11);
+ int g = (dst_dat >> 5) & 0x3f;
+ int b = dst_dat & 0x1f;
+
+ if (r >= voodoo->bltDstChromaMinR && r <= voodoo->bltDstChromaMaxR &&
+ g >= voodoo->bltDstChromaMinG && g <= voodoo->bltDstChromaMaxG &&
+ b >= voodoo->bltDstChromaMinB && b <= voodoo->bltDstChromaMaxB)
+ rop |= BLIT_ROP_DST_PASS;
+ }
+
+ MIX(src_dat, dst_dat, voodoo->bltRop[rop]);
+
+ dst[dst_x] = dst_dat;
+skip_pixel_blit:
+ src_x += x_dir;
+ dst_x += x_dir;
+ }
+
+ src_y += y_dir;
+ dst_y += y_dir;
+ }
+ break;
+
+ case BLIT_COMMAND_CPU_TO_SCREEN:
+ voodoo->blt.dst_x = voodoo->bltDstX;
+ voodoo->blt.dst_y = voodoo->bltDstY;
+ voodoo->blt.cur_x = 0;
+ voodoo->blt.size_x = size_x;
+ voodoo->blt.size_y = size_y;
+ voodoo->blt.x_dir = x_dir;
+ voodoo->blt.y_dir = y_dir;
+ voodoo->blt.dst_stride = (voodoo->bltCommand & BLTCMD_DST_TILED) ? ((voodoo->bltDstXYStride & 0x3f) * 32*2) : (voodoo->bltDstXYStride & 0xff8);
+ break;
+
+ case BLIT_COMMAND_RECT_FILL:
+ for (y = 0; y <= size_y; y++)
+ {
+ uint16_t *dst;
+ int dst_x = voodoo->bltDstX;
+
+ if (SLI_ENABLED)
+ {
+ if ((!(voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && (voodoo->blt.dst_y & 1)) ||
+ ((voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && !(voodoo->blt.dst_y & 1)))
+ goto skip_line_fill;
+ dst = (uint16_t *)&voodoo->fb_mem[dst_base_addr + (dst_y >> 1) * dst_stride];
+ }
+ else
+ dst = (uint16_t *)&voodoo->fb_mem[dst_base_addr + dst_y*dst_stride];
+
+ for (x = 0; x <= size_x; x++)
+ {
+ if (voodoo->bltCommand & BLIT_CLIPPING_ENABLED)
+ {
+ if (dst_x < voodoo->bltClipLeft || dst_x >= voodoo->bltClipRight ||
+ dst_y < voodoo->bltClipLowY || dst_y >= voodoo->bltClipHighY)
+ goto skip_pixel_fill;
+ }
+
+ dst[dst_x] = voodoo->bltColorFg;
+skip_pixel_fill:
+ dst_x += x_dir;
+ }
+skip_line_fill:
+ dst_y += y_dir;
+ }
+ break;
+
+ case BLIT_COMMAND_SGRAM_FILL:
+ /*32x32 tiles - 2kb*/
+ dst_y = voodoo->bltDstY & 0x3ff;
+ size_x = voodoo->bltSizeX & 0x1ff; //512*8 = 4kb
+ size_y = voodoo->bltSizeY & 0x3ff;
+
+ dat64 = voodoo->bltColorFg | ((uint64_t)voodoo->bltColorFg << 16) |
+ ((uint64_t)voodoo->bltColorFg << 32) | ((uint64_t)voodoo->bltColorFg << 48);
+
+ for (y = 0; y <= size_y; y++)
+ {
+ uint64_t *dst;
+
+ /*This may be wrong*/
+ if (!y)
+ {
+ dst_x = voodoo->bltDstX & 0x1ff;
+ size_x = 511 - dst_x;
+ }
+ else if (y < size_y)
+ {
+ dst_x = 0;
+ size_x = 511;
+ }
+ else
+ {
+ dst_x = 0;
+ size_x = voodoo->bltSizeX & 0x1ff;
+ }
+
+ dst = (uint64_t *)&voodoo->fb_mem[(dst_y*512*8 + dst_x*8) & voodoo->fb_mask];
+
+ for (x = 0; x <= size_x; x++)
+ dst[x] = dat64;
+
+ dst_y++;
+ }
+ break;
+
+ default:
+ fatal("bad blit command %08x\n", voodoo->bltCommand);
+ }
+}
+
+void voodoo_v2_blit_data(voodoo_t *voodoo, uint32_t data)
+{
+ int src_bits = 32;
+ uint32_t base_addr = (voodoo->bltCommand & BLTCMD_DST_TILED) ? ((voodoo->bltDstBaseAddr & 0x3ff) << 12) : (voodoo->bltDstBaseAddr & 0x3ffff8);
+ uint32_t addr;
+ uint16_t *dst;
+
+ if ((voodoo->bltCommand & BLIT_COMMAND_MASK) != BLIT_COMMAND_CPU_TO_SCREEN)
+ return;
+
+ if (SLI_ENABLED)
+ {
+ addr = base_addr + (voodoo->blt.dst_y >> 1) * voodoo->blt.dst_stride;
+ dst = (uint16_t *)&voodoo->fb_mem[addr];
+ }
+ else
+ {
+ addr = base_addr + voodoo->blt.dst_y*voodoo->blt.dst_stride;
+ dst = (uint16_t *)&voodoo->fb_mem[addr];
+ }
+
+ if (addr >= voodoo->front_offset && voodoo->row_width)
+ {
+ int y = (addr - voodoo->front_offset) / voodoo->row_width;
+ if (y < voodoo->v_disp)
+ voodoo->dirty_line[y] = 2;
+ }
+
+ while (src_bits && voodoo->blt.cur_x <= voodoo->blt.size_x)
+ {
+ int r = 0, g = 0, b = 0;
+ uint16_t src_dat = 0, dst_dat;
+ int x = (voodoo->blt.x_dir > 0) ? (voodoo->blt.dst_x + voodoo->blt.cur_x) : (voodoo->blt.dst_x - voodoo->blt.cur_x);
+ int rop = 0;
+
+ switch (voodoo->bltCommand & BLIT_SRC_FORMAT)
+ {
+ case BLIT_SRC_1BPP: case BLIT_SRC_1BPP_BYTE_PACKED:
+ src_dat = (data & 1) ? voodoo->bltColorFg : voodoo->bltColorBg;
+ data >>= 1;
+ src_bits--;
+ break;
+ case BLIT_SRC_16BPP:
+ switch (voodoo->bltCommand & BLIT_SRC_RGB_FORMAT)
+ {
+ case BLIT_SRC_RGB_ARGB: case BLIT_SRC_RGB_RGBA:
+ src_dat = data & 0xffff;
+ break;
+ case BLIT_SRC_RGB_ABGR: case BLIT_SRC_RGB_BGRA:
+ src_dat = ((data & 0xf800) >> 11) | (data & 0x07c0) | ((data & 0x0038) << 11);
+ break;
+ }
+ data >>= 16;
+ src_bits -= 16;
+ break;
+ case BLIT_SRC_24BPP: case BLIT_SRC_24BPP_DITHER_2X2: case BLIT_SRC_24BPP_DITHER_4X4:
+ switch (voodoo->bltCommand & BLIT_SRC_RGB_FORMAT)
+ {
+ case BLIT_SRC_RGB_ARGB:
+ r = (data >> 16) & 0xff;
+ g = (data >> 8) & 0xff;
+ b = data & 0xff;
+ break;
+ case BLIT_SRC_RGB_ABGR:
+ r = data & 0xff;
+ g = (data >> 8) & 0xff;
+ b = (data >> 16) & 0xff;
+ break;
+ case BLIT_SRC_RGB_RGBA:
+ r = (data >> 24) & 0xff;
+ g = (data >> 16) & 0xff;
+ b = (data >> 8) & 0xff;
+ break;
+ case BLIT_SRC_RGB_BGRA:
+ r = (data >> 8) & 0xff;
+ g = (data >> 16) & 0xff;
+ b = (data >> 24) & 0xff;
+ break;
+ }
+ switch (voodoo->bltCommand & BLIT_SRC_FORMAT)
+ {
+ case BLIT_SRC_24BPP:
+ src_dat = (b >> 3) | ((g & 0xfc) << 3) | ((r & 0xf8) << 8);
+ break;
+ case BLIT_SRC_24BPP_DITHER_2X2:
+ r = dither_rb2x2[r][voodoo->blt.dst_y & 1][x & 1];
+ g = dither_g2x2[g][voodoo->blt.dst_y & 1][x & 1];
+ b = dither_rb2x2[b][voodoo->blt.dst_y & 1][x & 1];
+ src_dat = (b >> 3) | ((g & 0xfc) << 3) | ((r & 0xf8) << 8);
+ break;
+ case BLIT_SRC_24BPP_DITHER_4X4:
+ r = dither_rb[r][voodoo->blt.dst_y & 3][x & 3];
+ g = dither_g[g][voodoo->blt.dst_y & 3][x & 3];
+ b = dither_rb[b][voodoo->blt.dst_y & 3][x & 3];
+ src_dat = (b >> 3) | ((g & 0xfc) << 3) | ((r & 0xf8) << 8);
+ break;
+ }
+ src_bits = 0;
+ break;
+ }
+
+ if (SLI_ENABLED)
+ {
+ if ((!(voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && (voodoo->blt.dst_y & 1)) ||
+ ((voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && !(voodoo->blt.dst_y & 1)))
+ goto skip_pixel;
+ }
+
+ if (voodoo->bltCommand & BLIT_CLIPPING_ENABLED)
+ {
+ if (x < voodoo->bltClipLeft || x >= voodoo->bltClipRight ||
+ voodoo->blt.dst_y < voodoo->bltClipLowY || voodoo->blt.dst_y >= voodoo->bltClipHighY)
+ goto skip_pixel;
+ }
+
+ dst_dat = dst[x];
+
+ if (voodoo->bltCommand & BLIT_SRC_CHROMA)
+ {
+ r = (src_dat >> 11);
+ g = (src_dat >> 5) & 0x3f;
+ b = src_dat & 0x1f;
+
+ if (r >= voodoo->bltSrcChromaMinR && r <= voodoo->bltSrcChromaMaxR &&
+ g >= voodoo->bltSrcChromaMinG && g <= voodoo->bltSrcChromaMaxG &&
+ b >= voodoo->bltSrcChromaMinB && b <= voodoo->bltSrcChromaMaxB)
+ rop |= BLIT_ROP_SRC_PASS;
+ }
+ if (voodoo->bltCommand & BLIT_DST_CHROMA)
+ {
+ r = (dst_dat >> 11);
+ g = (dst_dat >> 5) & 0x3f;
+ b = dst_dat & 0x1f;
+
+ if (r >= voodoo->bltDstChromaMinR && r <= voodoo->bltDstChromaMaxR &&
+ g >= voodoo->bltDstChromaMinG && g <= voodoo->bltDstChromaMaxG &&
+ b >= voodoo->bltDstChromaMinB && b <= voodoo->bltDstChromaMaxB)
+ rop |= BLIT_ROP_DST_PASS;
+ }
+
+ MIX(src_dat, dst_dat, voodoo->bltRop[rop]);
+
+ dst[x] = dst_dat;
+
+skip_pixel:
+ voodoo->blt.cur_x++;
+ }
+
+ if (voodoo->blt.cur_x > voodoo->blt.size_x)
+ {
+ voodoo->blt.size_y--;
+ if (voodoo->blt.size_y >= 0)
+ {
+ voodoo->blt.cur_x = 0;
+ voodoo->blt.dst_y += voodoo->blt.y_dir;
+ }
+ }
+}
+
+
+void voodoo_fastfill(voodoo_t *voodoo, voodoo_params_t *params)
+{
+ int y;
+ int low_y, high_y;
+
+ if (params->fbzMode & (1 << 17))
+ {
+ high_y = voodoo->v_disp - params->clipLowY;
+ low_y = voodoo->v_disp - params->clipHighY;
+ }
+ else
+ {
+ low_y = params->clipLowY;
+ high_y = params->clipHighY;
+ }
+
+ if (params->fbzMode & FBZ_RGB_WMASK)
+ {
+ int r, g, b;
+ uint16_t col;
+
+ r = ((params->color1 >> 16) >> 3) & 0x1f;
+ g = ((params->color1 >> 8) >> 2) & 0x3f;
+ b = (params->color1 >> 3) & 0x1f;
+ col = b | (g << 5) | (r << 11);
+
+ if (SLI_ENABLED)
+ {
+ for (y = low_y; y < high_y; y += 2)
+ {
+ uint16_t *cbuf = (uint16_t *)&voodoo->fb_mem[(params->draw_offset + (y >> 1) * voodoo->row_width) & voodoo->fb_mask];
+ int x;
+
+ for (x = params->clipLeft; x < params->clipRight; x++)
+ cbuf[x] = col;
+ }
+ }
+ else
+ {
+ for (y = low_y; y < high_y; y++)
+ {
+ if (voodoo->col_tiled)
+ {
+ uint16_t *cbuf = (uint16_t *)&voodoo->fb_mem[(params->draw_offset + (y >> 5) * voodoo->row_width + (y & 31) * 128) & voodoo->fb_mask];
+ int x;
+
+ for (x = params->clipLeft; x < params->clipRight; x++)
+ {
+ int x2 = (x & 63) | ((x >> 6) * 128*32/2);
+ cbuf[x2] = col;
+ }
+ }
+ else
+ {
+ uint16_t *cbuf = (uint16_t *)&voodoo->fb_mem[(params->draw_offset + y * voodoo->row_width) & voodoo->fb_mask];
+ int x;
+
+ for (x = params->clipLeft; x < params->clipRight; x++)
+ cbuf[x] = col;
+ }
+ }
+ }
+ }
+ if (params->fbzMode & FBZ_DEPTH_WMASK)
+ {
+ if (SLI_ENABLED)
+ {
+ for (y = low_y; y < high_y; y += 2)
+ {
+ uint16_t *abuf = (uint16_t *)&voodoo->fb_mem[(params->aux_offset + (y >> 1) * voodoo->row_width) & voodoo->fb_mask];
+ int x;
+
+ for (x = params->clipLeft; x < params->clipRight; x++)
+ abuf[x] = params->zaColor & 0xffff;
+ }
+ }
+ else
+ {
+ for (y = low_y; y < high_y; y++)
+ {
+ if (voodoo->aux_tiled)
+ {
+ uint16_t *abuf = (uint16_t *)&voodoo->fb_mem[(params->aux_offset + (y >> 5) * voodoo->aux_row_width + (y & 31) * 128) & voodoo->fb_mask];
+ int x;
+
+ for (x = params->clipLeft; x < params->clipRight; x++)
+ {
+ int x2 = (x & 63) | ((x >> 6) * 128*32/2);
+ abuf[x2] = params->zaColor & 0xffff;
+ }
+ }
+ else
+ {
+ uint16_t *abuf = (uint16_t *)&voodoo->fb_mem[(params->aux_offset + y * voodoo->aux_row_width) & voodoo->fb_mask];
+ int x;
+
+ for (x = params->clipLeft; x < params->clipRight; x++)
+ abuf[x] = params->zaColor & 0xffff;
+ }
+ }
+ }
+ }
+}
--- /dev/null
+void voodoo_v2_blit_start(voodoo_t *voodoo);
+void voodoo_v2_blit_data(voodoo_t *voodoo, uint32_t data);
+void voodoo_fastfill(voodoo_t *voodoo, voodoo_params_t *params);
--- /dev/null
+/*Registers :
+
+ alphaMode
+ fbzMode & 0x1f3fff
+ fbzColorPath
+*/
+
+#if defined(__linux__) || defined(__APPLE__)
+#include <sys/mman.h>
+#include <unistd.h>
+#endif
+#if WIN64
+#define BITMAP windows_BITMAP
+#include <windows.h>
+#undef BITMAP
+#endif
+
+#include <xmmintrin.h>
+
+#define BLOCK_NUM 8
+#define BLOCK_MASK (BLOCK_NUM-1)
+#define BLOCK_SIZE 8192
+
+#define LOD_MASK (LOD_TMIRROR_S | LOD_TMIRROR_T)
+
+typedef struct voodoo_x86_data_t
+{
+ uint8_t code_block[BLOCK_SIZE];
+ int xdir;
+ uint32_t alphaMode;
+ uint32_t fbzMode;
+ uint32_t fogMode;
+ uint32_t fbzColorPath;
+ uint32_t textureMode[2];
+ uint32_t tLOD[2];
+ uint32_t trexInit1;
+ int is_tiled;
+} voodoo_x86_data_t;
+
+//static voodoo_x86_data_t voodoo_x86_data[2][BLOCK_NUM];
+
+static int last_block[4] = {0, 0};
+static int next_block_to_write[4] = {0, 0};
+
+#define addbyte(val) \
+ do { \
+ code_block[block_pos++] = val; \
+ if (block_pos >= BLOCK_SIZE) \
+ fatal("Over!\n"); \
+ } while (0)
+
+#define addword(val) \
+ do { \
+ *(uint16_t *)&code_block[block_pos] = val; \
+ block_pos += 2; \
+ if (block_pos >= BLOCK_SIZE) \
+ fatal("Over!\n"); \
+ } while (0)
+
+#define addlong(val) \
+ do { \
+ *(uint32_t *)&code_block[block_pos] = val; \
+ block_pos += 4; \
+ if (block_pos >= BLOCK_SIZE) \
+ fatal("Over!\n"); \
+ } while (0)
+
+#define addquad(val) \
+ do { \
+ *(uint64_t *)&code_block[block_pos] = val; \
+ block_pos += 8; \
+ if (block_pos >= BLOCK_SIZE) \
+ fatal("Over!\n"); \
+ } while (0)
+
+
+static __m128i xmm_01_w;// = 0x0001000100010001ull;
+static __m128i xmm_ff_w;// = 0x00ff00ff00ff00ffull;
+static __m128i xmm_ff_b;// = 0x00000000ffffffffull;
+
+static __m128i alookup[257], aminuslookup[256];
+static __m128i minus_254;// = 0xff02ff02ff02ff02ull;
+static __m128i bilinear_lookup[256*2];
+static __m128i xmm_00_ff_w[2];
+static uint32_t i_00_ff_w[2] = {0, 0xff};
+
+static inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int block_pos, int tmu)
+{
+ if (params->textureMode[tmu] & 1)
+ {
+ addbyte(0x48); /*MOV RBX, state->tmu0_s*/
+ addbyte(0x8b);
+ addbyte(0x9f);
+ addlong(tmu ? offsetof(voodoo_state_t, tmu1_s) : offsetof(voodoo_state_t, tmu0_s));
+ addbyte(0x48); /*MOV RAX, (1 << 48)*/
+ addbyte(0xb8);
+ addquad(1ULL << 48);
+ addbyte(0x48); /*XOR RDX, RDX*/
+ addbyte(0x31);
+ addbyte(0xd2);
+ addbyte(0x48); /*MOV RCX, state->tmu0_t*/
+ addbyte(0x8b);
+ addbyte(0x8f);
+ addlong(tmu ? offsetof(voodoo_state_t, tmu1_t) : offsetof(voodoo_state_t, tmu0_t));
+ addbyte(0x48); /*CMP state->tmu_w, 0*/
+ addbyte(0x83);
+ addbyte(0xbf);
+ addlong(tmu ? offsetof(voodoo_state_t, tmu1_w) : offsetof(voodoo_state_t, tmu0_w));
+ addbyte(0);
+ addbyte(0x74); /*JZ +*/
+ addbyte(7);
+ addbyte(0x48); /*IDIV state->tmu_w*/
+ addbyte(0xf7);
+ addbyte(0xbf);
+ addlong(tmu ? offsetof(voodoo_state_t, tmu1_w) : offsetof(voodoo_state_t, tmu0_w));
+ addbyte(0x48); /*SAR RBX, 14*/
+ addbyte(0xc1);
+ addbyte(0xfb);
+ addbyte(14);
+ addbyte(0x48); /*SAR RCX, 14*/
+ addbyte(0xc1);
+ addbyte(0xf9);
+ addbyte(14);
+ addbyte(0x48); /*IMUL RBX, RAX*/
+ addbyte(0x0f);
+ addbyte(0xaf);
+ addbyte(0xd8);
+ addbyte(0x48); /*IMUL RCX, RAX*/
+ addbyte(0x0f);
+ addbyte(0xaf);
+ addbyte(0xc8);
+ addbyte(0x48); /*SAR RBX, 30*/
+ addbyte(0xc1);
+ addbyte(0xfb);
+ addbyte(30);
+ addbyte(0x48); /*SAR RCX, 30*/
+ addbyte(0xc1);
+ addbyte(0xf9);
+ addbyte(30);
+ addbyte(0x48); /*BSR EDX, RAX*/
+ addbyte(0x0f);
+ addbyte(0xbd);
+ addbyte(0xd0);
+ addbyte(0x48); /*SHL RAX, 8*/
+ addbyte(0xc1);
+ addbyte(0xe0);
+ addbyte(8);
+ addbyte(0x89); /*MOV state->tex_t, ECX*/
+ addbyte(0x8f);
+ addlong(offsetof(voodoo_state_t, tex_t));
+ addbyte(0x89); /*MOV ECX, EDX*/
+ addbyte(0xd1);
+ addbyte(0x83); /*SUB EDX, 19*/
+ addbyte(0xea);
+ addbyte(19);
+ addbyte(0x48); /*SHR RAX, CL*/
+ addbyte(0xd3);
+ addbyte(0xe8);
+ addbyte(0xc1); /*SHL EDX, 8*/
+ addbyte(0xe2);
+ addbyte(8);
+ addbyte(0x25); /*AND EAX, 0xff*/
+ addlong(0xff);
+ addbyte(0x89); /*MOV state->tex_s, EBX*/
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, tex_s));
+ addbyte(0x41); /*MOVZX EAX, R9(logtable)[RAX]*/
+ addbyte(0x0f);
+ addbyte(0xb6);
+ addbyte(0x04);
+ addbyte(0x01);
+ addbyte(0x09); /*OR EAX, EDX*/
+ addbyte(0xd0);
+ addbyte(0x03); /*ADD EAX, state->lod*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, tmu[tmu].lod));
+ addbyte(0x3b); /*CMP EAX, state->lod_min*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, lod_min[tmu]));
+ addbyte(0x0f); /*CMOVL EAX, state->lod_min*/
+ addbyte(0x4c);
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, lod_min[tmu]));
+ addbyte(0x3b); /*CMP EAX, state->lod_max*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, lod_max[tmu]));
+ addbyte(0x0f); /*CMOVNL EAX, state->lod_max*/
+ addbyte(0x4d);
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, lod_max[tmu]));
+ addbyte(0xc1); /*SHR EAX, 8*/
+ addbyte(0xe8);
+ addbyte(8);
+ addbyte(0x89); /*MOV state->lod, EAX*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, lod));
+ }
+ else
+ {
+ addbyte(0x48); /*MOV RAX, state->tmu0_s*/
+ addbyte(0x8b);
+ addbyte(0x87);
+ addlong(tmu ? offsetof(voodoo_state_t, tmu1_s) : offsetof(voodoo_state_t, tmu0_s));
+ addbyte(0x48); /*MOV RCX, state->tmu0_t*/
+ addbyte(0x8b);
+ addbyte(0x8f);
+ addlong(tmu ? offsetof(voodoo_state_t, tmu1_t) : offsetof(voodoo_state_t, tmu0_t));
+ addbyte(0x48); /*SHR RAX, 28*/
+ addbyte(0xc1);
+ addbyte(0xe8);
+ addbyte(28);
+ addbyte(0x8b); /*MOV EBX, state->lod_min*/
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, lod_min[tmu]));
+ addbyte(0x48); /*SHR RCX, 28*/
+ addbyte(0xc1);
+ addbyte(0xe9);
+ addbyte(28);
+ addbyte(0x48); /*MOV state->tex_s, RAX*/
+ addbyte(0x89);
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, tex_s));
+ addbyte(0xc1); /*SHR EBX, 8*/
+ addbyte(0xeb);
+ addbyte(8);
+ addbyte(0x48); /*MOV state->tex_t, RCX*/
+ addbyte(0x89);
+ addbyte(0x8f);
+ addlong(offsetof(voodoo_state_t, tex_t));
+ addbyte(0x89); /*MOV state->lod, EBX*/
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, lod));
+ }
+
+ if (params->fbzColorPath & FBZCP_TEXTURE_ENABLED)
+ {
+ if (voodoo->bilinear_enabled && (params->textureMode[tmu] & 6))
+ {
+ addbyte(0xb2); /*MOV DL, 8*/
+ addbyte(8);
+ addbyte(0x8b); /*MOV ECX, state->lod[RDI]*/
+ addbyte(0x8f);
+ addlong(offsetof(voodoo_state_t, lod));
+ addbyte(0xbd); /*MOV EBP, 1*/
+ addlong(1);
+ addbyte(0x28); /*SUB DL, CL*/
+ addbyte(0xca);
+// addbyte(0x8a); /*MOV DL, params->tex_shift[RSI+ECX*4]*/
+// addbyte(0x94);
+// addbyte(0x8e);
+// addlong(offsetof(voodoo_params_t, tex_shift));
+ addbyte(0xd3); /*SHL EBP, CL*/
+ addbyte(0xe5);
+ addbyte(0x8b); /*MOV EAX, state->tex_s[RDI]*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, tex_s));
+ addbyte(0xc1); /*SHL EBP, 3*/
+ addbyte(0xe5);
+ addbyte(3);
+ addbyte(0x8b); /*MOV EBX, state->tex_t[RDI]*/
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, tex_t));
+ if (params->tLOD[tmu] & LOD_TMIRROR_S)
+ {
+ addbyte(0xa9); /*TEST EAX, 0x1000*/
+ addlong(0x1000);
+ addbyte(0x74); /*JZ +*/
+ addbyte(2);
+ addbyte(0xf7); /*NOT EAX*/
+ addbyte(0xd0);
+ }
+ if (params->tLOD[tmu] & LOD_TMIRROR_T)
+ {
+ addbyte(0xf7); /*TEST EBX, 0x1000*/
+ addbyte(0xc3);
+ addlong(0x1000);
+ addbyte(0x74); /*JZ +*/
+ addbyte(2);
+ addbyte(0xf7); /*NOT EBX*/
+ addbyte(0xd3);
+ }
+ addbyte(0x29); /*SUB EAX, EBP*/
+ addbyte(0xe8);
+ addbyte(0x29); /*SUB EBX, EBP*/
+ addbyte(0xeb);
+ addbyte(0xd3); /*SAR EAX, CL*/
+ addbyte(0xf8);
+ addbyte(0xd3); /*SAR EBX, CL*/
+ addbyte(0xfb);
+ addbyte(0x89); /*MOV EBP, EAX*/
+ addbyte(0xc5);
+ addbyte(0x89); /*MOV ECX, EBX*/
+ addbyte(0xd9);
+ addbyte(0x83); /*AND EBP, 0xf*/
+ addbyte(0xe5);
+ addbyte(0xf);
+ addbyte(0xc1); /*SHL ECX, 4*/
+ addbyte(0xe1);
+ addbyte(4);
+ addbyte(0xc1); /*SAR EAX, 4*/
+ addbyte(0xf8);
+ addbyte(4);
+ addbyte(0x81); /*AND ECX, 0xf0*/
+ addbyte(0xe1);
+ addlong(0xf0);
+ addbyte(0xc1); /*SAR EBX, 4*/
+ addbyte(0xfb);
+ addbyte(4);
+ addbyte(0x09); /*OR EBP, ECX*/
+ addbyte(0xcd);
+ addbyte(0x8b); /*MOV ECX, state->lod[RDI]*/
+ addbyte(0x8f);
+ addlong(offsetof(voodoo_state_t, lod));
+ addbyte(0xc1); /*SHL EBP, 5*/
+ addbyte(0xe5);
+ addbyte(5);
+ /*EAX = S, EBX = T, ECX = LOD, EDX = tex_shift, ESI=params, EDI=state, EBP = bilinear shift*/
+ addbyte(0x48); /*LEA RSI, [RSI+RCX*4]*/
+ addbyte(0x8d);
+ addbyte(0x34);
+ addbyte(0x8e);
+ addbyte(0x89); /*MOV ebp_store, EBP*/
+ addbyte(0xaf);
+ addlong(offsetof(voodoo_state_t, ebp_store));
+ addbyte(0x48); /*MOV RBP, state->tex[RDI+RCX*8]*/
+ addbyte(0x8b);
+ addbyte(0xac);
+ addbyte(0xcf);
+ addlong(offsetof(voodoo_state_t, tex[tmu]));
+ addbyte(0x88); /*MOV CL, DL*/
+ addbyte(0xd1);
+ addbyte(0x89); /*MOV EDX, EBX*/
+ addbyte(0xda);
+ if (!state->clamp_s[tmu])
+ {
+ addbyte(0x23); /*AND EAX, params->tex_w_mask[ESI]*/
+ addbyte(0x86);
+ addlong(offsetof(voodoo_params_t, tex_w_mask[tmu]));
+ }
+ addbyte(0x83); /*ADD EDX, 1*/
+ addbyte(0xc2);
+ addbyte(1);
+ if (state->clamp_t[tmu])
+ {
+ addbyte(0x41); /*CMOVS EDX, R10(alookup[0](zero))*/
+ addbyte(0x0f);
+ addbyte(0x48);
+ addbyte(0x12);
+ addbyte(0x3b); /*CMP EDX, params->tex_h_mask[ESI]*/
+ addbyte(0x96);
+ addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]));
+ addbyte(0x0f); /*CMOVA EDX, params->tex_h_mask[ESI]*/
+ addbyte(0x47);
+ addbyte(0x96);
+ addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]));
+ addbyte(0x85); /*TEST EBX,EBX*/
+ addbyte(0xdb);
+ addbyte(0x41); /*CMOVS EBX, R10(alookup[0](zero))*/
+ addbyte(0x0f);
+ addbyte(0x48);
+ addbyte(0x1a);
+ addbyte(0x3b); /*CMP EBX, params->tex_h_mask[ESI]*/
+ addbyte(0x9e);
+ addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]));
+ addbyte(0x0f); /*CMOVA EBX, params->tex_h_mask[ESI]*/
+ addbyte(0x47);
+ addbyte(0x9e);
+ addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]));
+ }
+ else
+ {
+ addbyte(0x23); /*AND EDX, params->tex_h_mask[ESI]*/
+ addbyte(0x96);
+ addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]));
+ addbyte(0x23); /*AND EBX, params->tex_h_mask[ESI]*/
+ addbyte(0x9e);
+ addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]));
+ }
+ /*EAX = S, EBX = T0, EDX = T1*/
+ addbyte(0xd3); /*SHL EBX, CL*/
+ addbyte(0xe3);
+ addbyte(0xd3); /*SHL EDX, CL*/
+ addbyte(0xe2);
+ addbyte(0x48); /*LEA RBX,[RBP+RBX*4]*/
+ addbyte(0x8d);
+ addbyte(0x5c);
+ addbyte(0x9d);
+ addbyte(0);
+ addbyte(0x48); /*LEA RDX,[RBP+RDX*4]*/
+ addbyte(0x8d);
+ addbyte(0x54);
+ addbyte(0x95);
+ addbyte(0);
+ if (state->clamp_s[tmu])
+ {
+ addbyte(0x8b); /*MOV EBP, params->tex_w_mask[ESI]*/
+ addbyte(0xae);
+ addlong(offsetof(voodoo_params_t, tex_w_mask[tmu]));
+ addbyte(0x85); /*TEST EAX, EAX*/
+ addbyte(0xc0);
+ addbyte(0x8b); /*MOV ebp_store2, RSI*/
+ addbyte(0xb7);
+ addlong(offsetof(voodoo_state_t, ebp_store));
+ addbyte(0x41); /*CMOVS EAX, R10(alookup[0](zero))*/
+ addbyte(0x0f);
+ addbyte(0x48);
+ addbyte(0x02);
+ addbyte(0x78); /*JS + - clamp on 0*/
+ addbyte(2+3+2+ 5+5+2);
+ addbyte(0x3b); /*CMP EAX, EBP*/
+ addbyte(0xc5);
+ addbyte(0x0f); /*CMOVAE EAX, EBP*/
+ addbyte(0x43);
+ addbyte(0xc5);
+ addbyte(0x73); /*JAE + - clamp on +*/
+ addbyte(5+5+2);
+ }
+ else
+ {
+ addbyte(0x3b); /*CMP EAX, params->tex_w_mask[ESI] - is S at texture edge (ie will wrap/clamp)?*/
+ addbyte(0x86);
+ addlong(offsetof(voodoo_params_t, tex_w_mask[tmu]));
+ addbyte(0x8b); /*MOV ebp_store2, ESI*/
+ addbyte(0xb7);
+ addlong(offsetof(voodoo_state_t, ebp_store));
+ addbyte(0x74); /*JE +*/
+ addbyte(5+5+2);
+ }
+
+ addbyte(0xf3); /*MOVQ XMM0, [RBX+RAX*4]*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0x04);
+ addbyte(0x83);
+ addbyte(0xf3); /*MOVQ XMM1, [RDX+RAX*4]*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0x0c);
+ addbyte(0x82);
+
+ if (state->clamp_s[tmu])
+ {
+ addbyte(0xeb); /*JMP +*/
+ addbyte(5+5+4+4);
+
+ /*S clamped - the two S coordinates are the same*/
+ addbyte(0x66); /*MOVD XMM0, [RBX+RAX*4]*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0x04);
+ addbyte(0x83);
+ addbyte(0x66); /*MOVD XMM1, [RDX+RAX*4]*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0x0c);
+ addbyte(0x82);
+ addbyte(0x66); /*PUNPCKLDQ XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x62);
+ addbyte(0xc0);
+ addbyte(0x66); /*PUNPCKLDQ XMM1, XMM1*/
+ addbyte(0x0f);
+ addbyte(0x62);
+ addbyte(0xc9);
+ }
+ else
+ {
+ addbyte(0xeb); /*JMP +*/
+ addbyte(5+5+5+5+6+6);
+
+ /*S wrapped - the two S coordinates are not contiguous*/
+ addbyte(0x66); /*MOVD XMM0, [RBX+EAX*4]*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0x04);
+ addbyte(0x83);
+ addbyte(0x66); /*MOVD XMM1, [RDX+EAX*4]*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0x0c);
+ addbyte(0x82);
+ addbyte(0x66); /*PINSRW XMM0, [RBX], 2*/
+ addbyte(0x0f);
+ addbyte(0xc4);
+ addbyte(0x03);
+ addbyte(0x02);
+ addbyte(0x66); /*PINSRW XMM1, [RDX], 2*/
+ addbyte(0x0f);
+ addbyte(0xc4);
+ addbyte(0x0a);
+ addbyte(0x02);
+ addbyte(0x66); /*PINSRW XMM0, 2[RBX], 3*/
+ addbyte(0x0f);
+ addbyte(0xc4);
+ addbyte(0x43);
+ addbyte(0x02);
+ addbyte(0x03);
+ addbyte(0x66); /*PINSRW XMM1, 2[RDX], 3*/
+ addbyte(0x0f);
+ addbyte(0xc4);
+ addbyte(0x4a);
+ addbyte(0x02);
+ addbyte(0x03);
+ }
+
+ addbyte(0x49); /*MOV R8, bilinear_lookup*/
+ addbyte(0xb8);
+ addquad((uintptr_t)bilinear_lookup);
+
+ addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/
+ addbyte(0x0f);
+ addbyte(0x60);
+ addbyte(0xc2);
+ addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/
+ addbyte(0x0f);
+ addbyte(0x60);
+ addbyte(0xca);
+
+ addbyte(0x4c); /*ADD RSI, R8*/
+ addbyte(0x01);
+ addbyte(0xc6);
+
+ addbyte(0x66); /*PMULLW XMM0, bilinear_lookup[ESI]*/
+ addbyte(0x0f);
+ addbyte(0xd5);
+ addbyte(0x06);
+ addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x10*/
+ addbyte(0x0f);
+ addbyte(0xd5);
+ addbyte(0x4e);
+ addbyte(0x10);
+ addbyte(0x66); /*PADDW XMM0, XMM1*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xc0 | 1 | (0 << 3));
+ addbyte(0x66); /*MOV XMM1, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x6f);
+ addbyte(0xc0 | 0 | (1 << 3));
+ addbyte(0x66); /*PSRLDQ XMM0, 64*/
+ addbyte(0x0f);
+ addbyte(0x73);
+ addbyte(0xd8);
+ addbyte(8);
+ addbyte(0x66); /*PADDW XMM0, XMM1*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xc0 | 1 | (0 << 3));
+ addbyte(0x66); /*PSRLW XMM0, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd0 | 0);
+ addbyte(8);
+ addbyte(0x66); /*PACKUSWB XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x67);
+ addbyte(0xc0);
+
+ addbyte(0x4c); /*MOV RSI, R15*/
+ addbyte(0x89);
+ addbyte(0xfe);
+
+ addbyte(0x66); /*MOV EAX, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xc0);
+ }
+ else
+ {
+ addbyte(0xb2); /*MOV DL, 8*/
+ addbyte(8);
+ addbyte(0x8b); /*MOV ECX, state->lod[RDI]*/
+ addbyte(0x8f);
+ addlong(offsetof(voodoo_state_t, lod));
+ addbyte(0x48); /*MOV RBP, state->tex[RDI+RCX*8]*/
+ addbyte(0x8b);
+ addbyte(0xac);
+ addbyte(0xcf);
+ addlong(offsetof(voodoo_state_t, tex[tmu]));
+ addbyte(0x28); /*SUB DL, CL*/
+ addbyte(0xca);
+ addbyte(0x80); /*ADD CL, 4*/
+ addbyte(0xc1);
+ addbyte(4);
+ addbyte(0x8b); /*MOV EAX, state->tex_s[EDI]*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, tex_s));
+ addbyte(0x8b); /*MOV EBX, state->tex_t[EDI]*/
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, tex_t));
+ if (params->tLOD[tmu] & LOD_TMIRROR_S)
+ {
+ addbyte(0xa9); /*TEST EAX, 0x1000*/
+ addlong(0x1000);
+ addbyte(0x74); /*JZ +*/
+ addbyte(2);
+ addbyte(0xf7); /*NOT EAX*/
+ addbyte(0xd0);
+ }
+ if (params->tLOD[tmu] & LOD_TMIRROR_T)
+ {
+ addbyte(0xf7); /*TEST EBX, 0x1000*/
+ addbyte(0xc3);
+ addlong(0x1000);
+ addbyte(0x74); /*JZ +*/
+ addbyte(2);
+ addbyte(0xf7); /*NOT EBX*/
+ addbyte(0xd3);
+ }
+ addbyte(0xd3); /*SHR EAX, CL*/
+ addbyte(0xe8);
+ addbyte(0xd3); /*SHR EBX, CL*/
+ addbyte(0xeb);
+ if (state->clamp_s[tmu])
+ {
+ addbyte(0x85); /*TEST EAX, EAX*/
+ addbyte(0xc0);
+ addbyte(0x41); /*CMOVS EAX, R10(alookup[0](zero))*/
+ addbyte(0x0f);
+ addbyte(0x48);
+ addbyte(0x02);
+ addbyte(0x3b); /*CMP EAX, params->tex_w_mask[ESI+ECX*4]*/
+ addbyte(0x84);
+ addbyte(0x8e);
+ addlong(offsetof(voodoo_params_t, tex_w_mask[tmu]) - 0x10);
+ addbyte(0x0f); /*CMOVAE EAX, params->tex_w_mask[ESI+ECX*4]*/
+ addbyte(0x43);
+ addbyte(0x84);
+ addbyte(0x8e);
+ addlong(offsetof(voodoo_params_t, tex_w_mask[tmu]) - 0x10);
+
+ }
+ else
+ {
+ addbyte(0x23); /*AND EAX, params->tex_w_mask-0x10[ESI+ECX*4]*/
+ addbyte(0x84);
+ addbyte(0x8e);
+ addlong(offsetof(voodoo_params_t, tex_w_mask[tmu]) - 0x10);
+ }
+ if (state->clamp_t[tmu])
+ {
+ addbyte(0x85); /*TEST EBX, EBX*/
+ addbyte(0xdb);
+ addbyte(0x41); /*CMOVS EBX, R10(alookup[0](zero))*/
+ addbyte(0x0f);
+ addbyte(0x48);
+ addbyte(0x1a);
+ addbyte(0x3b); /*CMP EBX, params->tex_h_mask[ESI+ECX*4]*/
+ addbyte(0x9c);
+ addbyte(0x8e);
+ addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]) - 0x10);
+ addbyte(0x0f); /*CMOVAE EBX, params->tex_h_mask[ESI+ECX*4]*/
+ addbyte(0x43);
+ addbyte(0x9c);
+ addbyte(0x8e);
+ addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]) - 0x10);
+ }
+ else
+ {
+ addbyte(0x23); /*AND EBX, params->tex_h_mask-0x10[ESI+ECX*4]*/
+ addbyte(0x9c);
+ addbyte(0x8e);
+ addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]) - 0x10);
+ }
+ addbyte(0x88); /*MOV CL, DL*/
+ addbyte(0xd1);
+ addbyte(0xd3); /*SHL EBX, CL*/
+ addbyte(0xe3);
+ addbyte(0x01); /*ADD EBX, EAX*/
+ addbyte(0xc3);
+
+ addbyte(0x8b); /*MOV EAX, [RBP+RBX*4]*/
+ addbyte(0x44);
+ addbyte(0x9d);
+ addbyte(0);
+ }
+ }
+
+ return block_pos;
+}
+
+static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int depthop)
+{
+ int block_pos = 0;
+ int z_skip_pos = 0;
+ int a_skip_pos = 0;
+ int chroma_skip_pos = 0;
+ int depth_jump_pos = 0;
+ int depth_jump_pos2 = 0;
+ int loop_jump_pos = 0;
+// xmm_01_w = (__m128i)0x0001000100010001ull;
+// xmm_ff_w = (__m128i)0x00ff00ff00ff00ffull;
+// xmm_ff_b = (__m128i)0x00000000ffffffffull;
+ xmm_01_w = _mm_set_epi32(0, 0, 0x00010001, 0x00010001);
+ xmm_ff_w = _mm_set_epi32(0, 0, 0x00ff00ff, 0x00ff00ff);
+ xmm_ff_b = _mm_set_epi32(0, 0, 0, 0x00ffffff);
+ minus_254 = _mm_set_epi32(0, 0, 0xff02ff02, 0xff02ff02);
+// *(uint64_t *)&const_1_48 = 0x45b0000000000000ull;
+// block_pos = 0;
+// voodoo_get_depth = &code_block[block_pos];
+ /*W at (%esp+4)
+ Z at (%esp+12)
+ new_depth at (%esp+16)*/
+// if ((params->fbzMode & FBZ_DEPTH_ENABLE) && (depth_op == DEPTHOP_NEVER))
+// {
+// addbyte(0xC3); /*RET*/
+// return;
+// }
+ addbyte(0x55); /*PUSH RBP*/
+ addbyte(0x57); /*PUSH RDI*/
+ addbyte(0x56); /*PUSH RSI*/
+ addbyte(0x53); /*PUSH RBX*/
+ addbyte(0x41); /*PUSH R12*/
+ addbyte(0x54);
+ addbyte(0x41); /*PUSH R13*/
+ addbyte(0x55);
+ addbyte(0x41); /*PUSH R14*/
+ addbyte(0x56);
+ addbyte(0x41); /*PUSH R15*/
+ addbyte(0x57);
+
+ addbyte(0x49); /*MOV R15, xmm_01_w*/
+ addbyte(0xbf);
+ addquad((uint64_t)(uintptr_t)&xmm_01_w);
+ addbyte(0x66); /*MOVDQA XMM8, [R15]*/
+ addbyte(0x45);
+ addbyte(0x0f);
+ addbyte(0x6f);
+ addbyte(0x07 | (0 << 3));
+ addbyte(0x49); /*MOV R15, xmm_ff_w*/
+ addbyte(0xbf);
+ addquad((uint64_t)(uintptr_t)&xmm_ff_w);
+ addbyte(0x66); /*MOVDQA XMM9, [R15]*/
+ addbyte(0x45);
+ addbyte(0x0f);
+ addbyte(0x6f);
+ addbyte(0x07 | (1 << 3));
+ addbyte(0x49); /*MOV R15, xmm_ff_b*/
+ addbyte(0xbf);
+ addquad((uint64_t)(uintptr_t)&xmm_ff_b);
+ addbyte(0x66); /*MOVDQA XMM10, [R15]*/
+ addbyte(0x45);
+ addbyte(0x0f);
+ addbyte(0x6f);
+ addbyte(0x07 | (2 << 3));
+ addbyte(0x49); /*MOV R15, minus_254*/
+ addbyte(0xbf);
+ addquad((uint64_t)(uintptr_t)&minus_254);
+ addbyte(0x66); /*MOVDQA XMM11, [R15]*/
+ addbyte(0x45);
+ addbyte(0x0f);
+ addbyte(0x6f);
+ addbyte(0x07 | (3 << 3));
+
+#if WIN64
+ addbyte(0x48); /*MOV RDI, RCX (voodoo_state)*/
+ addbyte(0x89);
+ addbyte(0xcf);
+ addbyte(0x49); /*MOV R15, RDX (voodoo_params)*/
+ addbyte(0x89);
+ addbyte(0xd7);
+ addbyte(0x4d); /*MOV R14, R9 (real_y)*/
+ addbyte(0x89);
+ addbyte(0xce);
+#else
+ addbyte(0x49); /*MOV R14, RCX (real_y)*/
+ addbyte(0x89);
+ addbyte(0xce);
+ addbyte(0x49); /*MOV R15, RSI (voodoo_state)*/
+ addbyte(0x89);
+ addbyte(0xf7);
+#endif
+
+ addbyte(0x49); /*MOV R9, logtable*/
+ addbyte(0xb8 | (9 & 7));
+ addquad((uint64_t)(uintptr_t)&logtable);
+ addbyte(0x49); /*MOV R10, alookup*/
+ addbyte(0xb8 | (10 & 7));
+ addquad((uint64_t)(uintptr_t)&alookup);
+ addbyte(0x49); /*MOV R11, aminuslookup*/
+ addbyte(0xb8 | (11 & 7));
+ addquad((uint64_t)(uintptr_t)&aminuslookup);
+ addbyte(0x49); /*MOV R12, xmm_00_ff_w*/
+ addbyte(0xb8 | (12 & 7));
+ addquad((uint64_t)(uintptr_t)&xmm_00_ff_w);
+ addbyte(0x49); /*MOV R13, i_00_ff_w*/
+ addbyte(0xb8 | (13 & 7));
+ addquad((uint64_t)(uintptr_t)&i_00_ff_w);
+
+ loop_jump_pos = block_pos;
+ addbyte(0x4c); /*MOV RSI, R15*/
+ addbyte(0x89);
+ addbyte(0xfe);
+ if (params->col_tiled || params->aux_tiled)
+ {
+ addbyte(0x8b); /*MOV EAX, state->x[EDI]*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, x));
+ addbyte(0x89); /*MOV EBX, EAX*/
+ addbyte(0xc3);
+ addbyte(0x83); /*AND EAX, 63*/
+ addbyte(0xe0);
+ addbyte(63);
+ addbyte(0xc1); /*SHR EBX, 6*/
+ addbyte(0xeb);
+ addbyte(6);
+ addbyte(0xc1); /*SHL EBX, 11 - tile is 128*32, << 12, div 2 because word index*/
+ addbyte(0xe3);
+ addbyte(11);
+ addbyte(0x01); /*ADD EAX, EBX*/
+ addbyte(0xd8);
+ addbyte(0x89); /*MOV state->x_tiled[EDI], EAX*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, x_tiled));
+ }
+ addbyte(0x66); /*PXOR XMM2, XMM2*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0xd2);
+
+ if ((params->fbzMode & FBZ_W_BUFFER) || (params->fogMode & (FOG_ENABLE|FOG_CONSTANT|FOG_Z|FOG_ALPHA)) == FOG_ENABLE)
+ {
+ addbyte(0xb8); /*MOV new_depth, 0*/
+ addlong(0);
+ addbyte(0x66); /*TEST w+4, 0xffff*/
+ addbyte(0xf7);
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, w)+4);
+ addword(0xffff);
+ addbyte(0x75); /*JNZ got_depth*/
+ depth_jump_pos = block_pos;
+ addbyte(0);
+// addbyte(4+5+2+3+2+5+5+3+2+2+2+/*3+*/3+2+6+4+5+2+3);
+ addbyte(0x8b); /*MOV EDX, w*/
+ addbyte(0x97);
+ addlong(offsetof(voodoo_state_t, w));
+ addbyte(0xb8); /*MOV new_depth, 0xf001*/
+ addlong(0xf001);
+ addbyte(0x89); /*MOV EBX, EDX*/
+ addbyte(0xd3);
+ addbyte(0xc1); /*SHR EDX, 16*/
+ addbyte(0xea);
+ addbyte(16);
+ addbyte(0x74); /*JZ got_depth*/
+ depth_jump_pos2 = block_pos;
+ addbyte(0);
+// addbyte(5+5+3+2+2+2+/*3+*/3+2+6+4+5+2+3);
+ addbyte(0xb9); /*MOV ECX, 19*/
+ addlong(19);
+ addbyte(0x0f); /*BSR EAX, EDX*/
+ addbyte(0xbd);
+ addbyte(0xc2);
+ addbyte(0xba); /*MOV EDX, 15*/
+ addlong(15);
+ addbyte(0xf7); /*NOT EBX*/
+ addbyte(0xd3);
+ addbyte(0x29); /*SUB EDX, EAX - EDX = exp*/
+ addbyte(0xc2);
+ addbyte(0x29); /*SUB ECX, EDX*/
+ addbyte(0xd1);
+ addbyte(0xc1); /*SHL EDX, 12*/
+ addbyte(0xe2);
+ addbyte(12);
+ addbyte(0xd3); /*SHR EBX, CL*/
+ addbyte(0xeb);
+ addbyte(0x81); /*AND EBX, 0xfff - EBX = mant*/
+ addbyte(0xe3);
+ addlong(0xfff);
+ addbyte(0x67); /*LEA EAX, 1[EDX, EBX]*/
+ addbyte(0x8d);
+ addbyte(0x44);
+ addbyte(0x13);
+ addbyte(1);
+ addbyte(0xbb); /*MOV EBX, 0xffff*/
+ addlong(0xffff);
+ addbyte(0x39); /*CMP EAX, EBX*/
+ addbyte(0xd8);
+ addbyte(0x0f); /*CMOVA EAX, EBX*/
+ addbyte(0x47);
+ addbyte(0xc3);
+
+ if (depth_jump_pos)
+ *(uint8_t *)&code_block[depth_jump_pos] = (block_pos - depth_jump_pos) - 1;
+ if (depth_jump_pos)
+ *(uint8_t *)&code_block[depth_jump_pos2] = (block_pos - depth_jump_pos2) - 1;
+
+ if ((params->fogMode & (FOG_ENABLE|FOG_CONSTANT|FOG_Z|FOG_ALPHA)) == FOG_ENABLE)
+ {
+ addbyte(0x89); /*MOV state->w_depth[EDI], EAX*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, w_depth));
+ }
+ }
+ if (!(params->fbzMode & FBZ_W_BUFFER))
+ {
+ addbyte(0x8b); /*MOV EAX, z*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, z));
+ addbyte(0xbb); /*MOV EBX, 0xffff*/
+ addlong(0xffff);
+ addbyte(0x31); /*XOR ECX, ECX*/
+ addbyte(0xc9);
+ addbyte(0xc1); /*SAR EAX, 12*/
+ addbyte(0xf8);
+ addbyte(12);
+ addbyte(0x0f); /*CMOVS EAX, ECX*/
+ addbyte(0x48);
+ addbyte(0xc1);
+ addbyte(0x39); /*CMP EAX, EBX*/
+ addbyte(0xd8);
+ addbyte(0x0f); /*CMOVA EAX, EBX*/
+ addbyte(0x47);
+ addbyte(0xc3);
+ }
+
+ if (params->fbzMode & FBZ_DEPTH_BIAS)
+ {
+ addbyte(0x03); /*ADD EAX, params->zaColor[ESI]*/
+ addbyte(0x86);
+ addlong(offsetof(voodoo_params_t, zaColor));
+ addbyte(0x25); /*AND EAX, 0xffff*/
+ addlong(0xffff);
+ }
+
+ addbyte(0x89); /*MOV state->new_depth[EDI], EAX*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, new_depth));
+
+ if ((params->fbzMode & FBZ_DEPTH_ENABLE) && (depthop != DEPTHOP_ALWAYS) && (depthop != DEPTHOP_NEVER))
+ {
+ addbyte(0x8b); /*MOV EBX, state->x[EDI]*/
+ addbyte(0x9f);
+ if (params->aux_tiled)
+ addlong(offsetof(voodoo_state_t, x_tiled));
+ else
+ addlong(offsetof(voodoo_state_t, x));
+ addbyte(0x48); /*MOV RCX, aux_mem[RDI]*/
+ addbyte(0x8b);
+ addbyte(0x8f);
+ addlong(offsetof(voodoo_state_t, aux_mem));
+ addbyte(0x0f); /*MOVZX EBX, [ECX+EBX*2]*/
+ addbyte(0xb7);
+ addbyte(0x1c);
+ addbyte(0x59);
+ if (params->fbzMode & FBZ_DEPTH_SOURCE)
+ {
+ addbyte(0x0f); /*MOVZX EAX, zaColor[RSI]*/
+ addbyte(0xb7);
+ addbyte(0x86);
+ addlong(offsetof(voodoo_params_t, zaColor));
+ }
+ addbyte(0x39); /*CMP EAX, EBX*/
+ addbyte(0xd8);
+ if (depthop == DEPTHOP_LESSTHAN)
+ {
+ addbyte(0x0f); /*JAE skip*/
+ addbyte(0x83);
+ z_skip_pos = block_pos;
+ addlong(0);
+ }
+ else if (depthop == DEPTHOP_EQUAL)
+ {
+ addbyte(0x0f); /*JNE skip*/
+ addbyte(0x85);
+ z_skip_pos = block_pos;
+ addlong(0);
+ }
+ else if (depthop == DEPTHOP_LESSTHANEQUAL)
+ {
+ addbyte(0x0f); /*JA skip*/
+ addbyte(0x87);
+ z_skip_pos = block_pos;
+ addlong(0);
+ }
+ else if (depthop == DEPTHOP_GREATERTHAN)
+ {
+ addbyte(0x0f); /*JBE skip*/
+ addbyte(0x86);
+ z_skip_pos = block_pos;
+ addlong(0);
+ }
+ else if (depthop == DEPTHOP_NOTEQUAL)
+ {
+ addbyte(0x0f); /*JE skip*/
+ addbyte(0x84);
+ z_skip_pos = block_pos;
+ addlong(0);
+ }
+ else if (depthop == DEPTHOP_GREATERTHANEQUAL)
+ {
+ addbyte(0x0f); /*JB skip*/
+ addbyte(0x82);
+ z_skip_pos = block_pos;
+ addlong(0);
+ }
+ else
+ fatal("Bad depth_op\n");
+ }
+ else if ((params->fbzMode & FBZ_DEPTH_ENABLE) && (depthop == DEPTHOP_NEVER))
+ {
+ addbyte(0xC3); /*RET*/
+ }
+
+ /*XMM0 = colour*/
+ /*XMM2 = 0 (for unpacking*/
+
+ /*EDI = state, ESI = params*/
+
+ if ((params->textureMode[0] & TEXTUREMODE_LOCAL_MASK) == TEXTUREMODE_LOCAL || !voodoo->dual_tmus)
+ {
+ /*TMU0 only sampling local colour or only one TMU, only sample TMU0*/
+ block_pos = codegen_texture_fetch(code_block, voodoo, params, state, block_pos, 0);
+
+ addbyte(0x66); /*MOVD XMM0, EAX*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0xc0);
+ addbyte(0xc1); /*SHR EAX, 24*/
+ addbyte(0xe8);
+ addbyte(24);
+ addbyte(0x89); /*MOV state->tex_a[RDI], EAX*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, tex_a));
+ }
+ else if ((params->textureMode[0] & TEXTUREMODE_MASK) == TEXTUREMODE_PASSTHROUGH)
+ {
+ /*TMU0 in pass-through mode, only sample TMU1*/
+ block_pos = codegen_texture_fetch(code_block, voodoo, params, state, block_pos, 1);
+
+ addbyte(0x66); /*MOVD XMM0, EAX*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0xc0);
+ addbyte(0xc1); /*SHR EAX, 24*/
+ addbyte(0xe8);
+ addbyte(24);
+ addbyte(0x89); /*MOV state->tex_a[RDI], EAX*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, tex_a));
+ }
+ else
+ {
+ block_pos = codegen_texture_fetch(code_block, voodoo, params, state, block_pos, 1);
+
+ addbyte(0x66); /*MOVD XMM3, EAX*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0xd8);
+ if ((params->textureMode[1] & TEXTUREMODE_TRILINEAR) && tc_sub_clocal_1)
+ {
+ addbyte(0x8b); /*MOV EAX, state->lod*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, lod));
+ if (!tc_reverse_blend_1)
+ {
+ addbyte(0xbb); /*MOV EBX, 1*/
+ addlong(1);
+ }
+ else
+ {
+ addbyte(0x31); /*XOR EBX, EBX*/
+ addbyte(0xdb);
+ }
+ addbyte(0x83); /*AND EAX, 1*/
+ addbyte(0xe0);
+ addbyte(1);
+ if (!tca_reverse_blend_1)
+ {
+ addbyte(0xb9); /*MOV ECX, 1*/
+ addlong(1);
+ }
+ else
+ {
+ addbyte(0x31); /*XOR ECX, ECX*/
+ addbyte(0xc9);
+ }
+ addbyte(0x31); /*XOR EBX, EAX*/
+ addbyte(0xc3);
+ addbyte(0x31); /*XOR ECX, EAX*/
+ addbyte(0xc1);
+ addbyte(0xc1); /*SHL EBX, 4*/
+ addbyte(0xe3);
+ addbyte(4);
+ /*EBX = tc_reverse_blend, ECX=tca_reverse_blend*/
+ }
+ addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/
+ addbyte(0x0f);
+ addbyte(0x60);
+ addbyte(0xda);
+ if (tc_sub_clocal_1)
+ {
+ switch (tc_mselect_1)
+ {
+ case TC_MSELECT_ZERO:
+ addbyte(0x66); /*PXOR XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0xc0);
+ break;
+ case TC_MSELECT_CLOCAL:
+ addbyte(0xf3); /*MOVQ XMM0, XMM3*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xc3);
+ break;
+ case TC_MSELECT_AOTHER:
+ addbyte(0x66); /*PXOR XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0xc0);
+ break;
+ case TC_MSELECT_ALOCAL:
+ addbyte(0xf2); /*PSHUFLW XMM0, XMM3, 0xff*/
+ addbyte(0x0f);
+ addbyte(0x70);
+ addbyte(0xc3);
+ addbyte(0xff);
+ break;
+ case TC_MSELECT_DETAIL:
+ addbyte(0xb8); /*MOV EAX, params->detail_bias[1]*/
+ addlong(params->detail_bias[1]);
+ addbyte(0x2b); /*SUB EAX, state->lod*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, lod));
+ addbyte(0xba); /*MOV EDX, params->detail_max[1]*/
+ addlong(params->detail_max[1]);
+ addbyte(0xc1); /*SHL EAX, params->detail_scale[1]*/
+ addbyte(0xe0);
+ addbyte(params->detail_scale[1]);
+ addbyte(0x39); /*CMP EAX, EDX*/
+ addbyte(0xd0);
+ addbyte(0x0f); /*CMOVNL EAX, EDX*/
+ addbyte(0x4d);
+ addbyte(0xc2);
+ addbyte(0x66); /*MOVD XMM0, EAX*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0xc0);
+ addbyte(0xf2); /*PSHUFLW XMM0, XMM0, 0*/
+ addbyte(0x0f);
+ addbyte(0x70);
+ addbyte(0xc0);
+ addbyte(0);
+ break;
+ case TC_MSELECT_LOD_FRAC:
+ addbyte(0x66); /*MOVD XMM0, state->lod_frac[1]*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, lod_frac[1]));
+ addbyte(0xf2); /*PSHUFLW XMM0, XMM0, 0*/
+ addbyte(0x0f);
+ addbyte(0x70);
+ addbyte(0xc0);
+ addbyte(0);
+ break;
+ }
+ if (params->textureMode[1] & TEXTUREMODE_TRILINEAR)
+ {
+ addbyte(0x66); /*PXOR XMM0, R12(xmm_00_ff_w)[EBX]*/
+ addbyte(0x41);
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0x04);
+ addbyte(0x1c);
+ }
+ else if (!tc_reverse_blend_1)
+ {
+ addbyte(0x66); /*PXOR XMM0, XMM9(xmm_ff_w)*/
+ addbyte(0x41);
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0xc1);
+ }
+ addbyte(0x66); /*PADDW XMM0, XMM8(xmm_01_w)*/
+ addbyte(0x41);
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xc0);
+ addbyte(0xf3); /*MOVQ XMM1, XMM2*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xca);
+ addbyte(0xf3); /*MOVQ XMM5, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xe8);
+ addbyte(0x66); /*PMULLW XMM0, XMM3*/
+ addbyte(0x0f);
+ addbyte(0xd5);
+ addbyte(0xc3);
+ addbyte(0x66); /*PMULHW XMM5, XMM3*/
+ addbyte(0x0f);
+ addbyte(0xe5);
+ addbyte(0xeb);
+ addbyte(0x66); /*PUNPCKLWD XMM0, XMM5*/
+ addbyte(0x0f);
+ addbyte(0x61);
+ addbyte(0xc5);
+ addbyte(0x66); /*PSRAD XMM0, 8*/
+ addbyte(0x0f);
+ addbyte(0x72);
+ addbyte(0xe0);
+ addbyte(8);
+ addbyte(0x66); /*PACKSSDW XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x6b);
+ addbyte(0xc0);
+ addbyte(0x66); /*PSUBW XMM1, XMM0*/
+ addbyte(0x0f);
+ addbyte(0xf9);
+ addbyte(0xc8);
+ if (tc_add_clocal_1)
+ {
+ addbyte(0x66); /*PADDW XMM1, XMM3*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xcb);
+ }
+ else if (tc_add_alocal_1)
+ {
+ addbyte(0xf2); /*PSHUFLW XMM0, XMM3, 0xff*/
+ addbyte(0x0f);
+ addbyte(0x70);
+ addbyte(0xc3);
+ addbyte(0xff);
+ addbyte(0x66); /*PADDW XMM1, XMM0*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xc8);
+ }
+ addbyte(0x66); /*PACKUSWB XMM3, XMM1*/
+ addbyte(0x0f);
+ addbyte(0x67);
+ addbyte(0xd9);
+ if (tca_sub_clocal_1)
+ {
+ addbyte(0x66); /*MOVD EBX, XMM3*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xdb);
+ }
+ addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/
+ addbyte(0x0f);
+ addbyte(0x60);
+ addbyte(0xda);
+ }
+
+ if (tca_sub_clocal_1)
+ {
+ addbyte(0xc1); /*SHR EBX, 24*/
+ addbyte(0xeb);
+ addbyte(24);
+ switch (tca_mselect_1)
+ {
+ case TCA_MSELECT_ZERO:
+ addbyte(0x31); /*XOR EAX, EAX*/
+ addbyte(0xc0);
+ break;
+ case TCA_MSELECT_CLOCAL:
+ addbyte(0x89); /*MOV EAX, EBX*/
+ addbyte(0xd8);
+ break;
+ case TCA_MSELECT_AOTHER:
+ addbyte(0x31); /*XOR EAX, EAX*/
+ addbyte(0xc0);
+ break;
+ case TCA_MSELECT_ALOCAL:
+ addbyte(0x89); /*MOV EAX, EBX*/
+ addbyte(0xd8);
+ break;
+ case TCA_MSELECT_DETAIL:
+ addbyte(0xb8); /*MOV EAX, params->detail_bias[1]*/
+ addlong(params->detail_bias[1]);
+ addbyte(0x2b); /*SUB EAX, state->lod*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, lod));
+ addbyte(0xba); /*MOV EDX, params->detail_max[1]*/
+ addlong(params->detail_max[1]);
+ addbyte(0xc1); /*SHL EAX, params->detail_scale[1]*/
+ addbyte(0xe0);
+ addbyte(params->detail_scale[1]);
+ addbyte(0x39); /*CMP EAX, EDX*/
+ addbyte(0xd0);
+ addbyte(0x0f); /*CMOVNL EAX, EDX*/
+ addbyte(0x4d);
+ addbyte(0xc2);
+ break;
+ case TCA_MSELECT_LOD_FRAC:
+ addbyte(0x8b); /*MOV EAX, state->lod_frac[1]*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, lod_frac[1]));
+ break;
+ }
+ if (params->textureMode[1] & TEXTUREMODE_TRILINEAR)
+ {
+ addbyte(0x41); /*XOR EAX, R13(i_00_ff_w)[ECX*4]*/
+ addbyte(0x33);
+ addbyte(0x44);
+ addbyte(0x8d);
+ addbyte(0);
+ }
+ else if (!tc_reverse_blend_1)
+ {
+ addbyte(0x35); /*XOR EAX, 0xff*/
+ addlong(0xff);
+ }
+ addbyte(0x8e); /*ADD EAX, 1*/
+ addbyte(0xc0);
+ addbyte(1);
+ addbyte(0x0f); /*IMUL EAX, EBX*/
+ addbyte(0xaf);
+ addbyte(0xc3);
+ addbyte(0xb9); /*MOV ECX, 0xff*/
+ addlong(0xff);
+ addbyte(0xf7); /*NEG EAX*/
+ addbyte(0xd8);
+ addbyte(0xc1); /*SAR EAX, 8*/
+ addbyte(0xf8);
+ addbyte(8);
+ if (tca_add_clocal_1 || tca_add_alocal_1)
+ {
+ addbyte(0x01); /*ADD EAX, EBX*/
+ addbyte(0xd8);
+ }
+ addbyte(0x39); /*CMP ECX, EAX*/
+ addbyte(0xc1);
+ addbyte(0x0f); /*CMOVA ECX, EAX*/
+ addbyte(0x47);
+ addbyte(0xc8);
+ addbyte(0x66); /*PINSRW 3, XMM3, XMM0*/
+ addbyte(0x0f);
+ addbyte(0xc4);
+ addbyte(0xd8);
+ addbyte(3);
+ }
+
+ block_pos = codegen_texture_fetch(code_block, voodoo, params, state, block_pos, 0);
+
+ addbyte(0x66); /*MOVD XMM0, EAX*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0xc0);
+ addbyte(0x66); /*MOVD XMM7, EAX*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0xf8);
+
+ if (params->textureMode[0] & TEXTUREMODE_TRILINEAR)
+ {
+ addbyte(0x8b); /*MOV EAX, state->lod*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, lod));
+ if (!tc_reverse_blend)
+ {
+ addbyte(0xbb); /*MOV EBX, 1*/
+ addlong(1);
+ }
+ else
+ {
+ addbyte(0x31); /*XOR EBX, EBX*/
+ addbyte(0xdb);
+ }
+ addbyte(0x83); /*AND EAX, 1*/
+ addbyte(0xe0);
+ addbyte(1);
+ if (!tca_reverse_blend)
+ {
+ addbyte(0xb9); /*MOV ECX, 1*/
+ addlong(1);
+ }
+ else
+ {
+ addbyte(0x31); /*XOR ECX, ECX*/
+ addbyte(0xc9);
+ }
+ addbyte(0x31); /*XOR EBX, EAX*/
+ addbyte(0xc3);
+ addbyte(0x31); /*XOR ECX, EAX*/
+ addbyte(0xc1);
+ addbyte(0xc1); /*SHL EBX, 4*/
+ addbyte(0xe3);
+ addbyte(4);
+ /*EBX = tc_reverse_blend, ECX=tca_reverse_blend*/
+ }
+
+ /*XMM0 = TMU0 output, XMM3 = TMU1 output*/
+
+ addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/
+ addbyte(0x0f);
+ addbyte(0x60);
+ addbyte(0xc2);
+ if (tc_zero_other)
+ {
+ addbyte(0x66); /*PXOR XMM1, XMM1*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0xc9);
+ }
+ else
+ {
+ addbyte(0xf3); /*MOV XMM1, XMM3*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xcb);
+ }
+ if (tc_sub_clocal)
+ {
+ addbyte(0x66); /*PSUBW XMM1, XMM0*/
+ addbyte(0x0f);
+ addbyte(0xf9);
+ addbyte(0xc8);
+ }
+
+ switch (tc_mselect)
+ {
+ case TC_MSELECT_ZERO:
+ addbyte(0x66); /*PXOR XMM4, XMM4*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0xe4);
+ break;
+ case TC_MSELECT_CLOCAL:
+ addbyte(0xf3); /*MOV XMM4, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xe0);
+ break;
+ case TC_MSELECT_AOTHER:
+ addbyte(0xf2); /*PSHUFLW XMM4, XMM3, 3, 3, 3, 3*/
+ addbyte(0x0f);
+ addbyte(0x70);
+ addbyte(0xe3);
+ addbyte(0xff);
+ break;
+ case TC_MSELECT_ALOCAL:
+ addbyte(0xf2); /*PSHUFLW XMM4, XMM0, 3, 3, 3, 3*/
+ addbyte(0x0f);
+ addbyte(0x70);
+ addbyte(0xe0);
+ addbyte(0xff);
+ break;
+ case TC_MSELECT_DETAIL:
+ addbyte(0xb8); /*MOV EAX, params->detail_bias[0]*/
+ addlong(params->detail_bias[0]);
+ addbyte(0x2b); /*SUB EAX, state->lod*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, lod));
+ addbyte(0xba); /*MOV EDX, params->detail_max[0]*/
+ addlong(params->detail_max[0]);
+ addbyte(0xc1); /*SHL EAX, params->detail_scale[0]*/
+ addbyte(0xe0);
+ addbyte(params->detail_scale[0]);
+ addbyte(0x39); /*CMP EAX, EDX*/
+ addbyte(0xd0);
+ addbyte(0x0f); /*CMOVNL EAX, EDX*/
+ addbyte(0x4d);
+ addbyte(0xc2);
+ addbyte(0x66); /*MOVD XMM4, EAX*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0xe0);
+ addbyte(0xf2); /*PSHUFLW XMM4, XMM4, 0*/
+ addbyte(0x0f);
+ addbyte(0x70);
+ addbyte(0xe4);
+ addbyte(0);
+ break;
+ case TC_MSELECT_LOD_FRAC:
+ addbyte(0x66); /*MOVD XMM0, state->lod_frac[0]*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0xa7);
+ addlong(offsetof(voodoo_state_t, lod_frac[0]));
+ addbyte(0xf2); /*PSHUFLW XMM0, XMM0, 0*/
+ addbyte(0x0f);
+ addbyte(0x70);
+ addbyte(0xe4);
+ addbyte(0);
+ break;
+ }
+ if (params->textureMode[0] & TEXTUREMODE_TRILINEAR)
+ {
+ addbyte(0x66); /*PXOR XMM4, R12(xmm_00_ff_w)[EBX]*/
+ addbyte(0x41);
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0x24);
+ addbyte(0x1c);
+ }
+ else if (!tc_reverse_blend)
+ {
+ addbyte(0x66); /*PXOR XMM4, XMM9(xmm_ff_w)*/
+ addbyte(0x41);
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0xe1);
+ }
+ addbyte(0x66); /*PADDW XMM4, XMM8(xmm_01_w)*/
+ addbyte(0x41);
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xe0);
+ addbyte(0xf3); /*MOVQ XMM5, XMM1*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xe9);
+ addbyte(0x66); /*PMULLW XMM1, XMM4*/
+ addbyte(0x0f);
+ addbyte(0xd5);
+ addbyte(0xcc);
+
+ if (tca_sub_clocal)
+ {
+ addbyte(0x66); /*MOV EBX, XMM7*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xfb);
+ }
+
+ addbyte(0x66); /*PMULHW XMM5, XMM4*/
+ addbyte(0x0f);
+ addbyte(0xe5);
+ addbyte(0xec);
+ addbyte(0x66); /*PUNPCKLWD XMM1, XMM5*/
+ addbyte(0x0f);
+ addbyte(0x61);
+ addbyte(0xcd);
+ addbyte(0x66); /*PSRAD XMM1, 8*/
+ addbyte(0x0f);
+ addbyte(0x72);
+ addbyte(0xe1);
+ addbyte(8);
+ addbyte(0x66); /*PACKSSDW XMM1, XMM1*/
+ addbyte(0x0f);
+ addbyte(0x6b);
+ addbyte(0xc9);
+
+ if (tca_sub_clocal)
+ {
+ addbyte(0xc1); /*SHR EBX, 24*/
+ addbyte(0xeb);
+ addbyte(24);
+ }
+
+ if (tc_add_clocal)
+ {
+ addbyte(0x66); /*PADDW XMM1, XMM0*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xc8);
+ }
+ else if (tc_add_alocal)
+ {
+ addbyte(0xf2); /*PSHUFLW XMM4, XMM0, 3, 3, 3, 3*/
+ addbyte(0x0f);
+ addbyte(0x70);
+ addbyte(0xe0);
+ addbyte(0xff);
+ addbyte(0x66); /*PADDW XMM1, XMM4*/
+ addbyte(0x0f);
+ addbyte(0xfc);
+ addbyte(0xcc);
+ }
+ if (tc_invert_output)
+ {
+ addbyte(0x66); /*PXOR XMM1, XMM9(xmm_ff_w)*/
+ addbyte(0x41);
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0xc9);
+ }
+
+ addbyte(0x66); /*PACKUSWB XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x67);
+ addbyte(0xc0);
+ addbyte(0x66); /*PACKUSWB XMM3, XMM3*/
+ addbyte(0x0f);
+ addbyte(0x67);
+ addbyte(0xdb);
+ addbyte(0x66); /*PACKUSWB XMM1, XMM1*/
+ addbyte(0x0f);
+ addbyte(0x67);
+ addbyte(0xc9);
+
+ if (tca_zero_other)
+ {
+ addbyte(0x31); /*XOR EAX, EAX*/
+ addbyte(0xc0);
+ }
+ else
+ {
+ addbyte(0x66); /*MOV EAX, XMM3*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xd8);
+ addbyte(0xc1); /*SHR EAX, 24*/
+ addbyte(0xe8);
+ addbyte(24);
+ }
+ if (tca_sub_clocal)
+ {
+ addbyte(0x29); /*SUB EAX, EBX*/
+ addbyte(0xd8);
+ }
+ switch (tca_mselect)
+ {
+ case TCA_MSELECT_ZERO:
+ addbyte(0x31); /*XOR EBX, EBX*/
+ addbyte(0xdb);
+ break;
+ case TCA_MSELECT_CLOCAL:
+ addbyte(0x66); /*MOV EBX, XMM7*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xfb);
+ addbyte(0xc1); /*SHR EBX, 24*/
+ addbyte(0xeb);
+ addbyte(24);
+ break;
+ case TCA_MSELECT_AOTHER:
+ addbyte(0x66); /*MOV EBX, XMM3*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xdb);
+ addbyte(0xc1); /*SHR EBX, 24*/
+ addbyte(0xeb);
+ addbyte(24);
+ break;
+ case TCA_MSELECT_ALOCAL:
+ addbyte(0x66); /*MOV EBX, XMM7*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xfb);
+ addbyte(0xc1); /*SHR EBX, 24*/
+ addbyte(0xeb);
+ addbyte(24);
+ break;
+ case TCA_MSELECT_DETAIL:
+ addbyte(0xbb); /*MOV EBX, params->detail_bias[1]*/
+ addlong(params->detail_bias[1]);
+ addbyte(0x2b); /*SUB EBX, state->lod*/
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, lod));
+ addbyte(0xba); /*MOV EDX, params->detail_max[1]*/
+ addlong(params->detail_max[1]);
+ addbyte(0xc1); /*SHL EBX, params->detail_scale[1]*/
+ addbyte(0xe3);
+ addbyte(params->detail_scale[1]);
+ addbyte(0x39); /*CMP EBX, EDX*/
+ addbyte(0xd3);
+ addbyte(0x0f); /*CMOVNL EBX, EDX*/
+ addbyte(0x4d);
+ addbyte(0xda);
+ break;
+ case TCA_MSELECT_LOD_FRAC:
+ addbyte(0x8b); /*MOV EBX, state->lod_frac[0]*/
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, lod_frac[0]));
+ break;
+ }
+ if (params->textureMode[0] & TEXTUREMODE_TRILINEAR)
+ {
+ addbyte(0x41); /*XOR EBX, R13(i_00_ff_w)[ECX*4]*/
+ addbyte(0x33);
+ addbyte(0x5c);
+ addbyte(0x8d);
+ addbyte(0);
+ }
+ else if (!tca_reverse_blend)
+ {
+ addbyte(0x81); /*XOR EBX, 0xFF*/
+ addbyte(0xf3);
+ addlong(0xff);
+ }
+
+ addbyte(0x83); /*ADD EBX, 1*/
+ addbyte(0xc3);
+ addbyte(1);
+ addbyte(0x0f); /*IMUL EAX, EBX*/
+ addbyte(0xaf);
+ addbyte(0xc3);
+ addbyte(0x31); /*XOR EDX, EDX*/
+ addbyte(0xd2);
+ addbyte(0xc1); /*SAR EAX, 8*/
+ addbyte(0xf8);
+ addbyte(8);
+ if (tca_add_clocal || tca_add_alocal)
+ {
+ addbyte(0x66); /*MOV EBX, XMM7*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xfb);
+ addbyte(0xc1); /*SHR EBX, 24*/
+ addbyte(0xeb);
+ addbyte(24);
+ addbyte(0x01); /*ADD EAX, EBX*/
+ addbyte(0xd8);
+ }
+ addbyte(0x0f); /*CMOVS EAX, EDX*/
+ addbyte(0x48);
+ addbyte(0xc2);
+ addbyte(0xba); /*MOV EDX, 0xff*/
+ addlong(0xff);
+ addbyte(0x3d); /*CMP EAX, 0xff*/
+ addlong(0xff);
+ addbyte(0x0f); /*CMOVA EAX, EDX*/
+ addbyte(0x47);
+ addbyte(0xc2);
+ if (tca_invert_output)
+ {
+ addbyte(0x35); /*XOR EAX, 0xff*/
+ addlong(0xff);
+ }
+
+ addbyte(0x89); /*MOV state->tex_a[EDI], EAX*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, tex_a));
+
+ addbyte(0xf3); /*MOVQ XMM0, XMM1*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xc1);
+ }
+ if (cc_mselect == CC_MSELECT_TEXRGB)
+ {
+ addbyte(0xf3); /*MOVD XMM4, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xe0);
+ }
+
+ if ((params->fbzMode & FBZ_CHROMAKEY))
+ {
+ switch (_rgb_sel)
+ {
+ case CC_LOCALSELECT_ITER_RGB:
+ addbyte(0xf3); /*MOVDQU XMM0, ib*/ /* ir, ig and ib must be in same dqword!*/
+ addbyte(0x0f);
+ addbyte(0x6f);
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, ib));
+ addbyte(0x66); /*PSRAD XMM0, 12*/
+ addbyte(0x0f);
+ addbyte(0x72);
+ addbyte(0xe0);
+ addbyte(12);
+ addbyte(0x66); /*PACKSSDW XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x6b);
+ addbyte(0xc0);
+ addbyte(0x66); /*PACKUSWB XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x67);
+ addbyte(0xc0);
+ addbyte(0x66); /*MOVD EAX, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xc0);
+ break;
+ case CC_LOCALSELECT_COLOR1:
+ addbyte(0x8b); /*MOV EAX, params->color1[RSI]*/
+ addbyte(0x86);
+ addlong(offsetof(voodoo_params_t, color1));
+ break;
+ case CC_LOCALSELECT_TEX:
+ addbyte(0x66); /*MOVD EAX, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xc0);
+ break;
+ }
+ addbyte(0x8b); /*MOV EBX, params->chromaKey[ESI]*/
+ addbyte(0x9e);
+ addlong(offsetof(voodoo_params_t, chromaKey));
+ addbyte(0x31); /*XOR EBX, EAX*/
+ addbyte(0xc3);
+ addbyte(0x81); /*AND EBX, 0xffffff*/
+ addbyte(0xe3);
+ addlong(0xffffff);
+ addbyte(0x0f); /*JE skip*/
+ addbyte(0x84);
+ chroma_skip_pos = block_pos;
+ addlong(0);
+ }
+
+ if (voodoo->trexInit1[0] & (1 << 18))
+ {
+ addbyte(0xb8); /*MOV EAX, tmuConfig*/
+ addlong(voodoo->tmuConfig);
+ addbyte(0x66); /*MOVD XMM0, EAX*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0xc0);
+ }
+
+ if (params->alphaMode & ((1 << 0) | (1 << 4)))
+ {
+ /*EBX = a_other*/
+ switch (a_sel)
+ {
+ case A_SEL_ITER_A:
+ addbyte(0x8b); /*MOV EBX, state->ia*/
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, ia));
+ addbyte(0x31); /*XOR EAX, EAX*/
+ addbyte(0xc0);
+ addbyte(0xba); /*MOV EDX, 0xff*/
+ addlong(0xff);
+ addbyte(0xc1); /*SAR EBX, 12*/
+ addbyte(0xfb);
+ addbyte(12);
+ addbyte(0x0f); /*CMOVS EBX, EAX*/
+ addbyte(0x48);
+ addbyte(0xd8);
+ addbyte(0x39); /*CMP EBX, EDX*/
+ addbyte(0xd3);
+ addbyte(0x0f); /*CMOVA EBX, EDX*/
+ addbyte(0x47);
+ addbyte(0xda);
+ break;
+ case A_SEL_TEX:
+ addbyte(0x8b); /*MOV EBX, state->tex_a*/
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, tex_a));
+ break;
+ case A_SEL_COLOR1:
+ addbyte(0x0f); /*MOVZX EBX, params->color1+3*/
+ addbyte(0xb6);
+ addbyte(0x9e);
+ addlong(offsetof(voodoo_params_t, color1)+3);
+ break;
+ default:
+ addbyte(0x31); /*XOR EBX, EBX*/
+ addbyte(0xdb);
+ break;
+ }
+ /*ECX = a_local*/
+ switch (cca_localselect)
+ {
+ case CCA_LOCALSELECT_ITER_A:
+ if (a_sel == A_SEL_ITER_A)
+ {
+ addbyte(0x89); /*MOV ECX, EBX*/
+ addbyte(0xd9);
+ }
+ else
+ {
+ addbyte(0x8b); /*MOV ECX, state->ia*/
+ addbyte(0x8f);
+ addlong(offsetof(voodoo_state_t, ia));
+ addbyte(0x31); /*XOR EAX, EAX*/
+ addbyte(0xc0);
+ addbyte(0xba); /*MOV EDX, 0xff*/
+ addlong(0xff);
+ addbyte(0xc1);/*SAR ECX, 12*/
+ addbyte(0xf9);
+ addbyte(12);
+ addbyte(0x0f); /*CMOVS ECX, EAX*/
+ addbyte(0x48);
+ addbyte(0xc8);
+ addbyte(0x39); /*CMP ECX, EDX*/
+ addbyte(0xd1);
+ addbyte(0x0f); /*CMOVA ECX, EDX*/
+ addbyte(0x47);
+ addbyte(0xca);
+ }
+ break;
+ case CCA_LOCALSELECT_COLOR0:
+ addbyte(0x0f); /*MOVZX ECX, params->color0+3*/
+ addbyte(0xb6);
+ addbyte(0x8e);
+ addlong(offsetof(voodoo_params_t, color0)+3);
+ break;
+ case CCA_LOCALSELECT_ITER_Z:
+ addbyte(0x8b); /*MOV ECX, state->z*/
+ addbyte(0x8f);
+ addlong(offsetof(voodoo_state_t, z));
+ if (a_sel != A_SEL_ITER_A)
+ {
+ addbyte(0x31); /*XOR EAX, EAX*/
+ addbyte(0xc0);
+ addbyte(0xba); /*MOV EDX, 0xff*/
+ addlong(0xff);
+ }
+ addbyte(0xc1);/*SAR ECX, 20*/
+ addbyte(0xf9);
+ addbyte(20);
+ addbyte(0x0f); /*CMOVS ECX, EAX*/
+ addbyte(0x48);
+ addbyte(0xc8);
+ addbyte(0x39); /*CMP ECX, EDX*/
+ addbyte(0xd1);
+ addbyte(0x0f); /*CMOVA ECX, EDX*/
+ addbyte(0x47);
+ addbyte(0xca);
+ break;
+
+ default:
+ addbyte(0xb9); /*MOV ECX, 0xff*/
+ addlong(0xff);
+ break;
+ }
+
+ if (cca_zero_other)
+ {
+ addbyte(0x31); /*XOR EDX, EDX*/
+ addbyte(0xd2);
+ }
+ else
+ {
+ addbyte(0x89); /*MOV EDX, EBX*/
+ addbyte(0xda);
+ }
+
+ if (cca_sub_clocal)
+ {
+ addbyte(0x29); /*SUB EDX, ECX*/
+ addbyte(0xca);
+ }
+ }
+
+ if (cc_sub_clocal || cc_mselect == 1 || cc_add == 1)
+ {
+ /*XMM1 = local*/
+ if (!cc_localselect_override)
+ {
+ if (cc_localselect)
+ {
+ addbyte(0x66); /*MOVD XMM1, params->color0*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0x8e);
+ addlong(offsetof(voodoo_params_t, color0));
+ }
+ else
+ {
+ addbyte(0xf3); /*MOVDQU XMM1, ib*/ /* ir, ig and ib must be in same dqword!*/
+ addbyte(0x0f);
+ addbyte(0x6f);
+ addbyte(0x8f);
+ addlong(offsetof(voodoo_state_t, ib));
+ addbyte(0x66); /*PSRAD XMM1, 12*/
+ addbyte(0x0f);
+ addbyte(0x72);
+ addbyte(0xe1);
+ addbyte(12);
+ addbyte(0x66); /*PACKSSDW XMM1, XMM1*/
+ addbyte(0x0f);
+ addbyte(0x6b);
+ addbyte(0xc9);
+ addbyte(0x66); /*PACKUSWB XMM1, XMM1*/
+ addbyte(0x0f);
+ addbyte(0x67);
+ addbyte(0xc9);
+ }
+ }
+ else
+ {
+ addbyte(0xf6); /*TEST state->tex_a, 0x80*/
+ addbyte(0x87);
+ addbyte(0x23);
+ addlong(offsetof(voodoo_state_t, tex_a));
+ addbyte(0x80);
+ addbyte(0x74);/*JZ !cc_localselect*/
+ addbyte(8+2);
+ addbyte(0x66); /*MOVD XMM1, params->color0*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0x8e);
+ addlong(offsetof(voodoo_params_t, color0));
+ addbyte(0xeb); /*JMP +*/
+ addbyte(8+5+4+4);
+ /*!cc_localselect:*/
+ addbyte(0xf3); /*MOVDQU XMM1, ib*/ /* ir, ig and ib must be in same dqword!*/
+ addbyte(0x0f);
+ addbyte(0x6f);
+ addbyte(0x8f);
+ addlong(offsetof(voodoo_state_t, ib));
+ addbyte(0x66); /*PSRAD XMM1, 12*/
+ addbyte(0x0f);
+ addbyte(0x72);
+ addbyte(0xe1);
+ addbyte(12);
+ addbyte(0x66); /*PACKSSDW XMM1, XMM1*/
+ addbyte(0x0f);
+ addbyte(0x6b);
+ addbyte(0xc9);
+ addbyte(0x66); /*PACKUSWB XMM1, XMM1*/
+ addbyte(0x0f);
+ addbyte(0x67);
+ addbyte(0xc9);
+ }
+ addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/
+ addbyte(0x0f);
+ addbyte(0x60);
+ addbyte(0xca);
+ }
+ if (!cc_zero_other)
+ {
+ if (_rgb_sel == CC_LOCALSELECT_ITER_RGB)
+ {
+ addbyte(0xf3); /*MOVDQU XMM0, ib*/ /* ir, ig and ib must be in same dqword!*/
+ addbyte(0x0f);
+ addbyte(0x6f);
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, ib));
+ addbyte(0x66); /*PSRAD XMM0, 12*/
+ addbyte(0x0f);
+ addbyte(0x72);
+ addbyte(0xe0);
+ addbyte(12);
+ addbyte(0x66); /*PACKSSDW XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x6b);
+ addbyte(0xc0);
+ addbyte(0x66); /*PACKUSWB XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x67);
+ addbyte(0xc0);
+ }
+ else if (_rgb_sel == CC_LOCALSELECT_TEX)
+ {
+#if 0
+ addbyte(0xf3); /*MOVDQU XMM0, state->tex_b*/
+ addbyte(0x0f);
+ addbyte(0x6f);
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, tex_b));
+ addbyte(0x66); /*PACKSSDW XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x6b);
+ addbyte(0xc0);
+ addbyte(0x66); /*PACKUSWB XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x67);
+ addbyte(0xc0);
+#endif
+ }
+ else if (_rgb_sel == CC_LOCALSELECT_COLOR1)
+ {
+ addbyte(0x66); /*MOVD XMM0, params->color1*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0x86);
+ addlong(offsetof(voodoo_params_t, color1));
+ }
+ else
+ {
+ /*MOVD XMM0, src_r*/
+ }
+ addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/
+ addbyte(0x0f);
+ addbyte(0x60);
+ addbyte(0xc2);
+ if (cc_sub_clocal)
+ {
+ addbyte(0x66); /*PSUBW XMM0, XMM1*/
+ addbyte(0x0f);
+ addbyte(0xf9);
+ addbyte(0xc1);
+ }
+ }
+ else
+ {
+ addbyte(0x66); /*PXOR XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0xc0);
+ if (cc_sub_clocal)
+ {
+ addbyte(0x66); /*PSUBW XMM0, XMM1*/
+ addbyte(0x0f);
+ addbyte(0xf9);
+ addbyte(0xc1);
+ }
+ }
+
+ if (params->alphaMode & ((1 << 0) | (1 << 4)))
+ {
+ if (!(cca_mselect == 0 && cca_reverse_blend == 0))
+ {
+ switch (cca_mselect)
+ {
+ case CCA_MSELECT_ALOCAL:
+ addbyte(0x89); /*MOV EAX, ECX*/
+ addbyte(0xc8);
+ break;
+ case CCA_MSELECT_AOTHER:
+ addbyte(0x89); /*MOV EAX, EBX*/
+ addbyte(0xd8);
+ break;
+ case CCA_MSELECT_ALOCAL2:
+ addbyte(0x89); /*MOV EAX, ECX*/
+ addbyte(0xc8);
+ break;
+ case CCA_MSELECT_TEX:
+ addbyte(0x0f); /*MOVZX EAX, state->tex_a*/
+ addbyte(0xb6);
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, tex_a));
+ break;
+
+ case CCA_MSELECT_ZERO:
+ default:
+ addbyte(0x31); /*XOR EAX, EAX*/
+ addbyte(0xc0);
+ break;
+ }
+ if (!cca_reverse_blend)
+ {
+ addbyte(0x35); /*XOR EAX, 0xff*/
+ addlong(0xff);
+ }
+ addbyte(0x83); /*ADD EAX, 1*/
+ addbyte(0xc0);
+ addbyte(1);
+ addbyte(0x0f); /*IMUL EDX, EAX*/
+ addbyte(0xaf);
+ addbyte(0xd0);
+ addbyte(0xc1); /*SHR EDX, 8*/
+ addbyte(0xea);
+ addbyte(8);
+ }
+ }
+
+ if ((params->alphaMode & ((1 << 0) | (1 << 4))))
+ {
+ addbyte(0x31); /*XOR EAX, EAX*/
+ addbyte(0xc0);
+ }
+
+ if (!(cc_mselect == 0 && cc_reverse_blend == 0) && cc_mselect == CC_MSELECT_AOTHER)
+ {
+ /*Copy a_other to XMM3 before it gets modified*/
+ addbyte(0x66); /*MOVD XMM3, EDX*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0xda);
+ addbyte(0xf2); /*PSHUFLW XMM3, XMM3, 0*/
+ addbyte(0x0f);
+ addbyte(0x70);
+ addbyte(0xdb);
+ addbyte(0x00);
+ }
+
+ if (cca_add && (params->alphaMode & ((1 << 0) | (1 << 4))))
+ {
+ addbyte(0x01); /*ADD EDX, ECX*/
+ addbyte(0xca);
+ }
+
+ if ((params->alphaMode & ((1 << 0) | (1 << 4))))
+ {
+ addbyte(0x85); /*TEST EDX, EDX*/
+ addbyte(0xd2);
+ addbyte(0x0f); /*CMOVS EDX, EAX*/
+ addbyte(0x48);
+ addbyte(0xd0);
+ addbyte(0xb8); /*MOV EAX, 0xff*/
+ addlong(0xff);
+ addbyte(0x81); /*CMP EDX, 0xff*/
+ addbyte(0xfa);
+ addlong(0xff);
+ addbyte(0x0f); /*CMOVA EDX, EAX*/
+ addbyte(0x47);
+ addbyte(0xd0);
+ if (cca_invert_output)
+ {
+ addbyte(0x81); /*XOR EDX, 0xff*/
+ addbyte(0xf2);
+ addlong(0xff);
+ }
+ }
+
+ if (!(cc_mselect == 0 && cc_reverse_blend == 0))
+ {
+ switch (cc_mselect)
+ {
+ case CC_MSELECT_ZERO:
+ addbyte(0x66); /*PXOR XMM3, XMM3*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0xdb);
+ break;
+ case CC_MSELECT_CLOCAL:
+ addbyte(0xf3); /*MOV XMM3, XMM1*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xd9);
+ break;
+ case CC_MSELECT_ALOCAL:
+ addbyte(0x66); /*MOVD XMM3, ECX*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0xd9);
+ addbyte(0xf2); /*PSHUFLW XMM3, XMM3, 0*/
+ addbyte(0x0f);
+ addbyte(0x70);
+ addbyte(0xdb);
+ addbyte(0x00);
+ break;
+ case CC_MSELECT_AOTHER:
+ /*Handled above*/
+ break;
+ case CC_MSELECT_TEX:
+ addbyte(0x66); /*PINSRW XMM3, state->tex_a, 0*/
+ addbyte(0x0f);
+ addbyte(0xc4);
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, tex_a));
+ addbyte(0);
+ addbyte(0x66); /*PINSRW XMM3, state->tex_a, 1*/
+ addbyte(0x0f);
+ addbyte(0xc4);
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, tex_a));
+ addbyte(1);
+ addbyte(0x66); /*PINSRW XMM3, state->tex_a, 2*/
+ addbyte(0x0f);
+ addbyte(0xc4);
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, tex_a));
+ addbyte(2);
+ break;
+ case CC_MSELECT_TEXRGB:
+ addbyte(0x66); /*PUNPCKLBW XMM4, XMM2*/
+ addbyte(0x0f);
+ addbyte(0x60);
+ addbyte(0xe2);
+ addbyte(0xf3); /*MOVQ XMM3, XMM4*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xdc);
+ break;
+ default:
+ addbyte(0x66); /*PXOR XMM3, XMM3*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0xdb);
+ break;
+ }
+ addbyte(0xf3); /*MOV XMM4, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xe0);
+ if (!cc_reverse_blend)
+ {
+ addbyte(0x66); /*PXOR XMM3, XMM9(xmm_ff_w)*/
+ addbyte(0x41);
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0xd9);
+ }
+ addbyte(0x66); /*PADDW XMM3, XMM8(xmm_01_w)*/
+ addbyte(0x41);
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xd8);
+ addbyte(0x66); /*PMULLW XMM0, XMM3*/
+ addbyte(0x0f);
+ addbyte(0xd5);
+ addbyte(0xc3);
+ addbyte(0x66); /*PMULHW XMM4, XMM3*/
+ addbyte(0x0f);
+ addbyte(0xe5);
+ addbyte(0xe3);
+ addbyte(0x66); /*PUNPCKLWD XMM0, XMM4*/
+ addbyte(0x0f);
+ addbyte(0x61);
+ addbyte(0xc4);
+ addbyte(0x66); /*PSRLD XMM0, 8*/
+ addbyte(0x0f);
+ addbyte(0x72);
+ addbyte(0xe0);
+ addbyte(8);
+ addbyte(0x66); /*PACKSSDW XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x6b);
+ addbyte(0xc0);
+ }
+
+ if (cc_add == 1)
+ {
+ addbyte(0x66); /*PADDW XMM0, XMM1*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xc1);
+ }
+
+ addbyte(0x66); /*PACKUSWB XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x67);
+ addbyte(0xc0);
+
+ if (cc_invert_output)
+ {
+ addbyte(0x66); /*PXOR XMM0, XMM10(xmm_ff_b)*/
+ addbyte(0x41);
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0xc2);
+ }
+
+ if (params->fogMode & FOG_ENABLE)
+ {
+ if (params->fogMode & FOG_CONSTANT)
+ {
+ addbyte(0x66); /*MOVD XMM3, params->fogColor[ESI]*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0x9e);
+ addlong(offsetof(voodoo_params_t, fogColor));
+ addbyte(0x66); /*PADDUSB XMM0, XMM3*/
+ addbyte(0x0f);
+ addbyte(0xdc);
+ addbyte(0xc3);
+ }
+ else
+ {
+ addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/
+ addbyte(0x0f);
+ addbyte(0x60);
+ addbyte(0xc2);
+
+ if (!(params->fogMode & FOG_ADD))
+ {
+ addbyte(0x66); /*MOVD XMM3, params->fogColor[ESI]*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0x9e);
+ addlong(offsetof(voodoo_params_t, fogColor));
+ addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/
+ addbyte(0x0f);
+ addbyte(0x60);
+ addbyte(0xda);
+ }
+ else
+ {
+ addbyte(0x66); /*PXOR XMM3, XMM3*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0xdb);
+ }
+
+ if (!(params->fogMode & FOG_MULT))
+ {
+ addbyte(0x66); /*PSUBW XMM3, XMM0*/
+ addbyte(0x0f);
+ addbyte(0xf9);
+ addbyte(0xd8);
+ }
+
+ /*Divide by 2 to prevent overflow on multiply*/
+ addbyte(0x66); /*PSRAW XMM3, 1*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xe3);
+ addbyte(1);
+
+ switch (params->fogMode & (FOG_Z|FOG_ALPHA))
+ {
+ case 0:
+ addbyte(0x8b); /*MOV EBX, state->w_depth[EDI]*/
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, w_depth));
+ addbyte(0x89); /*MOV EAX, EBX*/
+ addbyte(0xd8);
+ addbyte(0xc1); /*SHR EBX, 10*/
+ addbyte(0xeb);
+ addbyte(10);
+ addbyte(0xc1); /*SHR EAX, 2*/
+ addbyte(0xe8);
+ addbyte(2);
+ addbyte(0x83); /*AND EBX, 0x3f*/
+ addbyte(0xe3);
+ addbyte(0x3f);
+ addbyte(0x25); /*AND EAX, 0xff*/
+ addlong(0xff);
+ addbyte(0xf6); /*MUL params->fogTable+1[ESI+EBX*2]*/
+ addbyte(0xa4);
+ addbyte(0x5e);
+ addlong(offsetof(voodoo_params_t, fogTable)+1);
+ addbyte(0x0f); /*MOVZX EBX, params->fogTable[ESI+EBX*2]*/
+ addbyte(0xb6);
+ addbyte(0x9c);
+ addbyte(0x5e);
+ addlong(offsetof(voodoo_params_t, fogTable));
+ addbyte(0xc1); /*SHR EAX, 10*/
+ addbyte(0xe8);
+ addbyte(10);
+ addbyte(0x01); /*ADD EAX, EBX*/
+ addbyte(0xd8);
+/* int fog_idx = (w_depth >> 10) & 0x3f;
+
+ fog_a = params->fogTable[fog_idx].fog;
+ fog_a += (params->fogTable[fog_idx].dfog * ((w_depth >> 2) & 0xff)) >> 10;*/
+ break;
+
+ case FOG_Z:
+ addbyte(0x8b); /*MOV EAX, state->z[EDI]*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, z));
+ addbyte(0xc1); /*SHR EAX, 12*/
+ addbyte(0xe8);
+ addbyte(12);
+ addbyte(0x25); /*AND EAX, 0xff*/
+ addlong(0xff);
+// fog_a = (z >> 20) & 0xff;
+ break;
+
+ case FOG_ALPHA:
+ addbyte(0x8b); /*MOV EAX, state->ia[EDI]*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, ia));
+ addbyte(0x31); /*XOR EBX, EBX*/
+ addbyte(0xdb);
+ addbyte(0xc1); /*SAR EAX, 12*/
+ addbyte(0xf8);
+ addbyte(12);
+ addbyte(0x0f); /*CMOVS EAX, EBX*/
+ addbyte(0x48);
+ addbyte(0xc3);
+ addbyte(0xbb); /*MOV EBX, 0xff*/
+ addlong(0xff);
+ addbyte(0x3d); /*CMP EAX, 0xff*/
+ addlong(0xff);
+ addbyte(0x0f); /*CMOVAE EAX, EBX*/
+ addbyte(0x43);
+ addbyte(0xc3);
+// fog_a = CLAMP(ia >> 12);
+ break;
+
+ case FOG_W:
+ addbyte(0x8b); /*MOV EAX, state->w[EDI]+4*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, w)+4);
+ addbyte(0x31); /*XOR EBX, EBX*/
+ addbyte(0xdb);
+ addbyte(0x09); /*OR EAX, EAX*/
+ addbyte(0xc0);
+ addbyte(0x0f); /*CMOVS EAX, EBX*/
+ addbyte(0x48);
+ addbyte(0xc3);
+ addbyte(0xbb); /*MOV EBX, 0xff*/
+ addlong(0xff);
+ addbyte(0x3d); /*CMP EAX, 0xff*/
+ addlong(0xff);
+ addbyte(0x0f); /*CMOVAE EAX, EBX*/
+ addbyte(0x43);
+ addbyte(0xc3);
+// fog_a = CLAMP(w >> 32);
+ break;
+ }
+ addbyte(0x01); /*ADD EAX, EAX*/
+ addbyte(0xc0);
+
+ addbyte(0x66); /*PMULLW XMM3, alookup+4[EAX*8]*/
+ addbyte(0x41);
+ addbyte(0x0f);
+ addbyte(0xd5);
+ addbyte(0x5c);
+ addbyte(0xc2);
+ addbyte(16);
+ addbyte(0x66); /*PSRAW XMM3, 7*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xe3);
+ addbyte(7);
+
+ if (params->fogMode & FOG_MULT)
+ {
+ addbyte(0xf3); /*MOV XMM0, XMM3*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xc3);
+ }
+ else
+ {
+ addbyte(0x66); /*PADDW XMM0, XMM3*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xc3);
+ }
+ addbyte(0x66); /*PACKUSWB XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x67);
+ addbyte(0xc0);
+ }
+ }
+
+ if ((params->alphaMode & 1) && (alpha_func != AFUNC_NEVER) && (alpha_func != AFUNC_ALWAYS))
+ {
+ addbyte(0x0f); /*MOVZX ECX, params->alphaMode+3*/
+ addbyte(0xb6);
+ addbyte(0x8e);
+ addlong(offsetof(voodoo_params_t, alphaMode) + 3);
+ addbyte(0x39); /*CMP EDX, ECX*/
+ addbyte(0xca);
+
+ switch (alpha_func)
+ {
+ case AFUNC_LESSTHAN:
+ addbyte(0x0f); /*JAE skip*/
+ addbyte(0x83);
+ a_skip_pos = block_pos;
+ addlong(0);
+ break;
+ case AFUNC_EQUAL:
+ addbyte(0x0f); /*JNE skip*/
+ addbyte(0x85);
+ a_skip_pos = block_pos;
+ addlong(0);
+ break;
+ case AFUNC_LESSTHANEQUAL:
+ addbyte(0x0f); /*JA skip*/
+ addbyte(0x87);
+ a_skip_pos = block_pos;
+ addlong(0);
+ break;
+ case AFUNC_GREATERTHAN:
+ addbyte(0x0f); /*JBE skip*/
+ addbyte(0x86);
+ a_skip_pos = block_pos;
+ addlong(0);
+ break;
+ case AFUNC_NOTEQUAL:
+ addbyte(0x0f); /*JE skip*/
+ addbyte(0x84);
+ a_skip_pos = block_pos;
+ addlong(0);
+ break;
+ case AFUNC_GREATERTHANEQUAL:
+ addbyte(0x0f); /*JB skip*/
+ addbyte(0x82);
+ a_skip_pos = block_pos;
+ addlong(0);
+ break;
+ }
+ }
+ else if ((params->alphaMode & 1) && (alpha_func == AFUNC_NEVER))
+ {
+ addbyte(0xC3); /*RET*/
+ }
+
+ if (params->alphaMode & (1 << 4))
+ {
+ addbyte(0x49); /*MOV R8, rgb565*/
+ addbyte(0xb8);
+ addquad((uintptr_t)rgb565);
+ addbyte(0x8b); /*MOV EAX, state->x[EDI]*/
+ addbyte(0x87);
+ if (params->col_tiled)
+ addlong(offsetof(voodoo_state_t, x_tiled));
+ else
+ addlong(offsetof(voodoo_state_t, x));
+ addbyte(0x48); /*MOV RBP, fb_mem*/
+ addbyte(0x8b);
+ addbyte(0xaf);
+ addlong(offsetof(voodoo_state_t, fb_mem));
+ addbyte(0x01); /*ADD EDX, EDX*/
+ addbyte(0xd2);
+ addbyte(0x0f); /*MOVZX EAX, [RBP+RAX*2]*/
+ addbyte(0xb7);
+ addbyte(0x44);
+ addbyte(0x45);
+ addbyte(0);
+ addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/
+ addbyte(0x0f);
+ addbyte(0x60);
+ addbyte(0xc2);
+ addbyte(0x66); /*MOVD XMM4, rgb565[EAX*4]*/
+ addbyte(0x41);
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0x24);
+ addbyte(0x80);
+ addbyte(0x66); /*PUNPCKLBW XMM4, XMM2*/
+ addbyte(0x0f);
+ addbyte(0x60);
+ addbyte(0xe2);
+ addbyte(0xf3); /*MOV XMM6, XMM4*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xf4);
+
+ switch (dest_afunc)
+ {
+ case AFUNC_AZERO:
+ addbyte(0x66); /*PXOR XMM4, XMM4*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0xe4);
+ break;
+ case AFUNC_ASRC_ALPHA:
+ addbyte(0x66); /*PMULLW XMM4, R10(alookup)[EDX*8]*/
+ addbyte(0x41);
+ addbyte(0x0f);
+ addbyte(0xd5);
+ addbyte(0x24);
+ addbyte(0xd2);
+ addbyte(0xf3); /*MOVQ XMM5, XMM4*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xec);
+ addbyte(0x66); /*PADDW XMM4, R10(alookup)[1*8]*/
+ addbyte(0x41);
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0x62);
+ addbyte(8*2);
+ addbyte(0x66); /*PSRLW XMM5, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd5);
+ addbyte(8);
+ addbyte(0x66); /*PADDW XMM4, XMM5*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xe5);
+ addbyte(0x66); /*PSRLW XMM4, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd4);
+ addbyte(8);
+ break;
+ case AFUNC_A_COLOR:
+ addbyte(0x66); /*PMULLW XMM4, XMM0*/
+ addbyte(0x0f);
+ addbyte(0xd5);
+ addbyte(0xe0);
+ addbyte(0xf3); /*MOVQ XMM5, XMM4*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xec);
+ addbyte(0x66); /*PADDW XMM4, R10(alookup)[1*8]*/
+ addbyte(0x41);
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0x62);
+ addbyte(8*2);
+ addbyte(0x66); /*PSRLW XMM5, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd5);
+ addbyte(8);
+ addbyte(0x66); /*PADDW XMM4, XMM5*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xe5);
+ addbyte(0x66); /*PSRLW XMM4, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd4);
+ addbyte(8);
+ break;
+ case AFUNC_ADST_ALPHA:
+ break;
+ case AFUNC_AONE:
+ break;
+ case AFUNC_AOMSRC_ALPHA:
+ addbyte(0x66); /*PMULLW XMM4, R11(aminuslookup)[EDX*8]*/
+ addbyte(0x41);
+ addbyte(0x0f);
+ addbyte(0xd5);
+ addbyte(0x24);
+ addbyte(0xd3);
+ addbyte(0xf3); /*MOVQ XMM5, XMM4*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xec);
+ addbyte(0x66); /*PADDW XMM4, R10(alookup)[1*8]*/
+ addbyte(0x41);
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0x62);
+ addbyte(8*2);
+ addbyte(0x66); /*PSRLW XMM5, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd5);
+ addbyte(8);
+ addbyte(0x66); /*PADDW XMM4, XMM5*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xe5);
+ addbyte(0x66); /*PSRLW XMM4, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd4);
+ addbyte(8);
+ break;
+ case AFUNC_AOM_COLOR:
+ addbyte(0xf3); /*MOVQ XMM5, XMM9(xmm_ff_w)*/
+ addbyte(0x41);
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xe9);
+ addbyte(0x66); /*PSUBW XMM5, XMM0*/
+ addbyte(0x0f);
+ addbyte(0xf9);
+ addbyte(0xe8);
+ addbyte(0x66); /*PMULLW XMM4, XMM5*/
+ addbyte(0x0f);
+ addbyte(0xd5);
+ addbyte(0xe5);
+ addbyte(0xf3); /*MOVQ XMM5, XMM4*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xec);
+ addbyte(0x66); /*PADDW XMM4, alookup[1*8]*/
+ addbyte(0x41);
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0x62);
+ addbyte(8*2);
+ addbyte(0x66); /*PSRLW XMM5, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd5);
+ addbyte(8);
+ addbyte(0x66); /*PADDW XMM4, XMM5*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xe5);
+ addbyte(0x66); /*PSRLW XMM4, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd4);
+ addbyte(8);
+ break;
+ case AFUNC_AOMDST_ALPHA:
+ addbyte(0x66); /*PXOR XMM4, XMM4*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0xe4);
+ break;
+ case AFUNC_ASATURATE:
+ addbyte(0x66); /*PMULLW XMM4, XMM11(minus_254)*/
+ addbyte(0x41);
+ addbyte(0x0f);
+ addbyte(0xd5);
+ addbyte(0xe3);
+ addbyte(0xf3); /*MOVQ XMM5, XMM4*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xec);
+ addbyte(0x66); /*PADDW XMM4, alookup[1*8]*/
+ addbyte(0x41);
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0x62);
+ addbyte(8*2);
+ addbyte(0x66); /*PSRLW XMM5, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd5);
+ addbyte(8);
+ addbyte(0x66); /*PADDW XMM4, XMM5*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xe5);
+ addbyte(0x66); /*PSRLW XMM4, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd4);
+ addbyte(8);
+ }
+
+ switch (src_afunc)
+ {
+ case AFUNC_AZERO:
+ addbyte(0x66); /*PXOR XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0xc0);
+ break;
+ case AFUNC_ASRC_ALPHA:
+ addbyte(0x66); /*PMULLW XMM0, R10(alookup)[EDX*8]*/
+ addbyte(0x41);
+ addbyte(0x0f);
+ addbyte(0xd5);
+ addbyte(0x04);
+ addbyte(0xd2);
+ addbyte(0xf3); /*MOVQ XMM5, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xe8);
+ addbyte(0x66); /*PADDW XMM0, R10(alookup)[1*8]*/
+ addbyte(0x41);
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0x42);
+ addbyte(8*2);
+ addbyte(0x66); /*PSRLW XMM5, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd5);
+ addbyte(8);
+ addbyte(0x66); /*PADDW XMM0, XMM5*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xc5);
+ addbyte(0x66); /*PSRLW XMM0, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd0);
+ addbyte(8);
+ break;
+ case AFUNC_A_COLOR:
+ addbyte(0x66); /*PMULLW XMM0, XMM6*/
+ addbyte(0x0f);
+ addbyte(0xd5);
+ addbyte(0xc6);
+ addbyte(0xf3); /*MOVQ XMM5, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xe8);
+ addbyte(0x66); /*PADDW XMM0, R10(alookup)[1*8]*/
+ addbyte(0x41);
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0x42);
+ addbyte(8*2);
+ addbyte(0x66); /*PSRLW XMM5, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd5);
+ addbyte(8);
+ addbyte(0x66); /*PADDW XMM0, XMM5*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xc5);
+ addbyte(0x66); /*PSRLW XMM0, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd0);
+ addbyte(8);
+ break;
+ case AFUNC_ADST_ALPHA:
+ break;
+ case AFUNC_AONE:
+ break;
+ case AFUNC_AOMSRC_ALPHA:
+ addbyte(0x66); /*PMULLW XMM0, R11(aminuslookup)[EDX*8]*/
+ addbyte(0x41);
+ addbyte(0x0f);
+ addbyte(0xd5);
+ addbyte(0x04);
+ addbyte(0xd3);
+ addbyte(0xf3); /*MOVQ XMM5, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xe8);
+ addbyte(0x66); /*PADDW XMM0, alookup[1*8]*/
+ addbyte(0x41);
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0x42);
+ addbyte(8*2);
+ addbyte(0x66); /*PSRLW XMM5, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd5);
+ addbyte(8);
+ addbyte(0x66); /*PADDW XMM0, XMM5*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xc5);
+ addbyte(0x66); /*PSRLW XMM0, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd0);
+ addbyte(8);
+ break;
+ case AFUNC_AOM_COLOR:
+ addbyte(0xf3); /*MOVQ XMM5, XMM9(xmm_ff_w)*/
+ addbyte(0x41);
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xe9);
+ addbyte(0x66); /*PSUBW XMM5, XMM6*/
+ addbyte(0x0f);
+ addbyte(0xf9);
+ addbyte(0xee);
+ addbyte(0x66); /*PMULLW XMM0, XMM5*/
+ addbyte(0x0f);
+ addbyte(0xd5);
+ addbyte(0xc5);
+ addbyte(0xf3); /*MOVQ XMM5, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xe8);
+ addbyte(0x66); /*PADDW XMM0, alookup[1*8]*/
+ addbyte(0x41);
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0x42);
+ addbyte(8*2);
+ addbyte(0x66); /*PSRLW XMM5, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd5);
+ addbyte(8);
+ addbyte(0x66); /*PADDW XMM0, XMM5*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xc5);
+ addbyte(0x66); /*PSRLW XMM0, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd0);
+ addbyte(8);
+ break;
+ case AFUNC_AOMDST_ALPHA:
+ addbyte(0x66); /*PXOR XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0xc0);
+ break;
+ case AFUNC_ACOLORBEFOREFOG:
+ break;
+ }
+
+ addbyte(0x66); /*PADDW XMM0, XMM4*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xc4);
+
+ addbyte(0x66); /*PACKUSWB XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x67);
+ addbyte(0xc0);
+ }
+
+ addbyte(0x8b); /*MOV EDX, state->x[EDI]*/
+ addbyte(0x97);
+ if (params->col_tiled)
+ addlong(offsetof(voodoo_state_t, x_tiled));
+ else
+ addlong(offsetof(voodoo_state_t, x));
+
+ addbyte(0x66); /*MOV EAX, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xc0);
+
+ if (params->fbzMode & FBZ_RGB_WMASK)
+ {
+ if (dither)
+ {
+ addbyte(0x49); /*MOV R8, dither_rb*/
+ addbyte(0xb8);
+ addquad(dither2x2 ? (uintptr_t)dither_rb2x2 : (uintptr_t)dither_rb);
+ addbyte(0x4c); /*MOV ESI, real_y (R14)*/
+ addbyte(0x89);
+ addbyte(0xf6);
+ addbyte(0x0f); /*MOVZX EBX, AH*/ /*G*/
+ addbyte(0xb6);
+ addbyte(0xdc);
+ if (dither2x2)
+ {
+ addbyte(0x83); /*AND EDX, 1*/
+ addbyte(0xe2);
+ addbyte(1);
+ addbyte(0x83); /*AND ESI, 1*/
+ addbyte(0xe6);
+ addbyte(1);
+ addbyte(0xc1); /*SHL EBX, 2*/
+ addbyte(0xe3);
+ addbyte(2);
+ }
+ else
+ {
+ addbyte(0x83); /*AND EDX, 3*/
+ addbyte(0xe2);
+ addbyte(3);
+ addbyte(0x83); /*AND ESI, 3*/
+ addbyte(0xe6);
+ addbyte(3);
+ addbyte(0xc1); /*SHL EBX, 4*/
+ addbyte(0xe3);
+ addbyte(4);
+ }
+ addbyte(0x0f); /*MOVZX ECX, AL*/ /*R*/
+ addbyte(0xb6);
+ addbyte(0xc8);
+ if (dither2x2)
+ {
+ addbyte(0xc1); /*SHR EAX, 14*/
+ addbyte(0xe8);
+ addbyte(14);
+ addbyte(0x8d); /*LEA ESI, RDX+RSI*2*/
+ addbyte(0x34);
+ addbyte(0x72);
+ }
+ else
+ {
+ addbyte(0xc1); /*SHR EAX, 12*/
+ addbyte(0xe8);
+ addbyte(12);
+ addbyte(0x8d); /*LEA ESI, RDX+RSI*4*/
+ addbyte(0x34);
+ addbyte(0xb2);
+ }
+ addbyte(0x8b); /*MOV EDX, state->x[EDI]*/
+ addbyte(0x97);
+ if (params->col_tiled)
+ addlong(offsetof(voodoo_state_t, x_tiled));
+ else
+ addlong(offsetof(voodoo_state_t, x));
+ addbyte(0x4c); /*ADD RSI, R8*/
+ addbyte(0x01);
+ addbyte(0xc6);
+ if (dither2x2)
+ {
+ addbyte(0xc1); /*SHL ECX, 2*/
+ addbyte(0xe1);
+ addbyte(2);
+ addbyte(0x25); /*AND EAX, 0x3fc*/ /*B*/
+ addlong(0x3fc);
+ }
+ else
+ {
+ addbyte(0xc1); /*SHL ECX, 4*/
+ addbyte(0xe1);
+ addbyte(4);
+ addbyte(0x25); /*AND EAX, 0xff0*/ /*B*/
+ addlong(0xff0);
+ }
+ addbyte(0x0f); /*MOVZX EBX, dither_g[EBX+ESI]*/
+ addbyte(0xb6);
+ addbyte(0x9c);
+ addbyte(0x1e);
+ addlong(dither2x2 ? ((uintptr_t)dither_g2x2 - (uintptr_t)dither_rb2x2) : ((uintptr_t)dither_g - (uintptr_t)dither_rb));
+ addbyte(0x0f); /*MOVZX ECX, dither_rb[RCX+RSI]*/
+ addbyte(0xb6);
+ addbyte(0x0c);
+ addbyte(0x0e);
+ addbyte(0x0f); /*MOVZX EAX, dither_rb[RAX+RSI]*/
+ addbyte(0xb6);
+ addbyte(0x04);
+ addbyte(0x06);
+ addbyte(0xc1); /*SHL EBX, 5*/
+ addbyte(0xe3);
+ addbyte(5);
+ addbyte(0xc1); /*SHL EAX, 11*/
+ addbyte(0xe0);
+ addbyte(11);
+ addbyte(0x09); /*OR EAX, EBX*/
+ addbyte(0xd8);
+ addbyte(0x09); /*OR EAX, ECX*/
+ addbyte(0xc8);
+ }
+ else
+ {
+ addbyte(0x89); /*MOV EBX, EAX*/
+ addbyte(0xc3);
+ addbyte(0x0f); /*MOVZX ECX, AH*/
+ addbyte(0xb6);
+ addbyte(0xcc);
+ addbyte(0xc1); /*SHR EAX, 3*/
+ addbyte(0xe8);
+ addbyte(3);
+ addbyte(0xc1); /*SHR EBX, 8*/
+ addbyte(0xeb);
+ addbyte(8);
+ addbyte(0xc1); /*SHL ECX, 3*/
+ addbyte(0xe1);
+ addbyte(3);
+ addbyte(0x81); /*AND EAX, 0x001f*/
+ addbyte(0xe0);
+ addlong(0x001f);
+ addbyte(0x81); /*AND EBX, 0xf800*/
+ addbyte(0xe3);
+ addlong(0xf800);
+ addbyte(0x81); /*AND ECX, 0x07e0*/
+ addbyte(0xe1);
+ addlong(0x07e0);
+ addbyte(0x09); /*OR EAX, EBX*/
+ addbyte(0xd8);
+ addbyte(0x09); /*OR EAX, ECX*/
+ addbyte(0xc8);
+ }
+ addbyte(0x48); /*MOV RSI, fb_mem*/
+ addbyte(0x8b);
+ addbyte(0xb7);
+ addlong(offsetof(voodoo_state_t, fb_mem));
+ addbyte(0x66); /*MOV [ESI+EDX*2], AX*/
+ addbyte(0x89);
+ addbyte(0x04);
+ addbyte(0x56);
+ }
+
+ if ((params->fbzMode & (FBZ_DEPTH_WMASK | FBZ_DEPTH_ENABLE)) == (FBZ_DEPTH_WMASK | FBZ_DEPTH_ENABLE))
+ {
+ addbyte(0x8b); /*MOV EDX, state->x[EDI]*/
+ addbyte(0x97);
+ if (params->aux_tiled)
+ addlong(offsetof(voodoo_state_t, x_tiled));
+ else
+ addlong(offsetof(voodoo_state_t, x));
+ addbyte(0x66); /*MOV AX, new_depth*/
+ addbyte(0x8b);
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, new_depth));
+ addbyte(0x48); /*MOV RSI, aux_mem*/
+ addbyte(0x8b);
+ addbyte(0xb7);
+ addlong(offsetof(voodoo_state_t, aux_mem));
+ addbyte(0x66); /*MOV [ESI+EDX*2], AX*/
+ addbyte(0x89);
+ addbyte(0x04);
+ addbyte(0x56);
+ }
+
+ if (z_skip_pos)
+ *(uint32_t *)&code_block[z_skip_pos] = (block_pos - z_skip_pos) - 4;
+ if (a_skip_pos)
+ *(uint32_t *)&code_block[a_skip_pos] = (block_pos - a_skip_pos) - 4;
+ if (chroma_skip_pos)
+ *(uint32_t *)&code_block[chroma_skip_pos] = (block_pos - chroma_skip_pos) - 4;
+
+ addbyte(0x4c); /*MOV RSI, R15*/
+ addbyte(0x89);
+ addbyte(0xfe);
+
+ addbyte(0xf3); /*MOVDQU XMM1, state->ib[EDI]*/
+ addbyte(0x0f);
+ addbyte(0x6f);
+ addbyte(0x8f);
+ addlong(offsetof(voodoo_state_t, ib));
+ addbyte(0xf3); /*MOVDQU XMM3, state->tmu0_s[EDI]*/
+ addbyte(0x0f);
+ addbyte(0x6f);
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, tmu0_s));
+ addbyte(0xf3); /*MOVQ XMM4, state->tmu0_w[EDI]*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xa7);
+ addlong(offsetof(voodoo_state_t, tmu0_w));
+ addbyte(0xf3); /*MOVDQU XMM0, params->dBdX[ESI]*/
+ addbyte(0x0f);
+ addbyte(0x6f);
+ addbyte(0x86);
+ addlong(offsetof(voodoo_params_t, dBdX));
+ addbyte(0x8b); /*MOV EAX, params->dZdX[ESI]*/
+ addbyte(0x86);
+ addlong(offsetof(voodoo_params_t, dZdX));
+ addbyte(0xf3); /*MOVDQU XMM5, params->tmu[0].dSdX[ESI]*/
+ addbyte(0x0f);
+ addbyte(0x6f);
+ addbyte(0xae);
+ addlong(offsetof(voodoo_params_t, tmu[0].dSdX));
+ addbyte(0xf3); /*MOVQ XMM6, params->tmu[0].dWdX[ESI]*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xb6);
+ addlong(offsetof(voodoo_params_t, tmu[0].dWdX));
+
+ if (state->xdir > 0)
+ {
+ addbyte(0x66); /*PADDD XMM1, XMM0*/
+ addbyte(0x0f);
+ addbyte(0xfe);
+ addbyte(0xc8);
+ }
+ else
+ {
+ addbyte(0x66); /*PSUBD XMM1, XMM0*/
+ addbyte(0x0f);
+ addbyte(0xfa);
+ addbyte(0xc8);
+ }
+
+ addbyte(0xf3); /*MOVQ XMM0, state->w*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, w));
+ addbyte(0xf3); /*MOVDQU state->ib, XMM1*/
+ addbyte(0x0f);
+ addbyte(0x7f);
+ addbyte(0x8f);
+ addlong(offsetof(voodoo_state_t, ib));
+ addbyte(0xf3); /*MOVQ XMM7, params->dWdX*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xbe);
+ addlong(offsetof(voodoo_params_t, dWdX));
+
+ if (state->xdir > 0)
+ {
+ addbyte(0x66); /*PADDQ XMM3, XMM5*/
+ addbyte(0x0f);
+ addbyte(0xd4);
+ addbyte(0xdd);
+ addbyte(0x66); /*PADDQ XMM4, XMM6*/
+ addbyte(0x0f);
+ addbyte(0xd4);
+ addbyte(0xe6);
+ addbyte(0x66); /*PADDQ XMM0, XMM7*/
+ addbyte(0x0f);
+ addbyte(0xd4);
+ addbyte(0xc7);
+ addbyte(0x01); /*ADD state->z[EDI], EAX*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, z));
+ }
+ else
+ {
+ addbyte(0x66); /*PSUBQ XMM3, XMM5*/
+ addbyte(0x0f);
+ addbyte(0xfb);
+ addbyte(0xdd);
+ addbyte(0x66); /*PSUBQ XMM4, XMM6*/
+ addbyte(0x0f);
+ addbyte(0xfb);
+ addbyte(0xe6);
+ addbyte(0x66); /*PSUBQ XMM0, XMM7*/
+ addbyte(0x0f);
+ addbyte(0xfb);
+ addbyte(0xc7);
+ addbyte(0x29); /*SUB state->z[EDI], EAX*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, z));
+ }
+
+ if (voodoo->dual_tmus)
+ {
+ addbyte(0xf3); /*MOVDQU XMM5, params->tmu[1].dSdX[ESI]*/
+ addbyte(0x0f);
+ addbyte(0x6f);
+ addbyte(0xae);
+ addlong(offsetof(voodoo_params_t, tmu[1].dSdX));
+ addbyte(0xf3); /*MOVQ XMM6, params->tmu[1].dWdX[ESI]*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xb6);
+ addlong(offsetof(voodoo_params_t, tmu[1].dWdX));
+ }
+
+ addbyte(0xf3); /*MOVDQU state->tmu0_s, XMM3*/
+ addbyte(0x0f);
+ addbyte(0x7f);
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, tmu0_s));
+ addbyte(0x66); /*MOVQ state->tmu0_w, XMM4*/
+ addbyte(0x0f);
+ addbyte(0xd6);
+ addbyte(0xa7);
+ addlong(offsetof(voodoo_state_t, tmu0_w));
+ addbyte(0x66); /*MOVQ state->w, XMM0*/
+ addbyte(0x0f);
+ addbyte(0xd6);
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, w));
+
+ if (voodoo->dual_tmus)
+ {
+ addbyte(0xf3); /*MOVDQU XMM3, state->tmu1_s[EDI]*/
+ addbyte(0x0f);
+ addbyte(0x6f);
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, tmu1_s));
+ addbyte(0xf3); /*MOVQ XMM4, state->tmu1_w[EDI]*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xa7);
+ addlong(offsetof(voodoo_state_t, tmu1_w));
+
+ if (state->xdir > 0)
+ {
+ addbyte(0x66); /*PADDQ XMM3, XMM5*/
+ addbyte(0x0f);
+ addbyte(0xd4);
+ addbyte(0xdd);
+ addbyte(0x66); /*PADDQ XMM4, XMM6*/
+ addbyte(0x0f);
+ addbyte(0xd4);
+ addbyte(0xe6);
+ }
+ else
+ {
+ addbyte(0x66); /*PSUBQ XMM3, XMM5*/
+ addbyte(0x0f);
+ addbyte(0xfb);
+ addbyte(0xdd);
+ addbyte(0x66); /*PSUBQ XMM4, XMM6*/
+ addbyte(0x0f);
+ addbyte(0xfb);
+ addbyte(0xe6);
+ }
+
+ addbyte(0xf3); /*MOVDQU state->tmu1_s, XMM3*/
+ addbyte(0x0f);
+ addbyte(0x7f);
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, tmu1_s));
+ addbyte(0x66); /*MOVQ state->tmu1_w, XMM4*/
+ addbyte(0x0f);
+ addbyte(0xd6);
+ addbyte(0xa7);
+ addlong(offsetof(voodoo_state_t, tmu1_w));
+ }
+
+ addbyte(0x83); /*ADD state->pixel_count[EDI], 1*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, pixel_count));
+ addbyte(1);
+
+ if (params->fbzColorPath & FBZCP_TEXTURE_ENABLED)
+ {
+ if ((params->textureMode[0] & TEXTUREMODE_MASK) == TEXTUREMODE_PASSTHROUGH ||
+ (params->textureMode[0] & TEXTUREMODE_LOCAL_MASK) == TEXTUREMODE_LOCAL)
+ {
+ addbyte(0x83); /*ADD state->texel_count[EDI], 1*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, texel_count));
+ addbyte(1);
+ }
+ else
+ {
+ addbyte(0x83); /*ADD state->texel_count[EDI], 2*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, texel_count));
+ addbyte(2);
+ }
+ }
+
+ addbyte(0x8b); /*MOV EAX, state->x[EDI]*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, x));
+
+ if (state->xdir > 0)
+ {
+ addbyte(0x83); /*ADD state->x[EDI], 1*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, x));
+ addbyte(1);
+ }
+ else
+ {
+ addbyte(0x83); /*SUB state->x[EDI], 1*/
+ addbyte(0xaf);
+ addlong(offsetof(voodoo_state_t, x));
+ addbyte(1);
+ }
+
+ addbyte(0x3b); /*CMP EAX, state->x2[EDI]*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, x2));
+ addbyte(0x0f); /*JNZ loop_jump_pos*/
+ addbyte(0x85);
+ addlong(loop_jump_pos - (block_pos + 4));
+
+ addbyte(0x41); /*POP R15*/
+ addbyte(0x5f);
+ addbyte(0x41); /*POP R14*/
+ addbyte(0x5e);
+ addbyte(0x41); /*POP R13*/
+ addbyte(0x5d);
+ addbyte(0x41); /*POP R12*/
+ addbyte(0x5c);
+ addbyte(0x5b); /*POP RBX*/
+ addbyte(0x5e); /*POP RSI*/
+ addbyte(0x5f); /*POP RDI*/
+ addbyte(0x5d); /*POP RBP*/
+
+ addbyte(0xC3); /*RET*/
+}
+int voodoo_recomp = 0;
+static inline void *voodoo_get_block(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int odd_even)
+{
+ int c;
+ int b = last_block[odd_even];
+ voodoo_x86_data_t *voodoo_x86_data = voodoo->codegen_data;
+ voodoo_x86_data_t *data;
+
+ for (c = 0; c < 8; c++)
+ {
+ data = &voodoo_x86_data[odd_even + c*4]; //&voodoo_x86_data[odd_even][b];
+
+ if (state->xdir == data->xdir &&
+ params->alphaMode == data->alphaMode &&
+ params->fbzMode == data->fbzMode &&
+ params->fogMode == data->fogMode &&
+ params->fbzColorPath == data->fbzColorPath &&
+ (voodoo->trexInit1[0] & (1 << 18)) == data->trexInit1 &&
+ params->textureMode[0] == data->textureMode[0] &&
+ params->textureMode[1] == data->textureMode[1] &&
+ (params->tLOD[0] & LOD_MASK) == data->tLOD[0] &&
+ (params->tLOD[1] & LOD_MASK) == data->tLOD[1] &&
+ ((params->col_tiled || params->aux_tiled) ? 1 : 0) == data->is_tiled)
+ {
+ last_block[odd_even] = b;
+ return data->code_block;
+ }
+
+ b = (b + 1) & 7;
+ }
+voodoo_recomp++;
+ data = &voodoo_x86_data[odd_even + next_block_to_write[odd_even]*4];
+// code_block = data->code_block;
+
+ voodoo_generate(data->code_block, voodoo, params, state, depth_op);
+
+ data->xdir = state->xdir;
+ data->alphaMode = params->alphaMode;
+ data->fbzMode = params->fbzMode;
+ data->fogMode = params->fogMode;
+ data->fbzColorPath = params->fbzColorPath;
+ data->trexInit1 = voodoo->trexInit1[0] & (1 << 18);
+ data->textureMode[0] = params->textureMode[0];
+ data->textureMode[1] = params->textureMode[1];
+ data->tLOD[0] = params->tLOD[0] & LOD_MASK;
+ data->tLOD[1] = params->tLOD[1] & LOD_MASK;
+ data->is_tiled = (params->col_tiled || params->aux_tiled) ? 1 : 0;
+
+ next_block_to_write[odd_even] = (next_block_to_write[odd_even] + 1) & 7;
+
+ return data->code_block;
+}
+
+void voodoo_codegen_init(voodoo_t *voodoo)
+{
+ int c;
+
+#if WIN64
+ voodoo->codegen_data = VirtualAlloc(NULL, sizeof(voodoo_x86_data_t) * BLOCK_NUM * 4, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
+#else
+ voodoo->codegen_data = mmap(0, sizeof(voodoo_x86_data_t) * BLOCK_NUM*4, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, 0, 0);
+#endif
+
+ for (c = 0; c < 256; c++)
+ {
+ int d[4];
+ int _ds = c & 0xf;
+ int dt = c >> 4;
+
+ alookup[c] = _mm_set_epi32(0, 0, c | (c << 16), c | (c << 16));
+ aminuslookup[c] = _mm_set_epi32(0, 0, (255-c) | ((255-c) << 16), (255-c) | ((255-c) << 16));
+
+ d[0] = (16 - _ds) * (16 - dt);
+ d[1] = _ds * (16 - dt);
+ d[2] = (16 - _ds) * dt;
+ d[3] = _ds * dt;
+
+ bilinear_lookup[c*2] = _mm_set_epi32(d[1] | (d[1] << 16), d[1] | (d[1] << 16), d[0] | (d[0] << 16), d[0] | (d[0] << 16));
+ bilinear_lookup[c*2 + 1] = _mm_set_epi32(d[3] | (d[3] << 16), d[3] | (d[3] << 16), d[2] | (d[2] << 16), d[2] | (d[2] << 16));
+ }
+ alookup[256] = _mm_set_epi32(0, 0, 256 | (256 << 16), 256 | (256 << 16));
+ xmm_00_ff_w[0] = _mm_set_epi32(0, 0, 0, 0);
+ xmm_00_ff_w[1] = _mm_set_epi32(0, 0, 0xff | (0xff << 16), 0xff | (0xff << 16));
+}
+
+void voodoo_codegen_close(voodoo_t *voodoo)
+{
+#if WIN64
+ VirtualFree(voodoo->codegen_data, 0, MEM_RELEASE);
+#else
+ munmap(voodoo->codegen_data, sizeof(voodoo_x86_data_t) * BLOCK_NUM*4);
+#endif
+}
+
--- /dev/null
+/*Registers :
+
+ alphaMode
+ fbzMode & 0x1f3fff
+ fbzColorPath
+*/
+
+#if defined(__linux__) || defined(__APPLE__)
+#include <sys/mman.h>
+#include <unistd.h>
+#endif
+#if defined WIN32 || defined _WIN32 || defined _WIN32
+#define BITMAP windows_BITMAP
+#include <windows.h>
+#undef BITMAP
+#endif
+
+#include <xmmintrin.h>
+
+#define BLOCK_NUM 8
+#define BLOCK_MASK (BLOCK_NUM-1)
+#define BLOCK_SIZE 8192
+
+#define LOD_MASK (LOD_TMIRROR_S | LOD_TMIRROR_T)
+
+typedef struct voodoo_x86_data_t
+{
+ uint8_t code_block[BLOCK_SIZE];
+ int xdir;
+ uint32_t alphaMode;
+ uint32_t fbzMode;
+ uint32_t fogMode;
+ uint32_t fbzColorPath;
+ uint32_t textureMode[2];
+ uint32_t tLOD[2];
+ uint32_t trexInit1;
+ int is_tiled;
+} voodoo_x86_data_t;
+
+static int last_block[4] = {0, 0};
+static int next_block_to_write[4] = {0, 0};
+
+#define addbyte(val) \
+ do { \
+ code_block[block_pos++] = val; \
+ if (block_pos >= BLOCK_SIZE) \
+ fatal("Over!\n"); \
+ } while (0)
+
+#define addword(val) \
+ do { \
+ *(uint16_t *)&code_block[block_pos] = val; \
+ block_pos += 2; \
+ if (block_pos >= BLOCK_SIZE) \
+ fatal("Over!\n"); \
+ } while (0)
+
+#define addlong(val) \
+ do { \
+ *(uint32_t *)&code_block[block_pos] = val; \
+ block_pos += 4; \
+ if (block_pos >= BLOCK_SIZE) \
+ fatal("Over!\n"); \
+ } while (0)
+
+#define addquad(val) \
+ do { \
+ *(uint64_t *)&code_block[block_pos] = val; \
+ block_pos += 8; \
+ if (block_pos >= BLOCK_SIZE) \
+ fatal("Over!\n"); \
+ } while (0)
+
+
+static __m128i xmm_01_w;// = 0x0001000100010001ull;
+static __m128i xmm_ff_w;// = 0x00ff00ff00ff00ffull;
+static __m128i xmm_ff_b;// = 0x00000000ffffffffull;
+
+static uint32_t zero = 0;
+static double const_1_48 = (double)(1ull << 4);
+
+static __m128i alookup[257], aminuslookup[256];
+static __m128i minus_254;// = 0xff02ff02ff02ff02ull;
+static __m128i bilinear_lookup[256*2];
+static __m128i xmm_00_ff_w[2];
+static uint32_t i_00_ff_w[2] = {0, 0xff};
+
+static inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int block_pos, int tmu)
+{
+ if (params->textureMode[tmu] & 1)
+ {
+ addbyte(0xdf); /*FILDq state->tmu0_w*/
+ addbyte(0xaf);
+ addlong(tmu ? offsetof(voodoo_state_t, tmu1_w) : offsetof(voodoo_state_t, tmu0_w));
+ addbyte(0xdd); /*FLDq const_1_48*/
+ addbyte(0x05);
+ addlong((uint32_t)&const_1_48);
+ addbyte(0xde); /*FDIV ST(1)*/
+ addbyte(0xf1);
+ addbyte(0xdf); /*FILDq state->tmu0_s*/
+ addbyte(0xaf);
+ addlong(tmu ? offsetof(voodoo_state_t, tmu1_s) : offsetof(voodoo_state_t, tmu0_s));
+ addbyte(0xdf); /*FILDq state->tmu0_t*/ /*ST(0)=t, ST(1)=s, ST(2)=1/w*/
+ addbyte(0xaf);
+ addlong(tmu ? offsetof(voodoo_state_t, tmu1_t) : offsetof(voodoo_state_t, tmu0_t));
+ addbyte(0xd9); /*FXCH ST(1)*/ /*ST(0)=s, ST(1)=t, ST(2)=1/w*/
+ addbyte(0xc9);
+ addbyte(0xd8); /*FMUL ST(2)*/ /*ST(0)=s/w, ST(1)=t, ST(2)=1/w*/
+ addbyte(0xca);
+ addbyte(0xd9); /*FXCH ST(1)*/ /*ST(0)=t, ST(1)=s/w, ST(2)=1/w*/
+ addbyte(0xc9);
+ addbyte(0xd8); /*FMUL ST(2)*/ /*ST(0)=t/w, ST(1)=s/w, ST(2)=1/w*/
+ addbyte(0xca);
+ addbyte(0xd9); /*FXCH ST(2)*/ /*ST(0)=1/w, ST(1)=s/w, ST(2)=t/w*/
+ addbyte(0xca);
+ addbyte(0xd9); /*FSTPs log_temp*/ /*ST(0)=s/w, ST(1)=t/w*/
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, log_temp));
+ addbyte(0xdf); /*FSITPq state->tex_s*/
+ addbyte(0xbf);
+ addlong(offsetof(voodoo_state_t, tex_s));
+ addbyte(0x8b); /*MOV EAX, log_temp*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, log_temp));
+ addbyte(0xdf); /*FSITPq state->tex_t*/
+ addbyte(0xbf);
+ addlong(offsetof(voodoo_state_t, tex_t));
+ addbyte(0xc1); /*SHR EAX, 23-8*/
+ addbyte(0xe8);
+ addbyte(15);
+ addbyte(0x0f); /*MOVZX EBX, AL*/
+ addbyte(0xb6);
+ addbyte(0xd8);
+ addbyte(0x25); /*AND EAX, 0xff00*/
+ addlong(0xff00);
+ addbyte(0x2d); /*SUB EAX, (127-44)<<8*/
+ addlong((127-44+19) << 8);
+ addbyte(0x0f); /*MOVZX EBX, logtable[EBX]*/
+ addbyte(0xb6);
+ addbyte(0x9b);
+ addlong((uint32_t)logtable);
+ addbyte(0x09); /*OR EAX, EBX*/
+ addbyte(0xd8);
+ addbyte(0x03); /*ADD EAX, state->lod*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, tmu[tmu].lod));
+ addbyte(0x3b); /*CMP EAX, state->lod_min*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, lod_min[tmu]));
+ addbyte(0x0f); /*CMOVL EAX, state->lod_min*/
+ addbyte(0x4c);
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, lod_min[tmu]));
+ addbyte(0x3b); /*CMP EAX, state->lod_max*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, lod_max[tmu]));
+ addbyte(0x0f); /*CMOVNL EAX, state->lod_max*/
+ addbyte(0x4d);
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, lod_max[tmu]));
+ addbyte(0x0f); /*MOVZX EBX, AL*/
+ addbyte(0xb6);
+ addbyte(0xd8);
+ addbyte(0xc1); /*SHR EAX, 8*/
+ addbyte(0xe8);
+ addbyte(8);
+ addbyte(0x89); /*MOV state->lod_frac[tmu], EBX*/
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, lod_frac[tmu]));
+ addbyte(0x89); /*MOV state->lod, EAX*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, lod));
+ }
+ else
+ {
+ addbyte(0xf3); /*MOVQ XMM4, state->tmu0_s*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xa7);
+ addlong(tmu ? offsetof(voodoo_state_t, tmu1_s) : offsetof(voodoo_state_t, tmu0_s));
+ addbyte(0xf3); /*MOVQ XMM5, state->tmu0_t*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xaf);
+ addlong(tmu ? offsetof(voodoo_state_t, tmu1_t) : offsetof(voodoo_state_t, tmu0_t));
+ addbyte(0xc7); /*MOV state->lod[tmu], 0*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, lod_frac[tmu]));
+ addlong(0);
+ addbyte(0x8b); /*MOV EAX, state->lod_min*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, lod_min[tmu]));
+ addbyte(0x66); /*SHRQ XMM4, 28*/
+ addbyte(0x0f);
+ addbyte(0x73);
+ addbyte(0xd4);
+ addbyte(28);
+ addbyte(0x66); /*SHRQ XMM5, 28*/
+ addbyte(0x0f);
+ addbyte(0x73);
+ addbyte(0xd5);
+ addbyte(28);
+ addbyte(0x0f); /*MOVZX EBX, AL*/
+ addbyte(0xb6);
+ addbyte(0xd8);
+ addbyte(0xc1); /*SHR EAX, 8*/
+ addbyte(0xe8);
+ addbyte(8);
+ addbyte(0x66); /*MOVQ state->tex_s, XMM4*/
+ addbyte(0x0f);
+ addbyte(0xd6);
+ addbyte(0xa7);
+ addlong(offsetof(voodoo_state_t, tex_s));
+ addbyte(0x66); /*MOVQ state->tex_t, XMM5*/
+ addbyte(0x0f);
+ addbyte(0xd6);
+ addbyte(0xaf);
+ addlong(offsetof(voodoo_state_t, tex_t));
+ addbyte(0x89); /*MOV state->lod_frac[tmu], EBX*/
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, lod_frac[tmu]));
+ addbyte(0x89); /*MOV state->lod, EAX*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, lod));
+ }
+ /*EAX = state->lod*/
+ if (params->fbzColorPath & FBZCP_TEXTURE_ENABLED)
+ {
+ if (voodoo->bilinear_enabled && (params->textureMode[tmu] & 6))
+ {
+ addbyte(0x8b); /*MOV ECX, state->tex_lod[tmu]*/
+ addbyte(0x8f);
+ addlong(offsetof(voodoo_state_t, tex_lod[tmu]));
+ addbyte(0xb2); /*MOV DL, 8*/
+ addbyte(8);
+ addbyte(0x8b); /*MOV ECX, [ECX+EAX*4]*/
+ addbyte(0x0c);
+ addbyte(0x81);
+ addbyte(0xbd); /*MOV EBP, 8*/
+ addlong(8);
+ addbyte(0x28); /*SUB DL, CL*/
+ addbyte(0xca);
+ addbyte(0xd3); /*SHL EBP, CL*/
+ addbyte(0xe5);
+ addbyte(0x8b); /*MOV EAX, state->tex_s[EDI]*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, tex_s));
+ addbyte(0x8b); /*MOV EBX, state->tex_t[EDI]*/
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, tex_t));
+ if (params->tLOD[tmu] & LOD_TMIRROR_S)
+ {
+ addbyte(0xa9); /*TEST EAX, 0x1000*/
+ addlong(0x1000);
+ addbyte(0x74); /*JZ +*/
+ addbyte(2);
+ addbyte(0xf7); /*NOT EAX*/
+ addbyte(0xd0);
+ }
+ if (params->tLOD[tmu] & LOD_TMIRROR_T)
+ {
+ addbyte(0xf7); /*TEST EBX, 0x1000*/
+ addbyte(0xc3);
+ addlong(0x1000);
+ addbyte(0x74); /*JZ +*/
+ addbyte(2);
+ addbyte(0xf7); /*NOT EBX*/
+ addbyte(0xd3);
+ }
+ addbyte(0x29); /*SUB EAX, EBP*/
+ addbyte(0xe8);
+ addbyte(0x29); /*SUB EBX, EBP*/
+ addbyte(0xeb);
+ addbyte(0xd3); /*SAR EAX, CL*/
+ addbyte(0xf8);
+ addbyte(0xd3); /*SAR EBX, CL*/
+ addbyte(0xfb);
+ addbyte(0x89); /*MOV EBP, EAX*/
+ addbyte(0xc5);
+ addbyte(0x89); /*MOV ECX, EBX*/
+ addbyte(0xd9);
+ addbyte(0x83); /*AND EBP, 0xf*/
+ addbyte(0xe5);
+ addbyte(0xf);
+ addbyte(0xc1); /*SHL ECX, 4*/
+ addbyte(0xe1);
+ addbyte(4);
+ addbyte(0xc1); /*SAR EAX, 4*/
+ addbyte(0xf8);
+ addbyte(4);
+ addbyte(0x81); /*AND ECX, 0xf0*/
+ addbyte(0xe1);
+ addlong(0xf0);
+ addbyte(0xc1); /*SAR EBX, 4*/
+ addbyte(0xfb);
+ addbyte(4);
+ addbyte(0x09); /*OR EBP, ECX*/
+ addbyte(0xcd);
+ addbyte(0x8b); /*MOV ECX, state->lod[EDI]*/
+ addbyte(0x8f);
+ addlong(offsetof(voodoo_state_t, lod));
+ addbyte(0xc1); /*SHL EBP, 5*/
+ addbyte(0xe5);
+ addbyte(5);
+ /*EAX = S, EBX = T, ECX = LOD, EDX = tex_shift, ESI=params, EDI=state, EBP = bilinear shift*/
+ addbyte(0x8d); /*LEA ESI, [ESI+ECX*4]*/
+ addbyte(0x34);
+ addbyte(0x8e);
+ addbyte(0x89); /*MOV ebp_store, EBP*/
+ addbyte(0xaf);
+ addlong(offsetof(voodoo_state_t, ebp_store));
+ addbyte(0x8b); /*MOV EBP, state->tex[EDI+ECX*4]*/
+ addbyte(0xac);
+ addbyte(0x8f);
+ addlong(offsetof(voodoo_state_t, tex[tmu]));
+ addbyte(0x88); /*MOV CL, DL*/
+ addbyte(0xd1);
+ addbyte(0x89); /*MOV EDX, EBX*/
+ addbyte(0xda);
+ if (!state->clamp_s[tmu])
+ {
+ addbyte(0x23); /*AND EAX, params->tex_w_mask[ESI]*/
+ addbyte(0x86);
+ addlong(offsetof(voodoo_params_t, tex_w_mask[tmu]));
+ }
+ addbyte(0x83); /*ADD EDX, 1*/
+ addbyte(0xc2);
+ addbyte(1);
+ if (state->clamp_t[tmu])
+ {
+ addbyte(0x0f); /*CMOVS EDX, zero*/
+ addbyte(0x48);
+ addbyte(0x15);
+ addlong((uint32_t)&zero);
+ addbyte(0x3b); /*CMP EDX, params->tex_h_mask[ESI]*/
+ addbyte(0x96);
+ addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]));
+ addbyte(0x0f); /*CMOVA EDX, params->tex_h_mask[ESI]*/
+ addbyte(0x47);
+ addbyte(0x96);
+ addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]));
+ addbyte(0x85); /*TEST EBX,EBX*/
+ addbyte(0xdb);
+ addbyte(0x0f); /*CMOVS EBX, zero*/
+ addbyte(0x48);
+ addbyte(0x1d);
+ addlong((uint32_t)&zero);
+ addbyte(0x3b); /*CMP EBX, params->tex_h_mask[ESI]*/
+ addbyte(0x9e);
+ addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]));
+ addbyte(0x0f); /*CMOVA EBX, params->tex_h_mask[ESI]*/
+ addbyte(0x47);
+ addbyte(0x9e);
+ addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]));
+ }
+ else
+ {
+ addbyte(0x23); /*AND EDX, params->tex_h_mask[ESI]*/
+ addbyte(0x96);
+ addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]));
+ addbyte(0x23); /*AND EBX, params->tex_h_mask[ESI]*/
+ addbyte(0x9e);
+ addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]));
+ }
+ /*EAX = S, EBX = T0, EDX = T1*/
+ addbyte(0xd3); /*SHL EBX, CL*/
+ addbyte(0xe3);
+ addbyte(0xd3); /*SHL EDX, CL*/
+ addbyte(0xe2);
+ addbyte(0x8d); /*LEA EBX,[EBP+EBX*2]*/
+ addbyte(0x5c);
+ addbyte(0x9d);
+ addbyte(0);
+ addbyte(0x8d); /*LEA EDX,[EBP+EDX*2]*/
+ addbyte(0x54);
+ addbyte(0x95);
+ addbyte(0);
+ if (state->clamp_s[tmu])
+ {
+ addbyte(0x8b); /*MOV EBP, params->tex_w_mask[ESI]*/
+ addbyte(0xae);
+ addlong(offsetof(voodoo_params_t, tex_w_mask[tmu]));
+ addbyte(0x85); /*TEST EAX, EAX*/
+ addbyte(0xc0);
+ addbyte(0x8b); /*MOV ESI, ebp_store*/
+ addbyte(0xb7);
+ addlong(offsetof(voodoo_state_t, ebp_store));
+ addbyte(0x0f); /*CMOVS EAX, zero*/
+ addbyte(0x48);
+ addbyte(0x05);
+ addlong((uint32_t)&zero);
+ addbyte(0x78); /*JS + - clamp on 0*/
+ addbyte(2+3+2+ 5+5+2);
+ addbyte(0x3b); /*CMP EAX, EBP*/
+ addbyte(0xc5);
+ addbyte(0x0f); /*CMOVAE EAX, EBP*/
+ addbyte(0x43);
+ addbyte(0xc5);
+ addbyte(0x73); /*JAE + - clamp on +*/
+ addbyte(5+5+2);
+ }
+ else
+ {
+ addbyte(0x3b); /*CMP EAX, params->tex_w_mask[ESI] - is S at texture edge (ie will wrap/clamp)?*/
+ addbyte(0x86);
+ addlong(offsetof(voodoo_params_t, tex_w_mask[tmu]));
+ addbyte(0x8b); /*MOV ESI, ebp_store*/
+ addbyte(0xb7);
+ addlong(offsetof(voodoo_state_t, ebp_store));
+ addbyte(0x74); /*JE +*/
+ addbyte(5+5+2);
+ }
+
+ addbyte(0xf3); /*MOVQ XMM0, [EBX+EAX*4]*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0x04);
+ addbyte(0x83);
+ addbyte(0xf3); /*MOVQ XMM1, [EDX+EAX*4]*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0x0c);
+ addbyte(0x82);
+
+ if (state->clamp_s[tmu])
+ {
+ addbyte(0xeb); /*JMP +*/
+ addbyte(5+5+4+4);
+
+ /*S clamped - the two S coordinates are the same*/
+ addbyte(0x66); /*MOVD XMM0, [EBX+EAX*4]*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0x04);
+ addbyte(0x83);
+ addbyte(0x66); /*MOVD XMM1, [EDX+EAX*4]*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0x0c);
+ addbyte(0x82);
+ addbyte(0x66); /*PUNPCKLDQ XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x62);
+ addbyte(0xc0);
+ addbyte(0x66); /*PUNPCKLDQ XMM1, XMM1*/
+ addbyte(0x0f);
+ addbyte(0x62);
+ addbyte(0xc9);
+ }
+ else
+ {
+ addbyte(0xeb); /*JMP +*/
+ addbyte(5+5+5+5+6+6);
+
+ /*S wrapped - the two S coordinates are not contiguous*/
+ addbyte(0x66); /*MOVD XMM0, [EBX+EAX*4]*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0x04);
+ addbyte(0x83);
+ addbyte(0x66); /*MOVD XMM1, [EDX+EAX*4]*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0x0c);
+ addbyte(0x82);
+ addbyte(0x66); /*PINSRW XMM0, [EBX], 2*/
+ addbyte(0x0f);
+ addbyte(0xc4);
+ addbyte(0x03);
+ addbyte(0x02);
+ addbyte(0x66); /*PINSRW XMM1, [EDX], 2*/
+ addbyte(0x0f);
+ addbyte(0xc4);
+ addbyte(0x0a);
+ addbyte(0x02);
+ addbyte(0x66); /*PINSRW XMM0, 2[EBX], 3*/
+ addbyte(0x0f);
+ addbyte(0xc4);
+ addbyte(0x43);
+ addbyte(0x02);
+ addbyte(0x03);
+ addbyte(0x66); /*PINSRW XMM1, 2[EDX], 3*/
+ addbyte(0x0f);
+ addbyte(0xc4);
+ addbyte(0x4a);
+ addbyte(0x02);
+ addbyte(0x03);
+ }
+
+ addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/
+ addbyte(0x0f);
+ addbyte(0x60);
+ addbyte(0xc2);
+ addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/
+ addbyte(0x0f);
+ addbyte(0x60);
+ addbyte(0xca);
+
+ addbyte(0x81); /*ADD ESI, bilinear_lookup*/
+ addbyte(0xc6);
+ addlong((uint32_t)bilinear_lookup);
+
+ addbyte(0x66); /*PMULLW XMM0, bilinear_lookup[ESI]*/
+ addbyte(0x0f);
+ addbyte(0xd5);
+ addbyte(0x06);
+ addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x10*/
+ addbyte(0x0f);
+ addbyte(0xd5);
+ addbyte(0x4e);
+ addbyte(0x10);
+ addbyte(0x66); /*PADDW XMM0, XMM1*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xc0 | 1 | (0 << 3));
+ addbyte(0x66); /*MOV XMM1, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x6f);
+ addbyte(0xc0 | 0 | (1 << 3));
+ addbyte(0x66); /*PSRLDQ XMM0, 64*/
+ addbyte(0x0f);
+ addbyte(0x73);
+ addbyte(0xd8);
+ addbyte(8);
+ addbyte(0x66); /*PADDW XMM0, XMM1*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xc0 | 1 | (0 << 3));
+ addbyte(0x66); /*PSRLW XMM0, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd0 | 0);
+ addbyte(8);
+ addbyte(0x66); /*PACKUSWB XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x67);
+ addbyte(0xc0);
+
+ addbyte(0x8b); /*MOV ESI, [ESP+8]*/
+ addbyte(0x74);
+ addbyte(0x24);
+ addbyte(8+16); /*CHECK!*/
+
+ addbyte(0x66); /*MOV EAX, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xc0);
+ }
+ else
+ {
+ addbyte(0x8b); /*MOV ECX, state->tex_lod[tmu]*/
+ addbyte(0x8f);
+ addlong(offsetof(voodoo_state_t, tex_lod[tmu]));
+ addbyte(0xb2); /*MOV DL, 8*/
+ addbyte(8);
+ addbyte(0x8b); /*MOV ECX, [ECX+EAX*4]*/
+ addbyte(0x0c);
+ addbyte(0x81);
+ addbyte(0x8b); /*MOV EBP, state->tex[EDI+ECX*4]*/
+ addbyte(0xac);
+ addbyte(0x8f);
+ addlong(offsetof(voodoo_state_t, tex[tmu]));
+ addbyte(0x28); /*SUB DL, CL*/
+ addbyte(0xca);
+ addbyte(0x80); /*ADD CL, 4*/
+ addbyte(0xc1);
+ addbyte(4);
+ addbyte(0x8b); /*MOV EAX, state->tex_s[EDI]*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, tex_s));
+ addbyte(0x8b); /*MOV EBX, state->tex_t[EDI]*/
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, tex_t));
+ if (params->tLOD[tmu] & LOD_TMIRROR_S)
+ {
+ addbyte(0xa9); /*TEST EAX, 0x1000*/
+ addlong(0x1000);
+ addbyte(0x74); /*JZ +*/
+ addbyte(2);
+ addbyte(0xf7); /*NOT EAX*/
+ addbyte(0xd0);
+ }
+ if (params->tLOD[tmu] & LOD_TMIRROR_T)
+ {
+ addbyte(0xf7); /*TEST EBX, 0x1000*/
+ addbyte(0xc3);
+ addlong(0x1000);
+ addbyte(0x74); /*JZ +*/
+ addbyte(2);
+ addbyte(0xf7); /*NOT EBX*/
+ addbyte(0xd3);
+ }
+ addbyte(0xd3); /*SHR EAX, CL*/
+ addbyte(0xe8);
+ addbyte(0xd3); /*SHR EBX, CL*/
+ addbyte(0xeb);
+ if (state->clamp_s[tmu])
+ {
+ addbyte(0x85); /*TEST EAX, EAX*/
+ addbyte(0xc0);
+ addbyte(0x0f); /*CMOVS EAX, zero*/
+ addbyte(0x48);
+ addbyte(0x05);
+ addlong((uint32_t)&zero);
+ addbyte(0x3b); /*CMP EAX, params->tex_w_mask[ESI+ECX*4]*/
+ addbyte(0x84);
+ addbyte(0x8e);
+ addlong(offsetof(voodoo_params_t, tex_w_mask[tmu]) - 0x10);
+ addbyte(0x0f); /*CMOVAE EAX, params->tex_w_mask[ESI+ECX*4]*/
+ addbyte(0x43);
+ addbyte(0x84);
+ addbyte(0x8e);
+ addlong(offsetof(voodoo_params_t, tex_w_mask[tmu]) - 0x10);
+
+ }
+ else
+ {
+ addbyte(0x23); /*AND EAX, params->tex_w_mask-0x10[ESI+ECX*4]*/
+ addbyte(0x84);
+ addbyte(0x8e);
+ addlong(offsetof(voodoo_params_t, tex_w_mask[tmu]) - 0x10);
+ }
+ if (state->clamp_t[tmu])
+ {
+ addbyte(0x85); /*TEST EBX, EBX*/
+ addbyte(0xdb);
+ addbyte(0x0f); /*CMOVS EBX, zero*/
+ addbyte(0x48);
+ addbyte(0x1d);
+ addlong((uint32_t)&zero);
+ addbyte(0x3b); /*CMP EBX, params->tex_h_mask[ESI+ECX*4]*/
+ addbyte(0x9c);
+ addbyte(0x8e);
+ addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]) - 0x10);
+ addbyte(0x0f); /*CMOVAE EBX, params->tex_h_mask[ESI+ECX*4]*/
+ addbyte(0x43);
+ addbyte(0x9c);
+ addbyte(0x8e);
+ addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]) - 0x10);
+ }
+ else
+ {
+ addbyte(0x23); /*AND EBX, params->tex_h_mask-0x10[ESI+ECX*4]*/
+ addbyte(0x9c);
+ addbyte(0x8e);
+ addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]) - 0x10);
+ }
+ addbyte(0x88); /*MOV CL, DL*/
+ addbyte(0xd1);
+ addbyte(0xd3); /*SHL EBX, CL*/
+ addbyte(0xe3);
+ addbyte(0x01); /*ADD EBX, EAX*/
+ addbyte(0xc3);
+
+ addbyte(0x8b); /*MOV EAX,[EBP+EBX*4]*/
+ addbyte(0x44);
+ addbyte(0x9d);
+ addbyte(0);
+ }
+ }
+
+ return block_pos;
+}
+
+static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int depthop)
+{
+ int block_pos = 0;
+ int z_skip_pos = 0;
+ int a_skip_pos = 0;
+ int chroma_skip_pos = 0;
+ int depth_jump_pos = 0;
+ int depth_jump_pos2 = 0;
+ int loop_jump_pos = 0;
+// xmm_01_w = (__m128i)0x0001000100010001ull;
+// xmm_ff_w = (__m128i)0x00ff00ff00ff00ffull;
+// xmm_ff_b = (__m128i)0x00000000ffffffffull;
+ xmm_01_w = _mm_set_epi32(0, 0, 0x00010001, 0x00010001);
+ xmm_ff_w = _mm_set_epi32(0, 0, 0x00ff00ff, 0x00ff00ff);
+ xmm_ff_b = _mm_set_epi32(0, 0, 0, 0x00ffffff);
+ minus_254 = _mm_set_epi32(0, 0, 0xff02ff02, 0xff02ff02);
+// *(uint64_t *)&const_1_48 = 0x45b0000000000000ull;
+// block_pos = 0;
+// voodoo_get_depth = &code_block[block_pos];
+ /*W at (%esp+4)
+ Z at (%esp+12)
+ new_depth at (%esp+16)*/
+// if ((params->fbzMode & FBZ_DEPTH_ENABLE) && (depth_op == DEPTHOP_NEVER))
+// {
+// addbyte(0xC3); /*RET*/
+// return;
+// }
+ addbyte(0x55); /*PUSH EBP*/
+ addbyte(0x57); /*PUSH EDI*/
+ addbyte(0x56); /*PUSH ESI*/
+ addbyte(0x53); /*PUSH EBX*/
+
+ addbyte(0x8b); /*MOV EDI, [ESP+4]*/
+ addbyte(0x7c);
+ addbyte(0x24);
+ addbyte(4+16);
+ loop_jump_pos = block_pos;
+ addbyte(0x8b); /*MOV ESI, [ESP+8]*/
+ addbyte(0x74);
+ addbyte(0x24);
+ addbyte(8+16);
+ if (params->col_tiled || params->aux_tiled)
+ {
+ addbyte(0x8b); /*MOV EAX, state->x[EDI]*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, x));
+ addbyte(0x89); /*MOV EBX, EAX*/
+ addbyte(0xc3);
+ addbyte(0x83); /*AND EAX, 63*/
+ addbyte(0xe0);
+ addbyte(63);
+ addbyte(0xc1); /*SHR EBX, 6*/
+ addbyte(0xeb);
+ addbyte(6);
+ addbyte(0xc1); /*SHL EBX, 11 - tile is 128*32, << 12, div 2 because word index*/
+ addbyte(0xe3);
+ addbyte(11);
+ addbyte(0x01); /*ADD EAX, EBX*/
+ addbyte(0xd8);
+ addbyte(0x89); /*MOV state->x_tiled[EDI], EAX*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, x_tiled));
+ }
+ addbyte(0x66); /*PXOR XMM2, XMM2*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0xd2);
+
+ if ((params->fbzMode & FBZ_W_BUFFER) || (params->fogMode & (FOG_ENABLE|FOG_CONSTANT|FOG_Z|FOG_ALPHA)) == FOG_ENABLE)
+ {
+ addbyte(0xb8); /*MOV new_depth, 0*/
+ addlong(0);
+ addbyte(0x66); /*TEST w+4, 0xffff*/
+ addbyte(0xf7);
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, w)+4);
+ addword(0xffff);
+ addbyte(0x75); /*JNZ got_depth*/
+ depth_jump_pos = block_pos;
+ addbyte(0);
+// addbyte(4+5+2+3+2+5+5+3+2+2+2+/*3+*/3+2+6+4+5+2+3);
+ addbyte(0x8b); /*MOV EDX, w*/
+ addbyte(0x97);
+ addlong(offsetof(voodoo_state_t, w));
+ addbyte(0xb8); /*MOV new_depth, 0xf001*/
+ addlong(0xf001);
+ addbyte(0x89); /*MOV EBX, EDX*/
+ addbyte(0xd3);
+ addbyte(0xc1); /*SHR EDX, 16*/
+ addbyte(0xea);
+ addbyte(16);
+ addbyte(0x74); /*JZ got_depth*/
+ depth_jump_pos2 = block_pos;
+ addbyte(0);
+// addbyte(5+5+3+2+2+2+/*3+*/3+2+6+4+5+2+3);
+ addbyte(0xb9); /*MOV ECX, 19*/
+ addlong(19);
+ addbyte(0x0f); /*BSR EAX, EDX*/
+ addbyte(0xbd);
+ addbyte(0xc2);
+ addbyte(0xba); /*MOV EDX, 15*/
+ addlong(15);
+ addbyte(0xf7); /*NOT EBX*/
+ addbyte(0xd3);
+ addbyte(0x29); /*SUB EDX, EAX - EDX = exp*/
+ addbyte(0xc2);
+ addbyte(0x29); /*SUB ECX, EDX*/
+ addbyte(0xd1);
+ addbyte(0xc1); /*SHL EDX, 12*/
+ addbyte(0xe2);
+ addbyte(12);
+ addbyte(0xd3); /*SHR EBX, CL*/
+ addbyte(0xeb);
+ addbyte(0x81); /*AND EBX, 0xfff - EBX = mant*/
+ addbyte(0xe3);
+ addlong(0xfff);
+ addbyte(0x8d); /*LEA EAX, 1[EDX, EBX]*/
+ addbyte(0x44);
+ addbyte(0x13);
+ addbyte(1);
+ addbyte(0xbb); /*MOV EBX, 0xffff*/
+ addlong(0xffff);
+ addbyte(0x39); /*CMP EAX, EBX*/
+ addbyte(0xd8);
+ addbyte(0x0f); /*CMOVA EAX, EBX*/
+ addbyte(0x47);
+ addbyte(0xc3);
+
+ if (depth_jump_pos)
+ *(uint8_t *)&code_block[depth_jump_pos] = (block_pos - depth_jump_pos) - 1;
+ if (depth_jump_pos)
+ *(uint8_t *)&code_block[depth_jump_pos2] = (block_pos - depth_jump_pos2) - 1;
+
+ if ((params->fogMode & (FOG_ENABLE|FOG_CONSTANT|FOG_Z|FOG_ALPHA)) == FOG_ENABLE)
+ {
+ addbyte(0x89); /*MOV state->w_depth[EDI], EAX*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, w_depth));
+ }
+ }
+ if (!(params->fbzMode & FBZ_W_BUFFER))
+ {
+ addbyte(0x8b); /*MOV EAX, z*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, z));
+ addbyte(0xbb); /*MOV EBX, 0xffff*/
+ addlong(0xffff);
+ addbyte(0x31); /*XOR ECX, ECX*/
+ addbyte(0xc9);
+ addbyte(0xc1); /*SAR EAX, 12*/
+ addbyte(0xf8);
+ addbyte(12);
+ addbyte(0x0f); /*CMOVS EAX, ECX*/
+ addbyte(0x48);
+ addbyte(0xc1);
+ addbyte(0x39); /*CMP EAX, EBX*/
+ addbyte(0xd8);
+ addbyte(0x0f); /*CMOVA EAX, EBX*/
+ addbyte(0x47);
+ addbyte(0xc3);
+ }
+
+ if (params->fbzMode & FBZ_DEPTH_BIAS)
+ {
+ addbyte(0x0f); /*MOVSX EDX, params->zaColor[ESI]*/
+ addbyte(0xbf);
+ addbyte(0x96);
+ addlong(offsetof(voodoo_params_t, zaColor));
+ if (params->fbzMode & FBZ_W_BUFFER)
+ {
+ addbyte(0xbb); /*MOV EBX, 0xffff*/
+ addlong(0xffff);
+ addbyte(0x31); /*XOR ECX, ECX*/
+ addbyte(0xc9);
+ }
+ addbyte(0x01); /*ADD EAX, EDX*/
+ addbyte(0xd0);
+ addbyte(0x0f); /*CMOVS EAX, ECX*/
+ addbyte(0x48);
+ addbyte(0xc1);
+ addbyte(0x39); /*CMP EAX, EBX*/
+ addbyte(0xd8);
+ addbyte(0x0f); /*CMOVA EAX, EBX*/
+ addbyte(0x47);
+ addbyte(0xc3);
+ }
+
+ addbyte(0x89); /*MOV state->new_depth[EDI], EAX*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, new_depth));
+
+ if ((params->fbzMode & FBZ_DEPTH_ENABLE) && (depthop != DEPTHOP_ALWAYS) && (depthop != DEPTHOP_NEVER))
+ {
+ addbyte(0x8b); /*MOV EBX, state->x[EDI]*/
+ addbyte(0x9f);
+ if (params->aux_tiled)
+ addlong(offsetof(voodoo_state_t, x_tiled));
+ else
+ addlong(offsetof(voodoo_state_t, x));
+ addbyte(0x8b);/*MOV ECX, aux_mem[EDI]*/
+ addbyte(0x8f);
+ addlong(offsetof(voodoo_state_t, aux_mem));
+ addbyte(0x0f); /*MOVZX EBX, [ECX+EBX*2]*/
+ addbyte(0xb7);
+ addbyte(0x1c);
+ addbyte(0x59);
+ if (params->fbzMode & FBZ_DEPTH_SOURCE)
+ {
+ addbyte(0x0f); /*MOVZX EAX, zaColor[ESI]*/
+ addbyte(0xb7);
+ addbyte(0x86);
+ addlong(offsetof(voodoo_params_t, zaColor));
+ }
+ addbyte(0x39); /*CMP EAX, EBX*/
+ addbyte(0xd8);
+ if (depthop == DEPTHOP_LESSTHAN)
+ {
+ addbyte(0x0f); /*JAE skip*/
+ addbyte(0x83);
+ z_skip_pos = block_pos;
+ addlong(0);
+ }
+ else if (depthop == DEPTHOP_EQUAL)
+ {
+ addbyte(0x0f); /*JNE skip*/
+ addbyte(0x85);
+ z_skip_pos = block_pos;
+ addlong(0);
+ }
+ else if (depthop == DEPTHOP_LESSTHANEQUAL)
+ {
+ addbyte(0x0f); /*JA skip*/
+ addbyte(0x87);
+ z_skip_pos = block_pos;
+ addlong(0);
+ }
+ else if (depthop == DEPTHOP_GREATERTHAN)
+ {
+ addbyte(0x0f); /*JBE skip*/
+ addbyte(0x86);
+ z_skip_pos = block_pos;
+ addlong(0);
+ }
+ else if (depthop == DEPTHOP_NOTEQUAL)
+ {
+ addbyte(0x0f); /*JE skip*/
+ addbyte(0x84);
+ z_skip_pos = block_pos;
+ addlong(0);
+ }
+ else if (depthop == DEPTHOP_GREATERTHANEQUAL)
+ {
+ addbyte(0x0f); /*JB skip*/
+ addbyte(0x82);
+ z_skip_pos = block_pos;
+ addlong(0);
+ }
+ else
+ fatal("Bad depth_op\n");
+ }
+ else if ((params->fbzMode & FBZ_DEPTH_ENABLE) && (depthop == DEPTHOP_NEVER))
+ {
+ addbyte(0xC3); /*RET*/
+// addbyte(0x30); /*XOR EAX, EAX*/
+// addbyte(0xc0);
+ }
+// else
+// {
+// addbyte(0xb0); /*MOV AL, 1*/
+// addbyte(1);
+// }
+
+
+// voodoo_combine = &code_block[block_pos];
+ /*XMM0 = colour*/
+ /*XMM2 = 0 (for unpacking*/
+
+ /*EDI = state, ESI = params*/
+
+ if ((params->textureMode[0] & TEXTUREMODE_LOCAL_MASK) == TEXTUREMODE_LOCAL || !voodoo->dual_tmus)
+ {
+ /*TMU0 only sampling local colour or only one TMU, only sample TMU0*/
+ block_pos = codegen_texture_fetch(code_block, voodoo, params, state, block_pos, 0);
+
+ addbyte(0x66); /*MOVD XMM0, EAX*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0xc0);
+ addbyte(0xc1); /*SHR EAX, 24*/
+ addbyte(0xe8);
+ addbyte(24);
+ addbyte(0x89); /*MOV state->tex_a[EDI], EAX*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, tex_a));
+ }
+ else if ((params->textureMode[0] & TEXTUREMODE_MASK) == TEXTUREMODE_PASSTHROUGH)
+ {
+ /*TMU0 in pass-through mode, only sample TMU1*/
+ block_pos = codegen_texture_fetch(code_block, voodoo, params, state, block_pos, 1);
+
+ addbyte(0x66); /*MOVD XMM0, EAX*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0xc0);
+ addbyte(0xc1); /*SHR EAX, 24*/
+ addbyte(0xe8);
+ addbyte(24);
+ addbyte(0x89); /*MOV state->tex_a[EDI], EAX*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, tex_a));
+ }
+ else
+ {
+ block_pos = codegen_texture_fetch(code_block, voodoo, params, state, block_pos, 1);
+
+ addbyte(0x66); /*MOVD XMM3, EAX*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0xd8);
+ if ((params->textureMode[1] & TEXTUREMODE_TRILINEAR) && tc_sub_clocal_1)
+ {
+ addbyte(0x8b); /*MOV EAX, state->lod*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, lod));
+ if (!tc_reverse_blend_1)
+ {
+ addbyte(0xbb); /*MOV EBX, 1*/
+ addlong(1);
+ }
+ else
+ {
+ addbyte(0x31); /*XOR EBX, EBX*/
+ addbyte(0xdb);
+ }
+ addbyte(0x83); /*AND EAX, 1*/
+ addbyte(0xe0);
+ addbyte(1);
+ if (!tca_reverse_blend_1)
+ {
+ addbyte(0xb9); /*MOV ECX, 1*/
+ addlong(1);
+ }
+ else
+ {
+ addbyte(0x31); /*XOR ECX, ECX*/
+ addbyte(0xc9);
+ }
+ addbyte(0x31); /*XOR EBX, EAX*/
+ addbyte(0xc3);
+ addbyte(0x31); /*XOR ECX, EAX*/
+ addbyte(0xc1);
+ addbyte(0xc1); /*SHL EBX, 4*/
+ addbyte(0xe3);
+ addbyte(4);
+ /*EBX = tc_reverse_blend, ECX=tca_reverse_blend*/
+ }
+ addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/
+ addbyte(0x0f);
+ addbyte(0x60);
+ addbyte(0xda);
+ if (tc_sub_clocal_1)
+ {
+ switch (tc_mselect_1)
+ {
+ case TC_MSELECT_ZERO:
+ addbyte(0x66); /*PXOR XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0xc0);
+ break;
+ case TC_MSELECT_CLOCAL:
+ addbyte(0xf3); /*MOVQ XMM0, XMM3*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xc3);
+ break;
+ case TC_MSELECT_AOTHER:
+ addbyte(0x66); /*PXOR XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0xc0);
+ break;
+ case TC_MSELECT_ALOCAL:
+ addbyte(0xf2); /*PSHUFLW XMM0, XMM3, 0xff*/
+ addbyte(0x0f);
+ addbyte(0x70);
+ addbyte(0xc3);
+ addbyte(0xff);
+ break;
+ case TC_MSELECT_DETAIL:
+ addbyte(0xb8); /*MOV EAX, params->detail_bias[1]*/
+ addlong(params->detail_bias[1]);
+ addbyte(0x2b); /*SUB EAX, state->lod*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, lod));
+ addbyte(0xba); /*MOV EDX, params->detail_max[1]*/
+ addlong(params->detail_max[1]);
+ addbyte(0xc1); /*SHL EAX, params->detail_scale[1]*/
+ addbyte(0xe0);
+ addbyte(params->detail_scale[1]);
+ addbyte(0x39); /*CMP EAX, EDX*/
+ addbyte(0xd0);
+ addbyte(0x0f); /*CMOVNL EAX, EDX*/
+ addbyte(0x4d);
+ addbyte(0xc2);
+ addbyte(0x66); /*MOVD XMM0, EAX*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0xc0);
+ addbyte(0xf2); /*PSHUFLW XMM0, XMM0, 0*/
+ addbyte(0x0f);
+ addbyte(0x70);
+ addbyte(0xc0);
+ addbyte(0);
+ break;
+ case TC_MSELECT_LOD_FRAC:
+ addbyte(0x66); /*MOVD XMM0, state->lod_frac[1]*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, lod_frac[1]));
+ addbyte(0xf2); /*PSHUFLW XMM0, XMM0, 0*/
+ addbyte(0x0f);
+ addbyte(0x70);
+ addbyte(0xc0);
+ addbyte(0);
+ break;
+ }
+ if (params->textureMode[1] & TEXTUREMODE_TRILINEAR)
+ {
+ addbyte(0x66); /*PXOR XMM0, xmm_00_ff_w[EBX]*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0x83);
+ addlong((uint32_t)&xmm_00_ff_w[0]);
+ }
+ else if (!tc_reverse_blend_1)
+ {
+ addbyte(0x66); /*PXOR XMM0, xmm_ff_w*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0x05);
+ addlong((uint32_t)&xmm_ff_w);
+ }
+ addbyte(0x66); /*PADD XMM0, xmm_01_w*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0x05);
+ addlong((uint32_t)&xmm_01_w);
+ addbyte(0xf3); /*MOVQ XMM1, XMM2*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xca);
+ addbyte(0xf3); /*MOVQ XMM5, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xe8);
+ addbyte(0x66); /*PMULLW XMM0, XMM3*/
+ addbyte(0x0f);
+ addbyte(0xd5);
+ addbyte(0xc3);
+ addbyte(0x66); /*PMULHW XMM5, XMM3*/
+ addbyte(0x0f);
+ addbyte(0xe5);
+ addbyte(0xeb);
+ addbyte(0x66); /*PUNPCKLWD XMM0, XMM5*/
+ addbyte(0x0f);
+ addbyte(0x61);
+ addbyte(0xc5);
+ addbyte(0x66); /*PSRAD XMM0, 8*/
+ addbyte(0x0f);
+ addbyte(0x72);
+ addbyte(0xe0);
+ addbyte(8);
+ addbyte(0x66); /*PACKSSDW XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x6b);
+ addbyte(0xc0);
+ addbyte(0x66); /*PSUBW XMM1, XMM0*/
+ addbyte(0x0f);
+ addbyte(0xf9);
+ addbyte(0xc8);
+ if (tc_add_clocal_1)
+ {
+ addbyte(0x66); /*PADDW XMM1, XMM3*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xcb);
+ }
+ else if (tc_add_alocal_1)
+ {
+ addbyte(0xf2); /*PSHUFLW XMM0, XMM3, 0xff*/
+ addbyte(0x0f);
+ addbyte(0x70);
+ addbyte(0xc3);
+ addbyte(0xff);
+ addbyte(0x66); /*PADDW XMM1, XMM0*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xc8);
+ }
+ addbyte(0xf3); /*MOVD XMM3, XMM1*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xd9);
+ addbyte(0x66); /*PACKUSWB XMM3, XMM3*/
+ addbyte(0x0f);
+ addbyte(0x67);
+ addbyte(0xdb);
+ if (tca_sub_clocal_1)
+ {
+ addbyte(0x66); /*MOVD EBX, XMM3*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xdb);
+ }
+ addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/
+ addbyte(0x0f);
+ addbyte(0x60);
+ addbyte(0xda);
+ }
+
+ if (tca_sub_clocal_1)
+ {
+ addbyte(0xc1); /*SHR EBX, 24*/
+ addbyte(0xeb);
+ addbyte(24);
+ switch (tca_mselect_1)
+ {
+ case TCA_MSELECT_ZERO:
+ addbyte(0x31); /*XOR EAX, EAX*/
+ addbyte(0xc0);
+ break;
+ case TCA_MSELECT_CLOCAL:
+ addbyte(0x89); /*MOV EAX, EBX*/
+ addbyte(0xd8);
+ break;
+ case TCA_MSELECT_AOTHER:
+ addbyte(0x31); /*XOR EAX, EAX*/
+ addbyte(0xc0);
+ break;
+ case TCA_MSELECT_ALOCAL:
+ addbyte(0x89); /*MOV EAX, EBX*/
+ addbyte(0xd8);
+ break;
+ case TCA_MSELECT_DETAIL:
+ addbyte(0xb8); /*MOV EAX, params->detail_bias[1]*/
+ addlong(params->detail_bias[1]);
+ addbyte(0x2b); /*SUB EAX, state->lod*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, lod));
+ addbyte(0xba); /*MOV EDX, params->detail_max[1]*/
+ addlong(params->detail_max[1]);
+ addbyte(0xc1); /*SHL EAX, params->detail_scale[1]*/
+ addbyte(0xe0);
+ addbyte(params->detail_scale[1]);
+ addbyte(0x39); /*CMP EAX, EDX*/
+ addbyte(0xd0);
+ addbyte(0x0f); /*CMOVNL EAX, EDX*/
+ addbyte(0x4d);
+ addbyte(0xc2);
+ break;
+ case TCA_MSELECT_LOD_FRAC:
+ addbyte(0x8b); /*MOV EAX, state->lod_frac[1]*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, lod_frac[1]));
+ break;
+ }
+ if (params->textureMode[1] & TEXTUREMODE_TRILINEAR)
+ {
+ addbyte(0x33); /*XOR EAX, i_00_ff_w[ECX*4]*/
+ addbyte(0x04);
+ addbyte(0x8d);
+ addlong((uint32_t)i_00_ff_w);
+ }
+ else if (!tc_reverse_blend_1)
+ {
+ addbyte(0x35); /*XOR EAX, 0xff*/
+ addlong(0xff);
+ }
+ addbyte(0x83); /*ADD EAX, 1*/
+ addbyte(0xc0);
+ addbyte(1);
+ addbyte(0x0f); /*IMUL EAX, EBX*/
+ addbyte(0xaf);
+ addbyte(0xc3);
+ addbyte(0xb9); /*MOV ECX, 0xff*/
+ addlong(0xff);
+ addbyte(0xf7); /*NEG EAX*/
+ addbyte(0xd8);
+ addbyte(0xc1); /*SAR EAX, 8*/
+ addbyte(0xf8);
+ addbyte(8);
+ if (tca_add_clocal_1 || tca_add_alocal_1)
+ {
+ addbyte(0x01); /*ADD EAX, EBX*/
+ addbyte(0xd8);
+ }
+ addbyte(0x39); /*CMP ECX, EAX*/
+ addbyte(0xc1);
+ addbyte(0x0f); /*CMOVA ECX, EAX*/
+ addbyte(0x47);
+ addbyte(0xc8);
+ addbyte(0x66); /*PINSRW 3, XMM3, XMM0*/
+ addbyte(0x0f);
+ addbyte(0xc4);
+ addbyte(0xd8);
+ addbyte(3);
+ }
+
+ block_pos = codegen_texture_fetch(code_block, voodoo, params, state, block_pos, 0);
+
+ addbyte(0x66); /*MOVD XMM0, EAX*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0xc0);
+ addbyte(0x66); /*MOVD XMM7, EAX*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0xf8);
+
+ if (params->textureMode[0] & TEXTUREMODE_TRILINEAR)
+ {
+ addbyte(0x8b); /*MOV EAX, state->lod*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, lod));
+ if (!tc_reverse_blend)
+ {
+ addbyte(0xbb); /*MOV EBX, 1*/
+ addlong(1);
+ }
+ else
+ {
+ addbyte(0x31); /*XOR EBX, EBX*/
+ addbyte(0xdb);
+ }
+ addbyte(0x83); /*AND EAX, 1*/
+ addbyte(0xe0);
+ addbyte(1);
+ if (!tca_reverse_blend)
+ {
+ addbyte(0xb9); /*MOV ECX, 1*/
+ addlong(1);
+ }
+ else
+ {
+ addbyte(0x31); /*XOR ECX, ECX*/
+ addbyte(0xc9);
+ }
+ addbyte(0x31); /*XOR EBX, EAX*/
+ addbyte(0xc3);
+ addbyte(0x31); /*XOR ECX, EAX*/
+ addbyte(0xc1);
+ addbyte(0xc1); /*SHL EBX, 4*/
+ addbyte(0xe3);
+ addbyte(4);
+ /*EBX = tc_reverse_blend, ECX=tca_reverse_blend*/
+ }
+
+ /*XMM0 = TMU0 output, XMM3 = TMU1 output*/
+
+ addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/
+ addbyte(0x0f);
+ addbyte(0x60);
+ addbyte(0xc2);
+ if (tc_zero_other)
+ {
+ addbyte(0x66); /*PXOR XMM1, XMM1*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0xc9);
+ }
+ else
+ {
+ addbyte(0xf3); /*MOV XMM1, XMM3*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xcb);
+ }
+ if (tc_sub_clocal)
+ {
+ addbyte(0x66); /*PSUBW XMM1, XMM0*/
+ addbyte(0x0f);
+ addbyte(0xf9);
+ addbyte(0xc8);
+ }
+
+ switch (tc_mselect)
+ {
+ case TC_MSELECT_ZERO:
+ addbyte(0x66); /*PXOR XMM4, XMM4*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0xe4);
+ break;
+ case TC_MSELECT_CLOCAL:
+ addbyte(0xf3); /*MOV XMM4, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xe0);
+ break;
+ case TC_MSELECT_AOTHER:
+ addbyte(0xf2); /*PSHUFLW XMM4, XMM3, 3, 3, 3, 3*/
+ addbyte(0x0f);
+ addbyte(0x70);
+ addbyte(0xe3);
+ addbyte(0xff);
+ break;
+ case TC_MSELECT_ALOCAL:
+ addbyte(0xf2); /*PSHUFLW XMM4, XMM0, 3, 3, 3, 3*/
+ addbyte(0x0f);
+ addbyte(0x70);
+ addbyte(0xe0);
+ addbyte(0xff);
+ break;
+ case TC_MSELECT_DETAIL:
+ addbyte(0xb8); /*MOV EAX, params->detail_bias[0]*/
+ addlong(params->detail_bias[0]);
+ addbyte(0x2b); /*SUB EAX, state->lod*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, lod));
+ addbyte(0xba); /*MOV EDX, params->detail_max[0]*/
+ addlong(params->detail_max[0]);
+ addbyte(0xc1); /*SHL EAX, params->detail_scale[0]*/
+ addbyte(0xe0);
+ addbyte(params->detail_scale[0]);
+ addbyte(0x39); /*CMP EAX, EDX*/
+ addbyte(0xd0);
+ addbyte(0x0f); /*CMOVNL EAX, EDX*/
+ addbyte(0x4d);
+ addbyte(0xc2);
+ addbyte(0x66); /*MOVD XMM4, EAX*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0xe0);
+ addbyte(0xf2); /*PSHUFLW XMM4, XMM4, 0*/
+ addbyte(0x0f);
+ addbyte(0x70);
+ addbyte(0xe4);
+ addbyte(0);
+ break;
+ case TC_MSELECT_LOD_FRAC:
+ addbyte(0x66); /*MOVD XMM0, state->lod_frac[0]*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0xa7);
+ addlong(offsetof(voodoo_state_t, lod_frac[0]));
+ addbyte(0xf2); /*PSHUFLW XMM0, XMM0, 0*/
+ addbyte(0x0f);
+ addbyte(0x70);
+ addbyte(0xe4);
+ addbyte(0);
+ break;
+ }
+ if (params->textureMode[0] & TEXTUREMODE_TRILINEAR)
+ {
+ addbyte(0x66); /*PXOR XMM4, xmm_00_ff_w[EBX]*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0xa3);
+ addlong((uint32_t)&xmm_00_ff_w[0]);
+ }
+ else if (!tc_reverse_blend)
+ {
+ addbyte(0x66); /*PXOR XMM4, FF*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0x25);
+ addlong((uint32_t)&xmm_ff_w);
+ }
+ addbyte(0x66); /*PADDW XMM4, 1*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0x25);
+ addlong((uint32_t)&xmm_01_w);
+ addbyte(0xf3); /*MOVQ XMM5, XMM1*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xe9);
+ addbyte(0x66); /*PMULLW XMM1, XMM4*/
+ addbyte(0x0f);
+ addbyte(0xd5);
+ addbyte(0xcc);
+
+ if (tca_sub_clocal)
+ {
+ addbyte(0x66); /*MOV EBX, XMM7*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xfb);
+ }
+
+ addbyte(0x66); /*PMULHW XMM5, XMM4*/
+ addbyte(0x0f);
+ addbyte(0xe5);
+ addbyte(0xec);
+ addbyte(0x66); /*PUNPCKLWD XMM1, XMM5*/
+ addbyte(0x0f);
+ addbyte(0x61);
+ addbyte(0xcd);
+ addbyte(0x66); /*PSRAD XMM1, 8*/
+ addbyte(0x0f);
+ addbyte(0x72);
+ addbyte(0xe1);
+ addbyte(8);
+ addbyte(0x66); /*PACKSSDW XMM1, XMM1*/
+ addbyte(0x0f);
+ addbyte(0x6b);
+ addbyte(0xc9);
+
+ if (tca_sub_clocal)
+ {
+ addbyte(0xc1); /*SHR EBX, 24*/
+ addbyte(0xeb);
+ addbyte(24);
+ }
+
+ if (tc_add_clocal)
+ {
+ addbyte(0x66); /*PADDW XMM1, XMM0*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xc8);
+ }
+ else if (tc_add_alocal)
+ {
+ addbyte(0xf2); /*PSHUFLW XMM4, XMM0, 3, 3, 3, 3*/
+ addbyte(0x0f);
+ addbyte(0x70);
+ addbyte(0xe0);
+ addbyte(0xff);
+ addbyte(0x66); /*PADDW XMM1, XMM4*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xcc);
+ }
+
+ addbyte(0x66); /*PACKUSWB XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x67);
+ addbyte(0xc0);
+ addbyte(0x66); /*PACKUSWB XMM3, XMM3*/
+ addbyte(0x0f);
+ addbyte(0x67);
+ addbyte(0xdb);
+ addbyte(0x66); /*PACKUSWB XMM1, XMM1*/
+ addbyte(0x0f);
+ addbyte(0x67);
+ addbyte(0xc9);
+ if (tc_invert_output)
+ {
+ addbyte(0x66); /*PXOR XMM1, FF*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0x0d);
+ addlong((uint32_t)&xmm_ff_b);
+ }
+
+ if (tca_zero_other)
+ {
+ addbyte(0x31); /*XOR EAX, EAX*/
+ addbyte(0xc0);
+ }
+ else
+ {
+ addbyte(0x66); /*MOV EAX, XMM3*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xd8);
+ addbyte(0xc1); /*SHR EAX, 24*/
+ addbyte(0xe8);
+ addbyte(24);
+ }
+ if (tca_sub_clocal)
+ {
+ addbyte(0x29); /*SUB EAX, EBX*/
+ addbyte(0xd8);
+ }
+ switch (tca_mselect)
+ {
+ case TCA_MSELECT_ZERO:
+ addbyte(0x31); /*XOR EBX, EBX*/
+ addbyte(0xdb);
+ break;
+ case TCA_MSELECT_CLOCAL:
+ addbyte(0x66); /*MOV EBX, XMM7*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xfb);
+ addbyte(0xc1); /*SHR EBX, 24*/
+ addbyte(0xeb);
+ addbyte(24);
+ break;
+ case TCA_MSELECT_AOTHER:
+ addbyte(0x66); /*MOV EBX, XMM3*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xdb);
+ addbyte(0xc1); /*SHR EBX, 24*/
+ addbyte(0xeb);
+ addbyte(24);
+ break;
+ case TCA_MSELECT_ALOCAL:
+ addbyte(0x66); /*MOV EBX, XMM7*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xfb);
+ addbyte(0xc1); /*SHR EBX, 24*/
+ addbyte(0xeb);
+ addbyte(24);
+ break;
+ case TCA_MSELECT_DETAIL:
+ addbyte(0xbb); /*MOV EBX, params->detail_bias[1]*/
+ addlong(params->detail_bias[1]);
+ addbyte(0x2b); /*SUB EBX, state->lod*/
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, lod));
+ addbyte(0xba); /*MOV EDX, params->detail_max[1]*/
+ addlong(params->detail_max[1]);
+ addbyte(0xc1); /*SHL EBX, params->detail_scale[1]*/
+ addbyte(0xe3);
+ addbyte(params->detail_scale[1]);
+ addbyte(0x39); /*CMP EBX, EDX*/
+ addbyte(0xd3);
+ addbyte(0x0f); /*CMOVNL EBX, EDX*/
+ addbyte(0x4d);
+ addbyte(0xda);
+ break;
+ case TCA_MSELECT_LOD_FRAC:
+ addbyte(0x8b); /*MOV EBX, state->lod_frac[0]*/
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, lod_frac[0]));
+ break;
+ }
+ if (params->textureMode[0] & TEXTUREMODE_TRILINEAR)
+ {
+ addbyte(0x33); /*XOR EBX, i_00_ff_w[ECX*4]*/
+ addbyte(0x1c);
+ addbyte(0x8d);
+ addlong((uint32_t)i_00_ff_w);
+ }
+ else if (!tca_reverse_blend)
+ {
+ addbyte(0x81); /*XOR EBX, 0xFF*/
+ addbyte(0xf3);
+ addlong(0xff);
+ }
+
+ addbyte(0x83); /*ADD EBX, 1*/
+ addbyte(0xc3);
+ addbyte(1);
+ addbyte(0x0f); /*IMUL EAX, EBX*/
+ addbyte(0xaf);
+ addbyte(0xc3);
+ addbyte(0x31); /*XOR EDX, EDX*/
+ addbyte(0xd2);
+ addbyte(0xc1); /*SAR EAX, 8*/
+ addbyte(0xf8);
+ addbyte(8);
+ if (tca_add_clocal || tca_add_alocal)
+ {
+ addbyte(0x66); /*MOV EBX, XMM7*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xfb);
+ addbyte(0xc1); /*SHR EBX, 24*/
+ addbyte(0xeb);
+ addbyte(24);
+ addbyte(0x01); /*ADD EAX, EBX*/
+ addbyte(0xd8);
+ }
+ addbyte(0x0f); /*CMOVS EAX, EDX*/
+ addbyte(0x48);
+ addbyte(0xc2);
+ addbyte(0xba); /*MOV EDX, 0xff*/
+ addlong(0xff);
+ addbyte(0x3d); /*CMP EAX, 0xff*/
+ addlong(0xff);
+ addbyte(0x0f); /*CMOVA EAX, EDX*/
+ addbyte(0x47);
+ addbyte(0xc2);
+ if (tca_invert_output)
+ {
+ addbyte(0x35); /*XOR EAX, 0xff*/
+ addlong(0xff);
+ }
+
+ addbyte(0x89); /*MOV state->tex_a[EDI], EAX*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, tex_a));
+
+ addbyte(0xf3); /*MOVQ XMM0, XMM1*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xc1);
+ }
+ if (cc_mselect == CC_MSELECT_TEXRGB)
+ {
+ addbyte(0xf3); /*MOVD XMM4, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xe0);
+ }
+
+ if ((params->fbzMode & FBZ_CHROMAKEY))
+ {
+ switch (_rgb_sel)
+ {
+ case CC_LOCALSELECT_ITER_RGB:
+ addbyte(0xf3); /*MOVDQU XMM0, ib*/ /* ir, ig and ib must be in same dqword!*/
+ addbyte(0x0f);
+ addbyte(0x6f);
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, ib));
+ addbyte(0x66); /*PSRAD XMM0, 12*/
+ addbyte(0x0f);
+ addbyte(0x72);
+ addbyte(0xe0);
+ addbyte(12);
+ addbyte(0x66); /*PACKSSDW XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x6b);
+ addbyte(0xc0);
+ addbyte(0x66); /*PACKUSWB XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x67);
+ addbyte(0xc0);
+ addbyte(0x66); /*MOVD EAX, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xc0);
+ break;
+ case CC_LOCALSELECT_COLOR1:
+ addbyte(0x8b); /*MOV EAX, params->color1[ESI]*/
+ addbyte(0x86);
+ addlong(offsetof(voodoo_params_t, color1));
+ break;
+ case CC_LOCALSELECT_TEX:
+ addbyte(0x66); /*MOVD EAX, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xc0);
+ break;
+ }
+ addbyte(0x8b); /*MOV EBX, params->chromaKey[ESI]*/
+ addbyte(0x9e);
+ addlong(offsetof(voodoo_params_t, chromaKey));
+ addbyte(0x31); /*XOR EBX, EAX*/
+ addbyte(0xc3);
+ addbyte(0x81); /*AND EBX, 0xffffff*/
+ addbyte(0xe3);
+ addlong(0xffffff);
+ addbyte(0x0f); /*JE skip*/
+ addbyte(0x84);
+ chroma_skip_pos = block_pos;
+ addlong(0);
+ }
+
+ if (voodoo->trexInit1[0] & (1 << 18))
+ {
+ addbyte(0xb8); /*MOV EAX, tmuConfig*/
+ addlong(voodoo->tmuConfig);
+ addbyte(0x66); /*MOVD XMM0, EAX*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0xc0);
+ }
+
+ if ((params->alphaMode & ((1 << 0) | (1 << 4))) || (!(cc_mselect == 0 && cc_reverse_blend == 0) && (cc_mselect == CC_MSELECT_AOTHER || cc_mselect == CC_MSELECT_ALOCAL)))
+ {
+ /*EBX = a_other*/
+ switch (a_sel)
+ {
+ case A_SEL_ITER_A:
+ addbyte(0x8b); /*MOV EBX, state->ia*/
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, ia));
+ addbyte(0x31); /*XOR EAX, EAX*/
+ addbyte(0xc0);
+ addbyte(0xba); /*MOV EDX, 0xff*/
+ addlong(0xff);
+ addbyte(0xc1); /*SAR EBX, 12*/
+ addbyte(0xfb);
+ addbyte(12);
+ addbyte(0x0f); /*CMOVS EBX, EAX*/
+ addbyte(0x48);
+ addbyte(0xd8);
+ addbyte(0x39); /*CMP EBX, EDX*/
+ addbyte(0xd3);
+ addbyte(0x0f); /*CMOVA EBX, EDX*/
+ addbyte(0x47);
+ addbyte(0xda);
+ break;
+ case A_SEL_TEX:
+ addbyte(0x8b); /*MOV EBX, state->tex_a*/
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, tex_a));
+ break;
+ case A_SEL_COLOR1:
+ addbyte(0x0f); /*MOVZX EBX, params->color1+3*/
+ addbyte(0xb6);
+ addbyte(0x9e);
+ addlong(offsetof(voodoo_params_t, color1)+3);
+ break;
+ default:
+ addbyte(0x31); /*XOR EBX, EBX*/
+ addbyte(0xdb);
+ break;
+ }
+ /*ECX = a_local*/
+ switch (cca_localselect)
+ {
+ case CCA_LOCALSELECT_ITER_A:
+ if (a_sel == A_SEL_ITER_A)
+ {
+ addbyte(0x89); /*MOV ECX, EBX*/
+ addbyte(0xd9);
+ }
+ else
+ {
+ addbyte(0x8b); /*MOV ECX, state->ia*/
+ addbyte(0x8f);
+ addlong(offsetof(voodoo_state_t, ia));
+ addbyte(0x31); /*XOR EAX, EAX*/
+ addbyte(0xc0);
+ addbyte(0xba); /*MOV EDX, 0xff*/
+ addlong(0xff);
+ addbyte(0xc1);/*SAR ECX, 12*/
+ addbyte(0xf9);
+ addbyte(12);
+ addbyte(0x0f); /*CMOVS ECX, EAX*/
+ addbyte(0x48);
+ addbyte(0xc8);
+ addbyte(0x39); /*CMP ECX, EDX*/
+ addbyte(0xd1);
+ addbyte(0x0f); /*CMOVA ECX, EDX*/
+ addbyte(0x47);
+ addbyte(0xca);
+ }
+ break;
+ case CCA_LOCALSELECT_COLOR0:
+ addbyte(0x0f); /*MOVZX ECX, params->color0+3*/
+ addbyte(0xb6);
+ addbyte(0x8e);
+ addlong(offsetof(voodoo_params_t, color0)+3);
+ break;
+ case CCA_LOCALSELECT_ITER_Z:
+ addbyte(0x8b); /*MOV ECX, state->z*/
+ addbyte(0x8f);
+ addlong(offsetof(voodoo_state_t, z));
+ if (a_sel != A_SEL_ITER_A)
+ {
+ addbyte(0x31); /*XOR EAX, EAX*/
+ addbyte(0xc0);
+ addbyte(0xba); /*MOV EDX, 0xff*/
+ addlong(0xff);
+ }
+ addbyte(0xc1);/*SAR ECX, 20*/
+ addbyte(0xf9);
+ addbyte(20);
+ addbyte(0x0f); /*CMOVS ECX, EAX*/
+ addbyte(0x48);
+ addbyte(0xc8);
+ addbyte(0x39); /*CMP ECX, EDX*/
+ addbyte(0xd1);
+ addbyte(0x0f); /*CMOVA ECX, EDX*/
+ addbyte(0x47);
+ addbyte(0xca);
+ break;
+
+ default:
+ addbyte(0xb9); /*MOV ECX, 0xff*/
+ addlong(0xff);
+ break;
+ }
+
+ if (cca_zero_other)
+ {
+ addbyte(0x31); /*XOR EDX, EDX*/
+ addbyte(0xd2);
+ }
+ else
+ {
+ addbyte(0x89); /*MOV EDX, EBX*/
+ addbyte(0xda);
+ }
+
+ if (cca_sub_clocal)
+ {
+ addbyte(0x29); /*SUB EDX, ECX*/
+ addbyte(0xca);
+ }
+ }
+
+ if (cc_sub_clocal || cc_mselect == 1 || cc_add == 1)
+ {
+ /*XMM1 = local*/
+ if (!cc_localselect_override)
+ {
+ if (cc_localselect)
+ {
+ addbyte(0x66); /*MOVD XMM1, params->color0*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0x8e);
+ addlong(offsetof(voodoo_params_t, color0));
+ }
+ else
+ {
+ addbyte(0xf3); /*MOVDQU XMM1, ib*/ /* ir, ig and ib must be in same dqword!*/
+ addbyte(0x0f);
+ addbyte(0x6f);
+ addbyte(0x8f);
+ addlong(offsetof(voodoo_state_t, ib));
+ addbyte(0x66); /*PSRAD XMM1, 12*/
+ addbyte(0x0f);
+ addbyte(0x72);
+ addbyte(0xe1);
+ addbyte(12);
+ addbyte(0x66); /*PACKSSDW XMM1, XMM1*/
+ addbyte(0x0f);
+ addbyte(0x6b);
+ addbyte(0xc9);
+ addbyte(0x66); /*PACKUSWB XMM1, XMM1*/
+ addbyte(0x0f);
+ addbyte(0x67);
+ addbyte(0xc9);
+ }
+ }
+ else
+ {
+ addbyte(0xf6); /*TEST state->tex_a, 0x80*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, tex_a));
+ addbyte(0x80);
+ addbyte(0x74);/*JZ !cc_localselect*/
+ addbyte(8+2);
+ addbyte(0x66); /*MOVD XMM1, params->color0*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0x8e);
+ addlong(offsetof(voodoo_params_t, color0));
+ addbyte(0xeb); /*JMP +*/
+ addbyte(8+5+4+4);
+ /*!cc_localselect:*/
+ addbyte(0xf3); /*MOVDQU XMM1, ib*/ /* ir, ig and ib must be in same dqword!*/
+ addbyte(0x0f);
+ addbyte(0x6f);
+ addbyte(0x8f);
+ addlong(offsetof(voodoo_state_t, ib));
+ addbyte(0x66); /*PSRAD XMM1, 12*/
+ addbyte(0x0f);
+ addbyte(0x72);
+ addbyte(0xe1);
+ addbyte(12);
+ addbyte(0x66); /*PACKSSDW XMM1, XMM1*/
+ addbyte(0x0f);
+ addbyte(0x6b);
+ addbyte(0xc9);
+ addbyte(0x66); /*PACKUSWB XMM1, XMM1*/
+ addbyte(0x0f);
+ addbyte(0x67);
+ addbyte(0xc9);
+ }
+ addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/
+ addbyte(0x0f);
+ addbyte(0x60);
+ addbyte(0xca);
+ }
+ if (!cc_zero_other)
+ {
+ if (_rgb_sel == CC_LOCALSELECT_ITER_RGB)
+ {
+ addbyte(0xf3); /*MOVDQU XMM0, ib*/ /* ir, ig and ib must be in same dqword!*/
+ addbyte(0x0f);
+ addbyte(0x6f);
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, ib));
+ addbyte(0x66); /*PSRAD XMM0, 12*/
+ addbyte(0x0f);
+ addbyte(0x72);
+ addbyte(0xe0);
+ addbyte(12);
+ addbyte(0x66); /*PACKSSDW XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x6b);
+ addbyte(0xc0);
+ addbyte(0x66); /*PACKUSWB XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x67);
+ addbyte(0xc0);
+ }
+ else if (_rgb_sel == CC_LOCALSELECT_TEX)
+ {
+#if 0
+ addbyte(0xf3); /*MOVDQU XMM0, state->tex_b*/
+ addbyte(0x0f);
+ addbyte(0x6f);
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, tex_b));
+ addbyte(0x66); /*PACKSSDW XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x6b);
+ addbyte(0xc0);
+ addbyte(0x66); /*PACKUSWB XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x67);
+ addbyte(0xc0);
+#endif
+ }
+ else if (_rgb_sel == CC_LOCALSELECT_COLOR1)
+ {
+ addbyte(0x66); /*MOVD XMM0, params->color1*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0x86);
+ addlong(offsetof(voodoo_params_t, color1));
+ }
+ else
+ {
+ /*MOVD XMM0, src_r*/
+ }
+ addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/
+ addbyte(0x0f);
+ addbyte(0x60);
+ addbyte(0xc2);
+ if (cc_sub_clocal)
+ {
+ addbyte(0x66); /*PSUBW XMM0, XMM1*/
+ addbyte(0x0f);
+ addbyte(0xf9);
+ addbyte(0xc1);
+ }
+ }
+ else
+ {
+ addbyte(0x66); /*PXOR XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0xc0);
+ if (cc_sub_clocal)
+ {
+ addbyte(0x66); /*PSUBW XMM0, XMM1*/
+ addbyte(0x0f);
+ addbyte(0xf9);
+ addbyte(0xc1);
+ }
+ }
+
+ if (params->alphaMode & ((1 << 0) | (1 << 4)))
+ {
+ if (!(cca_mselect == 0 && cca_reverse_blend == 0))
+ {
+ switch (cca_mselect)
+ {
+ case CCA_MSELECT_ALOCAL:
+ addbyte(0x89); /*MOV EAX, ECX*/
+ addbyte(0xc8);
+ break;
+ case CCA_MSELECT_AOTHER:
+ addbyte(0x89); /*MOV EAX, EBX*/
+ addbyte(0xd8);
+ break;
+ case CCA_MSELECT_ALOCAL2:
+ addbyte(0x89); /*MOV EAX, ECX*/
+ addbyte(0xc8);
+ break;
+ case CCA_MSELECT_TEX:
+ addbyte(0x0f); /*MOVZX EAX, state->tex_a*/
+ addbyte(0xb6);
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, tex_a));
+ break;
+
+ case CCA_MSELECT_ZERO:
+ default:
+ addbyte(0x31); /*XOR EAX, EAX*/
+ addbyte(0xc0);
+ break;
+ }
+ if (!cca_reverse_blend)
+ {
+ addbyte(0x35); /*XOR EAX, 0xff*/
+ addlong(0xff);
+ }
+ addbyte(0x83); /*ADD EAX, 1*/
+ addbyte(0xc0);
+ addbyte(1);
+ addbyte(0x0f); /*IMUL EDX, EAX*/
+ addbyte(0xaf);
+ addbyte(0xd0);
+ addbyte(0xc1); /*SHR EDX, 8*/
+ addbyte(0xea);
+ addbyte(8);
+ }
+ }
+
+ if ((params->alphaMode & ((1 << 0) | (1 << 4))))
+ {
+ addbyte(0x31); /*XOR EAX, EAX*/
+ addbyte(0xc0);
+ }
+
+ if (!(cc_mselect == 0 && cc_reverse_blend == 0) && cc_mselect == CC_MSELECT_AOTHER)
+ {
+ /*Copy a_other to XMM3 before it gets modified*/
+ addbyte(0x66); /*MOVD XMM3, EDX*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0xda);
+ addbyte(0xf2); /*PSHUFLW XMM3, XMM3, 0*/
+ addbyte(0x0f);
+ addbyte(0x70);
+ addbyte(0xdb);
+ addbyte(0x00);
+ }
+
+ if (cca_add && (params->alphaMode & ((1 << 0) | (1 << 4))))
+ {
+ addbyte(0x01); /*ADD EDX, ECX*/
+ addbyte(0xca);
+ }
+
+ if ((params->alphaMode & ((1 << 0) | (1 << 4))))
+ {
+ addbyte(0x85); /*TEST EDX, EDX*/
+ addbyte(0xd2);
+ addbyte(0x0f); /*CMOVS EDX, EAX*/
+ addbyte(0x48);
+ addbyte(0xd0);
+ addbyte(0xb8); /*MOV EAX, 0xff*/
+ addlong(0xff);
+ addbyte(0x81); /*CMP EDX, 0xff*/
+ addbyte(0xfa);
+ addlong(0xff);
+ addbyte(0x0f); /*CMOVA EDX, EAX*/
+ addbyte(0x47);
+ addbyte(0xd0);
+
+ if (cca_invert_output)
+ {
+ addbyte(0x81); /*XOR EDX, 0xff*/
+ addbyte(0xf2);
+ addlong(0xff);
+ }
+ }
+
+ if (!(cc_mselect == 0 && cc_reverse_blend == 0))
+ {
+ switch (cc_mselect)
+ {
+ case CC_MSELECT_ZERO:
+ addbyte(0x66); /*PXOR XMM3, XMM3*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0xdb);
+ break;
+ case CC_MSELECT_CLOCAL:
+ addbyte(0xf3); /*MOV XMM3, XMM1*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xd9);
+ break;
+ case CC_MSELECT_ALOCAL:
+ addbyte(0x66); /*MOVD XMM3, ECX*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0xd9);
+ addbyte(0xf2); /*PSHUFLW XMM3, XMM3, 0*/
+ addbyte(0x0f);
+ addbyte(0x70);
+ addbyte(0xdb);
+ addbyte(0x00);
+ break;
+ case CC_MSELECT_AOTHER:
+ /*Handled above*/
+ break;
+ case CC_MSELECT_TEX:
+ addbyte(0x66); /*PINSRW XMM3, state->tex_a, 0*/
+ addbyte(0x0f);
+ addbyte(0xc4);
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, tex_a));
+ addbyte(0);
+ addbyte(0x66); /*PINSRW XMM3, state->tex_a, 1*/
+ addbyte(0x0f);
+ addbyte(0xc4);
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, tex_a));
+ addbyte(1);
+ addbyte(0x66); /*PINSRW XMM3, state->tex_a, 2*/
+ addbyte(0x0f);
+ addbyte(0xc4);
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, tex_a));
+ addbyte(2);
+ break;
+ case CC_MSELECT_TEXRGB:
+ addbyte(0x66); /*PUNPCKLBW XMM4, XMM2*/
+ addbyte(0x0f);
+ addbyte(0x60);
+ addbyte(0xe2);
+ addbyte(0xf3); /*MOVQ XMM3, XMM4*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xdc);
+ break;
+ default:
+ addbyte(0x66); /*PXOR XMM3, XMM3*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0xdb);
+ break;
+ }
+ addbyte(0xf3); /*MOV XMM4, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xe0);
+ if (!cc_reverse_blend)
+ {
+ addbyte(0x66); /*PXOR XMM3, 0xff*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0x1d);
+ addlong((uint32_t)&xmm_ff_w);
+ }
+ addbyte(0x66); /*PADDW XMM3, 1*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0x1d);
+ addlong((uint32_t)&xmm_01_w);
+ addbyte(0x66); /*PMULLW XMM0, XMM3*/
+ addbyte(0x0f);
+ addbyte(0xd5);
+ addbyte(0xc3);
+ addbyte(0x66); /*PMULHW XMM4, XMM3*/
+ addbyte(0x0f);
+ addbyte(0xe5);
+ addbyte(0xe3);
+ addbyte(0x66); /*PUNPCKLWD XMM0, XMM4*/
+ addbyte(0x0f);
+ addbyte(0x61);
+ addbyte(0xc4);
+ addbyte(0x66); /*PSRLD XMM0, 8*/
+ addbyte(0x0f);
+ addbyte(0x72);
+ addbyte(0xe0);
+ addbyte(8);
+ addbyte(0x66); /*PACKSSDW XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x6b);
+ addbyte(0xc0);
+ }
+
+ if (cc_add == 1)
+ {
+ addbyte(0x66); /*PADDW XMM0, XMM1*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xc1);
+ }
+
+ addbyte(0x66); /*PACKUSWB XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x67);
+ addbyte(0xc0);
+
+ if (cc_invert_output)
+ {
+ addbyte(0x66); /*PXOR XMM0, 0xff*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0x05);
+ addlong((uint32_t)&xmm_ff_b);
+ }
+//#if 0
+// addbyte(0x66); /*MOVD state->out[EDI], XMM0*/
+// addbyte(0x0f);
+// addbyte(0x7e);
+// addbyte(0x87);
+// addlong(offsetof(voodoo_state_t, out));
+ if (params->fogMode & FOG_ENABLE)
+ {
+ if (params->fogMode & FOG_CONSTANT)
+ {
+ addbyte(0x66); /*MOVD XMM3, params->fogColor[ESI]*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0x9e);
+ addlong(offsetof(voodoo_params_t, fogColor));
+ addbyte(0x66); /*PADDUSB XMM0, XMM3*/
+ addbyte(0x0f);
+ addbyte(0xdc);
+ addbyte(0xc3);
+/* src_r += params->fogColor.r;
+ src_g += params->fogColor.g;
+ src_b += params->fogColor.b; */
+ }
+ else
+ {
+ /*int fog_r, fog_g, fog_b, fog_a; */
+
+ addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/
+ addbyte(0x0f);
+ addbyte(0x60);
+ addbyte(0xc2);
+
+ if (!(params->fogMode & FOG_ADD))
+ {
+ addbyte(0x66); /*MOVD XMM3, params->fogColor[ESI]*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0x9e);
+ addlong(offsetof(voodoo_params_t, fogColor));
+ addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/
+ addbyte(0x0f);
+ addbyte(0x60);
+ addbyte(0xda);
+ }
+ else
+ {
+ addbyte(0x66); /*PXOR XMM3, XMM3*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0xdb);
+ }
+
+ if (!(params->fogMode & FOG_MULT))
+ {
+ addbyte(0x66); /*PSUBW XMM3, XMM0*/
+ addbyte(0x0f);
+ addbyte(0xf9);
+ addbyte(0xd8);
+ }
+
+ /*Divide by 2 to prevent overflow on multiply*/
+ addbyte(0x66); /*PSRAW XMM3, 1*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xe3);
+ addbyte(1);
+
+ switch (params->fogMode & (FOG_Z|FOG_ALPHA))
+ {
+ case 0:
+ addbyte(0x8b); /*MOV EBX, state->w_depth[EDI]*/
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, w_depth));
+ addbyte(0x89); /*MOV EAX, EBX*/
+ addbyte(0xd8);
+ addbyte(0xc1); /*SHR EBX, 10*/
+ addbyte(0xeb);
+ addbyte(10);
+ addbyte(0xc1); /*SHR EAX, 2*/
+ addbyte(0xe8);
+ addbyte(2);
+ addbyte(0x83); /*AND EBX, 0x3f*/
+ addbyte(0xe3);
+ addbyte(0x3f);
+ addbyte(0x25); /*AND EAX, 0xff*/
+ addlong(0xff);
+ addbyte(0xf6); /*MUL params->fogTable+1[ESI+EBX*2]*/
+ addbyte(0xa4);
+ addbyte(0x5e);
+ addlong(offsetof(voodoo_params_t, fogTable)+1);
+ addbyte(0x0f); /*MOVZX EBX, params->fogTable[ESI+EBX*2]*/
+ addbyte(0xb6);
+ addbyte(0x9c);
+ addbyte(0x5e);
+ addlong(offsetof(voodoo_params_t, fogTable));
+ addbyte(0xc1); /*SHR EAX, 10*/
+ addbyte(0xe8);
+ addbyte(10);
+ addbyte(0x01); /*ADD EAX, EBX*/
+ addbyte(0xd8);
+
+/* int fog_idx = (w_depth >> 10) & 0x3f;
+
+ fog_a = params->fogTable[fog_idx].fog;
+ fog_a += (params->fogTable[fog_idx].dfog * ((w_depth >> 2) & 0xff)) >> 10;*/
+ break;
+
+ case FOG_Z:
+ addbyte(0x8b); /*MOV EAX, state->z[EDI]*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, z));
+ addbyte(0xc1); /*SHR EAX, 12*/
+ addbyte(0xe8);
+ addbyte(12);
+ addbyte(0x25); /*AND EAX, 0xff*/
+ addlong(0xff);
+// fog_a = (z >> 20) & 0xff;
+ break;
+
+ case FOG_ALPHA:
+ addbyte(0x8b); /*MOV EAX, state->ia[EDI]*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, ia));
+ addbyte(0x31); /*XOR EBX, EBX*/
+ addbyte(0xdb);
+ addbyte(0xc1); /*SAR EAX, 12*/
+ addbyte(0xf8);
+ addbyte(12);
+ addbyte(0x0f); /*CMOVS EAX, EBX*/
+ addbyte(0x48);
+ addbyte(0xc3);
+ addbyte(0xbb); /*MOV EBX, 0xff*/
+ addlong(0xff);
+ addbyte(0x3d); /*CMP EAX, 0xff*/
+ addlong(0xff);
+ addbyte(0x0f); /*CMOVAE EAX, EBX*/
+ addbyte(0x43);
+ addbyte(0xc3);
+// fog_a = CLAMP(ia >> 12);
+ break;
+
+ case FOG_W:
+ addbyte(0x8b); /*MOV EAX, state->w[EDI]+4*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, w)+4);
+ addbyte(0x31); /*XOR EBX, EBX*/
+ addbyte(0xdb);
+ addbyte(0x09); /*OR EAX, EAX*/
+ addbyte(0xc0);
+ addbyte(0x0f); /*CMOVS EAX, EBX*/
+ addbyte(0x48);
+ addbyte(0xc3);
+ addbyte(0xbb); /*MOV EBX, 0xff*/
+ addlong(0xff);
+ addbyte(0x3d); /*CMP EAX, 0xff*/
+ addlong(0xff);
+ addbyte(0x0f); /*CMOVAE EAX, EBX*/
+ addbyte(0x43);
+ addbyte(0xc3);
+// fog_a = CLAMP(w >> 32);
+ break;
+ }
+ addbyte(0x01); /*ADD EAX, EAX*/
+ addbyte(0xc0);
+// fog_a++;
+
+ addbyte(0x66); /*PMULLW XMM3, alookup+4[EAX*8]*/
+ addbyte(0x0f);
+ addbyte(0xd5);
+ addbyte(0x1c);
+ addbyte(0xc5);
+ addlong(((uintptr_t)alookup) + 16);
+ addbyte(0x66); /*PSRAW XMM3, 7*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xe3);
+ addbyte(7);
+/* fog_r = (fog_r * fog_a) >> 8;
+ fog_g = (fog_g * fog_a) >> 8;
+ fog_b = (fog_b * fog_a) >> 8;*/
+
+ if (params->fogMode & FOG_MULT)
+ {
+ addbyte(0xf3); /*MOV XMM0, XMM3*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xc3);
+ }
+ else
+ {
+ addbyte(0x66); /*PADDW XMM0, XMM3*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xc3);
+/* src_r += fog_r;
+ src_g += fog_g;
+ src_b += fog_b;*/
+ }
+ addbyte(0x66); /*PACKUSWB XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x67);
+ addbyte(0xc0);
+ }
+
+/* src_r = CLAMP(src_r);
+ src_g = CLAMP(src_g);
+ src_b = CLAMP(src_b);*/
+ }
+
+ if ((params->alphaMode & 1) && (alpha_func != AFUNC_NEVER) && (alpha_func != AFUNC_ALWAYS))
+ {
+ addbyte(0x0f); /*MOVZX ECX, params->alphaMode+3*/
+ addbyte(0xb6);
+ addbyte(0x8e);
+ addlong(offsetof(voodoo_params_t, alphaMode) + 3);
+ addbyte(0x39); /*CMP EDX, ECX*/
+ addbyte(0xca);
+
+ switch (alpha_func)
+ {
+ case AFUNC_LESSTHAN:
+ addbyte(0x0f); /*JAE skip*/
+ addbyte(0x83);
+ a_skip_pos = block_pos;
+ addlong(0);
+ break;
+ case AFUNC_EQUAL:
+ addbyte(0x0f); /*JNE skip*/
+ addbyte(0x85);
+ a_skip_pos = block_pos;
+ addlong(0);
+ break;
+ case AFUNC_LESSTHANEQUAL:
+ addbyte(0x0f); /*JA skip*/
+ addbyte(0x87);
+ a_skip_pos = block_pos;
+ addlong(0);
+ break;
+ case AFUNC_GREATERTHAN:
+ addbyte(0x0f); /*JBE skip*/
+ addbyte(0x86);
+ a_skip_pos = block_pos;
+ addlong(0);
+ break;
+ case AFUNC_NOTEQUAL:
+ addbyte(0x0f); /*JE skip*/
+ addbyte(0x84);
+ a_skip_pos = block_pos;
+ addlong(0);
+ break;
+ case AFUNC_GREATERTHANEQUAL:
+ addbyte(0x0f); /*JB skip*/
+ addbyte(0x82);
+ a_skip_pos = block_pos;
+ addlong(0);
+ break;
+ }
+ }
+ else if ((params->alphaMode & 1) && (alpha_func == AFUNC_NEVER))
+ {
+ addbyte(0xC3); /*RET*/
+ }
+
+ if (params->alphaMode & (1 << 4))
+ {
+ addbyte(0x8b); /*MOV EAX, state->x[EDI]*/
+ addbyte(0x87);
+ if (params->col_tiled)
+ addlong(offsetof(voodoo_state_t, x_tiled));
+ else
+ addlong(offsetof(voodoo_state_t, x));
+ addbyte(0x8b); /*MOV EBP, fb_mem*/
+ addbyte(0xaf);
+ addlong(offsetof(voodoo_state_t, fb_mem));
+ addbyte(0x01); /*ADD EDX, EDX*/
+ addbyte(0xd2);
+ addbyte(0x0f); /*MOVZX EAX, [EBP+EAX*2]*/
+ addbyte(0xb7);
+ addbyte(0x44);
+ addbyte(0x45);
+ addbyte(0);
+ addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/
+ addbyte(0x0f);
+ addbyte(0x60);
+ addbyte(0xc2);
+ addbyte(0x66); /*MOVD XMM4, rgb565[EAX*4]*/
+ addbyte(0x0f);
+ addbyte(0x6e);
+ addbyte(0x24);
+ addbyte(0x85);
+ addlong((uint32_t)rgb565);
+ addbyte(0x66); /*PUNPCKLBW XMM4, XMM2*/
+ addbyte(0x0f);
+ addbyte(0x60);
+ addbyte(0xe2);
+ addbyte(0xf3); /*MOV XMM6, XMM4*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xf4);
+
+ switch (dest_afunc)
+ {
+ case AFUNC_AZERO:
+ addbyte(0x66); /*PXOR XMM4, XMM4*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0xe4);
+ break;
+ case AFUNC_ASRC_ALPHA:
+ addbyte(0x66); /*PMULLW XMM4, alookup[EDX*8]*/
+ addbyte(0x0f);
+ addbyte(0xd5);
+ addbyte(0x24);
+ addbyte(0xd5);
+ addlong((uint32_t)alookup);
+ addbyte(0xf3); /*MOVQ XMM5, XMM4*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xec);
+ addbyte(0x66); /*PADDW XMM4, alookup[1*8]*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0x25);
+ addlong((uint32_t)alookup + 16);
+ addbyte(0x66); /*PSRLW XMM5, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd5);
+ addbyte(8);
+ addbyte(0x66); /*PADDW XMM4, XMM5*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xe5);
+ addbyte(0x66); /*PSRLW XMM4, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd4);
+ addbyte(8);
+ break;
+ case AFUNC_A_COLOR:
+ addbyte(0x66); /*PMULLW XMM4, XMM0*/
+ addbyte(0x0f);
+ addbyte(0xd5);
+ addbyte(0xe0);
+ addbyte(0xf3); /*MOVQ XMM5, XMM4*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xec);
+ addbyte(0x66); /*PADDW XMM4, alookup[1*8]*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0x25);
+ addlong((uint32_t)alookup + 16);
+ addbyte(0x66); /*PSRLW XMM5, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd5);
+ addbyte(8);
+ addbyte(0x66); /*PADDW XMM4, XMM5*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xe5);
+ addbyte(0x66); /*PSRLW XMM4, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd4);
+ addbyte(8);
+ break;
+ case AFUNC_ADST_ALPHA:
+ break;
+ case AFUNC_AONE:
+ break;
+ case AFUNC_AOMSRC_ALPHA:
+ addbyte(0x66); /*PMULLW XMM4, aminuslookup[EDX*8]*/
+ addbyte(0x0f);
+ addbyte(0xd5);
+ addbyte(0x24);
+ addbyte(0xd5);
+ addlong((uint32_t)aminuslookup);
+ addbyte(0xf3); /*MOVQ XMM5, XMM4*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xec);
+ addbyte(0x66); /*PADDW XMM4, alookup[1*8]*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0x25);
+ addlong((uint32_t)alookup + 16);
+ addbyte(0x66); /*PSRLW XMM5, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd5);
+ addbyte(8);
+ addbyte(0x66); /*PADDW XMM4, XMM5*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xe5);
+ addbyte(0x66); /*PSRLW XMM4, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd4);
+ addbyte(8);
+ break;
+ case AFUNC_AOM_COLOR:
+ addbyte(0xf3); /*MOVQ XMM5, xmm_ff_w*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0x2d);
+ addlong((uint32_t)&xmm_ff_w);
+ addbyte(0x66); /*PSUBW XMM5, XMM0*/
+ addbyte(0x0f);
+ addbyte(0xf9);
+ addbyte(0xe8);
+ addbyte(0x66); /*PMULLW XMM4, XMM5*/
+ addbyte(0x0f);
+ addbyte(0xd5);
+ addbyte(0xe5);
+ addbyte(0xf3); /*MOVQ XMM5, XMM4*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xec);
+ addbyte(0x66); /*PADDW XMM4, alookup[1*8]*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0x25);
+ addlong((uint32_t)alookup + 16);
+ addbyte(0x66); /*PSRLW XMM5, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd5);
+ addbyte(8);
+ addbyte(0x66); /*PADDW XMM4, XMM5*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xe5);
+ addbyte(0x66); /*PSRLW XMM4, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd4);
+ addbyte(8);
+ break;
+ case AFUNC_AOMDST_ALPHA:
+ addbyte(0x66); /*PXOR XMM4, XMM4*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0xe4);
+ break;
+ case AFUNC_ASATURATE:
+ addbyte(0x66); /*PMULLW XMM4, minus_254*/
+ addbyte(0x0f);
+ addbyte(0xd5);
+ addbyte(0x25);
+ addlong((uint32_t)&minus_254);
+ addbyte(0xf3); /*MOVQ XMM5, XMM4*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xec);
+ addbyte(0x66); /*PADDW XMM4, alookup[1*8]*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0x25);
+ addlong((uint32_t)alookup + 16);
+ addbyte(0x66); /*PSRLW XMM5, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd5);
+ addbyte(8);
+ addbyte(0x66); /*PADDW XMM4, XMM5*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xe5);
+ addbyte(0x66); /*PSRLW XMM4, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd4);
+ addbyte(8);
+ }
+
+ switch (src_afunc)
+ {
+ case AFUNC_AZERO:
+ addbyte(0x66); /*PXOR XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0xc0);
+ break;
+ case AFUNC_ASRC_ALPHA:
+ addbyte(0x66); /*PMULLW XMM0, alookup[EDX*8]*/
+ addbyte(0x0f);
+ addbyte(0xd5);
+ addbyte(0x04);
+ addbyte(0xd5);
+ addlong((uint32_t)alookup);
+ addbyte(0xf3); /*MOVQ XMM5, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xe8);
+ addbyte(0x66); /*PADDW XMM0, alookup[1*8]*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0x05);
+ addlong((uint32_t)alookup + 16);
+ addbyte(0x66); /*PSRLW XMM5, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd5);
+ addbyte(8);
+ addbyte(0x66); /*PADDW XMM0, XMM5*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xc5);
+ addbyte(0x66); /*PSRLW XMM0, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd0);
+ addbyte(8);
+ break;
+ case AFUNC_A_COLOR:
+ addbyte(0x66); /*PMULLW XMM0, XMM6*/
+ addbyte(0x0f);
+ addbyte(0xd5);
+ addbyte(0xc6);
+ addbyte(0xf3); /*MOVQ XMM5, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xe8);
+ addbyte(0x66); /*PADDW XMM0, alookup[1*8]*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0x05);
+ addlong((uint32_t)alookup + 16);
+ addbyte(0x66); /*PSRLW XMM5, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd5);
+ addbyte(8);
+ addbyte(0x66); /*PADDW XMM0, XMM5*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xc5);
+ addbyte(0x66); /*PSRLW XMM0, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd0);
+ addbyte(8);
+ break;
+ case AFUNC_ADST_ALPHA:
+ break;
+ case AFUNC_AONE:
+ break;
+ case AFUNC_AOMSRC_ALPHA:
+ addbyte(0x66); /*PMULLW XMM0, aminuslookup[EDX*8]*/
+ addbyte(0x0f);
+ addbyte(0xd5);
+ addbyte(0x04);
+ addbyte(0xd5);
+ addlong((uint32_t)aminuslookup);
+ addbyte(0xf3); /*MOVQ XMM5, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xe8);
+ addbyte(0x66); /*PADDW XMM0, alookup[1*8]*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0x05);
+ addlong((uint32_t)alookup + 16);
+ addbyte(0x66); /*PSRLW XMM5, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd5);
+ addbyte(8);
+ addbyte(0x66); /*PADDW XMM0, XMM5*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xc5);
+ addbyte(0x66); /*PSRLW XMM0, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd0);
+ addbyte(8);
+ break;
+ case AFUNC_AOM_COLOR:
+ addbyte(0xf3); /*MOVQ XMM5, xmm_ff_w*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0x2d);
+ addlong((uint32_t)&xmm_ff_w);
+ addbyte(0x66); /*PSUBW XMM5, XMM6*/
+ addbyte(0x0f);
+ addbyte(0xf9);
+ addbyte(0xee);
+ addbyte(0x66); /*PMULLW XMM0, XMM5*/
+ addbyte(0x0f);
+ addbyte(0xd5);
+ addbyte(0xc5);
+ addbyte(0xf3); /*MOVQ XMM5, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xe8);
+ addbyte(0x66); /*PADDW XMM0, alookup[1*8]*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0x05);
+ addlong((uint32_t)alookup + 16);
+ addbyte(0x66); /*PSRLW XMM5, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd5);
+ addbyte(8);
+ addbyte(0x66); /*PADDW XMM0, XMM5*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xc5);
+ addbyte(0x66); /*PSRLW XMM0, 8*/
+ addbyte(0x0f);
+ addbyte(0x71);
+ addbyte(0xd0);
+ addbyte(8);
+ break;
+ case AFUNC_AOMDST_ALPHA:
+ addbyte(0x66); /*PXOR XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0xef);
+ addbyte(0xc0);
+ break;
+ case AFUNC_ACOLORBEFOREFOG:
+ break;
+ }
+
+ addbyte(0x66); /*PADDW XMM0, XMM4*/
+ addbyte(0x0f);
+ addbyte(0xfd);
+ addbyte(0xc4);
+
+ addbyte(0x66); /*PACKUSWB XMM0, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x67);
+ addbyte(0xc0);
+ }
+//#endif
+
+// addbyte(0x8b); /*MOV EDX, x (ESP+12)*/
+// addbyte(0x54);
+// addbyte(0x24);
+// addbyte(12);
+
+
+ addbyte(0x8b); /*MOV EDX, state->x[EDI]*/
+ addbyte(0x97);
+ if (params->col_tiled)
+ addlong(offsetof(voodoo_state_t, x_tiled));
+ else
+ addlong(offsetof(voodoo_state_t, x));
+
+ addbyte(0x66); /*MOV EAX, XMM0*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xc0);
+
+ if (params->fbzMode & FBZ_RGB_WMASK)
+ {
+// addbyte(0x89); /*MOV state->rgb_out[EDI], EAX*/
+// addbyte(0x87);
+// addlong(offsetof(voodoo_state_t, rgb_out));
+
+ if (dither)
+ {
+ addbyte(0x8b); /*MOV ESI, real_y (ESP+16)*/
+ addbyte(0x74);
+ addbyte(0x24);
+ addbyte(16+16);
+ addbyte(0x0f); /*MOVZX EBX, AH*/ /*G*/
+ addbyte(0xb6);
+ addbyte(0xdc);
+ if (dither2x2)
+ {
+ addbyte(0x83); /*AND EDX, 1*/
+ addbyte(0xe2);
+ addbyte(1);
+ addbyte(0x83); /*AND ESI, 1*/
+ addbyte(0xe6);
+ addbyte(1);
+ addbyte(0xc1); /*SHL EBX, 2*/
+ addbyte(0xe3);
+ addbyte(2);
+ }
+ else
+ {
+ addbyte(0x83); /*AND EDX, 3*/
+ addbyte(0xe2);
+ addbyte(3);
+ addbyte(0x83); /*AND ESI, 3*/
+ addbyte(0xe6);
+ addbyte(3);
+ addbyte(0xc1); /*SHL EBX, 4*/
+ addbyte(0xe3);
+ addbyte(4);
+ }
+ addbyte(0x0f); /*MOVZX ECX, AL*/ /*R*/
+ addbyte(0xb6);
+ addbyte(0xc8);
+ if (dither2x2)
+ {
+ addbyte(0xc1); /*SHR EAX, 14*/
+ addbyte(0xe8);
+ addbyte(14);
+ addbyte(0x8d); /*LEA ESI, EDX+ESI*2*/
+ addbyte(0x34);
+ addbyte(0x72);
+ }
+ else
+ {
+ addbyte(0xc1); /*SHR EAX, 12*/
+ addbyte(0xe8);
+ addbyte(12);
+ addbyte(0x8d); /*LEA ESI, EDX+ESI*4*/
+ addbyte(0x34);
+ addbyte(0xb2);
+ }
+ addbyte(0x8b); /*MOV EDX, state->x[EDI]*/
+ addbyte(0x97);
+ if (params->col_tiled)
+ addlong(offsetof(voodoo_state_t, x_tiled));
+ else
+ addlong(offsetof(voodoo_state_t, x));
+ if (dither2x2)
+ {
+ addbyte(0xc1); /*SHL ECX, 2*/
+ addbyte(0xe1);
+ addbyte(2);
+ addbyte(0x25); /*AND EAX, 0x3fc*/ /*B*/
+ addlong(0x3fc);
+ }
+ else
+ {
+ addbyte(0xc1); /*SHL ECX, 4*/
+ addbyte(0xe1);
+ addbyte(4);
+ addbyte(0x25); /*AND EAX, 0xff0*/ /*B*/
+ addlong(0xff0);
+ }
+ addbyte(0x0f); /*MOVZX EBX, dither_g[EBX+ESI]*/
+ addbyte(0xb6);
+ addbyte(0x9c);
+ addbyte(0x33);
+ addlong(dither2x2 ? (uint32_t)dither_g2x2 : (uint32_t)dither_g);
+ addbyte(0x0f); /*MOVZX ECX, dither_rb[ECX+ESI]*/
+ addbyte(0xb6);
+ addbyte(0x8c);
+ addbyte(0x31);
+ addlong(dither2x2 ? (uint32_t)dither_rb2x2 : (uint32_t)dither_rb);
+ addbyte(0x0f); /*MOVZX EAX, dither_rb[EAX+ESI]*/
+ addbyte(0xb6);
+ addbyte(0x84);
+ addbyte(0x30);
+ addlong(dither2x2 ? (uint32_t)dither_rb2x2 : (uint32_t)dither_rb);
+ addbyte(0xc1); /*SHL EBX, 5*/
+ addbyte(0xe3);
+ addbyte(5);
+ addbyte(0xc1); /*SHL EAX, 11*/
+ addbyte(0xe0);
+ addbyte(11);
+ addbyte(0x09); /*OR EAX, EBX*/
+ addbyte(0xd8);
+ addbyte(0x09); /*OR EAX, ECX*/
+ addbyte(0xc8);
+ }
+ else
+ {
+ addbyte(0x89); /*MOV EBX, EAX*/
+ addbyte(0xc3);
+ addbyte(0x0f); /*MOVZX ECX, AH*/
+ addbyte(0xb6);
+ addbyte(0xcc);
+ addbyte(0xc1); /*SHR EAX, 3*/
+ addbyte(0xe8);
+ addbyte(3);
+ addbyte(0xc1); /*SHR EBX, 8*/
+ addbyte(0xeb);
+ addbyte(8);
+ addbyte(0xc1); /*SHL ECX, 3*/
+ addbyte(0xe1);
+ addbyte(3);
+ addbyte(0x81); /*AND EAX, 0x001f*/
+ addbyte(0xe0);
+ addlong(0x001f);
+ addbyte(0x81); /*AND EBX, 0xf800*/
+ addbyte(0xe3);
+ addlong(0xf800);
+ addbyte(0x81); /*AND ECX, 0x07e0*/
+ addbyte(0xe1);
+ addlong(0x07e0);
+ addbyte(0x09); /*OR EAX, EBX*/
+ addbyte(0xd8);
+ addbyte(0x09); /*OR EAX, ECX*/
+ addbyte(0xc8);
+ }
+ addbyte(0x8b); /*MOV ESI, fb_mem*/
+ addbyte(0xb7);
+ addlong(offsetof(voodoo_state_t, fb_mem));
+ addbyte(0x66); /*MOV [ESI+EDX*2], AX*/
+ addbyte(0x89);
+ addbyte(0x04);
+ addbyte(0x56);
+ }
+
+ if ((params->fbzMode & (FBZ_DEPTH_WMASK | FBZ_DEPTH_ENABLE)) == (FBZ_DEPTH_WMASK | FBZ_DEPTH_ENABLE))
+ {
+ addbyte(0x8b); /*MOV EDX, state->x[EDI]*/
+ addbyte(0x97);
+ if (params->aux_tiled)
+ addlong(offsetof(voodoo_state_t, x_tiled));
+ else
+ addlong(offsetof(voodoo_state_t, x));
+ addbyte(0x66); /*MOV AX, new_depth*/
+ addbyte(0x8b);
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, new_depth));
+ addbyte(0x8b); /*MOV ESI, aux_mem*/
+ addbyte(0xb7);
+ addlong(offsetof(voodoo_state_t, aux_mem));
+ addbyte(0x66); /*MOV [ESI+EDX*2], AX*/
+ addbyte(0x89);
+ addbyte(0x04);
+ addbyte(0x56);
+ }
+
+ if (z_skip_pos)
+ *(uint32_t *)&code_block[z_skip_pos] = (block_pos - z_skip_pos) - 4;
+ if (a_skip_pos)
+ *(uint32_t *)&code_block[a_skip_pos] = (block_pos - a_skip_pos) - 4;
+ if (chroma_skip_pos)
+ *(uint32_t *)&code_block[chroma_skip_pos] = (block_pos - chroma_skip_pos) - 4;
+
+
+ addbyte(0x8b); /*MOV ESI, [ESP+8]*/
+ addbyte(0x74);
+ addbyte(0x24);
+ addbyte(8+16);
+
+ if (voodoo->dual_tmus)
+ {
+ addbyte(0xf3); /*MOVDQU XMM3, state->tmu1_s[EDI]*/
+ addbyte(0x0f);
+ addbyte(0x6f);
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, tmu1_s));
+ addbyte(0xf3); /*MOVQ XMM4, state->tmu1_w[EDI]*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xa7);
+ addlong(offsetof(voodoo_state_t, tmu1_w));
+ addbyte(0xf3); /*MOVDQU XMM5, params->tmu[1].dSdX[ESI]*/
+ addbyte(0x0f);
+ addbyte(0x6f);
+ addbyte(0xae);
+ addlong(offsetof(voodoo_params_t, tmu[1].dSdX));
+ addbyte(0xf3); /*MOVQ XMM6, params->tmu[1].dWdX[ESI]*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xb6);
+ addlong(offsetof(voodoo_params_t, tmu[1].dWdX));
+ if (state->xdir > 0)
+ {
+ addbyte(0x66); /*PADDQ XMM3, XMM5*/
+ addbyte(0x0f);
+ addbyte(0xd4);
+ addbyte(0xdd);
+ addbyte(0x66); /*PADDQ XMM4, XMM6*/
+ addbyte(0x0f);
+ addbyte(0xd4);
+ addbyte(0xe6);
+ }
+ else
+ {
+ addbyte(0x66); /*PSUBQ XMM3, XMM5*/
+ addbyte(0x0f);
+ addbyte(0xfb);
+ addbyte(0xdd);
+ addbyte(0x66); /*PSUBQ XMM4, XMM6*/
+ addbyte(0x0f);
+ addbyte(0xfb);
+ addbyte(0xe6);
+ }
+ addbyte(0xf3); /*MOVDQU state->tmu1_s, XMM3*/
+ addbyte(0x0f);
+ addbyte(0x7f);
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, tmu1_s));
+ addbyte(0x66); /*MOVQ state->tmu1_w, XMM4*/
+ addbyte(0x0f);
+ addbyte(0xd6);
+ addbyte(0xa7);
+ addlong(offsetof(voodoo_state_t, tmu1_w));
+ }
+
+ addbyte(0xf3); /*MOVDQU XMM1, state->ib[EDI]*/
+ addbyte(0x0f);
+ addbyte(0x6f);
+ addbyte(0x8f);
+ addlong(offsetof(voodoo_state_t, ib));
+ addbyte(0xf3); /*MOVDQU XMM3, state->tmu0_s[EDI]*/
+ addbyte(0x0f);
+ addbyte(0x6f);
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, tmu0_s));
+ addbyte(0xf3); /*MOVQ XMM4, state->tmu0_w[EDI]*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xa7);
+ addlong(offsetof(voodoo_state_t, tmu0_w));
+ addbyte(0xf3); /*MOVDQU XMM0, params->dBdX[ESI]*/
+ addbyte(0x0f);
+ addbyte(0x6f);
+ addbyte(0x86);
+ addlong(offsetof(voodoo_params_t, dBdX));
+ addbyte(0x8b); /*MOV EAX, params->dZdX[ESI]*/
+ addbyte(0x86);
+ addlong(offsetof(voodoo_params_t, dZdX));
+ addbyte(0xf3); /*MOVDQU XMM5, params->tmu[0].dSdX[ESI]*/
+ addbyte(0x0f);
+ addbyte(0x6f);
+ addbyte(0xae);
+ addlong(offsetof(voodoo_params_t, tmu[0].dSdX));
+ addbyte(0xf3); /*MOVQ XMM6, params->tmu[0].dWdX[ESI]*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xb6);
+ addlong(offsetof(voodoo_params_t, tmu[0].dWdX));
+
+ if (state->xdir > 0)
+ {
+ addbyte(0x66); /*PADDD XMM1, XMM0*/
+ addbyte(0x0f);
+ addbyte(0xfe);
+ addbyte(0xc8);
+ }
+ else
+ {
+ addbyte(0x66); /*PSUBD XMM1, XMM0*/
+ addbyte(0x0f);
+ addbyte(0xfa);
+ addbyte(0xc8);
+ }
+
+ addbyte(0xf3); /*MOVQ XMM0, state->w*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, w));
+ addbyte(0xf3); /*MOVDQU state->ib, XMM1*/
+ addbyte(0x0f);
+ addbyte(0x7f);
+ addbyte(0x8f);
+ addlong(offsetof(voodoo_state_t, ib));
+ addbyte(0xf3); /*MOVQ XMM7, params->dWdX*/
+ addbyte(0x0f);
+ addbyte(0x7e);
+ addbyte(0xbe);
+ addlong(offsetof(voodoo_params_t, dWdX));
+
+ if (state->xdir > 0)
+ {
+ addbyte(0x66); /*PADDQ XMM3, XMM5*/
+ addbyte(0x0f);
+ addbyte(0xd4);
+ addbyte(0xdd);
+ addbyte(0x66); /*PADDQ XMM4, XMM6*/
+ addbyte(0x0f);
+ addbyte(0xd4);
+ addbyte(0xe6);
+ addbyte(0x66); /*PADDQ XMM0, XMM7*/
+ addbyte(0x0f);
+ addbyte(0xd4);
+ addbyte(0xc7);
+ addbyte(0x01); /*ADD state->z[EDI], EAX*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, z));
+ }
+ else
+ {
+ addbyte(0x66); /*PSUBQ XMM3, XMM5*/
+ addbyte(0x0f);
+ addbyte(0xfb);
+ addbyte(0xdd);
+ addbyte(0x66); /*PSUBQ XMM4, XMM6*/
+ addbyte(0x0f);
+ addbyte(0xfb);
+ addbyte(0xe6);
+ addbyte(0x66); /*PSUBQ XMM0, XMM7*/
+ addbyte(0x0f);
+ addbyte(0xfb);
+ addbyte(0xc7);
+ addbyte(0x29); /*SUB state->z[EDI], EAX*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, z));
+ }
+
+ addbyte(0xf3); /*MOVDQU state->tmu0_s, XMM3*/
+ addbyte(0x0f);
+ addbyte(0x7f);
+ addbyte(0x9f);
+ addlong(offsetof(voodoo_state_t, tmu0_s));
+ addbyte(0x66); /*MOVQ state->tmu0_w, XMM4*/
+ addbyte(0x0f);
+ addbyte(0xd6);
+ addbyte(0xa7);
+ addlong(offsetof(voodoo_state_t, tmu0_w));
+ addbyte(0x66); /*MOVQ state->w, XMM0*/
+ addbyte(0x0f);
+ addbyte(0xd6);
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, w));
+
+ addbyte(0x83); /*ADD state->pixel_count[EDI], 1*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, pixel_count));
+ addbyte(1);
+
+ if (params->fbzColorPath & FBZCP_TEXTURE_ENABLED)
+ {
+ if ((params->textureMode[0] & TEXTUREMODE_MASK) == TEXTUREMODE_PASSTHROUGH ||
+ (params->textureMode[0] & TEXTUREMODE_LOCAL_MASK) == TEXTUREMODE_LOCAL)
+ {
+ addbyte(0x83); /*ADD state->texel_count[EDI], 1*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, texel_count));
+ addbyte(1);
+ }
+ else
+ {
+ addbyte(0x83); /*ADD state->texel_count[EDI], 2*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, texel_count));
+ addbyte(2);
+ }
+ }
+ addbyte(0x8b); /*MOV EAX, state->x[EDI]*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, x));
+
+ if (state->xdir > 0)
+ {
+ addbyte(0x83); /*ADD state->x[EDI], 1*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, x));
+ addbyte(1);
+ }
+ else
+ {
+ addbyte(0x83); /*SUB state->x[EDI], 1*/
+ addbyte(0xaf);
+ addlong(offsetof(voodoo_state_t, x));
+ addbyte(1);
+ }
+
+ addbyte(0x3b); /*CMP EAX, state->x2[EDI]*/
+ addbyte(0x87);
+ addlong(offsetof(voodoo_state_t, x2));
+ addbyte(0x0f); /*JNZ loop_jump_pos*/
+ addbyte(0x85);
+ addlong(loop_jump_pos - (block_pos + 4));
+
+ addbyte(0x5b); /*POP EBX*/
+ addbyte(0x5e); /*POP ESI*/
+ addbyte(0x5f); /*POP EDI*/
+ addbyte(0x5d); /*POP EBP*/
+
+ addbyte(0xC3); /*RET*/
+
+ if (params->textureMode[1] & TEXTUREMODE_TRILINEAR)
+ cs = cs;
+}
+int voodoo_recomp = 0;
+
+static inline void *voodoo_get_block(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int odd_even)
+{
+ int c;
+ int b = last_block[odd_even];
+ voodoo_x86_data_t *data;
+ voodoo_x86_data_t *codegen_data = voodoo->codegen_data;
+
+ for (c = 0; c < 8; c++)
+ {
+ data = &codegen_data[odd_even + b*4];
+
+ if (state->xdir == data->xdir &&
+ params->alphaMode == data->alphaMode &&
+ params->fbzMode == data->fbzMode &&
+ params->fogMode == data->fogMode &&
+ params->fbzColorPath == data->fbzColorPath &&
+ (voodoo->trexInit1[0] & (1 << 18)) == data->trexInit1 &&
+ params->textureMode[0] == data->textureMode[0] &&
+ params->textureMode[1] == data->textureMode[1] &&
+ (params->tLOD[0] & LOD_MASK) == data->tLOD[0] &&
+ (params->tLOD[1] & LOD_MASK) == data->tLOD[1] &&
+ ((params->col_tiled || params->aux_tiled) ? 1 : 0) == data->is_tiled)
+ {
+ last_block[odd_even] = b;
+ return data->code_block;
+ }
+
+ b = (b + 1) & 7;
+ }
+voodoo_recomp++;
+ data = &codegen_data[odd_even + next_block_to_write[odd_even]*4];
+// code_block = data->code_block;
+
+ voodoo_generate(data->code_block, voodoo, params, state, depth_op);
+
+ data->xdir = state->xdir;
+ data->alphaMode = params->alphaMode;
+ data->fbzMode = params->fbzMode;
+ data->fogMode = params->fogMode;
+ data->fbzColorPath = params->fbzColorPath;
+ data->trexInit1 = voodoo->trexInit1[0] & (1 << 18);
+ data->textureMode[0] = params->textureMode[0];
+ data->textureMode[1] = params->textureMode[1];
+ data->tLOD[0] = params->tLOD[0] & LOD_MASK;
+ data->tLOD[1] = params->tLOD[1] & LOD_MASK;
+ data->is_tiled = (params->col_tiled || params->aux_tiled) ? 1 : 0;
+
+ next_block_to_write[odd_even] = (next_block_to_write[odd_even] + 1) & 7;
+
+ return data->code_block;
+}
+
+void voodoo_codegen_init(voodoo_t *voodoo)
+{
+ int c;
+#if defined(__linux__) || defined(__APPLE__)
+ void *start;
+ size_t len;
+ long pagesize = sysconf(_SC_PAGESIZE);
+ long pagemask = ~(pagesize - 1);
+#endif
+
+#if defined WIN32 || defined _WIN32 || defined _WIN32
+ voodoo->codegen_data = VirtualAlloc(NULL, sizeof(voodoo_x86_data_t) * BLOCK_NUM*4, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
+#else
+ voodoo->codegen_data = mmap(0, sizeof(voodoo_x86_data_t) * BLOCK_NUM*4, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, 0, 0);
+#endif
+
+ for (c = 0; c < 256; c++)
+ {
+ int d[4];
+ int _ds = c & 0xf;
+ int dt = c >> 4;
+
+ alookup[c] = _mm_set_epi32(0, 0, c | (c << 16), c | (c << 16));
+ aminuslookup[c] = _mm_set_epi32(0, 0, (255-c) | ((255-c) << 16), (255-c) | ((255-c) << 16));
+
+ d[0] = (16 - _ds) * (16 - dt);
+ d[1] = _ds * (16 - dt);
+ d[2] = (16 - _ds) * dt;
+ d[3] = _ds * dt;
+
+ bilinear_lookup[c*2] = _mm_set_epi32(d[1] | (d[1] << 16), d[1] | (d[1] << 16), d[0] | (d[0] << 16), d[0] | (d[0] << 16));
+ bilinear_lookup[c*2 + 1] = _mm_set_epi32(d[3] | (d[3] << 16), d[3] | (d[3] << 16), d[2] | (d[2] << 16), d[2] | (d[2] << 16));
+ }
+ alookup[256] = _mm_set_epi32(0, 0, 256 | (256 << 16), 256 | (256 << 16));
+ xmm_00_ff_w[0] = _mm_set_epi32(0, 0, 0, 0);
+ xmm_00_ff_w[1] = _mm_set_epi32(0, 0, 0xff | (0xff << 16), 0xff | (0xff << 16));
+}
+
+void voodoo_codegen_close(voodoo_t *voodoo)
+{
+#if defined WIN32 || defined _WIN32 || defined _WIN32
+ VirtualFree(voodoo->codegen_data, 0, MEM_RELEASE);
+#else
+ munmap(voodoo->codegen_data, sizeof(voodoo_x86_data_t) * BLOCK_NUM*4);
+#endif
+}
--- /dev/null
+#ifdef MIN
+#undef MIN
+#endif
+#ifdef CLAMP
+#undef CLAMP
+#endif
+
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+#define CLAMP(x) (((x) < 0) ? 0 : (((x) > 0xff) ? 0xff : (x)))
+#define CLAMP16(x) (((x) < 0) ? 0 : (((x) > 0xffff) ? 0xffff : (x)))
+
+
+#define LOD_MAX 8
+
+#define TEX_DIRTY_SHIFT 10
+
+#define TEX_CACHE_MAX 64
+
+enum
+{
+ VOODOO_1 = 0,
+ VOODOO_SB50,
+ VOODOO_2,
+ VOODOO_BANSHEE,
+ VOODOO_3
+};
+
+typedef union int_float
+{
+ uint32_t i;
+ float f;
+} int_float;
+
+typedef struct rgb_t
+{
+ uint8_t b, g, r;
+ uint8_t pad;
+} rgb_t;
+typedef struct rgba8_t
+{
+ uint8_t b, g, r, a;
+} rgba8_t;
+
+typedef union rgba_u
+{
+ struct
+ {
+ uint8_t b, g, r, a;
+ } rgba;
+ uint32_t u;
+} rgba_u;
+
+#define FIFO_SIZE 65536
+#define FIFO_MASK (FIFO_SIZE - 1)
+#define FIFO_ENTRY_SIZE (1 << 31)
+
+#define FIFO_ENTRIES (voodoo->fifo_write_idx - voodoo->fifo_read_idx)
+#define FIFO_FULL ((voodoo->fifo_write_idx - voodoo->fifo_read_idx) >= FIFO_SIZE-4)
+#define FIFO_EMPTY (voodoo->fifo_read_idx == voodoo->fifo_write_idx)
+
+#define FIFO_TYPE 0xff000000
+#define FIFO_ADDR 0x00ffffff
+
+enum
+{
+ FIFO_INVALID = (0x00 << 24),
+ FIFO_WRITEL_REG = (0x01 << 24),
+ FIFO_WRITEW_FB = (0x02 << 24),
+ FIFO_WRITEL_FB = (0x03 << 24),
+ FIFO_WRITEL_TEX = (0x04 << 24),
+ FIFO_WRITEL_2DREG = (0x05 << 24)
+};
+
+#define PARAM_SIZE 1024
+#define PARAM_MASK (PARAM_SIZE - 1)
+#define PARAM_ENTRY_SIZE (1 << 31)
+
+#define PARAM_ENTRIES(x) (voodoo->params_write_idx - voodoo->params_read_idx[x])
+#define PARAM_FULL(x) ((voodoo->params_write_idx - voodoo->params_read_idx[x]) >= PARAM_SIZE)
+#define PARAM_EMPTY(x) (voodoo->params_read_idx[x] == voodoo->params_write_idx)
+
+typedef struct
+{
+ uint32_t addr_type;
+ uint32_t val;
+} fifo_entry_t;
+
+typedef struct voodoo_params_t
+{
+ int command;
+
+ int32_t vertexAx, vertexAy, vertexBx, vertexBy, vertexCx, vertexCy;
+
+ uint32_t startR, startG, startB, startZ, startA;
+
+ int32_t dBdX, dGdX, dRdX, dAdX, dZdX;
+
+ int32_t dBdY, dGdY, dRdY, dAdY, dZdY;
+
+ int64_t startW, dWdX, dWdY;
+
+ struct
+ {
+ int64_t startS, startT, startW, p1;
+ int64_t dSdX, dTdX, dWdX, p2;
+ int64_t dSdY, dTdY, dWdY, p3;
+ } tmu[2];
+
+ uint32_t color0, color1;
+
+ uint32_t fbzMode;
+ uint32_t fbzColorPath;
+
+ uint32_t fogMode;
+ rgb_t fogColor;
+ struct
+ {
+ uint8_t fog, dfog;
+ } fogTable[64];
+
+ uint32_t alphaMode;
+
+ uint32_t zaColor;
+
+ int chromaKey_r, chromaKey_g, chromaKey_b;
+ uint32_t chromaKey;
+
+ uint32_t textureMode[2];
+ uint32_t tLOD[2];
+
+ uint32_t texBaseAddr[2], texBaseAddr1[2], texBaseAddr2[2], texBaseAddr38[2];
+
+ uint32_t tex_base[2][LOD_MAX+2];
+ uint32_t tex_end[2][LOD_MAX+2];
+ int tex_width[2];
+ int tex_w_mask[2][LOD_MAX+2];
+ int tex_w_nmask[2][LOD_MAX+2];
+ int tex_h_mask[2][LOD_MAX+2];
+ int tex_shift[2][LOD_MAX+2];
+ int tex_lod[2][LOD_MAX+2];
+ int tex_entry[2];
+ int detail_max[2], detail_bias[2], detail_scale[2];
+
+ uint32_t draw_offset, aux_offset;
+
+ int tformat[2];
+
+ int clipLeft, clipRight, clipLowY, clipHighY;
+ int clipLeft1, clipRight1, clipLowY1, clipHighY1;
+
+ int sign;
+
+ uint32_t front_offset;
+
+ uint32_t swapbufferCMD;
+
+ uint32_t stipple;
+
+ int col_tiled, aux_tiled;
+ int row_width, aux_row_width;
+} voodoo_params_t;
+
+typedef struct texture_t
+{
+ uint32_t base;
+ uint32_t tLOD;
+ volatile int refcount, refcount_r[4];
+ int is16;
+ uint32_t palette_checksum;
+ uint32_t addr_start[4], addr_end[4];
+ uint32_t *data;
+} texture_t;
+
+typedef struct vert_t
+{
+ float sVx, sVy;
+ float sRed, sGreen, sBlue, sAlpha;
+ float sVz, sWb;
+ float sW0, sS0, sT0;
+ float sW1, sS1, sT1;
+} vert_t;
+
+typedef struct clip_t
+{
+ int x_min, x_max;
+ int y_min, y_max;
+} clip_t;
+
+typedef struct voodoo_t
+{
+ mem_mapping_t mapping;
+
+ int pci_enable;
+
+ uint8_t dac_data[8];
+ int dac_reg, dac_reg_ff;
+ uint8_t dac_readdata;
+ uint16_t dac_pll_regs[16];
+
+ float pixel_clock;
+ uint64_t line_time;
+
+ voodoo_params_t params;
+
+ uint32_t fbiInit0, fbiInit1, fbiInit2, fbiInit3, fbiInit4;
+ uint32_t fbiInit5, fbiInit6, fbiInit7; /*Voodoo 2*/
+
+ uint32_t initEnable;
+
+ uint32_t lfbMode;
+
+ uint32_t memBaseAddr;
+
+ int_float fvertexAx, fvertexAy, fvertexBx, fvertexBy, fvertexCx, fvertexCy;
+
+ uint32_t front_offset, back_offset;
+
+ uint32_t fb_read_offset, fb_write_offset;
+
+ int row_width, aux_row_width;
+ int block_width;
+
+ int col_tiled, aux_tiled;
+
+ uint8_t *fb_mem, *tex_mem[2];
+ uint16_t *tex_mem_w[2];
+
+ int rgb_sel;
+
+ uint32_t trexInit1[2];
+
+ uint32_t tmuConfig;
+
+ mutex_t *swap_mutex;
+ int swap_count;
+
+ int disp_buffer, draw_buffer;
+ pc_timer_t timer;
+
+ int line;
+ svga_t *svga;
+
+ uint32_t backPorch;
+ uint32_t videoDimensions;
+ uint32_t hSync, vSync;
+
+ int h_total, v_total, v_disp;
+ int h_disp;
+ int v_retrace;
+
+ struct
+ {
+ uint32_t y[4], i[4], q[4];
+ } nccTable[2][2];
+
+ rgba_u palette[2][256];
+
+ rgba_u ncc_lookup[2][2][256];
+ int ncc_dirty[2];
+
+ thread_t *fifo_thread;
+ thread_t *render_thread[4];
+ event_t *wake_fifo_thread;
+ event_t *wake_main_thread;
+ event_t *fifo_not_full_event;
+ event_t *render_not_full_event[4];
+ event_t *wake_render_thread[4];
+
+ int voodoo_busy;
+ int render_voodoo_busy[4];
+
+ int render_threads;
+ int odd_even_mask;
+
+ int pixel_count[4], texel_count[4], tri_count, frame_count;
+ int pixel_count_old[4], texel_count_old[4];
+ int wr_count, rd_count, tex_count;
+
+ int retrace_count;
+ int swap_interval;
+ uint32_t swap_offset;
+ int swap_pending;
+
+ int bilinear_enabled;
+
+ int fb_size;
+ uint32_t fb_mask;
+
+ int texture_size;
+ uint32_t texture_mask;
+
+ int dual_tmus;
+ int type;
+
+ fifo_entry_t fifo[FIFO_SIZE];
+ volatile int fifo_read_idx, fifo_write_idx;
+ volatile int cmd_read, cmd_written, cmd_written_fifo;
+
+ voodoo_params_t params_buffer[PARAM_SIZE];
+ volatile int params_read_idx[4], params_write_idx;
+
+ uint32_t cmdfifo_base, cmdfifo_end, cmdfifo_size;
+ int cmdfifo_rp, cmdfifo_ret_addr;
+ int cmdfifo_in_sub;
+ volatile int cmdfifo_depth_rd, cmdfifo_depth_wr;
+ volatile int cmdfifo_enabled;
+ uint32_t cmdfifo_amin, cmdfifo_amax;
+ int cmdfifo_holecount;
+
+ uint32_t sSetupMode;
+ vert_t verts[4];
+ unsigned int vertex_ages[3];
+ unsigned int vertex_next_age;
+ int num_verticies;
+ int cull_pingpong;
+
+ int flush;
+
+ int scrfilter;
+ int scrfilterEnabled;
+ int scrfilterThreshold;
+ int scrfilterThresholdOld;
+
+ uint32_t last_write_addr;
+
+ uint32_t fbiPixelsIn;
+ uint32_t fbiChromaFail;
+ uint32_t fbiZFuncFail;
+ uint32_t fbiAFuncFail;
+ uint32_t fbiPixelsOut;
+
+ uint32_t bltSrcBaseAddr;
+ uint32_t bltDstBaseAddr;
+ int bltSrcXYStride, bltDstXYStride;
+ uint32_t bltSrcChromaRange, bltDstChromaRange;
+ int bltSrcChromaMinR, bltSrcChromaMinG, bltSrcChromaMinB;
+ int bltSrcChromaMaxR, bltSrcChromaMaxG, bltSrcChromaMaxB;
+ int bltDstChromaMinR, bltDstChromaMinG, bltDstChromaMinB;
+ int bltDstChromaMaxR, bltDstChromaMaxG, bltDstChromaMaxB;
+
+ int bltClipRight, bltClipLeft;
+ int bltClipHighY, bltClipLowY;
+
+ int bltSrcX, bltSrcY;
+ int bltDstX, bltDstY;
+ int bltSizeX, bltSizeY;
+ int bltRop[4];
+ uint16_t bltColorFg, bltColorBg;
+
+ uint32_t bltCommand;
+
+ uint32_t leftOverlayBuf;
+
+ struct
+ {
+ int dst_x, dst_y;
+ int cur_x;
+ int size_x, size_y;
+ int x_dir, y_dir;
+ int dst_stride;
+ } blt;
+
+ struct
+ {
+ uint32_t bresError0, bresError1;
+ uint32_t clip0Min, clip0Max;
+ uint32_t clip1Min, clip1Max;
+ uint32_t colorBack, colorFore;
+ uint32_t command, commandExtra;
+ uint32_t dstBaseAddr;
+ uint32_t dstFormat;
+ uint32_t dstSize;
+ uint32_t dstXY;
+ uint32_t lineStipple;
+ uint32_t lineStyle;
+ uint32_t rop;
+ uint32_t srcBaseAddr;
+ uint32_t srcFormat;
+ uint32_t srcSize;
+ uint32_t srcXY;
+
+ uint32_t colorPattern[64];
+
+ int bres_error_0, bres_error_1;
+ uint32_t colorPattern8[64], colorPattern16[64], colorPattern24[64];
+ int cur_x, cur_y;
+ uint32_t dstBaseAddr_tiled;
+ uint32_t dstColorkeyMin, dstColorkeyMax;
+ int dstSizeX, dstSizeY;
+ int dstX, dstY;
+ int dst_stride;
+ int patoff_x, patoff_y;
+ uint8_t rops[4];
+ uint32_t srcBaseAddr_tiled;
+ uint32_t srcColorkeyMin, srcColorkeyMax;
+ int srcSizeX, srcSizeY;
+ int srcX, srcY;
+ int src_stride;
+ int old_srcX;
+
+ /*Used for handling packed 24bpp host data*/
+ int host_data_remainder;
+ uint32_t old_host_data;
+
+ /*Polyfill coordinates*/
+ int lx[2], rx[2];
+ int ly[2], ry[2];
+
+ /*Polyfill state*/
+ int error[2];
+ int dx[2], dy[2];
+ int x_inc[2]; /*y_inc is always 1 for polyfill*/
+ int lx_cur, rx_cur;
+
+ clip_t clip[2];
+
+ uint8_t host_data[16384];
+ int host_data_count;
+ int host_data_size_src, host_data_size_dest;
+ int src_stride_src, src_stride_dest;
+
+ int src_bpp;
+
+ int line_pix_pos, line_bit_pos;
+ int line_rep_cnt, line_bit_mask_size;
+ } banshee_blt;
+
+ struct
+ {
+ uint32_t vidOverlayStartCoords;
+ uint32_t vidOverlayEndScreenCoords;
+ uint32_t vidOverlayDudx, vidOverlayDudxOffsetSrcWidth;
+ uint32_t vidOverlayDvdy, vidOverlayDvdyOffset;
+ //uint32_t vidDesktopOverlayStride;
+
+ int start_x, start_y;
+ int end_x, end_y;
+ int size_x, size_y;
+ int overlay_bytes;
+
+ unsigned int src_y;
+ } overlay;
+
+ rgb_t clutData[33];
+ int clutData_dirty;
+ rgb_t clutData256[256];
+ uint32_t video_16to32[0x10000];
+
+ uint8_t dirty_line[2048];
+ int dirty_line_low, dirty_line_high;
+
+ int fb_write_buffer, fb_draw_buffer;
+ int buffer_cutoff;
+
+ uint32_t tile_base, tile_stride;
+ int tile_stride_shift, tile_x, tile_x_real;
+
+ int read_time, write_time, burst_time;
+
+ pc_timer_t wake_timer;
+
+ /* screen filter tables */
+ uint8_t thefilter[256][256];
+ uint8_t thefilterg[256][256];
+ uint8_t thefilterb[256][256];
+ uint16_t purpleline[256][3];
+
+ texture_t texture_cache[2][TEX_CACHE_MAX];
+ uint8_t texture_present[2][16384];
+ int texture_last_removed;
+
+ uint32_t palette_checksum[2];
+ int palette_dirty[2];
+
+ uint64_t time;
+ int render_time[4];
+
+ int use_recompiler;
+ void *codegen_data;
+
+ struct voodoo_set_t *set;
+
+
+ uint8_t *vram, *changedvram;
+
+ void *p;
+} voodoo_t;
+
+typedef struct voodoo_set_t
+{
+ voodoo_t *voodoos[2];
+
+ mem_mapping_t snoop_mapping;
+
+ int nr_cards;
+} voodoo_set_t;
+
+
+extern rgba8_t rgb332[0x100], ai44[0x100], rgb565[0x10000], argb1555[0x10000], argb4444[0x10000], ai88[0x10000];
+
+
+void voodoo_generate_vb_filters(voodoo_t *voodoo, int fcr, int fcg);
+
+void voodoo_recalc(voodoo_t *voodoo);
+void voodoo_update_ncc(voodoo_t *voodoo, int tmu);
+
+void *voodoo_2d3d_card_init(int type);
+void voodoo_card_close(voodoo_t *voodoo);
--- /dev/null
+#include "ibm.h"
+#include "device.h"
+#include "mem.h"
+#include "thread.h"
+#include "video.h"
+#include "vid_svga.h"
+#include "vid_voodoo.h"
+#include "vid_voodoo_common.h"
+#include "vid_voodoo_display.h"
+#include "vid_voodoo_regs.h"
+#include "vid_voodoo_render.h"
+
+void voodoo_update_ncc(voodoo_t *voodoo, int tmu)
+{
+ int tbl;
+
+ for (tbl = 0; tbl < 2; tbl++)
+ {
+ int col;
+
+ for (col = 0; col < 256; col++)
+ {
+ int y = (col >> 4), i = (col >> 2) & 3, q = col & 3;
+ int i_r, i_g, i_b;
+ int q_r, q_g, q_b;
+
+ y = (voodoo->nccTable[tmu][tbl].y[y >> 2] >> ((y & 3) * 8)) & 0xff;
+
+ i_r = (voodoo->nccTable[tmu][tbl].i[i] >> 18) & 0x1ff;
+ if (i_r & 0x100)
+ i_r |= 0xfffffe00;
+ i_g = (voodoo->nccTable[tmu][tbl].i[i] >> 9) & 0x1ff;
+ if (i_g & 0x100)
+ i_g |= 0xfffffe00;
+ i_b = voodoo->nccTable[tmu][tbl].i[i] & 0x1ff;
+ if (i_b & 0x100)
+ i_b |= 0xfffffe00;
+
+ q_r = (voodoo->nccTable[tmu][tbl].q[q] >> 18) & 0x1ff;
+ if (q_r & 0x100)
+ q_r |= 0xfffffe00;
+ q_g = (voodoo->nccTable[tmu][tbl].q[q] >> 9) & 0x1ff;
+ if (q_g & 0x100)
+ q_g |= 0xfffffe00;
+ q_b = voodoo->nccTable[tmu][tbl].q[q] & 0x1ff;
+ if (q_b & 0x100)
+ q_b |= 0xfffffe00;
+
+ voodoo->ncc_lookup[tmu][tbl][col].rgba.r = CLAMP(y + i_r + q_r);
+ voodoo->ncc_lookup[tmu][tbl][col].rgba.g = CLAMP(y + i_g + q_g);
+ voodoo->ncc_lookup[tmu][tbl][col].rgba.b = CLAMP(y + i_b + q_b);
+ voodoo->ncc_lookup[tmu][tbl][col].rgba.a = 0xff;
+ }
+ }
+}
+
+void voodoo_pixelclock_update(voodoo_t *voodoo)
+{
+ int m = (voodoo->dac_pll_regs[0] & 0x7f) + 2;
+ int n1 = ((voodoo->dac_pll_regs[0] >> 8) & 0x1f) + 2;
+ int n2 = ((voodoo->dac_pll_regs[0] >> 13) & 0x07);
+ float t = (14318184.0 * ((float)m / (float)n1)) / (float)(1 << n2);
+ double clock_const;
+ int line_length;
+
+ if ((voodoo->dac_data[6] & 0xf0) == 0x20 ||
+ (voodoo->dac_data[6] & 0xf0) == 0x60 ||
+ (voodoo->dac_data[6] & 0xf0) == 0x70)
+ t /= 2.0f;
+
+ line_length = (voodoo->hSync & 0xff) + ((voodoo->hSync >> 16) & 0x3ff);
+
+// pclog("Pixel clock %f MHz hsync %08x line_length %d\n", t, voodoo->hSync, line_length);
+
+ voodoo->pixel_clock = t;
+
+ clock_const = cpuclock / t;
+ voodoo->line_time = (uint64_t)((double)line_length * clock_const * (double)(1ull << 32));
+}
+
+static void voodoo_calc_clutData(voodoo_t *voodoo)
+{
+ int c;
+
+ for (c = 0; c < 256; c++)
+ {
+ voodoo->clutData256[c].r = (voodoo->clutData[c >> 3].r*(8-(c & 7)) +
+ voodoo->clutData[(c >> 3)+1].r*(c & 7)) >> 3;
+ voodoo->clutData256[c].g = (voodoo->clutData[c >> 3].g*(8-(c & 7)) +
+ voodoo->clutData[(c >> 3)+1].g*(c & 7)) >> 3;
+ voodoo->clutData256[c].b = (voodoo->clutData[c >> 3].b*(8-(c & 7)) +
+ voodoo->clutData[(c >> 3)+1].b*(c & 7)) >> 3;
+ }
+
+ for (c = 0; c < 65536; c++)
+ {
+ int r = (c >> 8) & 0xf8;
+ int g = (c >> 3) & 0xfc;
+ int b = (c << 3) & 0xf8;
+// r |= (r >> 5);
+// g |= (g >> 6);
+// b |= (b >> 5);
+
+ voodoo->video_16to32[c] = (voodoo->clutData256[r].r << 16) | (voodoo->clutData256[g].g << 8) | voodoo->clutData256[b].b;
+ }
+}
+
+
+
+#define FILTDIV 256
+
+static int FILTCAP, FILTCAPG, FILTCAPB = 0; /* color filter threshold values */
+
+void voodoo_generate_filter_v1(voodoo_t *voodoo)
+{
+ int g, h;
+ float difference, diffg, diffb;
+ float thiscol, thiscolg, thiscolb, lined;
+ float fcr, fcg, fcb;
+
+ fcr = FILTCAP * 5;
+ fcg = FILTCAPG * 6;
+ fcb = FILTCAPB * 5;
+
+ for (g=0;g<FILTDIV;g++) // pixel 1
+ {
+ for (h=0;h<FILTDIV;h++) // pixel 2
+ {
+ difference = (float)(h - g);
+ diffg = difference;
+ diffb = difference;
+
+ thiscol = thiscolg = thiscolb = g;
+
+ if (difference > FILTCAP)
+ difference = FILTCAP;
+ if (difference < -FILTCAP)
+ difference = -FILTCAP;
+
+ if (diffg > FILTCAPG)
+ diffg = FILTCAPG;
+ if (diffg < -FILTCAPG)
+ diffg = -FILTCAPG;
+
+ if (diffb > FILTCAPB)
+ diffb = FILTCAPB;
+ if (diffb < -FILTCAPB)
+ diffb = -FILTCAPB;
+
+ // hack - to make it not bleed onto black
+ //if (g == 0){
+ //difference = diffg = diffb = 0;
+ //}
+
+ if ((difference < fcr) || (-difference > -fcr))
+ thiscol = g + (difference / 2);
+ if ((diffg < fcg) || (-diffg > -fcg))
+ thiscolg = g + (diffg / 2); /* need these divides so we can actually undither! */
+ if ((diffb < fcb) || (-diffb > -fcb))
+ thiscolb = g + (diffb / 2);
+
+ if (thiscol < 0)
+ thiscol = 0;
+ if (thiscol > FILTDIV-1)
+ thiscol = FILTDIV-1;
+
+ if (thiscolg < 0)
+ thiscolg = 0;
+ if (thiscolg > FILTDIV-1)
+ thiscolg = FILTDIV-1;
+
+ if (thiscolb < 0)
+ thiscolb = 0;
+ if (thiscolb > FILTDIV-1)
+ thiscolb = FILTDIV-1;
+
+ voodoo->thefilter[g][h] = thiscol;
+ voodoo->thefilterg[g][h] = thiscolg;
+ voodoo->thefilterb[g][h] = thiscolb;
+ }
+
+ lined = g + 4;
+ if (lined > 255)
+ lined = 255;
+ voodoo->purpleline[g][0] = lined;
+ voodoo->purpleline[g][2] = lined;
+
+ lined = g + 0;
+ if (lined > 255)
+ lined = 255;
+ voodoo->purpleline[g][1] = lined;
+ }
+}
+
+void voodoo_generate_filter_v2(voodoo_t *voodoo)
+{
+ int g, h;
+ float difference;
+ float thiscol, thiscolg, thiscolb;
+ float clr, clg, clb = 0;
+ float fcr, fcg, fcb = 0;
+
+ // pre-clamping
+
+ fcr = FILTCAP;
+ fcg = FILTCAPG;
+ fcb = FILTCAPB;
+
+ if (fcr > 32) fcr = 32;
+ if (fcg > 32) fcg = 32;
+ if (fcb > 32) fcb = 32;
+
+ for (g=0;g<256;g++) // pixel 1 - our target pixel we want to bleed into
+ {
+ for (h=0;h<256;h++) // pixel 2 - our main pixel
+ {
+ float avg;
+ float avgdiff;
+
+ difference = (float)(g - h);
+ avg = (float)((g + g + g + g + h) / 5);
+ avgdiff = avg - (float)((g + h + h + h + h) / 5);
+ if (avgdiff < 0) avgdiff *= -1;
+ if (difference < 0) difference *= -1;
+
+ thiscol = thiscolg = thiscolb = g;
+
+ // try lighten
+ if (h > g)
+ {
+ clr = clg = clb = avgdiff;
+
+ if (clr>fcr) clr=fcr;
+ if (clg>fcg) clg=fcg;
+ if (clb>fcb) clb=fcb;
+
+
+ thiscol = g + clr;
+ thiscolg = g + clg;
+ thiscolb = g + clb;
+
+ if (thiscol>g+FILTCAP)
+ thiscol=g+FILTCAP;
+ if (thiscolg>g+FILTCAPG)
+ thiscolg=g+FILTCAPG;
+ if (thiscolb>g+FILTCAPB)
+ thiscolb=g+FILTCAPB;
+
+
+ if (thiscol>g+avgdiff)
+ thiscol=g+avgdiff;
+ if (thiscolg>g+avgdiff)
+ thiscolg=g+avgdiff;
+ if (thiscolb>g+avgdiff)
+ thiscolb=g+avgdiff;
+
+ }
+
+ if (difference > FILTCAP)
+ thiscol = g;
+ if (difference > FILTCAPG)
+ thiscolg = g;
+ if (difference > FILTCAPB)
+ thiscolb = g;
+
+ // clamp
+ if (thiscol < 0) thiscol = 0;
+ if (thiscolg < 0) thiscolg = 0;
+ if (thiscolb < 0) thiscolb = 0;
+
+ if (thiscol > 255) thiscol = 255;
+ if (thiscolg > 255) thiscolg = 255;
+ if (thiscolb > 255) thiscolb = 255;
+
+ // add to the table
+ voodoo->thefilter[g][h] = (thiscol);
+ voodoo->thefilterg[g][h] = (thiscolg);
+ voodoo->thefilterb[g][h] = (thiscolb);
+
+ // debug the ones that don't give us much of a difference
+ //if (difference < FILTCAP)
+ //pclog("Voodoofilter: %ix%i - %f difference, %f average difference, R=%f, G=%f, B=%f\n", g, h, difference, avgdiff, thiscol, thiscolg, thiscolb);
+ }
+
+ }
+}
+
+void voodoo_threshold_check(voodoo_t *voodoo)
+{
+ int r, g, b;
+
+ if (!voodoo->scrfilterEnabled)
+ return; /* considered disabled; don't check and generate */
+
+ /* Check for changes, to generate anew table */
+ if (voodoo->scrfilterThreshold != voodoo->scrfilterThresholdOld)
+ {
+ r = (voodoo->scrfilterThreshold >> 16) & 0xFF;
+ g = (voodoo->scrfilterThreshold >> 8 ) & 0xFF;
+ b = voodoo->scrfilterThreshold & 0xFF;
+
+ FILTCAP = r;
+ FILTCAPG = g;
+ FILTCAPB = b;
+
+ pclog("Voodoo Filter Threshold Check: %06x - RED %i GREEN %i BLUE %i\n", voodoo->scrfilterThreshold, r, g, b);
+
+ voodoo->scrfilterThresholdOld = voodoo->scrfilterThreshold;
+
+ if (voodoo->type == VOODOO_2)
+ voodoo_generate_filter_v2(voodoo);
+ else
+ voodoo_generate_filter_v1(voodoo);
+
+ if (voodoo->type >= VOODOO_BANSHEE)
+ voodoo_generate_vb_filters(voodoo, FILTCAP, FILTCAPG);
+ }
+}
+
+static void voodoo_filterline_v1(voodoo_t *voodoo, uint8_t *fil, int column, uint16_t *src, int line)
+{
+ int x;
+
+ // Scratchpad for avoiding feedback streaks
+ uint8_t fil3[(voodoo->h_disp) * 3];
+
+ /* 16 to 32-bit */
+ for (x=0; x<column;x++)
+ {
+ fil[x*3] = ((src[x] & 31) << 3);
+ fil[x*3+1] = (((src[x] >> 5) & 63) << 2);
+ fil[x*3+2] = (((src[x] >> 11) & 31) << 3);
+
+ // Copy to our scratchpads
+ fil3[x*3+0] = fil[x*3+0];
+ fil3[x*3+1] = fil[x*3+1];
+ fil3[x*3+2] = fil[x*3+2];
+ }
+
+
+ /* lines */
+
+ if (line & 1)
+ {
+ for (x=0; x<column;x++)
+ {
+ fil[x*3] = voodoo->purpleline[fil[x*3]][0];
+ fil[x*3+1] = voodoo->purpleline[fil[x*3+1]][1];
+ fil[x*3+2] = voodoo->purpleline[fil[x*3+2]][2];
+ }
+ }
+
+
+ /* filtering time */
+
+ for (x=1; x<column;x++)
+ {
+ fil3[(x)*3] = voodoo->thefilterb[fil[x*3]][fil[ (x-1) *3]];
+ fil3[(x)*3+1] = voodoo->thefilterg[fil[x*3+1]][fil[ (x-1) *3+1]];
+ fil3[(x)*3+2] = voodoo->thefilter[fil[x*3+2]][fil[ (x-1) *3+2]];
+ }
+
+ for (x=1; x<column;x++)
+ {
+ fil[(x)*3] = voodoo->thefilterb[fil3[x*3]][fil3[ (x-1) *3]];
+ fil[(x)*3+1] = voodoo->thefilterg[fil3[x*3+1]][fil3[ (x-1) *3+1]];
+ fil[(x)*3+2] = voodoo->thefilter[fil3[x*3+2]][fil3[ (x-1) *3+2]];
+ }
+
+ for (x=1; x<column;x++)
+ {
+ fil3[(x)*3] = voodoo->thefilterb[fil[x*3]][fil[ (x-1) *3]];
+ fil3[(x)*3+1] = voodoo->thefilterg[fil[x*3+1]][fil[ (x-1) *3+1]];
+ fil3[(x)*3+2] = voodoo->thefilter[fil[x*3+2]][fil[ (x-1) *3+2]];
+ }
+
+ for (x=0; x<column-1;x++)
+ {
+ fil[(x)*3] = voodoo->thefilterb[fil3[x*3]][fil3[ (x+1) *3]];
+ fil[(x)*3+1] = voodoo->thefilterg[fil3[x*3+1]][fil3[ (x+1) *3+1]];
+ fil[(x)*3+2] = voodoo->thefilter[fil3[x*3+2]][fil3[ (x+1) *3+2]];
+ }
+}
+
+
+static void voodoo_filterline_v2(voodoo_t *voodoo, uint8_t *fil, int column, uint16_t *src, int line)
+{
+ int x;
+
+ // Scratchpad for blending filter
+ uint8_t fil3[(voodoo->h_disp) * 3];
+
+ /* 16 to 32-bit */
+ for (x=0; x<column;x++)
+ {
+ // Blank scratchpads
+ fil3[x*3+0] = fil[x*3+0] = ((src[x] & 31) << 3);
+ fil3[x*3+1] = fil[x*3+1] = (((src[x] >> 5) & 63) << 2);
+ fil3[x*3+2] = fil[x*3+2] = (((src[x] >> 11) & 31) << 3);
+ }
+
+ /* filtering time */
+
+ for (x=1; x<column-3;x++)
+ {
+ fil3[(x+3)*3] = voodoo->thefilterb [((src[x+3] & 31) << 3)] [((src[x] & 31) << 3)];
+ fil3[(x+3)*3+1] = voodoo->thefilterg [(((src[x+3] >> 5) & 63) << 2)] [(((src[x] >> 5) & 63) << 2)];
+ fil3[(x+3)*3+2] = voodoo->thefilter [(((src[x+3] >> 11) & 31) << 3)] [(((src[x] >> 11) & 31) << 3)];
+
+ fil[(x+2)*3] = voodoo->thefilterb [fil3[(x+2)*3]][((src[x] & 31) << 3)];
+ fil[(x+2)*3+1] = voodoo->thefilterg [fil3[(x+2)*3+1]][(((src[x] >> 5) & 63) << 2)];
+ fil[(x+2)*3+2] = voodoo->thefilter [fil3[(x+2)*3+2]][(((src[x] >> 11) & 31) << 3)];
+
+ fil3[(x+1)*3] = voodoo->thefilterb [fil[(x+1)*3]][((src[x] & 31) << 3)];
+ fil3[(x+1)*3+1] = voodoo->thefilterg [fil[(x+1)*3+1]][(((src[x] >> 5) & 63) << 2)];
+ fil3[(x+1)*3+2] = voodoo->thefilter [fil[(x+1)*3+2]][(((src[x] >> 11) & 31) << 3)];
+
+ fil[(x-1)*3] = voodoo->thefilterb [fil3[(x-1)*3]][((src[x] & 31) << 3)];
+ fil[(x-1)*3+1] = voodoo->thefilterg [fil3[(x-1)*3+1]][(((src[x] >> 5) & 63) << 2)];
+ fil[(x-1)*3+2] = voodoo->thefilter [fil3[(x-1)*3+2]][(((src[x] >> 11) & 31) << 3)];
+ }
+
+ // unroll for edge cases
+
+ fil3[(column-3)*3] = voodoo->thefilterb [((src[column-3] & 31) << 3)] [((src[column] & 31) << 3)];
+ fil3[(column-3)*3+1] = voodoo->thefilterg [(((src[column-3] >> 5) & 63) << 2)] [(((src[column] >> 5) & 63) << 2)];
+ fil3[(column-3)*3+2] = voodoo->thefilter [(((src[column-3] >> 11) & 31) << 3)] [(((src[column] >> 11) & 31) << 3)];
+
+ fil3[(column-2)*3] = voodoo->thefilterb [((src[column-2] & 31) << 3)] [((src[column] & 31) << 3)];
+ fil3[(column-2)*3+1] = voodoo->thefilterg [(((src[column-2] >> 5) & 63) << 2)] [(((src[column] >> 5) & 63) << 2)];
+ fil3[(column-2)*3+2] = voodoo->thefilter [(((src[column-2] >> 11) & 31) << 3)] [(((src[column] >> 11) & 31) << 3)];
+
+ fil3[(column-1)*3] = voodoo->thefilterb [((src[column-1] & 31) << 3)] [((src[column] & 31) << 3)];
+ fil3[(column-1)*3+1] = voodoo->thefilterg [(((src[column-1] >> 5) & 63) << 2)] [(((src[column] >> 5) & 63) << 2)];
+ fil3[(column-1)*3+2] = voodoo->thefilter [(((src[column-1] >> 11) & 31) << 3)] [(((src[column] >> 11) & 31) << 3)];
+
+ fil[(column-2)*3] = voodoo->thefilterb [fil3[(column-2)*3]][((src[column] & 31) << 3)];
+ fil[(column-2)*3+1] = voodoo->thefilterg [fil3[(column-2)*3+1]][(((src[column] >> 5) & 63) << 2)];
+ fil[(column-2)*3+2] = voodoo->thefilter [fil3[(column-2)*3+2]][(((src[column] >> 11) & 31) << 3)];
+
+ fil[(column-1)*3] = voodoo->thefilterb [fil3[(column-1)*3]][((src[column] & 31) << 3)];
+ fil[(column-1)*3+1] = voodoo->thefilterg [fil3[(column-1)*3+1]][(((src[column] >> 5) & 63) << 2)];
+ fil[(column-1)*3+2] = voodoo->thefilter [fil3[(column-1)*3+2]][(((src[column] >> 11) & 31) << 3)];
+
+ fil3[(column-1)*3] = voodoo->thefilterb [fil[(column-1)*3]][((src[column] & 31) << 3)];
+ fil3[(column-1)*3+1] = voodoo->thefilterg [fil[(column-1)*3+1]][(((src[column] >> 5) & 63) << 2)];
+ fil3[(column-1)*3+2] = voodoo->thefilter [fil[(column-1)*3+2]][(((src[column] >> 11) & 31) << 3)];
+}
+
+void voodoo_callback(void *p)
+{
+ voodoo_t *voodoo = (voodoo_t *)p;
+
+ if (voodoo->fbiInit0 & FBIINIT0_VGA_PASS)
+ {
+ if (voodoo->line < voodoo->v_disp)
+ {
+ voodoo_t *draw_voodoo;
+ int draw_line;
+
+ if (SLI_ENABLED)
+ {
+ if (voodoo == voodoo->set->voodoos[1])
+ goto skip_draw;
+
+ if (((voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) ? 1 : 0) == (voodoo->line & 1))
+ draw_voodoo = voodoo;
+ else
+ draw_voodoo = voodoo->set->voodoos[1];
+ draw_line = voodoo->line >> 1;
+ }
+ else
+ {
+ if (!(voodoo->fbiInit0 & 1))
+ goto skip_draw;
+ draw_voodoo = voodoo;
+ draw_line = voodoo->line;
+ }
+
+ if (draw_voodoo->dirty_line[draw_line])
+ {
+ uint32_t *p = &((uint32_t *)buffer32->line[voodoo->line])[32];
+ uint16_t *src = (uint16_t *)&draw_voodoo->fb_mem[draw_voodoo->front_offset + draw_line*draw_voodoo->row_width];
+ int x;
+
+ draw_voodoo->dirty_line[draw_line] = 0;
+
+ if (voodoo->line < voodoo->dirty_line_low)
+ {
+ voodoo->dirty_line_low = voodoo->line;
+ video_wait_for_buffer();
+ }
+ if (voodoo->line > voodoo->dirty_line_high)
+ voodoo->dirty_line_high = voodoo->line;
+
+ if (voodoo->scrfilter && voodoo->scrfilterEnabled)
+ {
+ uint8_t fil[(voodoo->h_disp) * 3]; /* interleaved 24-bit RGB */
+
+ if (voodoo->type == VOODOO_2)
+ voodoo_filterline_v2(voodoo, fil, voodoo->h_disp, src, voodoo->line);
+ else
+ voodoo_filterline_v1(voodoo, fil, voodoo->h_disp, src, voodoo->line);
+
+ for (x = 0; x < voodoo->h_disp; x++)
+ {
+ p[x] = (voodoo->clutData256[fil[x*3]].b << 0 | voodoo->clutData256[fil[x*3+1]].g << 8 | voodoo->clutData256[fil[x*3+2]].r << 16);
+ }
+ }
+ else
+ {
+ for (x = 0; x < voodoo->h_disp; x++)
+ {
+ p[x] = draw_voodoo->video_16to32[src[x]];
+ }
+ }
+ }
+ }
+ }
+skip_draw:
+ if (voodoo->line == voodoo->v_disp)
+ {
+// pclog("retrace %i %i %08x %i\n", voodoo->retrace_count, voodoo->swap_interval, voodoo->swap_offset, voodoo->swap_pending);
+ voodoo->retrace_count++;
+ if (SLI_ENABLED && (voodoo->fbiInit2 & FBIINIT2_SWAP_ALGORITHM_MASK) == FBIINIT2_SWAP_ALGORITHM_SLI_SYNC)
+ {
+ if (voodoo == voodoo->set->voodoos[0])
+ {
+ voodoo_t *voodoo_1 = voodoo->set->voodoos[1];
+
+ thread_lock_mutex(voodoo->swap_mutex);
+ /*Only swap if both Voodoos are waiting for buffer swap*/
+ if (voodoo->swap_pending && (voodoo->retrace_count > voodoo->swap_interval) &&
+ voodoo_1->swap_pending && (voodoo_1->retrace_count > voodoo_1->swap_interval))
+ {
+ memset(voodoo->dirty_line, 1, 1024);
+ voodoo->retrace_count = 0;
+ voodoo->front_offset = voodoo->swap_offset;
+ if (voodoo->swap_count > 0)
+ voodoo->swap_count--;
+ voodoo->swap_pending = 0;
+
+ memset(voodoo_1->dirty_line, 1, 1024);
+ voodoo_1->retrace_count = 0;
+ voodoo_1->front_offset = voodoo_1->swap_offset;
+ if (voodoo_1->swap_count > 0)
+ voodoo_1->swap_count--;
+ voodoo_1->swap_pending = 0;
+ thread_unlock_mutex(voodoo->swap_mutex);
+
+ thread_set_event(voodoo->wake_fifo_thread);
+ thread_set_event(voodoo_1->wake_fifo_thread);
+
+ voodoo->frame_count++;
+ voodoo_1->frame_count++;
+ }
+ else
+ thread_unlock_mutex(voodoo->swap_mutex);
+ }
+ }
+ else
+ {
+ thread_lock_mutex(voodoo->swap_mutex);
+ if (voodoo->swap_pending && (voodoo->retrace_count > voodoo->swap_interval))
+ {
+ voodoo->front_offset = voodoo->swap_offset;
+ if (voodoo->swap_count > 0)
+ voodoo->swap_count--;
+ voodoo->swap_pending = 0;
+ thread_unlock_mutex(voodoo->swap_mutex);
+
+ memset(voodoo->dirty_line, 1, 1024);
+ voodoo->retrace_count = 0;
+ thread_set_event(voodoo->wake_fifo_thread);
+ voodoo->frame_count++;
+ }
+ else
+ thread_unlock_mutex(voodoo->swap_mutex);
+ }
+ voodoo->v_retrace = 1;
+ }
+ voodoo->line++;
+
+ if (voodoo->fbiInit0 & FBIINIT0_VGA_PASS)
+ {
+ if (voodoo->line == voodoo->v_disp)
+ {
+ if (voodoo->dirty_line_high > voodoo->dirty_line_low)
+ svga_doblit(0, voodoo->v_disp, voodoo->h_disp, voodoo->v_disp-1, voodoo->svga);
+ if (voodoo->clutData_dirty)
+ {
+ voodoo->clutData_dirty = 0;
+ voodoo_calc_clutData(voodoo);
+ }
+ voodoo->dirty_line_high = -1;
+ voodoo->dirty_line_low = 2000;
+ }
+ }
+
+ if (voodoo->line >= voodoo->v_total)
+ {
+ voodoo->line = 0;
+ voodoo->v_retrace = 0;
+ }
+ if (voodoo->line_time)
+ timer_advance_u64(&voodoo->timer, voodoo->line_time);
+ else
+ timer_advance_u64(&voodoo->timer, TIMER_USEC * 32);
+}
--- /dev/null
+void voodoo_update_ncc(voodoo_t *voodoo, int tmu);
+void voodoo_pixelclock_update(voodoo_t *voodoo);
+void voodoo_generate_filter_v1(voodoo_t *voodoo);
+void voodoo_generate_filter_v2(voodoo_t *voodoo);
+void voodoo_threshold_check(voodoo_t *voodoo);
+void voodoo_callback(void *p);
--- /dev/null
+static const uint8_t dither_rb[256][4][4] =
+{
+ {
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ },
+ {
+ {0, 0, 0, 0},
+ {0, 0, 1, 0},
+ {0, 0, 0, 0},
+ {1, 0, 0, 0},
+ },
+ {
+ {0, 0, 0, 0},
+ {1, 0, 1, 0},
+ {0, 0, 0, 0},
+ {1, 0, 1, 0},
+ },
+ {
+ {0, 0, 0, 1},
+ {1, 0, 1, 0},
+ {0, 1, 0, 0},
+ {1, 0, 1, 0},
+ },
+ {
+ {0, 1, 0, 1},
+ {1, 0, 1, 0},
+ {0, 1, 0, 1},
+ {1, 0, 1, 0},
+ },
+ {
+ {0, 1, 0, 1},
+ {1, 0, 1, 1},
+ {0, 1, 0, 1},
+ {1, 1, 1, 0},
+ },
+ {
+ {0, 1, 0, 1},
+ {1, 1, 1, 1},
+ {0, 1, 0, 1},
+ {1, 1, 1, 1},
+ },
+ {
+ {0, 1, 1, 1},
+ {1, 1, 1, 1},
+ {1, 1, 0, 1},
+ {1, 1, 1, 1},
+ },
+ {
+ {1, 1, 1, 1},
+ {1, 1, 1, 1},
+ {1, 1, 1, 1},
+ {1, 1, 1, 1},
+ },
+ {
+ {1, 1, 1, 1},
+ {1, 1, 2, 1},
+ {1, 1, 1, 1},
+ {2, 1, 1, 1},
+ },
+ {
+ {1, 1, 1, 1},
+ {2, 1, 2, 1},
+ {1, 1, 1, 1},
+ {2, 1, 2, 1},
+ },
+ {
+ {1, 1, 1, 2},
+ {2, 1, 2, 1},
+ {1, 2, 1, 1},
+ {2, 1, 2, 1},
+ },
+ {
+ {1, 2, 1, 2},
+ {2, 1, 2, 1},
+ {1, 2, 1, 2},
+ {2, 1, 2, 1},
+ },
+ {
+ {1, 2, 1, 2},
+ {2, 1, 2, 2},
+ {1, 2, 1, 2},
+ {2, 2, 2, 1},
+ },
+ {
+ {1, 2, 1, 2},
+ {2, 2, 2, 2},
+ {1, 2, 1, 2},
+ {2, 2, 2, 2},
+ },
+ {
+ {1, 2, 2, 2},
+ {2, 2, 2, 2},
+ {2, 2, 1, 2},
+ {2, 2, 2, 2},
+ },
+ {
+ {1, 2, 2, 2},
+ {2, 2, 2, 2},
+ {2, 2, 2, 2},
+ {2, 2, 2, 2},
+ },
+ {
+ {2, 2, 2, 2},
+ {2, 2, 2, 2},
+ {2, 2, 2, 2},
+ {3, 2, 2, 2},
+ },
+ {
+ {2, 2, 2, 2},
+ {2, 2, 3, 2},
+ {2, 2, 2, 2},
+ {3, 2, 3, 2},
+ },
+ {
+ {2, 2, 2, 2},
+ {3, 2, 3, 2},
+ {2, 3, 2, 2},
+ {3, 2, 3, 2},
+ },
+ {
+ {2, 2, 2, 3},
+ {3, 2, 3, 2},
+ {2, 3, 2, 3},
+ {3, 2, 3, 2},
+ },
+ {
+ {2, 3, 2, 3},
+ {3, 2, 3, 2},
+ {2, 3, 2, 3},
+ {3, 3, 3, 2},
+ },
+ {
+ {2, 3, 2, 3},
+ {3, 2, 3, 3},
+ {2, 3, 2, 3},
+ {3, 3, 3, 3},
+ },
+ {
+ {2, 3, 2, 3},
+ {3, 3, 3, 3},
+ {3, 3, 2, 3},
+ {3, 3, 3, 3},
+ },
+ {
+ {2, 3, 3, 3},
+ {3, 3, 3, 3},
+ {3, 3, 3, 3},
+ {3, 3, 3, 3},
+ },
+ {
+ {3, 3, 3, 3},
+ {3, 3, 3, 3},
+ {3, 3, 3, 3},
+ {4, 3, 3, 3},
+ },
+ {
+ {3, 3, 3, 3},
+ {3, 3, 4, 3},
+ {3, 3, 3, 3},
+ {4, 3, 4, 3},
+ },
+ {
+ {3, 3, 3, 3},
+ {4, 3, 4, 3},
+ {3, 4, 3, 3},
+ {4, 3, 4, 3},
+ },
+ {
+ {3, 3, 3, 4},
+ {4, 3, 4, 3},
+ {3, 4, 3, 4},
+ {4, 3, 4, 3},
+ },
+ {
+ {3, 4, 3, 4},
+ {4, 3, 4, 3},
+ {3, 4, 3, 4},
+ {4, 4, 4, 3},
+ },
+ {
+ {3, 4, 3, 4},
+ {4, 3, 4, 4},
+ {3, 4, 3, 4},
+ {4, 4, 4, 4},
+ },
+ {
+ {3, 4, 3, 4},
+ {4, 4, 4, 4},
+ {4, 4, 3, 4},
+ {4, 4, 4, 4},
+ },
+ {
+ {3, 4, 4, 4},
+ {4, 4, 4, 4},
+ {4, 4, 3, 4},
+ {4, 4, 4, 4},
+ },
+ {
+ {4, 4, 4, 4},
+ {4, 4, 4, 4},
+ {4, 4, 4, 4},
+ {4, 4, 4, 4},
+ },
+ {
+ {4, 4, 4, 4},
+ {4, 4, 5, 4},
+ {4, 4, 4, 4},
+ {5, 4, 4, 4},
+ },
+ {
+ {4, 4, 4, 4},
+ {5, 4, 5, 4},
+ {4, 4, 4, 4},
+ {5, 4, 5, 4},
+ },
+ {
+ {4, 4, 4, 5},
+ {5, 4, 5, 4},
+ {4, 5, 4, 4},
+ {5, 4, 5, 4},
+ },
+ {
+ {4, 5, 4, 5},
+ {5, 4, 5, 4},
+ {4, 5, 4, 5},
+ {5, 4, 5, 4},
+ },
+ {
+ {4, 5, 4, 5},
+ {5, 4, 5, 5},
+ {4, 5, 4, 5},
+ {5, 5, 5, 4},
+ },
+ {
+ {4, 5, 4, 5},
+ {5, 5, 5, 5},
+ {4, 5, 4, 5},
+ {5, 5, 5, 5},
+ },
+ {
+ {4, 5, 5, 5},
+ {5, 5, 5, 5},
+ {5, 5, 4, 5},
+ {5, 5, 5, 5},
+ },
+ {
+ {5, 5, 5, 5},
+ {5, 5, 5, 5},
+ {5, 5, 5, 5},
+ {5, 5, 5, 5},
+ },
+ {
+ {5, 5, 5, 5},
+ {5, 5, 6, 5},
+ {5, 5, 5, 5},
+ {6, 5, 5, 5},
+ },
+ {
+ {5, 5, 5, 5},
+ {6, 5, 6, 5},
+ {5, 5, 5, 5},
+ {6, 5, 6, 5},
+ },
+ {
+ {5, 5, 5, 6},
+ {6, 5, 6, 5},
+ {5, 6, 5, 5},
+ {6, 5, 6, 5},
+ },
+ {
+ {5, 6, 5, 6},
+ {6, 5, 6, 5},
+ {5, 6, 5, 6},
+ {6, 5, 6, 5},
+ },
+ {
+ {5, 6, 5, 6},
+ {6, 5, 6, 6},
+ {5, 6, 5, 6},
+ {6, 6, 6, 5},
+ },
+ {
+ {5, 6, 5, 6},
+ {6, 6, 6, 6},
+ {5, 6, 5, 6},
+ {6, 6, 6, 6},
+ },
+ {
+ {5, 6, 5, 6},
+ {6, 6, 6, 6},
+ {6, 6, 5, 6},
+ {6, 6, 6, 6},
+ },
+ {
+ {5, 6, 6, 6},
+ {6, 6, 6, 6},
+ {6, 6, 6, 6},
+ {6, 6, 6, 6},
+ },
+ {
+ {6, 6, 6, 6},
+ {6, 6, 6, 6},
+ {6, 6, 6, 6},
+ {7, 6, 6, 6},
+ },
+ {
+ {6, 6, 6, 6},
+ {6, 6, 7, 6},
+ {6, 6, 6, 6},
+ {7, 6, 7, 6},
+ },
+ {
+ {6, 6, 6, 6},
+ {7, 6, 7, 6},
+ {6, 7, 6, 6},
+ {7, 6, 7, 6},
+ },
+ {
+ {6, 6, 6, 7},
+ {7, 6, 7, 6},
+ {6, 7, 6, 7},
+ {7, 6, 7, 6},
+ },
+ {
+ {6, 7, 6, 7},
+ {7, 6, 7, 6},
+ {6, 7, 6, 7},
+ {7, 7, 7, 6},
+ },
+ {
+ {6, 7, 6, 7},
+ {7, 6, 7, 7},
+ {6, 7, 6, 7},
+ {7, 7, 7, 7},
+ },
+ {
+ {6, 7, 6, 7},
+ {7, 7, 7, 7},
+ {7, 7, 6, 7},
+ {7, 7, 7, 7},
+ },
+ {
+ {6, 7, 7, 7},
+ {7, 7, 7, 7},
+ {7, 7, 7, 7},
+ {7, 7, 7, 7},
+ },
+ {
+ {7, 7, 7, 7},
+ {7, 7, 7, 7},
+ {7, 7, 7, 7},
+ {8, 7, 7, 7},
+ },
+ {
+ {7, 7, 7, 7},
+ {7, 7, 8, 7},
+ {7, 7, 7, 7},
+ {8, 7, 8, 7},
+ },
+ {
+ {7, 7, 7, 7},
+ {8, 7, 8, 7},
+ {7, 8, 7, 7},
+ {8, 7, 8, 7},
+ },
+ {
+ {7, 7, 7, 8},
+ {8, 7, 8, 7},
+ {7, 8, 7, 8},
+ {8, 7, 8, 7},
+ },
+ {
+ {7, 8, 7, 8},
+ {8, 7, 8, 7},
+ {7, 8, 7, 8},
+ {8, 8, 8, 7},
+ },
+ {
+ {7, 8, 7, 8},
+ {8, 7, 8, 8},
+ {7, 8, 7, 8},
+ {8, 8, 8, 8},
+ },
+ {
+ {7, 8, 7, 8},
+ {8, 8, 8, 8},
+ {7, 8, 7, 8},
+ {8, 8, 8, 8},
+ },
+ {
+ {7, 8, 8, 8},
+ {8, 8, 8, 8},
+ {8, 8, 7, 8},
+ {8, 8, 8, 8},
+ },
+ {
+ {8, 8, 8, 8},
+ {8, 8, 8, 8},
+ {8, 8, 8, 8},
+ {8, 8, 8, 8},
+ },
+ {
+ {8, 8, 8, 8},
+ {8, 8, 9, 8},
+ {8, 8, 8, 8},
+ {9, 8, 8, 8},
+ },
+ {
+ {8, 8, 8, 8},
+ {9, 8, 9, 8},
+ {8, 8, 8, 8},
+ {9, 8, 9, 8},
+ },
+ {
+ {8, 8, 8, 9},
+ {9, 8, 9, 8},
+ {8, 9, 8, 8},
+ {9, 8, 9, 8},
+ },
+ {
+ {8, 9, 8, 9},
+ {9, 8, 9, 8},
+ {8, 9, 8, 9},
+ {9, 8, 9, 8},
+ },
+ {
+ {8, 9, 8, 9},
+ {9, 8, 9, 9},
+ {8, 9, 8, 9},
+ {9, 9, 9, 8},
+ },
+ {
+ {8, 9, 8, 9},
+ {9, 9, 9, 9},
+ {8, 9, 8, 9},
+ {9, 9, 9, 9},
+ },
+ {
+ {8, 9, 9, 9},
+ {9, 9, 9, 9},
+ {9, 9, 8, 9},
+ {9, 9, 9, 9},
+ },
+ {
+ {9, 9, 9, 9},
+ {9, 9, 9, 9},
+ {9, 9, 9, 9},
+ {9, 9, 9, 9},
+ },
+ {
+ {9, 9, 9, 9},
+ {9, 9, 10, 9},
+ {9, 9, 9, 9},
+ {10, 9, 9, 9},
+ },
+ {
+ {9, 9, 9, 9},
+ {10, 9, 10, 9},
+ {9, 9, 9, 9},
+ {10, 9, 10, 9},
+ },
+ {
+ {9, 9, 9, 10},
+ {10, 9, 10, 9},
+ {9, 10, 9, 9},
+ {10, 9, 10, 9},
+ },
+ {
+ {9, 10, 9, 10},
+ {10, 9, 10, 9},
+ {9, 10, 9, 10},
+ {10, 9, 10, 9},
+ },
+ {
+ {9, 10, 9, 10},
+ {10, 9, 10, 10},
+ {9, 10, 9, 10},
+ {10, 10, 10, 9},
+ },
+ {
+ {9, 10, 9, 10},
+ {10, 9, 10, 10},
+ {9, 10, 9, 10},
+ {10, 10, 10, 10},
+ },
+ {
+ {9, 10, 9, 10},
+ {10, 10, 10, 10},
+ {10, 10, 9, 10},
+ {10, 10, 10, 10},
+ },
+ {
+ {9, 10, 10, 10},
+ {10, 10, 10, 10},
+ {10, 10, 10, 10},
+ {10, 10, 10, 10},
+ },
+ {
+ {10, 10, 10, 10},
+ {10, 10, 10, 10},
+ {10, 10, 10, 10},
+ {11, 10, 10, 10},
+ },
+ {
+ {10, 10, 10, 10},
+ {10, 10, 11, 10},
+ {10, 10, 10, 10},
+ {11, 10, 11, 10},
+ },
+ {
+ {10, 10, 10, 10},
+ {11, 10, 11, 10},
+ {10, 11, 10, 10},
+ {11, 10, 11, 10},
+ },
+ {
+ {10, 10, 10, 11},
+ {11, 10, 11, 10},
+ {10, 11, 10, 11},
+ {11, 10, 11, 10},
+ },
+ {
+ {10, 11, 10, 11},
+ {11, 10, 11, 10},
+ {10, 11, 10, 11},
+ {11, 11, 11, 10},
+ },
+ {
+ {10, 11, 10, 11},
+ {11, 10, 11, 11},
+ {10, 11, 10, 11},
+ {11, 11, 11, 11},
+ },
+ {
+ {10, 11, 10, 11},
+ {11, 11, 11, 11},
+ {11, 11, 10, 11},
+ {11, 11, 11, 11},
+ },
+ {
+ {10, 11, 11, 11},
+ {11, 11, 11, 11},
+ {11, 11, 11, 11},
+ {11, 11, 11, 11},
+ },
+ {
+ {11, 11, 11, 11},
+ {11, 11, 11, 11},
+ {11, 11, 11, 11},
+ {12, 11, 11, 11},
+ },
+ {
+ {11, 11, 11, 11},
+ {11, 11, 12, 11},
+ {11, 11, 11, 11},
+ {12, 11, 12, 11},
+ },
+ {
+ {11, 11, 11, 11},
+ {12, 11, 12, 11},
+ {11, 12, 11, 11},
+ {12, 11, 12, 11},
+ },
+ {
+ {11, 11, 11, 12},
+ {12, 11, 12, 11},
+ {11, 12, 11, 12},
+ {12, 11, 12, 11},
+ },
+ {
+ {11, 12, 11, 12},
+ {12, 11, 12, 11},
+ {11, 12, 11, 12},
+ {12, 12, 12, 11},
+ },
+ {
+ {11, 12, 11, 12},
+ {12, 11, 12, 12},
+ {11, 12, 11, 12},
+ {12, 12, 12, 11},
+ },
+ {
+ {11, 12, 11, 12},
+ {12, 12, 12, 12},
+ {11, 12, 11, 12},
+ {12, 12, 12, 12},
+ },
+ {
+ {11, 12, 12, 12},
+ {12, 12, 12, 12},
+ {12, 12, 11, 12},
+ {12, 12, 12, 12},
+ },
+ {
+ {12, 12, 12, 12},
+ {12, 12, 12, 12},
+ {12, 12, 12, 12},
+ {12, 12, 12, 12},
+ },
+ {
+ {12, 12, 12, 12},
+ {12, 12, 13, 12},
+ {12, 12, 12, 12},
+ {13, 12, 12, 12},
+ },
+ {
+ {12, 12, 12, 12},
+ {13, 12, 13, 12},
+ {12, 12, 12, 12},
+ {13, 12, 13, 12},
+ },
+ {
+ {12, 12, 12, 13},
+ {13, 12, 13, 12},
+ {12, 13, 12, 12},
+ {13, 12, 13, 12},
+ },
+ {
+ {12, 13, 12, 13},
+ {13, 12, 13, 12},
+ {12, 13, 12, 13},
+ {13, 12, 13, 12},
+ },
+ {
+ {12, 13, 12, 13},
+ {13, 12, 13, 13},
+ {12, 13, 12, 13},
+ {13, 13, 13, 12},
+ },
+ {
+ {12, 13, 12, 13},
+ {13, 13, 13, 13},
+ {12, 13, 12, 13},
+ {13, 13, 13, 13},
+ },
+ {
+ {12, 13, 13, 13},
+ {13, 13, 13, 13},
+ {13, 13, 12, 13},
+ {13, 13, 13, 13},
+ },
+ {
+ {13, 13, 13, 13},
+ {13, 13, 13, 13},
+ {13, 13, 13, 13},
+ {13, 13, 13, 13},
+ },
+ {
+ {13, 13, 13, 13},
+ {13, 13, 14, 13},
+ {13, 13, 13, 13},
+ {14, 13, 13, 13},
+ },
+ {
+ {13, 13, 13, 13},
+ {14, 13, 14, 13},
+ {13, 13, 13, 13},
+ {14, 13, 14, 13},
+ },
+ {
+ {13, 13, 13, 14},
+ {14, 13, 14, 13},
+ {13, 14, 13, 13},
+ {14, 13, 14, 13},
+ },
+ {
+ {13, 14, 13, 14},
+ {14, 13, 14, 13},
+ {13, 14, 13, 14},
+ {14, 13, 14, 13},
+ },
+ {
+ {13, 14, 13, 14},
+ {14, 13, 14, 13},
+ {13, 14, 13, 14},
+ {14, 14, 14, 13},
+ },
+ {
+ {13, 14, 13, 14},
+ {14, 13, 14, 14},
+ {13, 14, 13, 14},
+ {14, 14, 14, 14},
+ },
+ {
+ {13, 14, 13, 14},
+ {14, 14, 14, 14},
+ {14, 14, 13, 14},
+ {14, 14, 14, 14},
+ },
+ {
+ {13, 14, 14, 14},
+ {14, 14, 14, 14},
+ {14, 14, 14, 14},
+ {14, 14, 14, 14},
+ },
+ {
+ {14, 14, 14, 14},
+ {14, 14, 14, 14},
+ {14, 14, 14, 14},
+ {15, 14, 14, 14},
+ },
+ {
+ {14, 14, 14, 14},
+ {14, 14, 15, 14},
+ {14, 14, 14, 14},
+ {15, 14, 15, 14},
+ },
+ {
+ {14, 14, 14, 14},
+ {15, 14, 15, 14},
+ {14, 15, 14, 14},
+ {15, 14, 15, 14},
+ },
+ {
+ {14, 14, 14, 15},
+ {15, 14, 15, 14},
+ {14, 15, 14, 15},
+ {15, 14, 15, 14},
+ },
+ {
+ {14, 15, 14, 15},
+ {15, 14, 15, 14},
+ {14, 15, 14, 15},
+ {15, 15, 15, 14},
+ },
+ {
+ {14, 15, 14, 15},
+ {15, 14, 15, 15},
+ {14, 15, 14, 15},
+ {15, 15, 15, 15},
+ },
+ {
+ {14, 15, 14, 15},
+ {15, 15, 15, 15},
+ {15, 15, 14, 15},
+ {15, 15, 15, 15},
+ },
+ {
+ {14, 15, 15, 15},
+ {15, 15, 15, 15},
+ {15, 15, 15, 15},
+ {15, 15, 15, 15},
+ },
+ {
+ {15, 15, 15, 15},
+ {15, 15, 15, 15},
+ {15, 15, 15, 15},
+ {16, 15, 15, 15},
+ },
+ {
+ {15, 15, 15, 15},
+ {15, 15, 16, 15},
+ {15, 15, 15, 15},
+ {16, 15, 16, 15},
+ },
+ {
+ {15, 15, 15, 15},
+ {16, 15, 16, 15},
+ {15, 16, 15, 15},
+ {16, 15, 16, 15},
+ },
+ {
+ {15, 15, 15, 16},
+ {16, 15, 16, 15},
+ {15, 16, 15, 16},
+ {16, 15, 16, 15},
+ },
+ {
+ {15, 16, 15, 16},
+ {16, 15, 16, 15},
+ {15, 16, 15, 16},
+ {16, 16, 16, 15},
+ },
+ {
+ {15, 16, 15, 16},
+ {16, 15, 16, 16},
+ {15, 16, 15, 16},
+ {16, 16, 16, 16},
+ },
+ {
+ {15, 16, 15, 16},
+ {16, 16, 16, 16},
+ {16, 16, 15, 16},
+ {16, 16, 16, 16},
+ },
+ {
+ {15, 16, 16, 16},
+ {16, 16, 16, 16},
+ {16, 16, 16, 16},
+ {16, 16, 16, 16},
+ },
+ {
+ {16, 16, 16, 16},
+ {16, 16, 16, 16},
+ {16, 16, 16, 16},
+ {17, 16, 16, 16},
+ },
+ {
+ {16, 16, 16, 16},
+ {16, 16, 17, 16},
+ {16, 16, 16, 16},
+ {17, 16, 17, 16},
+ },
+ {
+ {16, 16, 16, 16},
+ {17, 16, 17, 16},
+ {16, 17, 16, 16},
+ {17, 16, 17, 16},
+ },
+ {
+ {16, 16, 16, 17},
+ {17, 16, 17, 16},
+ {16, 17, 16, 17},
+ {17, 16, 17, 16},
+ },
+ {
+ {16, 17, 16, 17},
+ {17, 16, 17, 16},
+ {16, 17, 16, 17},
+ {17, 17, 17, 16},
+ },
+ {
+ {16, 17, 16, 17},
+ {17, 16, 17, 17},
+ {16, 17, 16, 17},
+ {17, 17, 17, 17},
+ },
+ {
+ {16, 17, 16, 17},
+ {17, 17, 17, 17},
+ {17, 17, 16, 17},
+ {17, 17, 17, 17},
+ },
+ {
+ {16, 17, 17, 17},
+ {17, 17, 17, 17},
+ {17, 17, 17, 17},
+ {17, 17, 17, 17},
+ },
+ {
+ {17, 17, 17, 17},
+ {17, 17, 17, 17},
+ {17, 17, 17, 17},
+ {18, 17, 17, 17},
+ },
+ {
+ {17, 17, 17, 17},
+ {17, 17, 18, 17},
+ {17, 17, 17, 17},
+ {18, 17, 18, 17},
+ },
+ {
+ {17, 17, 17, 17},
+ {18, 17, 18, 17},
+ {17, 18, 17, 17},
+ {18, 17, 18, 17},
+ },
+ {
+ {17, 17, 17, 18},
+ {18, 17, 18, 17},
+ {17, 18, 17, 18},
+ {18, 17, 18, 17},
+ },
+ {
+ {17, 18, 17, 18},
+ {18, 17, 18, 17},
+ {17, 18, 17, 18},
+ {18, 17, 18, 17},
+ },
+ {
+ {17, 18, 17, 18},
+ {18, 17, 18, 18},
+ {17, 18, 17, 18},
+ {18, 18, 18, 17},
+ },
+ {
+ {17, 18, 17, 18},
+ {18, 18, 18, 18},
+ {17, 18, 17, 18},
+ {18, 18, 18, 18},
+ },
+ {
+ {17, 18, 18, 18},
+ {18, 18, 18, 18},
+ {18, 18, 17, 18},
+ {18, 18, 18, 18},
+ },
+ {
+ {18, 18, 18, 18},
+ {18, 18, 18, 18},
+ {18, 18, 18, 18},
+ {18, 18, 18, 18},
+ },
+ {
+ {18, 18, 18, 18},
+ {18, 18, 19, 18},
+ {18, 18, 18, 18},
+ {19, 18, 18, 18},
+ },
+ {
+ {18, 18, 18, 18},
+ {19, 18, 19, 18},
+ {18, 18, 18, 18},
+ {19, 18, 19, 18},
+ },
+ {
+ {18, 18, 18, 19},
+ {19, 18, 19, 18},
+ {18, 19, 18, 18},
+ {19, 18, 19, 18},
+ },
+ {
+ {18, 19, 18, 19},
+ {19, 18, 19, 18},
+ {18, 19, 18, 19},
+ {19, 18, 19, 18},
+ },
+ {
+ {18, 19, 18, 19},
+ {19, 18, 19, 19},
+ {18, 19, 18, 19},
+ {19, 19, 19, 18},
+ },
+ {
+ {18, 19, 18, 19},
+ {19, 19, 19, 19},
+ {18, 19, 18, 19},
+ {19, 19, 19, 19},
+ },
+ {
+ {18, 19, 19, 19},
+ {19, 19, 19, 19},
+ {19, 19, 18, 19},
+ {19, 19, 19, 19},
+ },
+ {
+ {19, 19, 19, 19},
+ {19, 19, 19, 19},
+ {19, 19, 19, 19},
+ {19, 19, 19, 19},
+ },
+ {
+ {19, 19, 19, 19},
+ {19, 19, 20, 19},
+ {19, 19, 19, 19},
+ {20, 19, 19, 19},
+ },
+ {
+ {19, 19, 19, 19},
+ {20, 19, 20, 19},
+ {19, 19, 19, 19},
+ {20, 19, 20, 19},
+ },
+ {
+ {19, 19, 19, 20},
+ {20, 19, 20, 19},
+ {19, 20, 19, 19},
+ {20, 19, 20, 19},
+ },
+ {
+ {19, 19, 19, 20},
+ {20, 19, 20, 19},
+ {19, 20, 19, 20},
+ {20, 19, 20, 19},
+ },
+ {
+ {19, 20, 19, 20},
+ {20, 19, 20, 19},
+ {19, 20, 19, 20},
+ {20, 20, 20, 19},
+ },
+ {
+ {19, 20, 19, 20},
+ {20, 19, 20, 20},
+ {19, 20, 19, 20},
+ {20, 20, 20, 20},
+ },
+ {
+ {19, 20, 19, 20},
+ {20, 20, 20, 20},
+ {20, 20, 19, 20},
+ {20, 20, 20, 20},
+ },
+ {
+ {19, 20, 20, 20},
+ {20, 20, 20, 20},
+ {20, 20, 20, 20},
+ {20, 20, 20, 20},
+ },
+ {
+ {20, 20, 20, 20},
+ {20, 20, 20, 20},
+ {20, 20, 20, 20},
+ {21, 20, 20, 20},
+ },
+ {
+ {20, 20, 20, 20},
+ {20, 20, 21, 20},
+ {20, 20, 20, 20},
+ {21, 20, 21, 20},
+ },
+ {
+ {20, 20, 20, 20},
+ {21, 20, 21, 20},
+ {20, 21, 20, 20},
+ {21, 20, 21, 20},
+ },
+ {
+ {20, 20, 20, 21},
+ {21, 20, 21, 20},
+ {20, 21, 20, 21},
+ {21, 20, 21, 20},
+ },
+ {
+ {20, 21, 20, 21},
+ {21, 20, 21, 20},
+ {20, 21, 20, 21},
+ {21, 21, 21, 20},
+ },
+ {
+ {20, 21, 20, 21},
+ {21, 20, 21, 21},
+ {20, 21, 20, 21},
+ {21, 21, 21, 21},
+ },
+ {
+ {20, 21, 20, 21},
+ {21, 21, 21, 21},
+ {21, 21, 20, 21},
+ {21, 21, 21, 21},
+ },
+ {
+ {20, 21, 21, 21},
+ {21, 21, 21, 21},
+ {21, 21, 21, 21},
+ {21, 21, 21, 21},
+ },
+ {
+ {21, 21, 21, 21},
+ {21, 21, 21, 21},
+ {21, 21, 21, 21},
+ {22, 21, 21, 21},
+ },
+ {
+ {21, 21, 21, 21},
+ {21, 21, 22, 21},
+ {21, 21, 21, 21},
+ {22, 21, 22, 21},
+ },
+ {
+ {21, 21, 21, 21},
+ {22, 21, 22, 21},
+ {21, 22, 21, 21},
+ {22, 21, 22, 21},
+ },
+ {
+ {21, 21, 21, 22},
+ {22, 21, 22, 21},
+ {21, 22, 21, 21},
+ {22, 21, 22, 21},
+ },
+ {
+ {21, 22, 21, 22},
+ {22, 21, 22, 21},
+ {21, 22, 21, 22},
+ {22, 21, 22, 21},
+ },
+ {
+ {21, 22, 21, 22},
+ {22, 21, 22, 22},
+ {21, 22, 21, 22},
+ {22, 22, 22, 21},
+ },
+ {
+ {21, 22, 21, 22},
+ {22, 22, 22, 22},
+ {21, 22, 21, 22},
+ {22, 22, 22, 22},
+ },
+ {
+ {21, 22, 22, 22},
+ {22, 22, 22, 22},
+ {22, 22, 21, 22},
+ {22, 22, 22, 22},
+ },
+ {
+ {22, 22, 22, 22},
+ {22, 22, 22, 22},
+ {22, 22, 22, 22},
+ {22, 22, 22, 22},
+ },
+ {
+ {22, 22, 22, 22},
+ {22, 22, 23, 22},
+ {22, 22, 22, 22},
+ {23, 22, 22, 22},
+ },
+ {
+ {22, 22, 22, 22},
+ {23, 22, 23, 22},
+ {22, 22, 22, 22},
+ {23, 22, 23, 22},
+ },
+ {
+ {22, 22, 22, 23},
+ {23, 22, 23, 22},
+ {22, 23, 22, 22},
+ {23, 22, 23, 22},
+ },
+ {
+ {22, 23, 22, 23},
+ {23, 22, 23, 22},
+ {22, 23, 22, 23},
+ {23, 22, 23, 22},
+ },
+ {
+ {22, 23, 22, 23},
+ {23, 22, 23, 23},
+ {22, 23, 22, 23},
+ {23, 23, 23, 22},
+ },
+ {
+ {22, 23, 22, 23},
+ {23, 23, 23, 23},
+ {22, 23, 22, 23},
+ {23, 23, 23, 23},
+ },
+ {
+ {22, 23, 23, 23},
+ {23, 23, 23, 23},
+ {23, 23, 22, 23},
+ {23, 23, 23, 23},
+ },
+ {
+ {23, 23, 23, 23},
+ {23, 23, 23, 23},
+ {23, 23, 23, 23},
+ {23, 23, 23, 23},
+ },
+ {
+ {23, 23, 23, 23},
+ {23, 23, 24, 23},
+ {23, 23, 23, 23},
+ {24, 23, 23, 23},
+ },
+ {
+ {23, 23, 23, 23},
+ {24, 23, 24, 23},
+ {23, 23, 23, 23},
+ {24, 23, 24, 23},
+ },
+ {
+ {23, 23, 23, 23},
+ {24, 23, 24, 23},
+ {23, 24, 23, 23},
+ {24, 23, 24, 23},
+ },
+ {
+ {23, 23, 23, 24},
+ {24, 23, 24, 23},
+ {23, 24, 23, 24},
+ {24, 23, 24, 23},
+ },
+ {
+ {23, 24, 23, 24},
+ {24, 23, 24, 23},
+ {23, 24, 23, 24},
+ {24, 24, 24, 23},
+ },
+ {
+ {23, 24, 23, 24},
+ {24, 23, 24, 24},
+ {23, 24, 23, 24},
+ {24, 24, 24, 24},
+ },
+ {
+ {23, 24, 23, 24},
+ {24, 24, 24, 24},
+ {24, 24, 23, 24},
+ {24, 24, 24, 24},
+ },
+ {
+ {23, 24, 24, 24},
+ {24, 24, 24, 24},
+ {24, 24, 24, 24},
+ {24, 24, 24, 24},
+ },
+ {
+ {24, 24, 24, 24},
+ {24, 24, 24, 24},
+ {24, 24, 24, 24},
+ {25, 24, 24, 24},
+ },
+ {
+ {24, 24, 24, 24},
+ {24, 24, 25, 24},
+ {24, 24, 24, 24},
+ {25, 24, 25, 24},
+ },
+ {
+ {24, 24, 24, 24},
+ {25, 24, 25, 24},
+ {24, 25, 24, 24},
+ {25, 24, 25, 24},
+ },
+ {
+ {24, 24, 24, 25},
+ {25, 24, 25, 24},
+ {24, 25, 24, 25},
+ {25, 24, 25, 24},
+ },
+ {
+ {24, 25, 24, 25},
+ {25, 24, 25, 24},
+ {24, 25, 24, 25},
+ {25, 25, 25, 24},
+ },
+ {
+ {24, 25, 24, 25},
+ {25, 24, 25, 25},
+ {24, 25, 24, 25},
+ {25, 25, 25, 25},
+ },
+ {
+ {24, 25, 24, 25},
+ {25, 25, 25, 25},
+ {25, 25, 24, 25},
+ {25, 25, 25, 25},
+ },
+ {
+ {24, 25, 25, 25},
+ {25, 25, 25, 25},
+ {25, 25, 25, 25},
+ {25, 25, 25, 25},
+ },
+ {
+ {25, 25, 25, 25},
+ {25, 25, 25, 25},
+ {25, 25, 25, 25},
+ {26, 25, 25, 25},
+ },
+ {
+ {25, 25, 25, 25},
+ {25, 25, 26, 25},
+ {25, 25, 25, 25},
+ {26, 25, 26, 25},
+ },
+ {
+ {25, 25, 25, 25},
+ {26, 25, 26, 25},
+ {25, 25, 25, 25},
+ {26, 25, 26, 25},
+ },
+ {
+ {25, 25, 25, 26},
+ {26, 25, 26, 25},
+ {25, 26, 25, 25},
+ {26, 25, 26, 25},
+ },
+ {
+ {25, 26, 25, 26},
+ {26, 25, 26, 25},
+ {25, 26, 25, 26},
+ {26, 25, 26, 25},
+ },
+ {
+ {25, 26, 25, 26},
+ {26, 25, 26, 26},
+ {25, 26, 25, 26},
+ {26, 26, 26, 25},
+ },
+ {
+ {25, 26, 25, 26},
+ {26, 26, 26, 26},
+ {25, 26, 25, 26},
+ {26, 26, 26, 26},
+ },
+ {
+ {25, 26, 26, 26},
+ {26, 26, 26, 26},
+ {26, 26, 25, 26},
+ {26, 26, 26, 26},
+ },
+ {
+ {26, 26, 26, 26},
+ {26, 26, 26, 26},
+ {26, 26, 26, 26},
+ {26, 26, 26, 26},
+ },
+ {
+ {26, 26, 26, 26},
+ {26, 26, 27, 26},
+ {26, 26, 26, 26},
+ {27, 26, 26, 26},
+ },
+ {
+ {26, 26, 26, 26},
+ {27, 26, 27, 26},
+ {26, 26, 26, 26},
+ {27, 26, 27, 26},
+ },
+ {
+ {26, 26, 26, 27},
+ {27, 26, 27, 26},
+ {26, 27, 26, 26},
+ {27, 26, 27, 26},
+ },
+ {
+ {26, 27, 26, 27},
+ {27, 26, 27, 26},
+ {26, 27, 26, 27},
+ {27, 26, 27, 26},
+ },
+ {
+ {26, 27, 26, 27},
+ {27, 26, 27, 27},
+ {26, 27, 26, 27},
+ {27, 27, 27, 26},
+ },
+ {
+ {26, 27, 26, 27},
+ {27, 27, 27, 27},
+ {26, 27, 26, 27},
+ {27, 27, 27, 27},
+ },
+ {
+ {26, 27, 27, 27},
+ {27, 27, 27, 27},
+ {27, 27, 26, 27},
+ {27, 27, 27, 27},
+ },
+ {
+ {27, 27, 27, 27},
+ {27, 27, 27, 27},
+ {27, 27, 27, 27},
+ {27, 27, 27, 27},
+ },
+ {
+ {27, 27, 27, 27},
+ {27, 27, 28, 27},
+ {27, 27, 27, 27},
+ {28, 27, 27, 27},
+ },
+ {
+ {27, 27, 27, 27},
+ {27, 27, 28, 27},
+ {27, 27, 27, 27},
+ {28, 27, 28, 27},
+ },
+ {
+ {27, 27, 27, 27},
+ {28, 27, 28, 27},
+ {27, 28, 27, 27},
+ {28, 27, 28, 27},
+ },
+ {
+ {27, 27, 27, 28},
+ {28, 27, 28, 27},
+ {27, 28, 27, 28},
+ {28, 27, 28, 27},
+ },
+ {
+ {27, 28, 27, 28},
+ {28, 27, 28, 27},
+ {27, 28, 27, 28},
+ {28, 28, 28, 27},
+ },
+ {
+ {27, 28, 27, 28},
+ {28, 27, 28, 28},
+ {27, 28, 27, 28},
+ {28, 28, 28, 28},
+ },
+ {
+ {27, 28, 27, 28},
+ {28, 28, 28, 28},
+ {28, 28, 27, 28},
+ {28, 28, 28, 28},
+ },
+ {
+ {27, 28, 28, 28},
+ {28, 28, 28, 28},
+ {28, 28, 28, 28},
+ {28, 28, 28, 28},
+ },
+ {
+ {28, 28, 28, 28},
+ {28, 28, 28, 28},
+ {28, 28, 28, 28},
+ {29, 28, 28, 28},
+ },
+ {
+ {28, 28, 28, 28},
+ {28, 28, 29, 28},
+ {28, 28, 28, 28},
+ {29, 28, 29, 28},
+ },
+ {
+ {28, 28, 28, 28},
+ {29, 28, 29, 28},
+ {28, 29, 28, 28},
+ {29, 28, 29, 28},
+ },
+ {
+ {28, 28, 28, 29},
+ {29, 28, 29, 28},
+ {28, 29, 28, 29},
+ {29, 28, 29, 28},
+ },
+ {
+ {28, 29, 28, 29},
+ {29, 28, 29, 28},
+ {28, 29, 28, 29},
+ {29, 29, 29, 28},
+ },
+ {
+ {28, 29, 28, 29},
+ {29, 28, 29, 29},
+ {28, 29, 28, 29},
+ {29, 29, 29, 29},
+ },
+ {
+ {28, 29, 28, 29},
+ {29, 29, 29, 29},
+ {29, 29, 28, 29},
+ {29, 29, 29, 29},
+ },
+ {
+ {28, 29, 29, 29},
+ {29, 29, 29, 29},
+ {29, 29, 29, 29},
+ {29, 29, 29, 29},
+ },
+ {
+ {29, 29, 29, 29},
+ {29, 29, 29, 29},
+ {29, 29, 29, 29},
+ {30, 29, 29, 29},
+ },
+ {
+ {29, 29, 29, 29},
+ {29, 29, 30, 29},
+ {29, 29, 29, 29},
+ {30, 29, 29, 29},
+ },
+ {
+ {29, 29, 29, 29},
+ {30, 29, 30, 29},
+ {29, 29, 29, 29},
+ {30, 29, 30, 29},
+ },
+ {
+ {29, 29, 29, 30},
+ {30, 29, 30, 29},
+ {29, 30, 29, 29},
+ {30, 29, 30, 29},
+ },
+ {
+ {29, 30, 29, 30},
+ {30, 29, 30, 29},
+ {29, 30, 29, 30},
+ {30, 29, 30, 29},
+ },
+ {
+ {29, 30, 29, 30},
+ {30, 29, 30, 30},
+ {29, 30, 29, 30},
+ {30, 30, 30, 29},
+ },
+ {
+ {29, 30, 29, 30},
+ {30, 30, 30, 30},
+ {29, 30, 29, 30},
+ {30, 30, 30, 30},
+ },
+ {
+ {29, 30, 30, 30},
+ {30, 30, 30, 30},
+ {30, 30, 29, 30},
+ {30, 30, 30, 30},
+ },
+ {
+ {30, 30, 30, 30},
+ {30, 30, 30, 30},
+ {30, 30, 30, 30},
+ {30, 30, 30, 30},
+ },
+ {
+ {30, 30, 30, 30},
+ {30, 30, 31, 30},
+ {30, 30, 30, 30},
+ {31, 30, 30, 30},
+ },
+ {
+ {30, 30, 30, 30},
+ {31, 30, 31, 30},
+ {30, 30, 30, 30},
+ {31, 30, 31, 30},
+ },
+ {
+ {30, 30, 30, 31},
+ {31, 30, 31, 30},
+ {30, 31, 30, 30},
+ {31, 30, 31, 30},
+ },
+ {
+ {30, 31, 30, 31},
+ {31, 30, 31, 30},
+ {30, 31, 30, 31},
+ {31, 30, 31, 30},
+ },
+ {
+ {30, 31, 30, 31},
+ {31, 30, 31, 31},
+ {30, 31, 30, 31},
+ {31, 31, 31, 30},
+ },
+ {
+ {30, 31, 30, 31},
+ {31, 31, 31, 31},
+ {30, 31, 30, 31},
+ {31, 31, 31, 31},
+ },
+ {
+ {30, 31, 31, 31},
+ {31, 31, 31, 31},
+ {31, 31, 30, 31},
+ {31, 31, 31, 31},
+ },
+ {
+ {31, 31, 31, 31},
+ {31, 31, 31, 31},
+ {31, 31, 31, 31},
+ {31, 31, 31, 31},
+ },
+};
+
+static const uint8_t dither_g[256][4][4] =
+{
+ {
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ },
+ {
+ {0, 0, 0, 0},
+ {1, 0, 1, 0},
+ {0, 0, 0, 0},
+ {1, 0, 1, 0},
+ },
+ {
+ {0, 1, 0, 1},
+ {1, 0, 1, 0},
+ {0, 1, 0, 1},
+ {1, 0, 1, 0},
+ },
+ {
+ {0, 1, 0, 1},
+ {1, 1, 1, 1},
+ {0, 1, 0, 1},
+ {1, 1, 1, 1},
+ },
+ {
+ {1, 1, 1, 1},
+ {1, 1, 1, 1},
+ {1, 1, 1, 1},
+ {1, 1, 1, 1},
+ },
+ {
+ {1, 1, 1, 1},
+ {2, 1, 2, 1},
+ {1, 1, 1, 1},
+ {2, 1, 2, 1},
+ },
+ {
+ {1, 2, 1, 2},
+ {2, 1, 2, 1},
+ {1, 2, 1, 2},
+ {2, 1, 2, 1},
+ },
+ {
+ {1, 2, 1, 2},
+ {2, 2, 2, 2},
+ {1, 2, 1, 2},
+ {2, 2, 2, 2},
+ },
+ {
+ {2, 2, 2, 2},
+ {2, 2, 2, 2},
+ {2, 2, 2, 2},
+ {2, 2, 2, 2},
+ },
+ {
+ {2, 2, 2, 2},
+ {3, 2, 3, 2},
+ {2, 2, 2, 2},
+ {3, 2, 3, 2},
+ },
+ {
+ {2, 3, 2, 3},
+ {3, 2, 3, 2},
+ {2, 3, 2, 3},
+ {3, 2, 3, 2},
+ },
+ {
+ {2, 3, 2, 3},
+ {3, 3, 3, 3},
+ {2, 3, 2, 3},
+ {3, 3, 3, 3},
+ },
+ {
+ {3, 3, 3, 3},
+ {3, 3, 3, 3},
+ {3, 3, 3, 3},
+ {3, 3, 3, 3},
+ },
+ {
+ {3, 3, 3, 3},
+ {4, 3, 4, 3},
+ {3, 3, 3, 3},
+ {4, 3, 4, 3},
+ },
+ {
+ {3, 4, 3, 4},
+ {4, 3, 4, 3},
+ {3, 4, 3, 4},
+ {4, 3, 4, 3},
+ },
+ {
+ {3, 4, 3, 4},
+ {4, 4, 4, 4},
+ {3, 4, 3, 4},
+ {4, 4, 4, 4},
+ },
+ {
+ {3, 4, 4, 4},
+ {4, 4, 4, 4},
+ {4, 4, 4, 4},
+ {4, 4, 4, 4},
+ },
+ {
+ {4, 4, 4, 4},
+ {4, 4, 5, 4},
+ {4, 4, 4, 4},
+ {5, 4, 5, 4},
+ },
+ {
+ {4, 4, 4, 5},
+ {5, 4, 5, 4},
+ {4, 5, 4, 5},
+ {5, 4, 5, 4},
+ },
+ {
+ {4, 5, 4, 5},
+ {5, 4, 5, 5},
+ {4, 5, 4, 5},
+ {5, 5, 5, 5},
+ },
+ {
+ {4, 5, 5, 5},
+ {5, 5, 5, 5},
+ {5, 5, 5, 5},
+ {5, 5, 5, 5},
+ },
+ {
+ {5, 5, 5, 5},
+ {5, 5, 6, 5},
+ {5, 5, 5, 5},
+ {6, 5, 6, 5},
+ },
+ {
+ {5, 5, 5, 6},
+ {6, 5, 6, 5},
+ {5, 6, 5, 6},
+ {6, 5, 6, 5},
+ },
+ {
+ {5, 6, 5, 6},
+ {6, 5, 6, 6},
+ {5, 6, 5, 6},
+ {6, 6, 6, 6},
+ },
+ {
+ {5, 6, 6, 6},
+ {6, 6, 6, 6},
+ {6, 6, 6, 6},
+ {6, 6, 6, 6},
+ },
+ {
+ {6, 6, 6, 6},
+ {6, 6, 7, 6},
+ {6, 6, 6, 6},
+ {7, 6, 7, 6},
+ },
+ {
+ {6, 6, 6, 7},
+ {7, 6, 7, 6},
+ {6, 7, 6, 7},
+ {7, 6, 7, 6},
+ },
+ {
+ {6, 7, 6, 7},
+ {7, 6, 7, 7},
+ {6, 7, 6, 7},
+ {7, 7, 7, 7},
+ },
+ {
+ {6, 7, 7, 7},
+ {7, 7, 7, 7},
+ {7, 7, 7, 7},
+ {7, 7, 7, 7},
+ },
+ {
+ {7, 7, 7, 7},
+ {7, 7, 8, 7},
+ {7, 7, 7, 7},
+ {8, 7, 8, 7},
+ },
+ {
+ {7, 7, 7, 8},
+ {8, 7, 8, 7},
+ {7, 8, 7, 8},
+ {8, 7, 8, 7},
+ },
+ {
+ {7, 8, 7, 8},
+ {8, 7, 8, 8},
+ {7, 8, 7, 8},
+ {8, 8, 8, 8},
+ },
+ {
+ {7, 8, 8, 8},
+ {8, 8, 8, 8},
+ {8, 8, 7, 8},
+ {8, 8, 8, 8},
+ },
+ {
+ {8, 8, 8, 8},
+ {8, 8, 9, 8},
+ {8, 8, 8, 8},
+ {9, 8, 8, 8},
+ },
+ {
+ {8, 8, 8, 9},
+ {9, 8, 9, 8},
+ {8, 9, 8, 8},
+ {9, 8, 9, 8},
+ },
+ {
+ {8, 9, 8, 9},
+ {9, 8, 9, 9},
+ {8, 9, 8, 9},
+ {9, 9, 9, 8},
+ },
+ {
+ {8, 9, 9, 9},
+ {9, 9, 9, 9},
+ {9, 9, 8, 9},
+ {9, 9, 9, 9},
+ },
+ {
+ {9, 9, 9, 9},
+ {9, 9, 10, 9},
+ {9, 9, 9, 9},
+ {10, 9, 9, 9},
+ },
+ {
+ {9, 9, 9, 10},
+ {10, 9, 10, 9},
+ {9, 10, 9, 9},
+ {10, 9, 10, 9},
+ },
+ {
+ {9, 10, 9, 10},
+ {10, 9, 10, 10},
+ {9, 10, 9, 10},
+ {10, 10, 10, 9},
+ },
+ {
+ {9, 10, 10, 10},
+ {10, 10, 10, 10},
+ {10, 10, 9, 10},
+ {10, 10, 10, 10},
+ },
+ {
+ {10, 10, 10, 10},
+ {10, 10, 11, 10},
+ {10, 10, 10, 10},
+ {11, 10, 10, 10},
+ },
+ {
+ {10, 10, 10, 11},
+ {11, 10, 11, 10},
+ {10, 11, 10, 10},
+ {11, 10, 11, 10},
+ },
+ {
+ {10, 11, 10, 11},
+ {11, 10, 11, 11},
+ {10, 11, 10, 11},
+ {11, 11, 11, 10},
+ },
+ {
+ {10, 11, 11, 11},
+ {11, 11, 11, 11},
+ {11, 11, 10, 11},
+ {11, 11, 11, 11},
+ },
+ {
+ {11, 11, 11, 11},
+ {11, 11, 12, 11},
+ {11, 11, 11, 11},
+ {12, 11, 11, 11},
+ },
+ {
+ {11, 11, 11, 12},
+ {12, 11, 12, 11},
+ {11, 12, 11, 11},
+ {12, 11, 12, 11},
+ },
+ {
+ {11, 12, 11, 12},
+ {12, 11, 12, 12},
+ {11, 12, 11, 12},
+ {12, 12, 12, 11},
+ },
+ {
+ {11, 12, 11, 12},
+ {12, 12, 12, 12},
+ {12, 12, 11, 12},
+ {12, 12, 12, 12},
+ },
+ {
+ {12, 12, 12, 12},
+ {12, 12, 12, 12},
+ {12, 12, 12, 12},
+ {13, 12, 12, 12},
+ },
+ {
+ {12, 12, 12, 12},
+ {13, 12, 13, 12},
+ {12, 13, 12, 12},
+ {13, 12, 13, 12},
+ },
+ {
+ {12, 13, 12, 13},
+ {13, 12, 13, 12},
+ {12, 13, 12, 13},
+ {13, 13, 13, 12},
+ },
+ {
+ {12, 13, 12, 13},
+ {13, 13, 13, 13},
+ {13, 13, 12, 13},
+ {13, 13, 13, 13},
+ },
+ {
+ {13, 13, 13, 13},
+ {13, 13, 13, 13},
+ {13, 13, 13, 13},
+ {14, 13, 13, 13},
+ },
+ {
+ {13, 13, 13, 13},
+ {14, 13, 14, 13},
+ {13, 14, 13, 13},
+ {14, 13, 14, 13},
+ },
+ {
+ {13, 14, 13, 14},
+ {14, 13, 14, 13},
+ {13, 14, 13, 14},
+ {14, 14, 14, 13},
+ },
+ {
+ {13, 14, 13, 14},
+ {14, 14, 14, 14},
+ {14, 14, 13, 14},
+ {14, 14, 14, 14},
+ },
+ {
+ {14, 14, 14, 14},
+ {14, 14, 14, 14},
+ {14, 14, 14, 14},
+ {15, 14, 14, 14},
+ },
+ {
+ {14, 14, 14, 14},
+ {15, 14, 15, 14},
+ {14, 15, 14, 14},
+ {15, 14, 15, 14},
+ },
+ {
+ {14, 15, 14, 15},
+ {15, 14, 15, 14},
+ {14, 15, 14, 15},
+ {15, 15, 15, 14},
+ },
+ {
+ {14, 15, 14, 15},
+ {15, 15, 15, 15},
+ {15, 15, 14, 15},
+ {15, 15, 15, 15},
+ },
+ {
+ {15, 15, 15, 15},
+ {15, 15, 15, 15},
+ {15, 15, 15, 15},
+ {16, 15, 15, 15},
+ },
+ {
+ {15, 15, 15, 15},
+ {16, 15, 16, 15},
+ {15, 16, 15, 15},
+ {16, 15, 16, 15},
+ },
+ {
+ {15, 16, 15, 16},
+ {16, 15, 16, 15},
+ {15, 16, 15, 16},
+ {16, 16, 16, 15},
+ },
+ {
+ {15, 16, 15, 16},
+ {16, 16, 16, 16},
+ {16, 16, 15, 16},
+ {16, 16, 16, 16},
+ },
+ {
+ {16, 16, 16, 16},
+ {16, 16, 16, 16},
+ {16, 16, 16, 16},
+ {17, 16, 16, 16},
+ },
+ {
+ {16, 16, 16, 16},
+ {17, 16, 17, 16},
+ {16, 17, 16, 16},
+ {17, 16, 17, 16},
+ },
+ {
+ {16, 17, 16, 17},
+ {17, 16, 17, 16},
+ {16, 17, 16, 17},
+ {17, 17, 17, 16},
+ },
+ {
+ {16, 17, 16, 17},
+ {17, 17, 17, 17},
+ {17, 17, 16, 17},
+ {17, 17, 17, 17},
+ },
+ {
+ {17, 17, 17, 17},
+ {17, 17, 17, 17},
+ {17, 17, 17, 17},
+ {18, 17, 17, 17},
+ },
+ {
+ {17, 17, 17, 17},
+ {18, 17, 18, 17},
+ {17, 18, 17, 17},
+ {18, 17, 18, 17},
+ },
+ {
+ {17, 18, 17, 18},
+ {18, 17, 18, 17},
+ {17, 18, 17, 18},
+ {18, 18, 18, 17},
+ },
+ {
+ {17, 18, 17, 18},
+ {18, 18, 18, 18},
+ {18, 18, 17, 18},
+ {18, 18, 18, 18},
+ },
+ {
+ {18, 18, 18, 18},
+ {18, 18, 18, 18},
+ {18, 18, 18, 18},
+ {19, 18, 18, 18},
+ },
+ {
+ {18, 18, 18, 18},
+ {19, 18, 19, 18},
+ {18, 19, 18, 18},
+ {19, 18, 19, 18},
+ },
+ {
+ {18, 19, 18, 19},
+ {19, 18, 19, 18},
+ {18, 19, 18, 19},
+ {19, 19, 19, 18},
+ },
+ {
+ {18, 19, 18, 19},
+ {19, 19, 19, 19},
+ {19, 19, 18, 19},
+ {19, 19, 19, 19},
+ },
+ {
+ {19, 19, 19, 19},
+ {19, 19, 19, 19},
+ {19, 19, 19, 19},
+ {20, 19, 19, 19},
+ },
+ {
+ {19, 19, 19, 19},
+ {20, 19, 20, 19},
+ {19, 20, 19, 19},
+ {20, 19, 20, 19},
+ },
+ {
+ {19, 20, 19, 20},
+ {20, 19, 20, 19},
+ {19, 20, 19, 20},
+ {20, 20, 20, 19},
+ },
+ {
+ {19, 20, 19, 20},
+ {20, 20, 20, 20},
+ {19, 20, 19, 20},
+ {20, 20, 20, 20},
+ },
+ {
+ {20, 20, 20, 20},
+ {20, 20, 20, 20},
+ {20, 20, 20, 20},
+ {20, 20, 20, 20},
+ },
+ {
+ {20, 20, 20, 20},
+ {21, 20, 21, 20},
+ {20, 20, 20, 20},
+ {21, 20, 21, 20},
+ },
+ {
+ {20, 21, 20, 21},
+ {21, 20, 21, 20},
+ {20, 21, 20, 21},
+ {21, 20, 21, 20},
+ },
+ {
+ {20, 21, 20, 21},
+ {21, 21, 21, 21},
+ {20, 21, 20, 21},
+ {21, 21, 21, 21},
+ },
+ {
+ {21, 21, 21, 21},
+ {21, 21, 21, 21},
+ {21, 21, 21, 21},
+ {21, 21, 21, 21},
+ },
+ {
+ {21, 21, 21, 21},
+ {22, 21, 22, 21},
+ {21, 21, 21, 21},
+ {22, 21, 22, 21},
+ },
+ {
+ {21, 22, 21, 22},
+ {22, 21, 22, 21},
+ {21, 22, 21, 22},
+ {22, 21, 22, 21},
+ },
+ {
+ {21, 22, 21, 22},
+ {22, 22, 22, 22},
+ {21, 22, 21, 22},
+ {22, 22, 22, 22},
+ },
+ {
+ {22, 22, 22, 22},
+ {22, 22, 22, 22},
+ {22, 22, 22, 22},
+ {22, 22, 22, 22},
+ },
+ {
+ {22, 22, 22, 22},
+ {23, 22, 23, 22},
+ {22, 22, 22, 22},
+ {23, 22, 23, 22},
+ },
+ {
+ {22, 23, 22, 23},
+ {23, 22, 23, 22},
+ {22, 23, 22, 23},
+ {23, 22, 23, 22},
+ },
+ {
+ {22, 23, 22, 23},
+ {23, 23, 23, 23},
+ {22, 23, 22, 23},
+ {23, 23, 23, 23},
+ },
+ {
+ {23, 23, 23, 23},
+ {23, 23, 23, 23},
+ {23, 23, 23, 23},
+ {23, 23, 23, 23},
+ },
+ {
+ {23, 23, 23, 23},
+ {24, 23, 24, 23},
+ {23, 23, 23, 23},
+ {24, 23, 24, 23},
+ },
+ {
+ {23, 24, 23, 24},
+ {24, 23, 24, 23},
+ {23, 24, 23, 24},
+ {24, 23, 24, 23},
+ },
+ {
+ {23, 24, 23, 24},
+ {24, 23, 24, 24},
+ {23, 24, 23, 24},
+ {24, 24, 24, 24},
+ },
+ {
+ {23, 24, 24, 24},
+ {24, 24, 24, 24},
+ {24, 24, 24, 24},
+ {24, 24, 24, 24},
+ },
+ {
+ {24, 24, 24, 24},
+ {24, 24, 25, 24},
+ {24, 24, 24, 24},
+ {25, 24, 25, 24},
+ },
+ {
+ {24, 24, 24, 25},
+ {25, 24, 25, 24},
+ {24, 25, 24, 25},
+ {25, 24, 25, 24},
+ },
+ {
+ {24, 25, 24, 25},
+ {25, 24, 25, 25},
+ {24, 25, 24, 25},
+ {25, 25, 25, 25},
+ },
+ {
+ {24, 25, 25, 25},
+ {25, 25, 25, 25},
+ {25, 25, 25, 25},
+ {25, 25, 25, 25},
+ },
+ {
+ {25, 25, 25, 25},
+ {25, 25, 26, 25},
+ {25, 25, 25, 25},
+ {26, 25, 26, 25},
+ },
+ {
+ {25, 25, 25, 26},
+ {26, 25, 26, 25},
+ {25, 26, 25, 26},
+ {26, 25, 26, 25},
+ },
+ {
+ {25, 26, 25, 26},
+ {26, 25, 26, 26},
+ {25, 26, 25, 26},
+ {26, 26, 26, 26},
+ },
+ {
+ {25, 26, 26, 26},
+ {26, 26, 26, 26},
+ {26, 26, 26, 26},
+ {26, 26, 26, 26},
+ },
+ {
+ {26, 26, 26, 26},
+ {26, 26, 27, 26},
+ {26, 26, 26, 26},
+ {27, 26, 27, 26},
+ },
+ {
+ {26, 26, 26, 27},
+ {27, 26, 27, 26},
+ {26, 27, 26, 27},
+ {27, 26, 27, 26},
+ },
+ {
+ {26, 27, 26, 27},
+ {27, 26, 27, 27},
+ {26, 27, 26, 27},
+ {27, 27, 27, 27},
+ },
+ {
+ {26, 27, 27, 27},
+ {27, 27, 27, 27},
+ {27, 27, 27, 27},
+ {27, 27, 27, 27},
+ },
+ {
+ {27, 27, 27, 27},
+ {27, 27, 28, 27},
+ {27, 27, 27, 27},
+ {28, 27, 28, 27},
+ },
+ {
+ {27, 27, 27, 28},
+ {28, 27, 28, 27},
+ {27, 28, 27, 28},
+ {28, 27, 28, 27},
+ },
+ {
+ {27, 28, 27, 28},
+ {28, 27, 28, 28},
+ {27, 28, 27, 28},
+ {28, 28, 28, 27},
+ },
+ {
+ {27, 28, 28, 28},
+ {28, 28, 28, 28},
+ {28, 28, 27, 28},
+ {28, 28, 28, 28},
+ },
+ {
+ {28, 28, 28, 28},
+ {28, 28, 29, 28},
+ {28, 28, 28, 28},
+ {29, 28, 28, 28},
+ },
+ {
+ {28, 28, 28, 29},
+ {29, 28, 29, 28},
+ {28, 29, 28, 28},
+ {29, 28, 29, 28},
+ },
+ {
+ {28, 29, 28, 29},
+ {29, 28, 29, 29},
+ {28, 29, 28, 29},
+ {29, 29, 29, 28},
+ },
+ {
+ {28, 29, 29, 29},
+ {29, 29, 29, 29},
+ {29, 29, 28, 29},
+ {29, 29, 29, 29},
+ },
+ {
+ {29, 29, 29, 29},
+ {29, 29, 30, 29},
+ {29, 29, 29, 29},
+ {30, 29, 29, 29},
+ },
+ {
+ {29, 29, 29, 30},
+ {30, 29, 30, 29},
+ {29, 30, 29, 29},
+ {30, 29, 30, 29},
+ },
+ {
+ {29, 30, 29, 30},
+ {30, 29, 30, 30},
+ {29, 30, 29, 30},
+ {30, 30, 30, 29},
+ },
+ {
+ {29, 30, 30, 30},
+ {30, 30, 30, 30},
+ {30, 30, 29, 30},
+ {30, 30, 30, 30},
+ },
+ {
+ {30, 30, 30, 30},
+ {30, 30, 31, 30},
+ {30, 30, 30, 30},
+ {31, 30, 30, 30},
+ },
+ {
+ {30, 30, 30, 31},
+ {31, 30, 31, 30},
+ {30, 31, 30, 30},
+ {31, 30, 31, 30},
+ },
+ {
+ {30, 31, 30, 31},
+ {31, 30, 31, 31},
+ {30, 31, 30, 31},
+ {31, 31, 31, 30},
+ },
+ {
+ {30, 31, 31, 31},
+ {31, 31, 31, 31},
+ {31, 31, 30, 31},
+ {31, 31, 31, 31},
+ },
+ {
+ {31, 31, 31, 31},
+ {31, 31, 32, 31},
+ {31, 31, 31, 31},
+ {32, 31, 31, 31},
+ },
+ {
+ {31, 31, 31, 32},
+ {32, 31, 32, 31},
+ {31, 32, 31, 31},
+ {32, 31, 32, 31},
+ },
+ {
+ {31, 32, 31, 32},
+ {32, 31, 32, 32},
+ {31, 32, 31, 32},
+ {32, 32, 32, 31},
+ },
+ {
+ {31, 32, 32, 32},
+ {32, 32, 32, 32},
+ {32, 32, 31, 32},
+ {32, 32, 32, 32},
+ },
+ {
+ {32, 32, 32, 32},
+ {32, 32, 33, 32},
+ {32, 32, 32, 32},
+ {33, 32, 32, 32},
+ },
+ {
+ {32, 32, 32, 33},
+ {33, 32, 33, 32},
+ {32, 33, 32, 32},
+ {33, 32, 33, 32},
+ },
+ {
+ {32, 33, 32, 33},
+ {33, 32, 33, 33},
+ {32, 33, 32, 33},
+ {33, 33, 33, 32},
+ },
+ {
+ {32, 33, 33, 33},
+ {33, 33, 33, 33},
+ {33, 33, 32, 33},
+ {33, 33, 33, 33},
+ },
+ {
+ {33, 33, 33, 33},
+ {33, 33, 34, 33},
+ {33, 33, 33, 33},
+ {34, 33, 33, 33},
+ },
+ {
+ {33, 33, 33, 34},
+ {34, 33, 34, 33},
+ {33, 34, 33, 33},
+ {34, 33, 34, 33},
+ },
+ {
+ {33, 34, 33, 34},
+ {34, 33, 34, 34},
+ {33, 34, 33, 34},
+ {34, 34, 34, 33},
+ },
+ {
+ {33, 34, 34, 34},
+ {34, 34, 34, 34},
+ {34, 34, 33, 34},
+ {34, 34, 34, 34},
+ },
+ {
+ {34, 34, 34, 34},
+ {34, 34, 35, 34},
+ {34, 34, 34, 34},
+ {35, 34, 34, 34},
+ },
+ {
+ {34, 34, 34, 35},
+ {35, 34, 35, 34},
+ {34, 35, 34, 34},
+ {35, 34, 35, 34},
+ },
+ {
+ {34, 35, 34, 35},
+ {35, 34, 35, 35},
+ {34, 35, 34, 35},
+ {35, 35, 35, 34},
+ },
+ {
+ {34, 35, 35, 35},
+ {35, 35, 35, 35},
+ {35, 35, 34, 35},
+ {35, 35, 35, 35},
+ },
+ {
+ {35, 35, 35, 35},
+ {35, 35, 36, 35},
+ {35, 35, 35, 35},
+ {36, 35, 35, 35},
+ },
+ {
+ {35, 35, 35, 36},
+ {36, 35, 36, 35},
+ {35, 36, 35, 35},
+ {36, 35, 36, 35},
+ },
+ {
+ {35, 36, 35, 36},
+ {36, 35, 36, 35},
+ {35, 36, 35, 36},
+ {36, 36, 36, 35},
+ },
+ {
+ {35, 36, 35, 36},
+ {36, 36, 36, 36},
+ {36, 36, 35, 36},
+ {36, 36, 36, 36},
+ },
+ {
+ {36, 36, 36, 36},
+ {36, 36, 36, 36},
+ {36, 36, 36, 36},
+ {37, 36, 36, 36},
+ },
+ {
+ {36, 36, 36, 36},
+ {37, 36, 37, 36},
+ {36, 37, 36, 36},
+ {37, 36, 37, 36},
+ },
+ {
+ {36, 37, 36, 37},
+ {37, 36, 37, 36},
+ {36, 37, 36, 37},
+ {37, 37, 37, 36},
+ },
+ {
+ {36, 37, 36, 37},
+ {37, 37, 37, 37},
+ {37, 37, 36, 37},
+ {37, 37, 37, 37},
+ },
+ {
+ {37, 37, 37, 37},
+ {37, 37, 37, 37},
+ {37, 37, 37, 37},
+ {38, 37, 37, 37},
+ },
+ {
+ {37, 37, 37, 37},
+ {38, 37, 38, 37},
+ {37, 38, 37, 37},
+ {38, 37, 38, 37},
+ },
+ {
+ {37, 38, 37, 38},
+ {38, 37, 38, 37},
+ {37, 38, 37, 38},
+ {38, 38, 38, 37},
+ },
+ {
+ {37, 38, 37, 38},
+ {38, 38, 38, 38},
+ {38, 38, 37, 38},
+ {38, 38, 38, 38},
+ },
+ {
+ {38, 38, 38, 38},
+ {38, 38, 38, 38},
+ {38, 38, 38, 38},
+ {39, 38, 38, 38},
+ },
+ {
+ {38, 38, 38, 38},
+ {39, 38, 39, 38},
+ {38, 39, 38, 38},
+ {39, 38, 39, 38},
+ },
+ {
+ {38, 39, 38, 39},
+ {39, 38, 39, 38},
+ {38, 39, 38, 39},
+ {39, 39, 39, 38},
+ },
+ {
+ {38, 39, 38, 39},
+ {39, 39, 39, 39},
+ {39, 39, 38, 39},
+ {39, 39, 39, 39},
+ },
+ {
+ {39, 39, 39, 39},
+ {39, 39, 39, 39},
+ {39, 39, 39, 39},
+ {40, 39, 39, 39},
+ },
+ {
+ {39, 39, 39, 39},
+ {40, 39, 40, 39},
+ {39, 40, 39, 39},
+ {40, 39, 40, 39},
+ },
+ {
+ {39, 40, 39, 40},
+ {40, 39, 40, 39},
+ {39, 40, 39, 40},
+ {40, 39, 40, 39},
+ },
+ {
+ {39, 40, 39, 40},
+ {40, 40, 40, 40},
+ {39, 40, 39, 40},
+ {40, 40, 40, 40},
+ },
+ {
+ {40, 40, 40, 40},
+ {40, 40, 40, 40},
+ {40, 40, 40, 40},
+ {40, 40, 40, 40},
+ },
+ {
+ {40, 40, 40, 40},
+ {41, 40, 41, 40},
+ {40, 40, 40, 40},
+ {41, 40, 41, 40},
+ },
+ {
+ {40, 41, 40, 41},
+ {41, 40, 41, 40},
+ {40, 41, 40, 41},
+ {41, 40, 41, 40},
+ },
+ {
+ {40, 41, 40, 41},
+ {41, 41, 41, 41},
+ {40, 41, 40, 41},
+ {41, 41, 41, 41},
+ },
+ {
+ {41, 41, 41, 41},
+ {41, 41, 41, 41},
+ {41, 41, 41, 41},
+ {41, 41, 41, 41},
+ },
+ {
+ {41, 41, 41, 41},
+ {42, 41, 42, 41},
+ {41, 41, 41, 41},
+ {42, 41, 42, 41},
+ },
+ {
+ {41, 42, 41, 42},
+ {42, 41, 42, 41},
+ {41, 42, 41, 42},
+ {42, 41, 42, 41},
+ },
+ {
+ {41, 42, 41, 42},
+ {42, 42, 42, 42},
+ {41, 42, 41, 42},
+ {42, 42, 42, 42},
+ },
+ {
+ {42, 42, 42, 42},
+ {42, 42, 42, 42},
+ {42, 42, 42, 42},
+ {42, 42, 42, 42},
+ },
+ {
+ {42, 42, 42, 42},
+ {43, 42, 43, 42},
+ {42, 42, 42, 42},
+ {43, 42, 43, 42},
+ },
+ {
+ {42, 43, 42, 43},
+ {43, 42, 43, 42},
+ {42, 43, 42, 43},
+ {43, 42, 43, 42},
+ },
+ {
+ {42, 43, 42, 43},
+ {43, 43, 43, 43},
+ {42, 43, 42, 43},
+ {43, 43, 43, 43},
+ },
+ {
+ {43, 43, 43, 43},
+ {43, 43, 43, 43},
+ {43, 43, 43, 43},
+ {43, 43, 43, 43},
+ },
+ {
+ {43, 43, 43, 43},
+ {44, 43, 44, 43},
+ {43, 43, 43, 43},
+ {44, 43, 44, 43},
+ },
+ {
+ {43, 43, 43, 44},
+ {44, 43, 44, 43},
+ {43, 44, 43, 44},
+ {44, 43, 44, 43},
+ },
+ {
+ {43, 44, 43, 44},
+ {44, 43, 44, 44},
+ {43, 44, 43, 44},
+ {44, 44, 44, 44},
+ },
+ {
+ {43, 44, 44, 44},
+ {44, 44, 44, 44},
+ {44, 44, 44, 44},
+ {44, 44, 44, 44},
+ },
+ {
+ {44, 44, 44, 44},
+ {44, 44, 45, 44},
+ {44, 44, 44, 44},
+ {45, 44, 45, 44},
+ },
+ {
+ {44, 44, 44, 45},
+ {45, 44, 45, 44},
+ {44, 45, 44, 45},
+ {45, 44, 45, 44},
+ },
+ {
+ {44, 45, 44, 45},
+ {45, 44, 45, 45},
+ {44, 45, 44, 45},
+ {45, 45, 45, 45},
+ },
+ {
+ {44, 45, 45, 45},
+ {45, 45, 45, 45},
+ {45, 45, 45, 45},
+ {45, 45, 45, 45},
+ },
+ {
+ {45, 45, 45, 45},
+ {45, 45, 46, 45},
+ {45, 45, 45, 45},
+ {46, 45, 46, 45},
+ },
+ {
+ {45, 45, 45, 46},
+ {46, 45, 46, 45},
+ {45, 46, 45, 46},
+ {46, 45, 46, 45},
+ },
+ {
+ {45, 46, 45, 46},
+ {46, 45, 46, 46},
+ {45, 46, 45, 46},
+ {46, 46, 46, 46},
+ },
+ {
+ {45, 46, 46, 46},
+ {46, 46, 46, 46},
+ {46, 46, 46, 46},
+ {46, 46, 46, 46},
+ },
+ {
+ {46, 46, 46, 46},
+ {46, 46, 47, 46},
+ {46, 46, 46, 46},
+ {47, 46, 47, 46},
+ },
+ {
+ {46, 46, 46, 47},
+ {47, 46, 47, 46},
+ {46, 47, 46, 47},
+ {47, 46, 47, 46},
+ },
+ {
+ {46, 47, 46, 47},
+ {47, 46, 47, 47},
+ {46, 47, 46, 47},
+ {47, 47, 47, 47},
+ },
+ {
+ {46, 47, 47, 47},
+ {47, 47, 47, 47},
+ {47, 47, 47, 47},
+ {47, 47, 47, 47},
+ },
+ {
+ {47, 47, 47, 47},
+ {47, 47, 48, 47},
+ {47, 47, 47, 47},
+ {48, 47, 48, 47},
+ },
+ {
+ {47, 47, 47, 48},
+ {48, 47, 48, 47},
+ {47, 48, 47, 48},
+ {48, 47, 48, 47},
+ },
+ {
+ {47, 48, 47, 48},
+ {48, 47, 48, 48},
+ {47, 48, 47, 48},
+ {48, 48, 48, 48},
+ },
+ {
+ {47, 48, 48, 48},
+ {48, 48, 48, 48},
+ {48, 48, 48, 48},
+ {48, 48, 48, 48},
+ },
+ {
+ {48, 48, 48, 48},
+ {48, 48, 49, 48},
+ {48, 48, 48, 48},
+ {49, 48, 49, 48},
+ },
+ {
+ {48, 48, 48, 49},
+ {49, 48, 49, 48},
+ {48, 49, 48, 49},
+ {49, 48, 49, 48},
+ },
+ {
+ {48, 49, 48, 49},
+ {49, 48, 49, 49},
+ {48, 49, 48, 49},
+ {49, 49, 49, 49},
+ },
+ {
+ {48, 49, 49, 49},
+ {49, 49, 49, 49},
+ {49, 49, 49, 49},
+ {49, 49, 49, 49},
+ },
+ {
+ {49, 49, 49, 49},
+ {49, 49, 50, 49},
+ {49, 49, 49, 49},
+ {50, 49, 50, 49},
+ },
+ {
+ {49, 49, 49, 50},
+ {50, 49, 50, 49},
+ {49, 50, 49, 50},
+ {50, 49, 50, 49},
+ },
+ {
+ {49, 50, 49, 50},
+ {50, 49, 50, 50},
+ {49, 50, 49, 50},
+ {50, 50, 50, 50},
+ },
+ {
+ {49, 50, 50, 50},
+ {50, 50, 50, 50},
+ {50, 50, 50, 50},
+ {50, 50, 50, 50},
+ },
+ {
+ {50, 50, 50, 50},
+ {50, 50, 51, 50},
+ {50, 50, 50, 50},
+ {51, 50, 51, 50},
+ },
+ {
+ {50, 50, 50, 51},
+ {51, 50, 51, 50},
+ {50, 51, 50, 51},
+ {51, 50, 51, 50},
+ },
+ {
+ {50, 51, 50, 51},
+ {51, 50, 51, 51},
+ {50, 51, 50, 51},
+ {51, 51, 51, 51},
+ },
+ {
+ {50, 51, 51, 51},
+ {51, 51, 51, 51},
+ {51, 51, 51, 51},
+ {51, 51, 51, 51},
+ },
+ {
+ {51, 51, 51, 51},
+ {51, 51, 52, 51},
+ {51, 51, 51, 51},
+ {52, 51, 52, 51},
+ },
+ {
+ {51, 51, 51, 52},
+ {52, 51, 52, 51},
+ {51, 52, 51, 51},
+ {52, 51, 52, 51},
+ },
+ {
+ {51, 52, 51, 52},
+ {52, 51, 52, 52},
+ {51, 52, 51, 52},
+ {52, 52, 52, 51},
+ },
+ {
+ {51, 52, 52, 52},
+ {52, 52, 52, 52},
+ {52, 52, 51, 52},
+ {52, 52, 52, 52},
+ },
+ {
+ {52, 52, 52, 52},
+ {52, 52, 53, 52},
+ {52, 52, 52, 52},
+ {53, 52, 52, 52},
+ },
+ {
+ {52, 52, 52, 53},
+ {53, 52, 53, 52},
+ {52, 53, 52, 52},
+ {53, 52, 53, 52},
+ },
+ {
+ {52, 53, 52, 53},
+ {53, 52, 53, 53},
+ {52, 53, 52, 53},
+ {53, 53, 53, 52},
+ },
+ {
+ {52, 53, 53, 53},
+ {53, 53, 53, 53},
+ {53, 53, 52, 53},
+ {53, 53, 53, 53},
+ },
+ {
+ {53, 53, 53, 53},
+ {53, 53, 54, 53},
+ {53, 53, 53, 53},
+ {54, 53, 53, 53},
+ },
+ {
+ {53, 53, 53, 54},
+ {54, 53, 54, 53},
+ {53, 54, 53, 53},
+ {54, 53, 54, 53},
+ },
+ {
+ {53, 54, 53, 54},
+ {54, 53, 54, 54},
+ {53, 54, 53, 54},
+ {54, 54, 54, 53},
+ },
+ {
+ {53, 54, 54, 54},
+ {54, 54, 54, 54},
+ {54, 54, 53, 54},
+ {54, 54, 54, 54},
+ },
+ {
+ {54, 54, 54, 54},
+ {54, 54, 55, 54},
+ {54, 54, 54, 54},
+ {55, 54, 54, 54},
+ },
+ {
+ {54, 54, 54, 55},
+ {55, 54, 55, 54},
+ {54, 55, 54, 54},
+ {55, 54, 55, 54},
+ },
+ {
+ {54, 55, 54, 55},
+ {55, 54, 55, 55},
+ {54, 55, 54, 55},
+ {55, 55, 55, 54},
+ },
+ {
+ {54, 55, 55, 55},
+ {55, 55, 55, 55},
+ {55, 55, 54, 55},
+ {55, 55, 55, 55},
+ },
+ {
+ {55, 55, 55, 55},
+ {55, 55, 56, 55},
+ {55, 55, 55, 55},
+ {56, 55, 55, 55},
+ },
+ {
+ {55, 55, 55, 55},
+ {56, 55, 56, 55},
+ {55, 56, 55, 55},
+ {56, 55, 56, 55},
+ },
+ {
+ {55, 56, 55, 56},
+ {56, 55, 56, 55},
+ {55, 56, 55, 56},
+ {56, 56, 56, 55},
+ },
+ {
+ {55, 56, 55, 56},
+ {56, 56, 56, 56},
+ {56, 56, 55, 56},
+ {56, 56, 56, 56},
+ },
+ {
+ {56, 56, 56, 56},
+ {56, 56, 56, 56},
+ {56, 56, 56, 56},
+ {57, 56, 56, 56},
+ },
+ {
+ {56, 56, 56, 56},
+ {57, 56, 57, 56},
+ {56, 57, 56, 56},
+ {57, 56, 57, 56},
+ },
+ {
+ {56, 57, 56, 57},
+ {57, 56, 57, 56},
+ {56, 57, 56, 57},
+ {57, 57, 57, 56},
+ },
+ {
+ {56, 57, 56, 57},
+ {57, 57, 57, 57},
+ {57, 57, 56, 57},
+ {57, 57, 57, 57},
+ },
+ {
+ {57, 57, 57, 57},
+ {57, 57, 57, 57},
+ {57, 57, 57, 57},
+ {58, 57, 57, 57},
+ },
+ {
+ {57, 57, 57, 57},
+ {58, 57, 58, 57},
+ {57, 58, 57, 57},
+ {58, 57, 58, 57},
+ },
+ {
+ {57, 58, 57, 58},
+ {58, 57, 58, 57},
+ {57, 58, 57, 58},
+ {58, 58, 58, 57},
+ },
+ {
+ {57, 58, 57, 58},
+ {58, 58, 58, 58},
+ {58, 58, 57, 58},
+ {58, 58, 58, 58},
+ },
+ {
+ {58, 58, 58, 58},
+ {58, 58, 58, 58},
+ {58, 58, 58, 58},
+ {59, 58, 58, 58},
+ },
+ {
+ {58, 58, 58, 58},
+ {59, 58, 59, 58},
+ {58, 59, 58, 58},
+ {59, 58, 59, 58},
+ },
+ {
+ {58, 59, 58, 59},
+ {59, 58, 59, 58},
+ {58, 59, 58, 59},
+ {59, 59, 59, 58},
+ },
+ {
+ {58, 59, 58, 59},
+ {59, 59, 59, 59},
+ {59, 59, 58, 59},
+ {59, 59, 59, 59},
+ },
+ {
+ {59, 59, 59, 59},
+ {59, 59, 59, 59},
+ {59, 59, 59, 59},
+ {60, 59, 59, 59},
+ },
+ {
+ {59, 59, 59, 59},
+ {60, 59, 60, 59},
+ {59, 59, 59, 59},
+ {60, 59, 60, 59},
+ },
+ {
+ {59, 60, 59, 60},
+ {60, 59, 60, 59},
+ {59, 60, 59, 60},
+ {60, 59, 60, 59},
+ },
+ {
+ {59, 60, 59, 60},
+ {60, 60, 60, 60},
+ {59, 60, 59, 60},
+ {60, 60, 60, 60},
+ },
+ {
+ {60, 60, 60, 60},
+ {60, 60, 60, 60},
+ {60, 60, 60, 60},
+ {60, 60, 60, 60},
+ },
+ {
+ {60, 60, 60, 60},
+ {61, 60, 61, 60},
+ {60, 60, 60, 60},
+ {61, 60, 61, 60},
+ },
+ {
+ {60, 61, 60, 61},
+ {61, 60, 61, 60},
+ {60, 61, 60, 61},
+ {61, 60, 61, 60},
+ },
+ {
+ {60, 61, 60, 61},
+ {61, 61, 61, 61},
+ {60, 61, 60, 61},
+ {61, 61, 61, 61},
+ },
+ {
+ {61, 61, 61, 61},
+ {61, 61, 61, 61},
+ {61, 61, 61, 61},
+ {61, 61, 61, 61},
+ },
+ {
+ {61, 61, 61, 61},
+ {62, 61, 62, 61},
+ {61, 61, 61, 61},
+ {62, 61, 62, 61},
+ },
+ {
+ {61, 62, 61, 62},
+ {62, 61, 62, 61},
+ {61, 62, 61, 62},
+ {62, 61, 62, 61},
+ },
+ {
+ {61, 62, 61, 62},
+ {62, 62, 62, 62},
+ {61, 62, 61, 62},
+ {62, 62, 62, 62},
+ },
+ {
+ {62, 62, 62, 62},
+ {62, 62, 62, 62},
+ {62, 62, 62, 62},
+ {62, 62, 62, 62},
+ },
+ {
+ {62, 62, 62, 62},
+ {63, 62, 63, 62},
+ {62, 62, 62, 62},
+ {63, 62, 63, 62},
+ },
+ {
+ {62, 63, 62, 63},
+ {63, 62, 63, 62},
+ {62, 63, 62, 63},
+ {63, 62, 63, 62},
+ },
+ {
+ {62, 63, 62, 63},
+ {63, 63, 63, 63},
+ {62, 63, 62, 63},
+ {63, 63, 63, 63},
+ },
+ {
+ {63, 63, 63, 63},
+ {63, 63, 63, 63},
+ {63, 63, 63, 63},
+ {63, 63, 63, 63},
+ },
+};
+
+static const uint8_t dither_rb2x2[256][2][2] =
+{
+ {
+ {0, 0},
+ {0, 0},
+ },
+ {
+ {0, 0},
+ {1, 0},
+ },
+ {
+ {0, 0},
+ {1, 0},
+ },
+ {
+ {0, 1},
+ {1, 0},
+ },
+ {
+ {0, 1},
+ {1, 0},
+ },
+ {
+ {0, 1},
+ {1, 1},
+ },
+ {
+ {0, 1},
+ {1, 1},
+ },
+ {
+ {1, 1},
+ {1, 1},
+ },
+ {
+ {1, 1},
+ {1, 1},
+ },
+ {
+ {1, 1},
+ {2, 1},
+ },
+ {
+ {1, 1},
+ {2, 1},
+ },
+ {
+ {1, 2},
+ {2, 1},
+ },
+ {
+ {1, 2},
+ {2, 1},
+ },
+ {
+ {1, 2},
+ {2, 2},
+ },
+ {
+ {1, 2},
+ {2, 2},
+ },
+ {
+ {2, 2},
+ {2, 2},
+ },
+ {
+ {2, 2},
+ {2, 2},
+ },
+ {
+ {2, 2},
+ {2, 2},
+ },
+ {
+ {2, 2},
+ {3, 2},
+ },
+ {
+ {2, 2},
+ {3, 2},
+ },
+ {
+ {2, 3},
+ {3, 2},
+ },
+ {
+ {2, 3},
+ {3, 2},
+ },
+ {
+ {2, 3},
+ {3, 3},
+ },
+ {
+ {2, 3},
+ {3, 3},
+ },
+ {
+ {3, 3},
+ {3, 3},
+ },
+ {
+ {3, 3},
+ {3, 3},
+ },
+ {
+ {3, 3},
+ {4, 3},
+ },
+ {
+ {3, 3},
+ {4, 3},
+ },
+ {
+ {3, 4},
+ {4, 3},
+ },
+ {
+ {3, 4},
+ {4, 3},
+ },
+ {
+ {3, 4},
+ {4, 4},
+ },
+ {
+ {3, 4},
+ {4, 4},
+ },
+ {
+ {4, 4},
+ {4, 4},
+ },
+ {
+ {4, 4},
+ {4, 4},
+ },
+ {
+ {4, 4},
+ {5, 4},
+ },
+ {
+ {4, 4},
+ {5, 4},
+ },
+ {
+ {4, 5},
+ {5, 4},
+ },
+ {
+ {4, 5},
+ {5, 4},
+ },
+ {
+ {4, 5},
+ {5, 5},
+ },
+ {
+ {4, 5},
+ {5, 5},
+ },
+ {
+ {5, 5},
+ {5, 5},
+ },
+ {
+ {5, 5},
+ {5, 5},
+ },
+ {
+ {5, 5},
+ {6, 5},
+ },
+ {
+ {5, 5},
+ {6, 5},
+ },
+ {
+ {5, 6},
+ {6, 5},
+ },
+ {
+ {5, 6},
+ {6, 5},
+ },
+ {
+ {5, 6},
+ {6, 6},
+ },
+ {
+ {5, 6},
+ {6, 6},
+ },
+ {
+ {5, 6},
+ {6, 6},
+ },
+ {
+ {6, 6},
+ {6, 6},
+ },
+ {
+ {6, 6},
+ {6, 6},
+ },
+ {
+ {6, 6},
+ {7, 6},
+ },
+ {
+ {6, 6},
+ {7, 6},
+ },
+ {
+ {6, 7},
+ {7, 6},
+ },
+ {
+ {6, 7},
+ {7, 6},
+ },
+ {
+ {6, 7},
+ {7, 7},
+ },
+ {
+ {6, 7},
+ {7, 7},
+ },
+ {
+ {7, 7},
+ {7, 7},
+ },
+ {
+ {7, 7},
+ {7, 7},
+ },
+ {
+ {7, 7},
+ {8, 7},
+ },
+ {
+ {7, 7},
+ {8, 7},
+ },
+ {
+ {7, 8},
+ {8, 7},
+ },
+ {
+ {7, 8},
+ {8, 7},
+ },
+ {
+ {7, 8},
+ {8, 8},
+ },
+ {
+ {7, 8},
+ {8, 8},
+ },
+ {
+ {8, 8},
+ {8, 8},
+ },
+ {
+ {8, 8},
+ {8, 8},
+ },
+ {
+ {8, 8},
+ {9, 8},
+ },
+ {
+ {8, 8},
+ {9, 8},
+ },
+ {
+ {8, 9},
+ {9, 8},
+ },
+ {
+ {8, 9},
+ {9, 8},
+ },
+ {
+ {8, 9},
+ {9, 9},
+ },
+ {
+ {8, 9},
+ {9, 9},
+ },
+ {
+ {9, 9},
+ {9, 9},
+ },
+ {
+ {9, 9},
+ {9, 9},
+ },
+ {
+ {9, 9},
+ {10, 9},
+ },
+ {
+ {9, 9},
+ {10, 9},
+ },
+ {
+ {9, 10},
+ {10, 9},
+ },
+ {
+ {9, 10},
+ {10, 9},
+ },
+ {
+ {9, 10},
+ {10, 10},
+ },
+ {
+ {9, 10},
+ {10, 10},
+ },
+ {
+ {9, 10},
+ {10, 10},
+ },
+ {
+ {10, 10},
+ {10, 10},
+ },
+ {
+ {10, 10},
+ {10, 10},
+ },
+ {
+ {10, 10},
+ {11, 10},
+ },
+ {
+ {10, 10},
+ {11, 10},
+ },
+ {
+ {10, 11},
+ {11, 10},
+ },
+ {
+ {10, 11},
+ {11, 10},
+ },
+ {
+ {10, 11},
+ {11, 11},
+ },
+ {
+ {10, 11},
+ {11, 11},
+ },
+ {
+ {11, 11},
+ {11, 11},
+ },
+ {
+ {11, 11},
+ {11, 11},
+ },
+ {
+ {11, 11},
+ {12, 11},
+ },
+ {
+ {11, 11},
+ {12, 11},
+ },
+ {
+ {11, 12},
+ {12, 11},
+ },
+ {
+ {11, 12},
+ {12, 11},
+ },
+ {
+ {11, 12},
+ {12, 12},
+ },
+ {
+ {11, 12},
+ {12, 12},
+ },
+ {
+ {12, 12},
+ {12, 12},
+ },
+ {
+ {12, 12},
+ {12, 12},
+ },
+ {
+ {12, 12},
+ {13, 12},
+ },
+ {
+ {12, 12},
+ {13, 12},
+ },
+ {
+ {12, 13},
+ {13, 12},
+ },
+ {
+ {12, 13},
+ {13, 12},
+ },
+ {
+ {12, 13},
+ {13, 13},
+ },
+ {
+ {12, 13},
+ {13, 13},
+ },
+ {
+ {13, 13},
+ {13, 13},
+ },
+ {
+ {13, 13},
+ {13, 13},
+ },
+ {
+ {13, 13},
+ {14, 13},
+ },
+ {
+ {13, 13},
+ {14, 13},
+ },
+ {
+ {13, 14},
+ {14, 13},
+ },
+ {
+ {13, 14},
+ {14, 13},
+ },
+ {
+ {13, 14},
+ {14, 13},
+ },
+ {
+ {13, 14},
+ {14, 14},
+ },
+ {
+ {13, 14},
+ {14, 14},
+ },
+ {
+ {14, 14},
+ {14, 14},
+ },
+ {
+ {14, 14},
+ {14, 14},
+ },
+ {
+ {14, 14},
+ {15, 14},
+ },
+ {
+ {14, 14},
+ {15, 14},
+ },
+ {
+ {14, 15},
+ {15, 14},
+ },
+ {
+ {14, 15},
+ {15, 14},
+ },
+ {
+ {14, 15},
+ {15, 15},
+ },
+ {
+ {14, 15},
+ {15, 15},
+ },
+ {
+ {15, 15},
+ {15, 15},
+ },
+ {
+ {15, 15},
+ {15, 15},
+ },
+ {
+ {15, 15},
+ {16, 15},
+ },
+ {
+ {15, 15},
+ {16, 15},
+ },
+ {
+ {15, 16},
+ {16, 15},
+ },
+ {
+ {15, 16},
+ {16, 15},
+ },
+ {
+ {15, 16},
+ {16, 16},
+ },
+ {
+ {15, 16},
+ {16, 16},
+ },
+ {
+ {16, 16},
+ {16, 16},
+ },
+ {
+ {16, 16},
+ {16, 16},
+ },
+ {
+ {16, 16},
+ {17, 16},
+ },
+ {
+ {16, 16},
+ {17, 16},
+ },
+ {
+ {16, 17},
+ {17, 16},
+ },
+ {
+ {16, 17},
+ {17, 16},
+ },
+ {
+ {16, 17},
+ {17, 17},
+ },
+ {
+ {16, 17},
+ {17, 17},
+ },
+ {
+ {17, 17},
+ {17, 17},
+ },
+ {
+ {17, 17},
+ {17, 17},
+ },
+ {
+ {17, 17},
+ {18, 17},
+ },
+ {
+ {17, 17},
+ {18, 17},
+ },
+ {
+ {17, 18},
+ {18, 17},
+ },
+ {
+ {17, 18},
+ {18, 17},
+ },
+ {
+ {17, 18},
+ {18, 18},
+ },
+ {
+ {17, 18},
+ {18, 18},
+ },
+ {
+ {18, 18},
+ {18, 18},
+ },
+ {
+ {18, 18},
+ {18, 18},
+ },
+ {
+ {18, 18},
+ {19, 18},
+ },
+ {
+ {18, 18},
+ {19, 18},
+ },
+ {
+ {18, 19},
+ {19, 18},
+ },
+ {
+ {18, 19},
+ {19, 18},
+ },
+ {
+ {18, 19},
+ {19, 19},
+ },
+ {
+ {18, 19},
+ {19, 19},
+ },
+ {
+ {19, 19},
+ {19, 19},
+ },
+ {
+ {19, 19},
+ {19, 19},
+ },
+ {
+ {19, 19},
+ {20, 19},
+ },
+ {
+ {19, 19},
+ {20, 19},
+ },
+ {
+ {19, 20},
+ {20, 19},
+ },
+ {
+ {19, 20},
+ {20, 19},
+ },
+ {
+ {19, 20},
+ {20, 19},
+ },
+ {
+ {19, 20},
+ {20, 20},
+ },
+ {
+ {19, 20},
+ {20, 20},
+ },
+ {
+ {20, 20},
+ {20, 20},
+ },
+ {
+ {20, 20},
+ {20, 20},
+ },
+ {
+ {20, 20},
+ {21, 20},
+ },
+ {
+ {20, 20},
+ {21, 20},
+ },
+ {
+ {20, 21},
+ {21, 20},
+ },
+ {
+ {20, 21},
+ {21, 20},
+ },
+ {
+ {20, 21},
+ {21, 21},
+ },
+ {
+ {20, 21},
+ {21, 21},
+ },
+ {
+ {21, 21},
+ {21, 21},
+ },
+ {
+ {21, 21},
+ {21, 21},
+ },
+ {
+ {21, 21},
+ {22, 21},
+ },
+ {
+ {21, 21},
+ {22, 21},
+ },
+ {
+ {21, 22},
+ {22, 21},
+ },
+ {
+ {21, 22},
+ {22, 21},
+ },
+ {
+ {21, 22},
+ {22, 22},
+ },
+ {
+ {21, 22},
+ {22, 22},
+ },
+ {
+ {22, 22},
+ {22, 22},
+ },
+ {
+ {22, 22},
+ {22, 22},
+ },
+ {
+ {22, 22},
+ {23, 22},
+ },
+ {
+ {22, 22},
+ {23, 22},
+ },
+ {
+ {22, 23},
+ {23, 22},
+ },
+ {
+ {22, 23},
+ {23, 22},
+ },
+ {
+ {22, 23},
+ {23, 23},
+ },
+ {
+ {22, 23},
+ {23, 23},
+ },
+ {
+ {23, 23},
+ {23, 23},
+ },
+ {
+ {23, 23},
+ {23, 23},
+ },
+ {
+ {23, 23},
+ {24, 23},
+ },
+ {
+ {23, 23},
+ {24, 23},
+ },
+ {
+ {23, 23},
+ {24, 23},
+ },
+ {
+ {23, 24},
+ {24, 23},
+ },
+ {
+ {23, 24},
+ {24, 23},
+ },
+ {
+ {23, 24},
+ {24, 24},
+ },
+ {
+ {23, 24},
+ {24, 24},
+ },
+ {
+ {24, 24},
+ {24, 24},
+ },
+ {
+ {24, 24},
+ {24, 24},
+ },
+ {
+ {24, 24},
+ {25, 24},
+ },
+ {
+ {24, 24},
+ {25, 24},
+ },
+ {
+ {24, 25},
+ {25, 24},
+ },
+ {
+ {24, 25},
+ {25, 24},
+ },
+ {
+ {24, 25},
+ {25, 25},
+ },
+ {
+ {24, 25},
+ {25, 25},
+ },
+ {
+ {25, 25},
+ {25, 25},
+ },
+ {
+ {25, 25},
+ {25, 25},
+ },
+ {
+ {25, 25},
+ {26, 25},
+ },
+ {
+ {25, 25},
+ {26, 25},
+ },
+ {
+ {25, 26},
+ {26, 25},
+ },
+ {
+ {25, 26},
+ {26, 25},
+ },
+ {
+ {25, 26},
+ {26, 26},
+ },
+ {
+ {25, 26},
+ {26, 26},
+ },
+ {
+ {26, 26},
+ {26, 26},
+ },
+ {
+ {26, 26},
+ {26, 26},
+ },
+ {
+ {26, 26},
+ {27, 26},
+ },
+ {
+ {26, 26},
+ {27, 26},
+ },
+ {
+ {26, 27},
+ {27, 26},
+ },
+ {
+ {26, 27},
+ {27, 26},
+ },
+ {
+ {26, 27},
+ {27, 27},
+ },
+ {
+ {26, 27},
+ {27, 27},
+ },
+ {
+ {27, 27},
+ {27, 27},
+ },
+ {
+ {27, 27},
+ {27, 27},
+ },
+ {
+ {27, 27},
+ {28, 27},
+ },
+ {
+ {27, 27},
+ {28, 27},
+ },
+ {
+ {27, 27},
+ {28, 27},
+ },
+ {
+ {27, 28},
+ {28, 27},
+ },
+ {
+ {27, 28},
+ {28, 27},
+ },
+ {
+ {27, 28},
+ {28, 28},
+ },
+ {
+ {27, 28},
+ {28, 28},
+ },
+ {
+ {28, 28},
+ {28, 28},
+ },
+ {
+ {28, 28},
+ {28, 28},
+ },
+ {
+ {28, 28},
+ {29, 28},
+ },
+ {
+ {28, 28},
+ {29, 28},
+ },
+ {
+ {28, 29},
+ {29, 28},
+ },
+ {
+ {28, 29},
+ {29, 28},
+ },
+ {
+ {28, 29},
+ {29, 29},
+ },
+ {
+ {28, 29},
+ {29, 29},
+ },
+ {
+ {29, 29},
+ {29, 29},
+ },
+ {
+ {29, 29},
+ {29, 29},
+ },
+ {
+ {29, 29},
+ {30, 29},
+ },
+ {
+ {29, 29},
+ {30, 29},
+ },
+ {
+ {29, 30},
+ {30, 29},
+ },
+ {
+ {29, 30},
+ {30, 29},
+ },
+ {
+ {29, 30},
+ {30, 30},
+ },
+ {
+ {29, 30},
+ {30, 30},
+ },
+ {
+ {30, 30},
+ {30, 30},
+ },
+ {
+ {30, 30},
+ {30, 30},
+ },
+ {
+ {30, 30},
+ {31, 30},
+ },
+ {
+ {30, 30},
+ {31, 30},
+ },
+ {
+ {30, 31},
+ {31, 30},
+ },
+ {
+ {30, 31},
+ {31, 30},
+ },
+ {
+ {30, 31},
+ {31, 31},
+ },
+ {
+ {30, 31},
+ {31, 31},
+ },
+ {
+ {31, 31},
+ {31, 31},
+ },
+ {
+ {31, 31},
+ {31, 31},
+ },
+};
+
+static const uint8_t dither_g2x2[256][2][2] =
+{
+ {
+ {0, 0},
+ {0, 0},
+ },
+ {
+ {0, 0},
+ {1, 0},
+ },
+ {
+ {0, 1},
+ {1, 0},
+ },
+ {
+ {0, 1},
+ {1, 1},
+ },
+ {
+ {1, 1},
+ {1, 1},
+ },
+ {
+ {1, 1},
+ {2, 1},
+ },
+ {
+ {1, 2},
+ {2, 1},
+ },
+ {
+ {1, 2},
+ {2, 2},
+ },
+ {
+ {2, 2},
+ {2, 2},
+ },
+ {
+ {2, 2},
+ {3, 2},
+ },
+ {
+ {2, 3},
+ {3, 2},
+ },
+ {
+ {2, 3},
+ {3, 3},
+ },
+ {
+ {3, 3},
+ {3, 3},
+ },
+ {
+ {3, 3},
+ {4, 3},
+ },
+ {
+ {3, 4},
+ {4, 3},
+ },
+ {
+ {3, 4},
+ {4, 4},
+ },
+ {
+ {4, 4},
+ {4, 4},
+ },
+ {
+ {4, 4},
+ {5, 4},
+ },
+ {
+ {4, 5},
+ {5, 4},
+ },
+ {
+ {4, 5},
+ {5, 5},
+ },
+ {
+ {5, 5},
+ {5, 5},
+ },
+ {
+ {5, 5},
+ {6, 5},
+ },
+ {
+ {5, 6},
+ {6, 5},
+ },
+ {
+ {5, 6},
+ {6, 6},
+ },
+ {
+ {6, 6},
+ {6, 6},
+ },
+ {
+ {6, 6},
+ {7, 6},
+ },
+ {
+ {6, 7},
+ {7, 6},
+ },
+ {
+ {6, 7},
+ {7, 7},
+ },
+ {
+ {7, 7},
+ {7, 7},
+ },
+ {
+ {7, 7},
+ {8, 7},
+ },
+ {
+ {7, 8},
+ {8, 7},
+ },
+ {
+ {7, 8},
+ {8, 8},
+ },
+ {
+ {8, 8},
+ {8, 8},
+ },
+ {
+ {8, 8},
+ {9, 8},
+ },
+ {
+ {8, 9},
+ {9, 8},
+ },
+ {
+ {8, 9},
+ {9, 9},
+ },
+ {
+ {9, 9},
+ {9, 9},
+ },
+ {
+ {9, 9},
+ {10, 9},
+ },
+ {
+ {9, 10},
+ {10, 9},
+ },
+ {
+ {9, 10},
+ {10, 10},
+ },
+ {
+ {10, 10},
+ {10, 10},
+ },
+ {
+ {10, 10},
+ {11, 10},
+ },
+ {
+ {10, 11},
+ {11, 10},
+ },
+ {
+ {10, 11},
+ {11, 11},
+ },
+ {
+ {11, 11},
+ {11, 11},
+ },
+ {
+ {11, 11},
+ {12, 11},
+ },
+ {
+ {11, 12},
+ {12, 11},
+ },
+ {
+ {11, 12},
+ {12, 12},
+ },
+ {
+ {11, 12},
+ {12, 12},
+ },
+ {
+ {12, 12},
+ {12, 12},
+ },
+ {
+ {12, 12},
+ {13, 12},
+ },
+ {
+ {12, 13},
+ {13, 12},
+ },
+ {
+ {12, 13},
+ {13, 13},
+ },
+ {
+ {13, 13},
+ {13, 13},
+ },
+ {
+ {13, 13},
+ {14, 13},
+ },
+ {
+ {13, 14},
+ {14, 13},
+ },
+ {
+ {13, 14},
+ {14, 14},
+ },
+ {
+ {14, 14},
+ {14, 14},
+ },
+ {
+ {14, 14},
+ {15, 14},
+ },
+ {
+ {14, 15},
+ {15, 14},
+ },
+ {
+ {14, 15},
+ {15, 15},
+ },
+ {
+ {15, 15},
+ {15, 15},
+ },
+ {
+ {15, 15},
+ {16, 15},
+ },
+ {
+ {15, 16},
+ {16, 15},
+ },
+ {
+ {15, 16},
+ {16, 16},
+ },
+ {
+ {16, 16},
+ {16, 16},
+ },
+ {
+ {16, 16},
+ {17, 16},
+ },
+ {
+ {16, 17},
+ {17, 16},
+ },
+ {
+ {16, 17},
+ {17, 17},
+ },
+ {
+ {17, 17},
+ {17, 17},
+ },
+ {
+ {17, 17},
+ {18, 17},
+ },
+ {
+ {17, 18},
+ {18, 17},
+ },
+ {
+ {17, 18},
+ {18, 18},
+ },
+ {
+ {18, 18},
+ {18, 18},
+ },
+ {
+ {18, 18},
+ {19, 18},
+ },
+ {
+ {18, 19},
+ {19, 18},
+ },
+ {
+ {18, 19},
+ {19, 19},
+ },
+ {
+ {19, 19},
+ {19, 19},
+ },
+ {
+ {19, 19},
+ {20, 19},
+ },
+ {
+ {19, 20},
+ {20, 19},
+ },
+ {
+ {19, 20},
+ {20, 20},
+ },
+ {
+ {20, 20},
+ {20, 20},
+ },
+ {
+ {20, 20},
+ {21, 20},
+ },
+ {
+ {20, 21},
+ {21, 20},
+ },
+ {
+ {20, 21},
+ {21, 21},
+ },
+ {
+ {21, 21},
+ {21, 21},
+ },
+ {
+ {21, 21},
+ {22, 21},
+ },
+ {
+ {21, 22},
+ {22, 21},
+ },
+ {
+ {21, 22},
+ {22, 22},
+ },
+ {
+ {22, 22},
+ {22, 22},
+ },
+ {
+ {22, 22},
+ {23, 22},
+ },
+ {
+ {22, 23},
+ {23, 22},
+ },
+ {
+ {22, 23},
+ {23, 23},
+ },
+ {
+ {23, 23},
+ {23, 23},
+ },
+ {
+ {23, 23},
+ {24, 23},
+ },
+ {
+ {23, 24},
+ {24, 23},
+ },
+ {
+ {23, 24},
+ {24, 24},
+ },
+ {
+ {24, 24},
+ {24, 24},
+ },
+ {
+ {24, 24},
+ {25, 24},
+ },
+ {
+ {24, 25},
+ {25, 24},
+ },
+ {
+ {24, 25},
+ {25, 25},
+ },
+ {
+ {25, 25},
+ {25, 25},
+ },
+ {
+ {25, 25},
+ {26, 25},
+ },
+ {
+ {25, 26},
+ {26, 25},
+ },
+ {
+ {25, 26},
+ {26, 26},
+ },
+ {
+ {26, 26},
+ {26, 26},
+ },
+ {
+ {26, 26},
+ {27, 26},
+ },
+ {
+ {26, 27},
+ {27, 26},
+ },
+ {
+ {26, 27},
+ {27, 27},
+ },
+ {
+ {27, 27},
+ {27, 27},
+ },
+ {
+ {27, 27},
+ {28, 27},
+ },
+ {
+ {27, 28},
+ {28, 27},
+ },
+ {
+ {27, 28},
+ {28, 28},
+ },
+ {
+ {28, 28},
+ {28, 28},
+ },
+ {
+ {28, 28},
+ {29, 28},
+ },
+ {
+ {28, 29},
+ {29, 28},
+ },
+ {
+ {28, 29},
+ {29, 29},
+ },
+ {
+ {29, 29},
+ {29, 29},
+ },
+ {
+ {29, 29},
+ {30, 29},
+ },
+ {
+ {29, 30},
+ {30, 29},
+ },
+ {
+ {29, 30},
+ {30, 30},
+ },
+ {
+ {30, 30},
+ {30, 30},
+ },
+ {
+ {30, 30},
+ {31, 30},
+ },
+ {
+ {30, 31},
+ {31, 30},
+ },
+ {
+ {30, 31},
+ {31, 31},
+ },
+ {
+ {31, 31},
+ {31, 31},
+ },
+ {
+ {31, 31},
+ {32, 31},
+ },
+ {
+ {31, 32},
+ {32, 31},
+ },
+ {
+ {31, 32},
+ {32, 32},
+ },
+ {
+ {32, 32},
+ {32, 32},
+ },
+ {
+ {32, 32},
+ {33, 32},
+ },
+ {
+ {32, 33},
+ {33, 32},
+ },
+ {
+ {32, 33},
+ {33, 33},
+ },
+ {
+ {33, 33},
+ {33, 33},
+ },
+ {
+ {33, 33},
+ {34, 33},
+ },
+ {
+ {33, 34},
+ {34, 33},
+ },
+ {
+ {33, 34},
+ {34, 34},
+ },
+ {
+ {34, 34},
+ {34, 34},
+ },
+ {
+ {34, 34},
+ {35, 34},
+ },
+ {
+ {34, 35},
+ {35, 34},
+ },
+ {
+ {34, 35},
+ {35, 35},
+ },
+ {
+ {35, 35},
+ {35, 35},
+ },
+ {
+ {35, 35},
+ {36, 35},
+ },
+ {
+ {35, 36},
+ {36, 35},
+ },
+ {
+ {35, 36},
+ {36, 35},
+ },
+ {
+ {35, 36},
+ {36, 36},
+ },
+ {
+ {36, 36},
+ {36, 36},
+ },
+ {
+ {36, 36},
+ {37, 36},
+ },
+ {
+ {36, 37},
+ {37, 36},
+ },
+ {
+ {36, 37},
+ {37, 37},
+ },
+ {
+ {37, 37},
+ {37, 37},
+ },
+ {
+ {37, 37},
+ {38, 37},
+ },
+ {
+ {37, 38},
+ {38, 37},
+ },
+ {
+ {37, 38},
+ {38, 38},
+ },
+ {
+ {38, 38},
+ {38, 38},
+ },
+ {
+ {38, 38},
+ {39, 38},
+ },
+ {
+ {38, 39},
+ {39, 38},
+ },
+ {
+ {38, 39},
+ {39, 39},
+ },
+ {
+ {39, 39},
+ {39, 39},
+ },
+ {
+ {39, 39},
+ {40, 39},
+ },
+ {
+ {39, 40},
+ {40, 39},
+ },
+ {
+ {39, 40},
+ {40, 40},
+ },
+ {
+ {40, 40},
+ {40, 40},
+ },
+ {
+ {40, 40},
+ {41, 40},
+ },
+ {
+ {40, 41},
+ {41, 40},
+ },
+ {
+ {40, 41},
+ {41, 41},
+ },
+ {
+ {41, 41},
+ {41, 41},
+ },
+ {
+ {41, 41},
+ {42, 41},
+ },
+ {
+ {41, 42},
+ {42, 41},
+ },
+ {
+ {41, 42},
+ {42, 42},
+ },
+ {
+ {42, 42},
+ {42, 42},
+ },
+ {
+ {42, 42},
+ {43, 42},
+ },
+ {
+ {42, 43},
+ {43, 42},
+ },
+ {
+ {42, 43},
+ {43, 43},
+ },
+ {
+ {43, 43},
+ {43, 43},
+ },
+ {
+ {43, 43},
+ {44, 43},
+ },
+ {
+ {43, 44},
+ {44, 43},
+ },
+ {
+ {43, 44},
+ {44, 44},
+ },
+ {
+ {44, 44},
+ {44, 44},
+ },
+ {
+ {44, 44},
+ {45, 44},
+ },
+ {
+ {44, 45},
+ {45, 44},
+ },
+ {
+ {44, 45},
+ {45, 45},
+ },
+ {
+ {45, 45},
+ {45, 45},
+ },
+ {
+ {45, 45},
+ {46, 45},
+ },
+ {
+ {45, 46},
+ {46, 45},
+ },
+ {
+ {45, 46},
+ {46, 46},
+ },
+ {
+ {46, 46},
+ {46, 46},
+ },
+ {
+ {46, 46},
+ {47, 46},
+ },
+ {
+ {46, 47},
+ {47, 46},
+ },
+ {
+ {46, 47},
+ {47, 47},
+ },
+ {
+ {47, 47},
+ {47, 47},
+ },
+ {
+ {47, 47},
+ {48, 47},
+ },
+ {
+ {47, 48},
+ {48, 47},
+ },
+ {
+ {47, 48},
+ {48, 48},
+ },
+ {
+ {48, 48},
+ {48, 48},
+ },
+ {
+ {48, 48},
+ {49, 48},
+ },
+ {
+ {48, 49},
+ {49, 48},
+ },
+ {
+ {48, 49},
+ {49, 49},
+ },
+ {
+ {49, 49},
+ {49, 49},
+ },
+ {
+ {49, 49},
+ {50, 49},
+ },
+ {
+ {49, 50},
+ {50, 49},
+ },
+ {
+ {49, 50},
+ {50, 50},
+ },
+ {
+ {50, 50},
+ {50, 50},
+ },
+ {
+ {50, 50},
+ {51, 50},
+ },
+ {
+ {50, 51},
+ {51, 50},
+ },
+ {
+ {50, 51},
+ {51, 51},
+ },
+ {
+ {51, 51},
+ {51, 51},
+ },
+ {
+ {51, 51},
+ {52, 51},
+ },
+ {
+ {51, 52},
+ {52, 51},
+ },
+ {
+ {51, 52},
+ {52, 52},
+ },
+ {
+ {52, 52},
+ {52, 52},
+ },
+ {
+ {52, 52},
+ {53, 52},
+ },
+ {
+ {52, 53},
+ {53, 52},
+ },
+ {
+ {52, 53},
+ {53, 53},
+ },
+ {
+ {53, 53},
+ {53, 53},
+ },
+ {
+ {53, 53},
+ {54, 53},
+ },
+ {
+ {53, 54},
+ {54, 53},
+ },
+ {
+ {53, 54},
+ {54, 54},
+ },
+ {
+ {54, 54},
+ {54, 54},
+ },
+ {
+ {54, 54},
+ {55, 54},
+ },
+ {
+ {54, 55},
+ {55, 54},
+ },
+ {
+ {54, 55},
+ {55, 55},
+ },
+ {
+ {55, 55},
+ {55, 55},
+ },
+ {
+ {55, 55},
+ {56, 55},
+ },
+ {
+ {55, 55},
+ {56, 55},
+ },
+ {
+ {55, 56},
+ {56, 55},
+ },
+ {
+ {55, 56},
+ {56, 56},
+ },
+ {
+ {56, 56},
+ {56, 56},
+ },
+ {
+ {56, 56},
+ {57, 56},
+ },
+ {
+ {56, 57},
+ {57, 56},
+ },
+ {
+ {56, 57},
+ {57, 57},
+ },
+ {
+ {57, 57},
+ {57, 57},
+ },
+ {
+ {57, 57},
+ {58, 57},
+ },
+ {
+ {57, 58},
+ {58, 57},
+ },
+ {
+ {57, 58},
+ {58, 58},
+ },
+ {
+ {58, 58},
+ {58, 58},
+ },
+ {
+ {58, 58},
+ {59, 58},
+ },
+ {
+ {58, 59},
+ {59, 58},
+ },
+ {
+ {58, 59},
+ {59, 59},
+ },
+ {
+ {59, 59},
+ {59, 59},
+ },
+ {
+ {59, 59},
+ {60, 59},
+ },
+ {
+ {59, 60},
+ {60, 59},
+ },
+ {
+ {59, 60},
+ {60, 60},
+ },
+ {
+ {60, 60},
+ {60, 60},
+ },
+ {
+ {60, 60},
+ {61, 60},
+ },
+ {
+ {60, 61},
+ {61, 60},
+ },
+ {
+ {60, 61},
+ {61, 61},
+ },
+ {
+ {61, 61},
+ {61, 61},
+ },
+ {
+ {61, 61},
+ {62, 61},
+ },
+ {
+ {61, 62},
+ {62, 61},
+ },
+ {
+ {61, 62},
+ {62, 62},
+ },
+ {
+ {62, 62},
+ {62, 62},
+ },
+ {
+ {62, 62},
+ {63, 62},
+ },
+ {
+ {62, 63},
+ {63, 62},
+ },
+ {
+ {62, 63},
+ {63, 63},
+ },
+ {
+ {63, 63},
+ {63, 63},
+ },
+};
+
--- /dev/null
+#include "ibm.h"
+#include "device.h"
+#include "mem.h"
+#include "thread.h"
+#include "video.h"
+#include "vid_svga.h"
+#include "vid_voodoo.h"
+#include "vid_voodoo_common.h"
+#include "vid_voodoo_dither.h"
+#include "vid_voodoo_regs.h"
+#include "vid_voodoo_render.h"
+#include "vid_voodoo_fb.h"
+
+uint16_t voodoo_fb_readw(uint32_t addr, void *p)
+{
+ voodoo_t *voodoo = (voodoo_t *)p;
+ int x, y;
+ uint32_t read_addr;
+ uint16_t temp;
+
+ if (voodoo->type >= VOODOO_BANSHEE)
+ {
+ x = addr & 0xffe;
+ y = (addr >> 12) & 0x3ff;
+ }
+ else
+ {
+ x = addr & 0x7fe;
+ y = (addr >> 11) & 0x3ff;
+ }
+
+ if (SLI_ENABLED)
+ {
+ voodoo_set_t *set = voodoo->set;
+
+ if (y & 1)
+ voodoo = set->voodoos[1];
+ else
+ voodoo = set->voodoos[0];
+
+ y >>= 1;
+ }
+
+ if (voodoo->col_tiled)
+ read_addr = voodoo->fb_read_offset + (x & 127) + (x >> 7) * 128*32 + (y & 31) * 128 + (y >> 5) * voodoo->row_width;
+ else
+ read_addr = voodoo->fb_read_offset + x + (y * voodoo->row_width);
+
+ if (read_addr > voodoo->fb_mask)
+ return 0xffff;
+
+ temp = *(uint16_t *)(&voodoo->fb_mem[read_addr & voodoo->fb_mask]);
+
+// pclog("voodoo_fb_readw : %08X %08X %i %i %08X %08X %08x:%08x %i\n", addr, temp, x, y, read_addr, *(uint32_t *)(&voodoo->fb_mem[4]), cs, pc, fb_reads++);
+ return temp;
+}
+uint32_t voodoo_fb_readl(uint32_t addr, void *p)
+{
+ voodoo_t *voodoo = (voodoo_t *)p;
+ int x, y;
+ uint32_t read_addr;
+ uint32_t temp;
+
+ if (voodoo->type >= VOODOO_BANSHEE)
+ {
+ x = addr & 0xffe;
+ y = (addr >> 12) & 0x3ff;
+ }
+ else
+ {
+ x = addr & 0x7fe;
+ y = (addr >> 11) & 0x3ff;
+ }
+
+ if (SLI_ENABLED)
+ {
+ voodoo_set_t *set = voodoo->set;
+
+ if (y & 1)
+ voodoo = set->voodoos[1];
+ else
+ voodoo = set->voodoos[0];
+
+ y >>= 1;
+ }
+
+ if (voodoo->col_tiled)
+ read_addr = voodoo->fb_read_offset + (x & 127) + (x >> 7) * 128*32 + (y & 31) * 128 + (y >> 5) * voodoo->row_width;
+ else
+ read_addr = voodoo->fb_read_offset + x + (y * voodoo->row_width);
+
+ if (read_addr > voodoo->fb_mask)
+ return 0xffffffff;
+
+ temp = *(uint32_t *)(&voodoo->fb_mem[read_addr & voodoo->fb_mask]);
+
+// pclog("voodoo_fb_readl : %08X %08x %08X x=%i y=%i %08X %08X %08x:%08x %i ro=%08x rw=%i\n", addr, read_addr, temp, x, y, read_addr, *(uint32_t *)(&voodoo->fb_mem[4]), cs, pc, fb_reads++, voodoo->fb_read_offset, voodoo->row_width);
+ return temp;
+}
+
+static inline uint16_t do_dither(voodoo_params_t *params, rgba8_t col, int x, int y)
+{
+ int r, g, b;
+
+ if (dither)
+ {
+ if (dither2x2)
+ {
+ r = dither_rb2x2[col.r][y & 1][x & 1];
+ g = dither_g2x2[col.g][y & 1][x & 1];
+ b = dither_rb2x2[col.b][y & 1][x & 1];
+ }
+ else
+ {
+ r = dither_rb[col.r][y & 3][x & 3];
+ g = dither_g[col.g][y & 3][x & 3];
+ b = dither_rb[col.b][y & 3][x & 3];
+ }
+ }
+ else
+ {
+ r = col.r >> 3;
+ g = col.g >> 2;
+ b = col.b >> 3;
+ }
+
+ return b | (g << 5) | (r << 11);
+}
+
+void voodoo_fb_writew(uint32_t addr, uint16_t val, void *p)
+{
+ voodoo_t *voodoo = (voodoo_t *)p;
+ voodoo_params_t *params = &voodoo->params;
+ int x, y;
+ uint32_t write_addr, write_addr_aux;
+ rgba8_t colour_data;
+ uint16_t depth_data;
+ uint8_t alpha_data;
+ int write_mask = 0;
+
+ colour_data.r = colour_data.g = colour_data.b = colour_data.a = 0;
+
+ depth_data = voodoo->params.zaColor & 0xffff;
+ alpha_data = voodoo->params.zaColor >> 24;
+
+// while (!RB_EMPTY)
+// thread_reset_event(voodoo->not_full_event);
+
+// pclog("voodoo_fb_writew : %08X %04X\n", addr, val);
+
+
+ switch (voodoo->lfbMode & LFB_FORMAT_MASK)
+ {
+ case LFB_FORMAT_RGB565:
+ colour_data = rgb565[val];
+ alpha_data = 0xff;
+ write_mask = LFB_WRITE_COLOUR;
+ break;
+ case LFB_FORMAT_RGB555:
+ colour_data = argb1555[val];
+ alpha_data = 0xff;
+ write_mask = LFB_WRITE_COLOUR;
+ break;
+ case LFB_FORMAT_ARGB1555:
+ colour_data = argb1555[val];
+ alpha_data = colour_data.a;
+ write_mask = LFB_WRITE_COLOUR;
+ break;
+ case LFB_FORMAT_DEPTH:
+ depth_data = val;
+ write_mask = LFB_WRITE_DEPTH;
+ break;
+
+ default:
+ fatal("voodoo_fb_writew : bad LFB format %08X\n", voodoo->lfbMode);
+ }
+
+ if (voodoo->type >= VOODOO_BANSHEE)
+ {
+ x = addr & 0xffe;
+ y = (addr >> 12) & 0x3ff;
+ }
+ else
+ {
+ x = addr & 0x7fe;
+ y = (addr >> 11) & 0x3ff;
+ }
+
+ if (SLI_ENABLED)
+ {
+ if ((!(voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && (y & 1)) ||
+ ((voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && !(y & 1)))
+ return;
+ y >>= 1;
+ }
+
+
+ if (voodoo->fb_write_offset == voodoo->params.front_offset && y < 2048)
+ voodoo->dirty_line[y] = 1;
+
+ if (voodoo->col_tiled)
+ write_addr = voodoo->fb_write_offset + (x & 127) + (x >> 7) * 128*32 + (y & 31) * 128 + (y >> 5) * voodoo->row_width;
+ else
+ write_addr = voodoo->fb_write_offset + x + (y * voodoo->row_width);
+ if (voodoo->aux_tiled)
+ write_addr_aux = voodoo->params.aux_offset + (x & 127) + (x >> 7) * 128*32 + (y & 31) * 128 + (y >> 5) * voodoo->row_width;
+ else
+ write_addr_aux = voodoo->params.aux_offset + x + (y * voodoo->row_width);
+
+// pclog("fb_writew %08x %i %i %i %08x\n", addr, x, y, voodoo->row_width, write_addr);
+
+ if (voodoo->lfbMode & 0x100)
+ {
+ {
+ rgba8_t write_data = colour_data;
+ uint16_t new_depth = depth_data;
+
+ if (params->fbzMode & FBZ_DEPTH_ENABLE)
+ {
+ uint16_t old_depth = *(uint16_t *)(&voodoo->fb_mem[write_addr_aux & voodoo->fb_mask]);
+
+ DEPTH_TEST(new_depth);
+ }
+
+ if ((params->fbzMode & FBZ_CHROMAKEY) &&
+ write_data.r == params->chromaKey_r &&
+ write_data.g == params->chromaKey_g &&
+ write_data.b == params->chromaKey_b)
+ goto skip_pixel;
+
+ if (params->fogMode & FOG_ENABLE)
+ {
+ int32_t z = new_depth << 12;
+ int64_t w_depth = (int64_t)(int32_t)new_depth;
+ int32_t ia = alpha_data << 12;
+
+ APPLY_FOG(write_data.r, write_data.g, write_data.b, z, ia, w_depth);
+ }
+
+ if (params->alphaMode & 1)
+ ALPHA_TEST(alpha_data);
+
+ if (params->alphaMode & (1 << 4))
+ {
+ uint16_t dat = *(uint16_t *)(&voodoo->fb_mem[write_addr & voodoo->fb_mask]);
+ int dest_r, dest_g, dest_b, dest_a;
+
+ dest_r = (dat >> 8) & 0xf8;
+ dest_g = (dat >> 3) & 0xfc;
+ dest_b = (dat << 3) & 0xf8;
+ dest_r |= (dest_r >> 5);
+ dest_g |= (dest_g >> 6);
+ dest_b |= (dest_b >> 5);
+ dest_a = 0xff;
+
+ ALPHA_BLEND(write_data.r, write_data.g, write_data.b, alpha_data);
+ }
+
+ if (params->fbzMode & FBZ_RGB_WMASK)
+ *(uint16_t *)(&voodoo->fb_mem[write_addr & voodoo->fb_mask]) = do_dither(&voodoo->params, write_data, x >> 1, y);
+ if (params->fbzMode & FBZ_DEPTH_WMASK)
+ *(uint16_t *)(&voodoo->fb_mem[write_addr_aux & voodoo->fb_mask]) = new_depth;
+
+skip_pixel:
+ x = x;
+ }
+ }
+ else
+ {
+ if (write_mask & LFB_WRITE_COLOUR)
+ *(uint16_t *)(&voodoo->fb_mem[write_addr & voodoo->fb_mask]) = do_dither(&voodoo->params, colour_data, x >> 1, y);
+ if (write_mask & LFB_WRITE_DEPTH)
+ *(uint16_t *)(&voodoo->fb_mem[write_addr_aux & voodoo->fb_mask]) = depth_data;
+ }
+}
+
+
+void voodoo_fb_writel(uint32_t addr, uint32_t val, void *p)
+{
+ voodoo_t *voodoo = (voodoo_t *)p;
+ voodoo_params_t *params = &voodoo->params;
+ int x, y;
+ uint32_t write_addr, write_addr_aux;
+ rgba8_t colour_data[2];
+ uint16_t depth_data[2];
+ uint8_t alpha_data[2];
+ int write_mask = 0, count = 1;
+
+ depth_data[0] = depth_data[1] = voodoo->params.zaColor & 0xffff;
+ alpha_data[0] = alpha_data[1] = voodoo->params.zaColor >> 24;
+// while (!RB_EMPTY)
+// thread_reset_event(voodoo->not_full_event);
+
+// pclog("voodoo_fb_writel : %08X %08X\n", addr, val);
+
+ switch (voodoo->lfbMode & LFB_FORMAT_MASK)
+ {
+ case LFB_FORMAT_RGB565:
+ colour_data[0] = rgb565[val & 0xffff];
+ colour_data[1] = rgb565[val >> 16];
+ write_mask = LFB_WRITE_COLOUR;
+ count = 2;
+ break;
+ case LFB_FORMAT_RGB555:
+ colour_data[0] = argb1555[val & 0xffff];
+ colour_data[1] = argb1555[val >> 16];
+ write_mask = LFB_WRITE_COLOUR;
+ count = 2;
+ break;
+ case LFB_FORMAT_ARGB1555:
+ colour_data[0] = argb1555[val & 0xffff];
+ alpha_data[0] = colour_data[0].a;
+ colour_data[1] = argb1555[val >> 16];
+ alpha_data[1] = colour_data[1].a;
+ write_mask = LFB_WRITE_COLOUR;
+ count = 2;
+ break;
+
+ case LFB_FORMAT_ARGB8888:
+ colour_data[0].b = val & 0xff;
+ colour_data[0].g = (val >> 8) & 0xff;
+ colour_data[0].r = (val >> 16) & 0xff;
+ alpha_data[0] = (val >> 24) & 0xff;
+ write_mask = LFB_WRITE_COLOUR;
+ addr >>= 1;
+ break;
+
+ case LFB_FORMAT_DEPTH:
+ depth_data[0] = val;
+ depth_data[1] = val >> 16;
+ write_mask = LFB_WRITE_DEPTH;
+ count = 2;
+ break;
+
+ default:
+ fatal("voodoo_fb_writel : bad LFB format %08X\n", voodoo->lfbMode);
+ }
+
+ if (voodoo->type >= VOODOO_BANSHEE)
+ {
+ x = addr & 0xffe;
+ y = (addr >> 12) & 0x3ff;
+ }
+ else
+ {
+ x = addr & 0x7fe;
+ y = (addr >> 11) & 0x3ff;
+ }
+
+ if (SLI_ENABLED)
+ {
+ if ((!(voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && (y & 1)) ||
+ ((voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && !(y & 1)))
+ return;
+ y >>= 1;
+ }
+
+ if (voodoo->fb_write_offset == voodoo->params.front_offset && y < 2048)
+ voodoo->dirty_line[y] = 1;
+
+ if (voodoo->col_tiled)
+ write_addr = voodoo->fb_write_offset + (x & 127) + (x >> 7) * 128*32 + (y & 31) * 128 + (y >> 5) * voodoo->row_width;
+ else
+ write_addr = voodoo->fb_write_offset + x + (y * voodoo->row_width);
+ if (voodoo->aux_tiled)
+ write_addr_aux = voodoo->params.aux_offset + (x & 127) + (x >> 7) * 128*32 + (y & 31) * 128 + (y >> 5) * voodoo->row_width;
+ else
+ write_addr_aux = voodoo->params.aux_offset + x + (y * voodoo->row_width);
+
+// pclog("fb_writel %08x x=%i y=%i rw=%i %08x wo=%08x\n", addr, x, y, voodoo->row_width, write_addr, voodoo->fb_write_offset);
+
+ if (voodoo->lfbMode & 0x100)
+ {
+ int c;
+
+ for (c = 0; c < count; c++)
+ {
+ rgba8_t write_data = colour_data[c];
+ uint16_t new_depth = depth_data[c];
+
+ if (params->fbzMode & FBZ_DEPTH_ENABLE)
+ {
+ uint16_t old_depth = *(uint16_t *)(&voodoo->fb_mem[write_addr_aux & voodoo->fb_mask]);
+
+ DEPTH_TEST(new_depth);
+ }
+
+ if ((params->fbzMode & FBZ_CHROMAKEY) &&
+ write_data.r == params->chromaKey_r &&
+ write_data.g == params->chromaKey_g &&
+ write_data.b == params->chromaKey_b)
+ goto skip_pixel;
+
+ if (params->fogMode & FOG_ENABLE)
+ {
+ int32_t z = new_depth << 12;
+ int64_t w_depth = new_depth;
+ int32_t ia = alpha_data[c] << 12;
+
+ APPLY_FOG(write_data.r, write_data.g, write_data.b, z, ia, w_depth);
+ }
+
+ if (params->alphaMode & 1)
+ ALPHA_TEST(alpha_data[c]);
+
+ if (params->alphaMode & (1 << 4))
+ {
+ uint16_t dat = *(uint16_t *)(&voodoo->fb_mem[write_addr & voodoo->fb_mask]);
+ int dest_r, dest_g, dest_b, dest_a;
+
+ dest_r = (dat >> 8) & 0xf8;
+ dest_g = (dat >> 3) & 0xfc;
+ dest_b = (dat << 3) & 0xf8;
+ dest_r |= (dest_r >> 5);
+ dest_g |= (dest_g >> 6);
+ dest_b |= (dest_b >> 5);
+ dest_a = 0xff;
+
+ ALPHA_BLEND(write_data.r, write_data.g, write_data.b, alpha_data[c]);
+ }
+
+ if (params->fbzMode & FBZ_RGB_WMASK)
+ *(uint16_t *)(&voodoo->fb_mem[write_addr & voodoo->fb_mask]) = do_dither(&voodoo->params, write_data, (x >> 1) + c, y);
+ if (params->fbzMode & FBZ_DEPTH_WMASK)
+ *(uint16_t *)(&voodoo->fb_mem[write_addr_aux & voodoo->fb_mask]) = new_depth;
+
+skip_pixel:
+ write_addr += 2;
+ write_addr_aux += 2;
+ }
+ }
+ else
+ {
+ int c;
+
+ for (c = 0; c < count; c++)
+ {
+ if (write_mask & LFB_WRITE_COLOUR)
+ *(uint16_t *)(&voodoo->fb_mem[write_addr & voodoo->fb_mask]) = do_dither(&voodoo->params, colour_data[c], (x >> 1) + c, y);
+ if (write_mask & LFB_WRITE_DEPTH)
+ *(uint16_t *)(&voodoo->fb_mem[write_addr_aux & voodoo->fb_mask]) = depth_data[c];
+
+ write_addr += 2;
+ write_addr_aux += 2;
+ }
+ }
+}
--- /dev/null
+uint16_t voodoo_fb_readw(uint32_t addr, void *p);
+uint32_t voodoo_fb_readl(uint32_t addr, void *p);
+void voodoo_fb_writew(uint32_t addr, uint16_t val, void *p);
+void voodoo_fb_writel(uint32_t addr, uint32_t val, void *p);
--- /dev/null
+#include <math.h>
+#include <stddef.h>
+#include "ibm.h"
+#include "device.h"
+#include "mem.h"
+#include "thread.h"
+#include "video.h"
+#include "vid_svga.h"
+#include "vid_voodoo.h"
+#include "vid_voodoo_common.h"
+#include "vid_voodoo_banshee_blitter.h"
+#include "vid_voodoo_fb.h"
+#include "vid_voodoo_fifo.h"
+#include "vid_voodoo_reg.h"
+#include "vid_voodoo_regs.h"
+#include "vid_voodoo_render.h"
+#include "vid_voodoo_texture.h"
+
+#define WAKE_DELAY (TIMER_USEC * 100)
+void voodoo_wake_fifo_thread(voodoo_t *voodoo)
+{
+ if (!timer_is_enabled(&voodoo->wake_timer))
+ {
+ /*Don't wake FIFO thread immediately - if we do that it will probably
+ process one word and go back to sleep, requiring it to be woken on
+ almost every write. Instead, wait a short while so that the CPU
+ emulation writes more data so we have more batched-up work.*/
+ timer_set_delay_u64(&voodoo->wake_timer, WAKE_DELAY);
+ }
+}
+
+void voodoo_wake_fifo_thread_now(voodoo_t *voodoo)
+{
+ thread_set_event(voodoo->wake_fifo_thread); /*Wake up FIFO thread if moving from idle*/
+}
+
+void voodoo_wake_timer(void *p)
+{
+ voodoo_t *voodoo = (voodoo_t *)p;
+
+ thread_set_event(voodoo->wake_fifo_thread); /*Wake up FIFO thread if moving from idle*/
+}
+
+void voodoo_queue_command(voodoo_t *voodoo, uint32_t addr_type, uint32_t val)
+{
+ fifo_entry_t *fifo = &voodoo->fifo[voodoo->fifo_write_idx & FIFO_MASK];
+
+ while (FIFO_FULL)
+ {
+ thread_reset_event(voodoo->fifo_not_full_event);
+ if (FIFO_FULL)
+ {
+ thread_wait_event(voodoo->fifo_not_full_event, 1); /*Wait for room in ringbuffer*/
+ if (FIFO_FULL)
+ voodoo_wake_fifo_thread_now(voodoo);
+ }
+ }
+
+ fifo->val = val;
+ fifo->addr_type = addr_type;
+
+ voodoo->fifo_write_idx++;
+
+ if (FIFO_ENTRIES > 0xe000)
+ voodoo_wake_fifo_thread(voodoo);
+}
+
+void voodoo_flush(voodoo_t *voodoo)
+{
+ voodoo->flush = 1;
+ while (!FIFO_EMPTY)
+ {
+ voodoo_wake_fifo_thread_now(voodoo);
+ thread_wait_event(voodoo->fifo_not_full_event, 1);
+ }
+ voodoo_wait_for_render_thread_idle(voodoo);
+ voodoo->flush = 0;
+}
+
+void voodoo_wake_fifo_threads(voodoo_set_t *set, voodoo_t *voodoo)
+{
+ voodoo_wake_fifo_thread(voodoo);
+ if (SLI_ENABLED && voodoo->type != VOODOO_2 && set->voodoos[0] == voodoo)
+ voodoo_wake_fifo_thread(set->voodoos[1]);
+}
+
+void voodoo_wait_for_swap_complete(voodoo_t *voodoo)
+{
+ while (voodoo->swap_pending)
+ {
+ thread_wait_event(voodoo->wake_fifo_thread, -1);
+ thread_reset_event(voodoo->wake_fifo_thread);
+
+ thread_lock_mutex(voodoo->swap_mutex);
+ if ((voodoo->swap_pending && voodoo->flush) || FIFO_FULL)
+ {
+ /*Main thread is waiting for FIFO to empty, so skip vsync wait and just swap*/
+ memset(voodoo->dirty_line, 1, sizeof(voodoo->dirty_line));
+ voodoo->front_offset = voodoo->params.front_offset;
+ if (voodoo->swap_count > 0)
+ voodoo->swap_count--;
+ voodoo->swap_pending = 0;
+ thread_unlock_mutex(voodoo->swap_mutex);
+ break;
+ }
+ else
+ thread_unlock_mutex(voodoo->swap_mutex);
+ }
+}
+
+
+static uint32_t cmdfifo_get(voodoo_t *voodoo)
+{
+ uint32_t val;
+
+ if (!voodoo->cmdfifo_in_sub)
+ {
+ while (voodoo->cmdfifo_depth_rd == voodoo->cmdfifo_depth_wr)
+ {
+ thread_wait_event(voodoo->wake_fifo_thread, -1);
+ thread_reset_event(voodoo->wake_fifo_thread);
+ }
+ }
+
+ val = *(uint32_t *)&voodoo->fb_mem[voodoo->cmdfifo_rp & voodoo->fb_mask];
+
+ if (!voodoo->cmdfifo_in_sub)
+ voodoo->cmdfifo_depth_rd++;
+ voodoo->cmdfifo_rp += 4;
+
+// pclog(" CMDFIFO get %08x\n", val);
+ return val;
+}
+
+static inline float cmdfifo_get_f(voodoo_t *voodoo)
+{
+ union
+ {
+ uint32_t i;
+ float f;
+ } tempif;
+
+ tempif.i = cmdfifo_get(voodoo);
+ return tempif.f;
+}
+
+enum
+{
+ CMDFIFO3_PC_MASK_RGB = (1 << 10),
+ CMDFIFO3_PC_MASK_ALPHA = (1 << 11),
+ CMDFIFO3_PC_MASK_Z = (1 << 12),
+ CMDFIFO3_PC_MASK_Wb = (1 << 13),
+ CMDFIFO3_PC_MASK_W0 = (1 << 14),
+ CMDFIFO3_PC_MASK_S0_T0 = (1 << 15),
+ CMDFIFO3_PC_MASK_W1 = (1 << 16),
+ CMDFIFO3_PC_MASK_S1_T1 = (1 << 17),
+
+ CMDFIFO3_PC = (1 << 28)
+};
+
+void voodoo_fifo_thread(void *param)
+{
+ voodoo_t *voodoo = (voodoo_t *)param;
+
+ while (1)
+ {
+ thread_set_event(voodoo->fifo_not_full_event);
+ thread_wait_event(voodoo->wake_fifo_thread, -1);
+ thread_reset_event(voodoo->wake_fifo_thread);
+ voodoo->voodoo_busy = 1;
+ while (!FIFO_EMPTY)
+ {
+ uint64_t start_time = timer_read();
+ uint64_t end_time;
+ fifo_entry_t *fifo = &voodoo->fifo[voodoo->fifo_read_idx & FIFO_MASK];
+
+ switch (fifo->addr_type & FIFO_TYPE)
+ {
+ case FIFO_WRITEL_REG:
+ while ((fifo->addr_type & FIFO_TYPE) == FIFO_WRITEL_REG)
+ {
+ voodoo_reg_writel(fifo->addr_type & FIFO_ADDR, fifo->val, voodoo);
+ fifo->addr_type = FIFO_INVALID;
+ voodoo->fifo_read_idx++;
+ if (FIFO_EMPTY)
+ break;
+ fifo = &voodoo->fifo[voodoo->fifo_read_idx & FIFO_MASK];
+ }
+ break;
+ case FIFO_WRITEW_FB:
+ voodoo_wait_for_render_thread_idle(voodoo);
+ while ((fifo->addr_type & FIFO_TYPE) == FIFO_WRITEW_FB)
+ {
+ voodoo_fb_writew(fifo->addr_type & FIFO_ADDR, fifo->val, voodoo);
+ fifo->addr_type = FIFO_INVALID;
+ voodoo->fifo_read_idx++;
+ if (FIFO_EMPTY)
+ break;
+ fifo = &voodoo->fifo[voodoo->fifo_read_idx & FIFO_MASK];
+ }
+ break;
+ case FIFO_WRITEL_FB:
+ voodoo_wait_for_render_thread_idle(voodoo);
+ while ((fifo->addr_type & FIFO_TYPE) == FIFO_WRITEL_FB)
+ {
+ voodoo_fb_writel(fifo->addr_type & FIFO_ADDR, fifo->val, voodoo);
+ fifo->addr_type = FIFO_INVALID;
+ voodoo->fifo_read_idx++;
+ if (FIFO_EMPTY)
+ break;
+ fifo = &voodoo->fifo[voodoo->fifo_read_idx & FIFO_MASK];
+ }
+ break;
+ case FIFO_WRITEL_TEX:
+ while ((fifo->addr_type & FIFO_TYPE) == FIFO_WRITEL_TEX)
+ {
+ if (!(fifo->addr_type & 0x400000))
+ voodoo_tex_writel(fifo->addr_type & FIFO_ADDR, fifo->val, voodoo);
+ fifo->addr_type = FIFO_INVALID;
+ voodoo->fifo_read_idx++;
+ if (FIFO_EMPTY)
+ break;
+ fifo = &voodoo->fifo[voodoo->fifo_read_idx & FIFO_MASK];
+ }
+ break;
+ case FIFO_WRITEL_2DREG:
+ while ((fifo->addr_type & FIFO_TYPE) == FIFO_WRITEL_2DREG)
+ {
+ voodoo_2d_reg_writel(voodoo, fifo->addr_type & FIFO_ADDR, fifo->val);
+ fifo->addr_type = FIFO_INVALID;
+ voodoo->fifo_read_idx++;
+ if (FIFO_EMPTY)
+ break;
+ fifo = &voodoo->fifo[voodoo->fifo_read_idx & FIFO_MASK];
+ }
+ break;
+
+ default:
+ fatal("Unknown fifo entry %08x\n", fifo->addr_type);
+ }
+
+ if (FIFO_ENTRIES > 0xe000)
+ thread_set_event(voodoo->fifo_not_full_event);
+
+ end_time = timer_read();
+ voodoo->time += end_time - start_time;
+ }
+
+ while (voodoo->cmdfifo_enabled && (voodoo->cmdfifo_depth_rd != voodoo->cmdfifo_depth_wr || voodoo->cmdfifo_in_sub))
+ {
+ uint64_t start_time = timer_read();
+ uint64_t end_time;
+ uint32_t header = cmdfifo_get(voodoo);
+ uint32_t addr;
+ uint32_t mask;
+ int smode;
+ int num;
+ int num_verticies;
+ int v_num;
+
+// pclog(" CMDFIFO header %08x at %08x\n", header, voodoo->cmdfifo_rp);
+
+ switch (header & 7)
+ {
+ case 0:
+// pclog("CMDFIFO0\n");
+ switch ((header >> 3) & 7)
+ {
+ case 0: /*NOP*/
+ break;
+
+ case 1: /*JSR*/
+// pclog("JSR %08x\n", (header >> 4) & 0xfffffc);
+ voodoo->cmdfifo_ret_addr = voodoo->cmdfifo_rp;
+ voodoo->cmdfifo_rp = (header >> 4) & 0xfffffc;
+ voodoo->cmdfifo_in_sub = 1;
+ break;
+
+ case 2: /*RET*/
+ voodoo->cmdfifo_rp = voodoo->cmdfifo_ret_addr;
+ voodoo->cmdfifo_in_sub = 0;
+ break;
+
+ case 3: /*JMP local frame buffer*/
+ voodoo->cmdfifo_rp = (header >> 4) & 0xfffffc;
+// pclog("JMP to %08x %04x\n", voodoo->cmdfifo_rp, header);
+ break;
+
+ default:
+ fatal("Bad CMDFIFO0 %08x\n", header);
+ }
+ break;
+
+ case 1:
+ num = header >> 16;
+ addr = (header & 0x7ff8) >> 1;
+// pclog("CMDFIFO1 addr=%08x\n",addr);
+ while (num--)
+ {
+ uint32_t val = cmdfifo_get(voodoo);
+ if ((addr & (1 << 13)) && voodoo->type >= VOODOO_BANSHEE)
+ {
+// if (voodoo->type != VOODOO_BANSHEE)
+// fatal("CMDFIFO1: Not Banshee\n");
+// pclog("CMDFIFO1: write %08x %08x\n", addr, val);
+ voodoo_2d_reg_writel(voodoo, addr, val);
+ }
+ else
+ {
+ if ((addr & 0x3ff) == SST_triangleCMD || (addr & 0x3ff) == SST_ftriangleCMD ||
+ (addr & 0x3ff) == SST_fastfillCMD || (addr & 0x3ff) == SST_nopCMD)
+ voodoo->cmd_written_fifo++;
+
+ if (voodoo->type >= VOODOO_BANSHEE && (addr & 0x3ff) == SST_swapbufferCMD)
+ voodoo->cmd_written_fifo++;
+ voodoo_reg_writel(addr, val, voodoo);
+ }
+
+ if (header & (1 << 15))
+ addr += 4;
+ }
+ break;
+
+ case 2:
+ if (voodoo->type < VOODOO_BANSHEE)
+ fatal("CMDFIFO2: Not Banshee\n");
+ mask = (header >> 3);
+ addr = 8;
+ while (mask)
+ {
+ if (mask & 1)
+ {
+ uint32_t val = cmdfifo_get(voodoo);
+
+ voodoo_2d_reg_writel(voodoo, addr, val);
+ }
+
+ addr += 4;
+ mask >>= 1;
+ }
+ break;
+
+ case 3:
+ num = (header >> 29) & 7;
+ mask = header;//(header >> 10) & 0xff;
+ smode = (header >> 22) & 0xf;
+ voodoo_reg_writel(SST_sSetupMode, ((header >> 10) & 0xff) | (smode << 16), voodoo);
+ num_verticies = (header >> 6) & 0xf;
+ v_num = 0;
+ if (((header >> 3) & 7) == 2)
+ v_num = 1;
+// pclog("CMDFIFO3: num=%i verts=%i mask=%02x\n", num, num_verticies, (header >> 10) & 0xff);
+// pclog("CMDFIFO3 %02x %i\n", (header >> 10), (header >> 3) & 7);
+
+ while (num_verticies--)
+ {
+ voodoo->verts[3].sVx = cmdfifo_get_f(voodoo);
+ voodoo->verts[3].sVy = cmdfifo_get_f(voodoo);
+ if (mask & CMDFIFO3_PC_MASK_RGB)
+ {
+ if (header & CMDFIFO3_PC)
+ {
+ uint32_t val = cmdfifo_get(voodoo);
+ voodoo->verts[3].sBlue = (float)(val & 0xff);
+ voodoo->verts[3].sGreen = (float)((val >> 8) & 0xff);
+ voodoo->verts[3].sRed = (float)((val >> 16) & 0xff);
+ voodoo->verts[3].sAlpha = (float)((val >> 24) & 0xff);
+ }
+ else
+ {
+ voodoo->verts[3].sRed = cmdfifo_get_f(voodoo);
+ voodoo->verts[3].sGreen = cmdfifo_get_f(voodoo);
+ voodoo->verts[3].sBlue = cmdfifo_get_f(voodoo);
+ }
+ }
+ if ((mask & CMDFIFO3_PC_MASK_ALPHA) && !(header & CMDFIFO3_PC))
+ voodoo->verts[3].sAlpha = cmdfifo_get_f(voodoo);
+ if (mask & CMDFIFO3_PC_MASK_Z)
+ voodoo->verts[3].sVz = cmdfifo_get_f(voodoo);
+ if (mask & CMDFIFO3_PC_MASK_Wb)
+ voodoo->verts[3].sWb = cmdfifo_get_f(voodoo);
+ if (mask & CMDFIFO3_PC_MASK_W0)
+ voodoo->verts[3].sW0 = cmdfifo_get_f(voodoo);
+ if (mask & CMDFIFO3_PC_MASK_S0_T0)
+ {
+ voodoo->verts[3].sS0 = cmdfifo_get_f(voodoo);
+ voodoo->verts[3].sT0 = cmdfifo_get_f(voodoo);
+ }
+ if (mask & CMDFIFO3_PC_MASK_W1)
+ voodoo->verts[3].sW1 = cmdfifo_get_f(voodoo);
+ if (mask & CMDFIFO3_PC_MASK_S1_T1)
+ {
+ voodoo->verts[3].sS1 = cmdfifo_get_f(voodoo);
+ voodoo->verts[3].sT1 = cmdfifo_get_f(voodoo);
+ }
+ if (v_num)
+ voodoo_reg_writel(SST_sDrawTriCMD, 0, voodoo);
+ else
+ voodoo_reg_writel(SST_sBeginTriCMD, 0, voodoo);
+ v_num++;
+ if (v_num == 3 && ((header >> 3) & 7) == 0)
+ v_num = 0;
+ }
+ break;
+
+ case 4:
+ num = (header >> 29) & 7;
+ mask = (header >> 15) & 0x3fff;
+ addr = (header & 0x7ff8) >> 1;
+// pclog("CMDFIFO4 addr=%08x\n",addr);
+ while (mask)
+ {
+ if (mask & 1)
+ {
+ uint32_t val = cmdfifo_get(voodoo);
+
+ if ((addr & (1 << 13)) && voodoo->type >= VOODOO_BANSHEE)
+ {
+ if (voodoo->type < VOODOO_BANSHEE)
+ fatal("CMDFIFO1: Not Banshee\n");
+// pclog("CMDFIFO1: write %08x %08x\n", addr, val);
+ voodoo_2d_reg_writel(voodoo, addr, val);
+ }
+ else
+ {
+ if ((addr & 0x3ff) == SST_triangleCMD || (addr & 0x3ff) == SST_ftriangleCMD ||
+ (addr & 0x3ff) == SST_fastfillCMD || (addr & 0x3ff) == SST_nopCMD)
+ voodoo->cmd_written_fifo++;
+
+ if (voodoo->type >= VOODOO_BANSHEE && (addr & 0x3ff) == SST_swapbufferCMD)
+ voodoo->cmd_written_fifo++;
+ voodoo_reg_writel(addr, val, voodoo);
+ }
+ }
+
+ addr += 4;
+ mask >>= 1;
+ }
+ while (num--)
+ cmdfifo_get(voodoo);
+ break;
+
+ case 5:
+// if (header & 0x3fc00000)
+// fatal("CMDFIFO packet 5 has byte disables set %08x\n", header);
+ num = (header >> 3) & 0x7ffff;
+ addr = cmdfifo_get(voodoo) & 0xffffff;
+ if (!num)
+ num = 1;
+// pclog("CMDFIFO5 addr=%08x num=%i\n", addr, num);
+ switch (header >> 30)
+ {
+ case 0: /*Linear framebuffer (Banshee)*/
+ if (voodoo->texture_present[0][(addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT])
+ {
+// pclog("texture_present at %08x %i\n", addr, (addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT);
+ flush_texture_cache(voodoo, addr & voodoo->texture_mask, 0);
+ }
+ if (voodoo->texture_present[1][(addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT])
+ {
+// pclog("texture_present at %08x %i\n", addr, (addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT);
+ flush_texture_cache(voodoo, addr & voodoo->texture_mask, 1);
+ }
+ while (num--)
+ {
+ uint32_t val = cmdfifo_get(voodoo);
+ if (addr <= voodoo->fb_mask)
+ *(uint32_t *)&voodoo->fb_mem[addr] = val;
+ addr += 4;
+ }
+ break;
+ case 2: /*Framebuffer*/
+ while (num--)
+ {
+ uint32_t val = cmdfifo_get(voodoo);
+ voodoo_fb_writel(addr, val, voodoo);
+ addr += 4;
+ }
+ break;
+ case 3: /*Texture*/
+ while (num--)
+ {
+ uint32_t val = cmdfifo_get(voodoo);
+ voodoo_tex_writel(addr, val, voodoo);
+ addr += 4;
+ }
+ break;
+
+ default:
+ fatal("CMDFIFO packet 5 bad space %08x %08x\n", header, voodoo->cmdfifo_rp);
+ }
+ break;
+
+ default:
+ fatal("Bad CMDFIFO packet %08x %08x\n", header, voodoo->cmdfifo_rp);
+ }
+
+ end_time = timer_read();
+ voodoo->time += end_time - start_time;
+ }
+ voodoo->voodoo_busy = 0;
+ }
+}
--- /dev/null
+void voodoo_wake_fifo_thread(voodoo_t *voodoo);
+void voodoo_wake_fifo_thread_now(voodoo_t *voodoo);
+void voodoo_wake_timer(void *p);
+void voodoo_queue_command(voodoo_t *voodoo, uint32_t addr_type, uint32_t val);
+void voodoo_flush(voodoo_t *voodoo);
+void voodoo_wake_fifo_threads(voodoo_set_t *set, voodoo_t *voodoo);
+void voodoo_wait_for_swap_complete(voodoo_t *voodoo);
+void voodoo_fifo_thread(void *param);
--- /dev/null
+#include <math.h>
+#include <stddef.h>
+#include "ibm.h"
+#include "device.h"
+#include "mem.h"
+#include "thread.h"
+#include "video.h"
+#include "vid_svga.h"
+#include "vid_voodoo.h"
+#include "vid_voodoo_common.h"
+#include "vid_voodoo_banshee.h"
+#include "vid_voodoo_blitter.h"
+#include "vid_voodoo_dither.h"
+#include "vid_voodoo_fifo.h"
+#include "vid_voodoo_reg.h"
+#include "vid_voodoo_regs.h"
+#include "vid_voodoo_render.h"
+#include "vid_voodoo_setup.h"
+#include "vid_voodoo_texture.h"
+
+enum
+{
+ CHIP_FBI = 0x1,
+ CHIP_TREX0 = 0x2,
+ CHIP_TREX1 = 0x4,
+ CHIP_TREX2 = 0x8
+};
+
+void voodoo_reg_writel(uint32_t addr, uint32_t val, void *p)
+{
+ voodoo_t *voodoo = (voodoo_t *)p;
+ union
+ {
+ uint32_t i;
+ float f;
+ } tempif;
+ int ad21 = addr & (1 << 21);
+ int chip = (addr >> 10) & 0xf;
+ if (!chip)
+ chip = 0xf;
+
+ tempif.i = val;
+//pclog("voodoo_reg_write_l: addr=%08x val=%08x(%f) chip=%x\n", addr, val, tempif.f, chip);
+ addr &= 0x3fc;
+
+ if ((voodoo->fbiInit3 & FBIINIT3_REMAP) && addr < 0x100 && ad21)
+ addr |= 0x400;
+ switch (addr)
+ {
+ case SST_swapbufferCMD:
+ if (voodoo->type >= VOODOO_BANSHEE)
+ {
+// pclog("swapbufferCMD %08x %08x\n", val, voodoo->leftOverlayBuf);
+
+ voodoo_wait_for_render_thread_idle(voodoo);
+ if (!(val & 1))
+ {
+ banshee_set_overlay_addr(voodoo->p, voodoo->leftOverlayBuf);
+ thread_lock_mutex(voodoo->swap_mutex);
+ if (voodoo->swap_count > 0)
+ voodoo->swap_count--;
+ thread_unlock_mutex(voodoo->swap_mutex);
+ voodoo->frame_count++;
+ }
+ else if (TRIPLE_BUFFER)
+ {
+ if (voodoo->swap_pending)
+ voodoo_wait_for_swap_complete(voodoo);
+ voodoo->swap_interval = (val >> 1) & 0xff;
+ voodoo->swap_offset = voodoo->leftOverlayBuf;
+ voodoo->swap_pending = 1;
+ }
+ else
+ {
+ voodoo->swap_interval = (val >> 1) & 0xff;
+ voodoo->swap_offset = voodoo->leftOverlayBuf;
+ voodoo->swap_pending = 1;
+
+ voodoo_wait_for_swap_complete(voodoo);
+ }
+
+ voodoo->cmd_read++;
+ break;
+ }
+
+ if (TRIPLE_BUFFER)
+ {
+ voodoo->disp_buffer = (voodoo->disp_buffer + 1) % 3;
+ voodoo->draw_buffer = (voodoo->draw_buffer + 1) % 3;
+ }
+ else
+ {
+ voodoo->disp_buffer = !voodoo->disp_buffer;
+ voodoo->draw_buffer = !voodoo->draw_buffer;
+ }
+ voodoo_recalc(voodoo);
+
+ voodoo->params.swapbufferCMD = val;
+
+// pclog("Swap buffer %08x %d %p %i\n", val, voodoo->swap_count, &voodoo->swap_count, (voodoo == voodoo->set->voodoos[1]) ? 1 : 0);
+// voodoo->front_offset = params->front_offset;
+ voodoo_wait_for_render_thread_idle(voodoo);
+ if (!(val & 1))
+ {
+ memset(voodoo->dirty_line, 1, sizeof(voodoo->dirty_line));
+ voodoo->front_offset = voodoo->params.front_offset;
+ thread_lock_mutex(voodoo->swap_mutex);
+ if (voodoo->swap_count > 0)
+ voodoo->swap_count--;
+ thread_unlock_mutex(voodoo->swap_mutex);
+ }
+ else if (TRIPLE_BUFFER)
+ {
+ if (voodoo->swap_pending)
+ voodoo_wait_for_swap_complete(voodoo);
+
+ voodoo->swap_interval = (val >> 1) & 0xff;
+ voodoo->swap_offset = voodoo->params.front_offset;
+ voodoo->swap_pending = 1;
+ }
+ else
+ {
+ voodoo->swap_interval = (val >> 1) & 0xff;
+ voodoo->swap_offset = voodoo->params.front_offset;
+ voodoo->swap_pending = 1;
+
+ voodoo_wait_for_swap_complete(voodoo);
+ }
+ voodoo->cmd_read++;
+ break;
+
+ case SST_vertexAx: case SST_remap_vertexAx:
+ voodoo->params.vertexAx = val & 0xffff;
+ break;
+ case SST_vertexAy: case SST_remap_vertexAy:
+ voodoo->params.vertexAy = val & 0xffff;
+ break;
+ case SST_vertexBx: case SST_remap_vertexBx:
+ voodoo->params.vertexBx = val & 0xffff;
+ break;
+ case SST_vertexBy: case SST_remap_vertexBy:
+ voodoo->params.vertexBy = val & 0xffff;
+ break;
+ case SST_vertexCx: case SST_remap_vertexCx:
+ voodoo->params.vertexCx = val & 0xffff;
+ break;
+ case SST_vertexCy: case SST_remap_vertexCy:
+ voodoo->params.vertexCy = val & 0xffff;
+ break;
+
+ case SST_startR: case SST_remap_startR:
+ voodoo->params.startR = val & 0xffffff;
+ break;
+ case SST_startG: case SST_remap_startG:
+ voodoo->params.startG = val & 0xffffff;
+ break;
+ case SST_startB: case SST_remap_startB:
+ voodoo->params.startB = val & 0xffffff;
+ break;
+ case SST_startZ: case SST_remap_startZ:
+ voodoo->params.startZ = val;
+ break;
+ case SST_startA: case SST_remap_startA:
+ voodoo->params.startA = val & 0xffffff;
+ break;
+ case SST_startS: case SST_remap_startS:
+ if (chip & CHIP_TREX0)
+ voodoo->params.tmu[0].startS = ((int64_t)(int32_t)val) << 14;
+ if (chip & CHIP_TREX1)
+ voodoo->params.tmu[1].startS = ((int64_t)(int32_t)val) << 14;
+ break;
+ case SST_startT: case SST_remap_startT:
+ if (chip & CHIP_TREX0)
+ voodoo->params.tmu[0].startT = ((int64_t)(int32_t)val) << 14;
+ if (chip & CHIP_TREX1)
+ voodoo->params.tmu[1].startT = ((int64_t)(int32_t)val) << 14;
+ break;
+ case SST_startW: case SST_remap_startW:
+ if (chip & CHIP_FBI)
+ voodoo->params.startW = (int64_t)(int32_t)val << 2;
+ if (chip & CHIP_TREX0)
+ voodoo->params.tmu[0].startW = (int64_t)(int32_t)val << 2;
+ if (chip & CHIP_TREX1)
+ voodoo->params.tmu[1].startW = (int64_t)(int32_t)val << 2;
+ break;
+
+ case SST_dRdX: case SST_remap_dRdX:
+ voodoo->params.dRdX = (val & 0xffffff) | ((val & 0x800000) ? 0xff000000 : 0);
+ break;
+ case SST_dGdX: case SST_remap_dGdX:
+ voodoo->params.dGdX = (val & 0xffffff) | ((val & 0x800000) ? 0xff000000 : 0);
+ break;
+ case SST_dBdX: case SST_remap_dBdX:
+ voodoo->params.dBdX = (val & 0xffffff) | ((val & 0x800000) ? 0xff000000 : 0);
+ break;
+ case SST_dZdX: case SST_remap_dZdX:
+ voodoo->params.dZdX = val;
+ break;
+ case SST_dAdX: case SST_remap_dAdX:
+ voodoo->params.dAdX = (val & 0xffffff) | ((val & 0x800000) ? 0xff000000 : 0);
+ break;
+ case SST_dSdX: case SST_remap_dSdX:
+ if (chip & CHIP_TREX0)
+ voodoo->params.tmu[0].dSdX = ((int64_t)(int32_t)val) << 14;
+ if (chip & CHIP_TREX1)
+ voodoo->params.tmu[1].dSdX = ((int64_t)(int32_t)val) << 14;
+ break;
+ case SST_dTdX: case SST_remap_dTdX:
+ if (chip & CHIP_TREX0)
+ voodoo->params.tmu[0].dTdX = ((int64_t)(int32_t)val) << 14;
+ if (chip & CHIP_TREX1)
+ voodoo->params.tmu[1].dTdX = ((int64_t)(int32_t)val) << 14;
+ break;
+ case SST_dWdX: case SST_remap_dWdX:
+ if (chip & CHIP_TREX0)
+ voodoo->params.tmu[0].dWdX = (int64_t)(int32_t)val << 2;
+ if (chip & CHIP_TREX1)
+ voodoo->params.tmu[1].dWdX = (int64_t)(int32_t)val << 2;
+ if (chip & CHIP_FBI)
+ voodoo->params.dWdX = (int64_t)(int32_t)val << 2;
+ break;
+
+ case SST_dRdY: case SST_remap_dRdY:
+ voodoo->params.dRdY = (val & 0xffffff) | ((val & 0x800000) ? 0xff000000 : 0);
+ break;
+ case SST_dGdY: case SST_remap_dGdY:
+ voodoo->params.dGdY = (val & 0xffffff) | ((val & 0x800000) ? 0xff000000 : 0);
+ break;
+ case SST_dBdY: case SST_remap_dBdY:
+ voodoo->params.dBdY = (val & 0xffffff) | ((val & 0x800000) ? 0xff000000 : 0);
+ break;
+ case SST_dZdY: case SST_remap_dZdY:
+ voodoo->params.dZdY = val;
+ break;
+ case SST_dAdY: case SST_remap_dAdY:
+ voodoo->params.dAdY = (val & 0xffffff) | ((val & 0x800000) ? 0xff000000 : 0);
+ break;
+ case SST_dSdY: case SST_remap_dSdY:
+ if (chip & CHIP_TREX0)
+ voodoo->params.tmu[0].dSdY = ((int64_t)(int32_t)val) << 14;
+ if (chip & CHIP_TREX1)
+ voodoo->params.tmu[1].dSdY = ((int64_t)(int32_t)val) << 14;
+ break;
+ case SST_dTdY: case SST_remap_dTdY:
+ if (chip & CHIP_TREX0)
+ voodoo->params.tmu[0].dTdY = ((int64_t)(int32_t)val) << 14;
+ if (chip & CHIP_TREX1)
+ voodoo->params.tmu[1].dTdY = ((int64_t)(int32_t)val) << 14;
+ break;
+ case SST_dWdY: case SST_remap_dWdY:
+ if (chip & CHIP_TREX0)
+ voodoo->params.tmu[0].dWdY = (int64_t)(int32_t)val << 2;
+ if (chip & CHIP_TREX1)
+ voodoo->params.tmu[1].dWdY = (int64_t)(int32_t)val << 2;
+ if (chip & CHIP_FBI)
+ voodoo->params.dWdY = (int64_t)(int32_t)val << 2;
+ break;
+
+ case SST_triangleCMD: case SST_remap_triangleCMD:
+ voodoo->params.sign = val & (1 << 31);
+
+ if (voodoo->ncc_dirty[0])
+ voodoo_update_ncc(voodoo, 0);
+ if (voodoo->ncc_dirty[1])
+ voodoo_update_ncc(voodoo, 1);
+ voodoo->ncc_dirty[0] = voodoo->ncc_dirty[1] = 0;
+
+ voodoo_queue_triangle(voodoo, &voodoo->params);
+
+ voodoo->cmd_read++;
+ break;
+
+ case SST_fvertexAx: case SST_remap_fvertexAx:
+ voodoo->fvertexAx.i = val;
+ voodoo->params.vertexAx = (int32_t)(int16_t)(int32_t)(voodoo->fvertexAx.f * 16.0f) & 0xffff;
+ break;
+ case SST_fvertexAy: case SST_remap_fvertexAy:
+ voodoo->fvertexAy.i = val;
+ voodoo->params.vertexAy = (int32_t)(int16_t)(int32_t)(voodoo->fvertexAy.f * 16.0f) & 0xffff;
+ break;
+ case SST_fvertexBx: case SST_remap_fvertexBx:
+ voodoo->fvertexBx.i = val;
+ voodoo->params.vertexBx = (int32_t)(int16_t)(int32_t)(voodoo->fvertexBx.f * 16.0f) & 0xffff;
+ break;
+ case SST_fvertexBy: case SST_remap_fvertexBy:
+ voodoo->fvertexBy.i = val;
+ voodoo->params.vertexBy = (int32_t)(int16_t)(int32_t)(voodoo->fvertexBy.f * 16.0f) & 0xffff;
+ break;
+ case SST_fvertexCx: case SST_remap_fvertexCx:
+ voodoo->fvertexCx.i = val;
+ voodoo->params.vertexCx = (int32_t)(int16_t)(int32_t)(voodoo->fvertexCx.f * 16.0f) & 0xffff;
+ break;
+ case SST_fvertexCy: case SST_remap_fvertexCy:
+ voodoo->fvertexCy.i = val;
+ voodoo->params.vertexCy = (int32_t)(int16_t)(int32_t)(voodoo->fvertexCy.f * 16.0f) & 0xffff;
+ break;
+
+ case SST_fstartR: case SST_remap_fstartR:
+ tempif.i = val;
+ voodoo->params.startR = (int32_t)(tempif.f * 4096.0f);
+ break;
+ case SST_fstartG: case SST_remap_fstartG:
+ tempif.i = val;
+ voodoo->params.startG = (int32_t)(tempif.f * 4096.0f);
+ break;
+ case SST_fstartB: case SST_remap_fstartB:
+ tempif.i = val;
+ voodoo->params.startB = (int32_t)(tempif.f * 4096.0f);
+ break;
+ case SST_fstartZ: case SST_remap_fstartZ:
+ tempif.i = val;
+ voodoo->params.startZ = (int32_t)(tempif.f * 4096.0f);
+ break;
+ case SST_fstartA: case SST_remap_fstartA:
+ tempif.i = val;
+ voodoo->params.startA = (int32_t)(tempif.f * 4096.0f);
+ break;
+ case SST_fstartS: case SST_remap_fstartS:
+ tempif.i = val;
+ if (chip & CHIP_TREX0)
+ voodoo->params.tmu[0].startS = (int64_t)(tempif.f * 4294967296.0f);
+ if (chip & CHIP_TREX1)
+ voodoo->params.tmu[1].startS = (int64_t)(tempif.f * 4294967296.0f);
+ break;
+ case SST_fstartT: case SST_remap_fstartT:
+ tempif.i = val;
+ if (chip & CHIP_TREX0)
+ voodoo->params.tmu[0].startT = (int64_t)(tempif.f * 4294967296.0f);
+ if (chip & CHIP_TREX1)
+ voodoo->params.tmu[1].startT = (int64_t)(tempif.f * 4294967296.0f);
+ break;
+ case SST_fstartW: case SST_remap_fstartW:
+ tempif.i = val;
+ if (chip & CHIP_TREX0)
+ voodoo->params.tmu[0].startW = (int64_t)(tempif.f * 4294967296.0f);
+ if (chip & CHIP_TREX1)
+ voodoo->params.tmu[1].startW = (int64_t)(tempif.f * 4294967296.0f);
+ if (chip & CHIP_FBI)
+ voodoo->params.startW = (int64_t)(tempif.f * 4294967296.0f);
+ break;
+
+ case SST_fdRdX: case SST_remap_fdRdX:
+ tempif.i = val;
+ voodoo->params.dRdX = (int32_t)(tempif.f * 4096.0f);
+ break;
+ case SST_fdGdX: case SST_remap_fdGdX:
+ tempif.i = val;
+ voodoo->params.dGdX = (int32_t)(tempif.f * 4096.0f);
+ break;
+ case SST_fdBdX: case SST_remap_fdBdX:
+ tempif.i = val;
+ voodoo->params.dBdX = (int32_t)(tempif.f * 4096.0f);
+ break;
+ case SST_fdZdX: case SST_remap_fdZdX:
+ tempif.i = val;
+ voodoo->params.dZdX = (int32_t)(tempif.f * 4096.0f);
+ break;
+ case SST_fdAdX: case SST_remap_fdAdX:
+ tempif.i = val;
+ voodoo->params.dAdX = (int32_t)(tempif.f * 4096.0f);
+ break;
+ case SST_fdSdX: case SST_remap_fdSdX:
+ tempif.i = val;
+ if (chip & CHIP_TREX0)
+ voodoo->params.tmu[0].dSdX = (int64_t)(tempif.f * 4294967296.0f);
+ if (chip & CHIP_TREX1)
+ voodoo->params.tmu[1].dSdX = (int64_t)(tempif.f * 4294967296.0f);
+ break;
+ case SST_fdTdX: case SST_remap_fdTdX:
+ tempif.i = val;
+ if (chip & CHIP_TREX0)
+ voodoo->params.tmu[0].dTdX = (int64_t)(tempif.f * 4294967296.0f);
+ if (chip & CHIP_TREX1)
+ voodoo->params.tmu[1].dTdX = (int64_t)(tempif.f * 4294967296.0f);
+ break;
+ case SST_fdWdX: case SST_remap_fdWdX:
+ tempif.i = val;
+ if (chip & CHIP_TREX0)
+ voodoo->params.tmu[0].dWdX = (int64_t)(tempif.f * 4294967296.0f);
+ if (chip & CHIP_TREX1)
+ voodoo->params.tmu[1].dWdX = (int64_t)(tempif.f * 4294967296.0f);
+ if (chip & CHIP_FBI)
+ voodoo->params.dWdX = (int64_t)(tempif.f * 4294967296.0f);
+ break;
+
+ case SST_fdRdY: case SST_remap_fdRdY:
+ tempif.i = val;
+ voodoo->params.dRdY = (int32_t)(tempif.f * 4096.0f);
+ break;
+ case SST_fdGdY: case SST_remap_fdGdY:
+ tempif.i = val;
+ voodoo->params.dGdY = (int32_t)(tempif.f * 4096.0f);
+ break;
+ case SST_fdBdY: case SST_remap_fdBdY:
+ tempif.i = val;
+ voodoo->params.dBdY = (int32_t)(tempif.f * 4096.0f);
+ break;
+ case SST_fdZdY: case SST_remap_fdZdY:
+ tempif.i = val;
+ voodoo->params.dZdY = (int32_t)(tempif.f * 4096.0f);
+ break;
+ case SST_fdAdY: case SST_remap_fdAdY:
+ tempif.i = val;
+ voodoo->params.dAdY = (int32_t)(tempif.f * 4096.0f);
+ break;
+ case SST_fdSdY: case SST_remap_fdSdY:
+ tempif.i = val;
+ if (chip & CHIP_TREX0)
+ voodoo->params.tmu[0].dSdY = (int64_t)(tempif.f * 4294967296.0f);
+ if (chip & CHIP_TREX1)
+ voodoo->params.tmu[1].dSdY = (int64_t)(tempif.f * 4294967296.0f);
+ break;
+ case SST_fdTdY: case SST_remap_fdTdY:
+ tempif.i = val;
+ if (chip & CHIP_TREX0)
+ voodoo->params.tmu[0].dTdY = (int64_t)(tempif.f * 4294967296.0f);
+ if (chip & CHIP_TREX1)
+ voodoo->params.tmu[1].dTdY = (int64_t)(tempif.f * 4294967296.0f);
+ break;
+ case SST_fdWdY: case SST_remap_fdWdY:
+ tempif.i = val;
+ if (chip & CHIP_TREX0)
+ voodoo->params.tmu[0].dWdY = (int64_t)(tempif.f * 4294967296.0f);
+ if (chip & CHIP_TREX1)
+ voodoo->params.tmu[1].dWdY = (int64_t)(tempif.f * 4294967296.0f);
+ if (chip & CHIP_FBI)
+ voodoo->params.dWdY = (int64_t)(tempif.f * 4294967296.0f);
+ break;
+
+ case SST_ftriangleCMD:
+ voodoo->params.sign = val & (1 << 31);
+
+ if (voodoo->ncc_dirty[0])
+ voodoo_update_ncc(voodoo, 0);
+ if (voodoo->ncc_dirty[1])
+ voodoo_update_ncc(voodoo, 1);
+ voodoo->ncc_dirty[0] = voodoo->ncc_dirty[1] = 0;
+
+ voodoo_queue_triangle(voodoo, &voodoo->params);
+
+ voodoo->cmd_read++;
+ break;
+
+ case SST_fbzColorPath:
+ voodoo->params.fbzColorPath = val;
+ voodoo->rgb_sel = val & 3;
+ break;
+
+ case SST_fogMode:
+ voodoo->params.fogMode = val;
+ break;
+ case SST_alphaMode:
+ voodoo->params.alphaMode = val;
+ break;
+ case SST_fbzMode:
+ voodoo->params.fbzMode = val;
+ voodoo_recalc(voodoo);
+ break;
+ case SST_lfbMode:
+ voodoo->lfbMode = val;
+ voodoo_recalc(voodoo);
+ break;
+
+ case SST_clipLeftRight:
+ if (voodoo->type >= VOODOO_2)
+ {
+ voodoo->params.clipRight = val & 0xfff;
+ voodoo->params.clipLeft = (val >> 16) & 0xfff;
+ }
+ else
+ {
+ voodoo->params.clipRight = val & 0x3ff;
+ voodoo->params.clipLeft = (val >> 16) & 0x3ff;
+ }
+ break;
+ case SST_clipLowYHighY:
+ if (voodoo->type >= VOODOO_2)
+ {
+ voodoo->params.clipHighY = val & 0xfff;
+ voodoo->params.clipLowY = (val >> 16) & 0xfff;
+ }
+ else
+ {
+ voodoo->params.clipHighY = val & 0x3ff;
+ voodoo->params.clipLowY = (val >> 16) & 0x3ff;
+ }
+ break;
+
+ case SST_nopCMD:
+ voodoo->cmd_read++;
+ voodoo->fbiPixelsIn = 0;
+ voodoo->fbiChromaFail = 0;
+ voodoo->fbiZFuncFail = 0;
+ voodoo->fbiAFuncFail = 0;
+ voodoo->fbiPixelsOut = 0;
+ break;
+ case SST_fastfillCMD:
+ voodoo_wait_for_render_thread_idle(voodoo);
+ voodoo_fastfill(voodoo, &voodoo->params);
+ voodoo->cmd_read++;
+ break;
+
+ case SST_fogColor:
+ voodoo->params.fogColor.r = (val >> 16) & 0xff;
+ voodoo->params.fogColor.g = (val >> 8) & 0xff;
+ voodoo->params.fogColor.b = val & 0xff;
+ break;
+
+ case SST_zaColor:
+ voodoo->params.zaColor = val;
+ break;
+ case SST_chromaKey:
+ voodoo->params.chromaKey_r = (val >> 16) & 0xff;
+ voodoo->params.chromaKey_g = (val >> 8) & 0xff;
+ voodoo->params.chromaKey_b = val & 0xff;
+ voodoo->params.chromaKey = val & 0xffffff;
+ break;
+ case SST_stipple:
+ voodoo->params.stipple = val;
+ break;
+ case SST_color0:
+ voodoo->params.color0 = val;
+ break;
+ case SST_color1:
+ voodoo->params.color1 = val;
+ break;
+
+ case SST_fogTable00: case SST_fogTable01: case SST_fogTable02: case SST_fogTable03:
+ case SST_fogTable04: case SST_fogTable05: case SST_fogTable06: case SST_fogTable07:
+ case SST_fogTable08: case SST_fogTable09: case SST_fogTable0a: case SST_fogTable0b:
+ case SST_fogTable0c: case SST_fogTable0d: case SST_fogTable0e: case SST_fogTable0f:
+ case SST_fogTable10: case SST_fogTable11: case SST_fogTable12: case SST_fogTable13:
+ case SST_fogTable14: case SST_fogTable15: case SST_fogTable16: case SST_fogTable17:
+ case SST_fogTable18: case SST_fogTable19: case SST_fogTable1a: case SST_fogTable1b:
+ case SST_fogTable1c: case SST_fogTable1d: case SST_fogTable1e: case SST_fogTable1f:
+ addr = (addr - SST_fogTable00) >> 1;
+ voodoo->params.fogTable[addr].dfog = val & 0xff;
+ voodoo->params.fogTable[addr].fog = (val >> 8) & 0xff;
+ voodoo->params.fogTable[addr+1].dfog = (val >> 16) & 0xff;
+ voodoo->params.fogTable[addr+1].fog = (val >> 24) & 0xff;
+ break;
+
+ case SST_clipLeftRight1:
+ if (voodoo->type >= VOODOO_BANSHEE)
+ {
+ voodoo->params.clipRight1 = val & 0xfff;
+ voodoo->params.clipLeft1 = (val >> 16) & 0xfff;
+ }
+ break;
+ case SST_clipTopBottom1:
+ if (voodoo->type >= VOODOO_BANSHEE)
+ {
+ voodoo->params.clipHighY1 = val & 0xfff;
+ voodoo->params.clipLowY1 = (val >> 16) & 0xfff;
+ }
+ break;
+
+ case SST_colBufferAddr:
+ if (voodoo->type >= VOODOO_BANSHEE)
+ {
+ voodoo->params.draw_offset = val & 0xfffff0;
+ voodoo->fb_write_offset = voodoo->params.draw_offset;
+// pclog("colorBufferAddr=%06x\n", voodoo->params.draw_offset);
+ }
+ break;
+ case SST_colBufferStride:
+ if (voodoo->type >= VOODOO_BANSHEE)
+ {
+ voodoo->col_tiled = val & (1 << 15);
+ voodoo->params.col_tiled = voodoo->col_tiled;
+ if (voodoo->col_tiled)
+ {
+ voodoo->row_width = (val & 0x7f) * 128*32;
+// pclog("colBufferStride tiled = %i bytes, tiled %08x\n", voodoo->row_width, val);
+ }
+ else
+ {
+ voodoo->row_width = val & 0x3fff;
+// pclog("colBufferStride linear = %i bytes, linear\n", voodoo->row_width);
+ }
+ voodoo->params.row_width = voodoo->row_width;
+ }
+ break;
+ case SST_auxBufferAddr:
+ if (voodoo->type >= VOODOO_BANSHEE)
+ {
+ voodoo->params.aux_offset = val & 0xfffff0;
+// pclog("auxBufferAddr=%06x\n", voodoo->params.aux_offset);
+ }
+ break;
+ case SST_auxBufferStride:
+ if (voodoo->type >= VOODOO_BANSHEE)
+ {
+ voodoo->aux_tiled = val & (1 << 15);
+ voodoo->params.aux_tiled = voodoo->aux_tiled;
+ if (voodoo->aux_tiled)
+ {
+ voodoo->aux_row_width = (val & 0x7f) * 128*32;
+// pclog("auxBufferStride tiled = %i bytes, tiled\n", voodoo->aux_row_width);
+ }
+ else
+ {
+ voodoo->aux_row_width = val & 0x3fff;
+// pclog("auxBufferStride linear = %i bytes, linear\n", voodoo->aux_row_width);
+ }
+ voodoo->params.aux_row_width = voodoo->aux_row_width;
+ }
+ break;
+
+ case SST_clutData:
+ voodoo->clutData[(val >> 24) & 0x3f].b = val & 0xff;
+ voodoo->clutData[(val >> 24) & 0x3f].g = (val >> 8) & 0xff;
+ voodoo->clutData[(val >> 24) & 0x3f].r = (val >> 16) & 0xff;
+ if (val & 0x20000000)
+ {
+ voodoo->clutData[(val >> 24) & 0x3f].b = 255;
+ voodoo->clutData[(val >> 24) & 0x3f].g = 255;
+ voodoo->clutData[(val >> 24) & 0x3f].r = 255;
+ }
+ voodoo->clutData_dirty = 1;
+ break;
+
+ case SST_sSetupMode:
+ voodoo->sSetupMode = val;
+ break;
+ case SST_sVx:
+ tempif.i = val;
+ voodoo->verts[3].sVx = tempif.f;
+// pclog("sVx[%i]=%f\n", voodoo->vertex_num, tempif.f);
+ break;
+ case SST_sVy:
+ tempif.i = val;
+ voodoo->verts[3].sVy = tempif.f;
+// pclog("sVy[%i]=%f\n", voodoo->vertex_num, tempif.f);
+ break;
+ case SST_sARGB:
+ voodoo->verts[3].sBlue = (float)(val & 0xff);
+ voodoo->verts[3].sGreen = (float)((val >> 8) & 0xff);
+ voodoo->verts[3].sRed = (float)((val >> 16) & 0xff);
+ voodoo->verts[3].sAlpha = (float)((val >> 24) & 0xff);
+ break;
+ case SST_sRed:
+ tempif.i = val;
+ voodoo->verts[3].sRed = tempif.f;
+ break;
+ case SST_sGreen:
+ tempif.i = val;
+ voodoo->verts[3].sGreen = tempif.f;
+ break;
+ case SST_sBlue:
+ tempif.i = val;
+ voodoo->verts[3].sBlue = tempif.f;
+ break;
+ case SST_sAlpha:
+ tempif.i = val;
+ voodoo->verts[3].sAlpha = tempif.f;
+ break;
+ case SST_sVz:
+ tempif.i = val;
+ voodoo->verts[3].sVz = tempif.f;
+ break;
+ case SST_sWb:
+ tempif.i = val;
+ voodoo->verts[3].sWb = tempif.f;
+ break;
+ case SST_sW0:
+ tempif.i = val;
+ voodoo->verts[3].sW0 = tempif.f;
+ break;
+ case SST_sS0:
+ tempif.i = val;
+ voodoo->verts[3].sS0 = tempif.f;
+ break;
+ case SST_sT0:
+ tempif.i = val;
+ voodoo->verts[3].sT0 = tempif.f;
+ break;
+ case SST_sW1:
+ tempif.i = val;
+ voodoo->verts[3].sW1 = tempif.f;
+ break;
+ case SST_sS1:
+ tempif.i = val;
+ voodoo->verts[3].sS1 = tempif.f;
+ break;
+ case SST_sT1:
+ tempif.i = val;
+ voodoo->verts[3].sT1 = tempif.f;
+ break;
+
+ case SST_sBeginTriCMD:
+// pclog("sBeginTriCMD %i %f\n", voodoo->vertex_num, voodoo->verts[4].sVx);
+ voodoo->verts[0] = voodoo->verts[3];
+ voodoo->verts[1] = voodoo->verts[3];
+ voodoo->verts[2] = voodoo->verts[3];
+ voodoo->vertex_next_age = 0;
+ voodoo->vertex_ages[0] = voodoo->vertex_next_age++;
+
+ voodoo->num_verticies = 1;
+ voodoo->cull_pingpong = 0;
+ break;
+ case SST_sDrawTriCMD:
+// pclog("sDrawTriCMD %i %i\n", voodoo->num_verticies, voodoo->sSetupMode & SETUPMODE_STRIP_MODE);
+ /*I'm not sure this is the vertex selection algorithm actually used in the 3dfx
+ chips, but this works with a number of games that switch between strip and fan
+ mode in the middle of a run (eg Black & White, Viper Racing)*/
+ if (voodoo->vertex_next_age < 3)
+ {
+ /*Fewer than three vertices already written, store in next slot*/
+ int vertex_nr = voodoo->vertex_next_age;
+
+ voodoo->verts[vertex_nr] = voodoo->verts[3];
+ voodoo->vertex_ages[vertex_nr] = voodoo->vertex_next_age++;
+ }
+ else
+ {
+ int vertex_nr = 0;
+
+ if (!(voodoo->sSetupMode & SETUPMODE_STRIP_MODE))
+ {
+ /*Strip - find oldest vertex*/
+ if ((voodoo->vertex_ages[0] < voodoo->vertex_ages[1]) &&
+ (voodoo->vertex_ages[0] < voodoo->vertex_ages[2]))
+ vertex_nr = 0;
+ else if ((voodoo->vertex_ages[1] < voodoo->vertex_ages[0]) &&
+ (voodoo->vertex_ages[1] < voodoo->vertex_ages[2]))
+ vertex_nr = 1;
+ else
+ vertex_nr = 2;
+ }
+ else
+ {
+ /*Fan - find second oldest vertex (ie pivot around oldest)*/
+ if ((voodoo->vertex_ages[1] < voodoo->vertex_ages[0]) &&
+ (voodoo->vertex_ages[0] < voodoo->vertex_ages[2]))
+ vertex_nr = 0;
+ else if ((voodoo->vertex_ages[2] < voodoo->vertex_ages[0]) &&
+ (voodoo->vertex_ages[0] < voodoo->vertex_ages[1]))
+ vertex_nr = 0;
+ else if ((voodoo->vertex_ages[0] < voodoo->vertex_ages[1]) &&
+ (voodoo->vertex_ages[1] < voodoo->vertex_ages[2]))
+ vertex_nr = 1;
+ else if ((voodoo->vertex_ages[2] < voodoo->vertex_ages[1]) &&
+ (voodoo->vertex_ages[1] < voodoo->vertex_ages[0]))
+ vertex_nr = 1;
+ else
+ vertex_nr = 2;
+ }
+ voodoo->verts[vertex_nr] = voodoo->verts[3];
+ voodoo->vertex_ages[vertex_nr] = voodoo->vertex_next_age++;
+ }
+
+ voodoo->num_verticies++;
+ if (voodoo->num_verticies == 3)
+ {
+// pclog("triangle_setup\n");
+ voodoo_triangle_setup(voodoo);
+ voodoo->cull_pingpong = !voodoo->cull_pingpong;
+
+ voodoo->num_verticies = 2;
+ }
+ break;
+
+ case SST_bltSrcBaseAddr:
+ voodoo->bltSrcBaseAddr = val & 0x3fffff;
+ break;
+ case SST_bltDstBaseAddr:
+// pclog("Write bltDstBaseAddr %08x\n", val);
+ voodoo->bltDstBaseAddr = val & 0x3fffff;
+ break;
+ case SST_bltXYStrides:
+ voodoo->bltSrcXYStride = val & 0xfff;
+ voodoo->bltDstXYStride = (val >> 16) & 0xfff;
+// pclog("Write bltXYStrides %08x\n", val);
+ break;
+ case SST_bltSrcChromaRange:
+ voodoo->bltSrcChromaRange = val;
+ voodoo->bltSrcChromaMinB = val & 0x1f;
+ voodoo->bltSrcChromaMinG = (val >> 5) & 0x3f;
+ voodoo->bltSrcChromaMinR = (val >> 11) & 0x1f;
+ voodoo->bltSrcChromaMaxB = (val >> 16) & 0x1f;
+ voodoo->bltSrcChromaMaxG = (val >> 21) & 0x3f;
+ voodoo->bltSrcChromaMaxR = (val >> 27) & 0x1f;
+ break;
+ case SST_bltDstChromaRange:
+ voodoo->bltDstChromaRange = val;
+ voodoo->bltDstChromaMinB = val & 0x1f;
+ voodoo->bltDstChromaMinG = (val >> 5) & 0x3f;
+ voodoo->bltDstChromaMinR = (val >> 11) & 0x1f;
+ voodoo->bltDstChromaMaxB = (val >> 16) & 0x1f;
+ voodoo->bltDstChromaMaxG = (val >> 21) & 0x3f;
+ voodoo->bltDstChromaMaxR = (val >> 27) & 0x1f;
+ break;
+ case SST_bltClipX:
+ voodoo->bltClipRight = val & 0xfff;
+ voodoo->bltClipLeft = (val >> 16) & 0xfff;
+ break;
+ case SST_bltClipY:
+ voodoo->bltClipHighY = val & 0xfff;
+ voodoo->bltClipLowY = (val >> 16) & 0xfff;
+ break;
+
+ case SST_bltSrcXY:
+ voodoo->bltSrcX = val & 0x7ff;
+ voodoo->bltSrcY = (val >> 16) & 0x7ff;
+ break;
+ case SST_bltDstXY:
+// pclog("Write bltDstXY %08x\n", val);
+ voodoo->bltDstX = val & 0x7ff;
+ voodoo->bltDstY = (val >> 16) & 0x7ff;
+ if (val & (1 << 31))
+ voodoo_v2_blit_start(voodoo);
+ break;
+ case SST_bltSize:
+// pclog("Write bltSize %08x\n", val);
+ voodoo->bltSizeX = val & 0xfff;
+ if (voodoo->bltSizeX & 0x800)
+ voodoo->bltSizeX |= 0xfffff000;
+ voodoo->bltSizeY = (val >> 16) & 0xfff;
+ if (voodoo->bltSizeY & 0x800)
+ voodoo->bltSizeY |= 0xfffff000;
+ if (val & (1 << 31))
+ voodoo_v2_blit_start(voodoo);
+ break;
+ case SST_bltRop:
+ voodoo->bltRop[0] = val & 0xf;
+ voodoo->bltRop[1] = (val >> 4) & 0xf;
+ voodoo->bltRop[2] = (val >> 8) & 0xf;
+ voodoo->bltRop[3] = (val >> 12) & 0xf;
+ break;
+ case SST_bltColor:
+// pclog("Write bltColor %08x\n", val);
+ voodoo->bltColorFg = val & 0xffff;
+ voodoo->bltColorBg = (val >> 16) & 0xffff;
+ break;
+
+ case SST_bltCommand:
+ voodoo->bltCommand = val;
+// pclog("Write bltCommand %08x\n", val);
+ if (val & (1 << 31))
+ voodoo_v2_blit_start(voodoo);
+ break;
+ case SST_bltData:
+ voodoo_v2_blit_data(voodoo, val);
+ break;
+
+ case SST_textureMode:
+ if (chip & CHIP_TREX0)
+ {
+ voodoo->params.textureMode[0] = val;
+ voodoo->params.tformat[0] = (val >> 8) & 0xf;
+ }
+ if (chip & CHIP_TREX1)
+ {
+ voodoo->params.textureMode[1] = val;
+ voodoo->params.tformat[1] = (val >> 8) & 0xf;
+ }
+ break;
+ case SST_tLOD:
+ if (chip & CHIP_TREX0)
+ {
+ voodoo->params.tLOD[0] = val;
+ voodoo_recalc_tex(voodoo, 0);
+ }
+ if (chip & CHIP_TREX1)
+ {
+ voodoo->params.tLOD[1] = val;
+ voodoo_recalc_tex(voodoo, 1);
+ }
+ break;
+ case SST_tDetail:
+ if (chip & CHIP_TREX0)
+ {
+ voodoo->params.detail_max[0] = val & 0xff;
+ voodoo->params.detail_bias[0] = (val >> 8) & 0x3f;
+ voodoo->params.detail_scale[0] = (val >> 14) & 7;
+ }
+ if (chip & CHIP_TREX1)
+ {
+ voodoo->params.detail_max[1] = val & 0xff;
+ voodoo->params.detail_bias[1] = (val >> 8) & 0x3f;
+ voodoo->params.detail_scale[1] = (val >> 14) & 7;
+ }
+ break;
+ case SST_texBaseAddr:
+ if (chip & CHIP_TREX0)
+ {
+ if (voodoo->type >= VOODOO_BANSHEE)
+ voodoo->params.texBaseAddr[0] = val & 0xfffff0;
+ else
+ voodoo->params.texBaseAddr[0] = (val & 0x7ffff) << 3;
+// pclog("texBaseAddr = %08x %08x\n", voodoo->params.texBaseAddr[0], val);
+ voodoo_recalc_tex(voodoo, 0);
+ }
+ if (chip & CHIP_TREX1)
+ {
+ if (voodoo->type >= VOODOO_BANSHEE)
+ voodoo->params.texBaseAddr[1] = val & 0xfffff0;
+ else
+ voodoo->params.texBaseAddr[1] = (val & 0x7ffff) << 3;
+ voodoo_recalc_tex(voodoo, 1);
+ }
+ break;
+ case SST_texBaseAddr1:
+ if (chip & CHIP_TREX0)
+ {
+ if (voodoo->type >= VOODOO_BANSHEE)
+ voodoo->params.texBaseAddr1[0] = val & 0xfffff0;
+ else
+ voodoo->params.texBaseAddr1[0] = (val & 0x7ffff) << 3;
+ voodoo_recalc_tex(voodoo, 0);
+ }
+ if (chip & CHIP_TREX1)
+ {
+ if (voodoo->type >= VOODOO_BANSHEE)
+ voodoo->params.texBaseAddr1[1] = val & 0xfffff0;
+ else
+ voodoo->params.texBaseAddr1[1] = (val & 0x7ffff) << 3;
+ voodoo_recalc_tex(voodoo, 1);
+ }
+ break;
+ case SST_texBaseAddr2:
+ if (chip & CHIP_TREX0)
+ {
+ if (voodoo->type >= VOODOO_BANSHEE)
+ voodoo->params.texBaseAddr2[0] = val & 0xfffff0;
+ else
+ voodoo->params.texBaseAddr2[0] = (val & 0x7ffff) << 3;
+ voodoo_recalc_tex(voodoo, 0);
+ }
+ if (chip & CHIP_TREX1)
+ {
+ if (voodoo->type >= VOODOO_BANSHEE)
+ voodoo->params.texBaseAddr2[1] = val & 0xfffff0;
+ else
+ voodoo->params.texBaseAddr2[1] = (val & 0x7ffff) << 3;
+ voodoo_recalc_tex(voodoo, 1);
+ }
+ break;
+ case SST_texBaseAddr38:
+ if (chip & CHIP_TREX0)
+ {
+ if (voodoo->type >= VOODOO_BANSHEE)
+ voodoo->params.texBaseAddr38[0] = val & 0xfffff0;
+ else
+ voodoo->params.texBaseAddr38[0] = (val & 0x7ffff) << 3;
+ voodoo_recalc_tex(voodoo, 0);
+ }
+ if (chip & CHIP_TREX1)
+ {
+ if (voodoo->type >= VOODOO_BANSHEE)
+ voodoo->params.texBaseAddr38[1] = val & 0xfffff0;
+ else
+ voodoo->params.texBaseAddr38[1] = (val & 0x7ffff) << 3;
+ voodoo_recalc_tex(voodoo, 1);
+ }
+ break;
+
+ case SST_trexInit1:
+ if (chip & CHIP_TREX0)
+ voodoo->trexInit1[0] = val;
+ if (chip & CHIP_TREX1)
+ voodoo->trexInit1[1] = val;
+ break;
+
+ case SST_nccTable0_Y0:
+ if (chip & CHIP_TREX0)
+ {
+ voodoo->nccTable[0][0].y[0] = val;
+ voodoo->ncc_dirty[0] = 1;
+ }
+ if (chip & CHIP_TREX1)
+ {
+ voodoo->nccTable[1][0].y[0] = val;
+ voodoo->ncc_dirty[1] = 1;
+ }
+ break;
+ case SST_nccTable0_Y1:
+ if (chip & CHIP_TREX0)
+ {
+ voodoo->nccTable[0][0].y[1] = val;
+ voodoo->ncc_dirty[0] = 1;
+ }
+ if (chip & CHIP_TREX1)
+ {
+ voodoo->nccTable[1][0].y[1] = val;
+ voodoo->ncc_dirty[1] = 1;
+ }
+ break;
+ case SST_nccTable0_Y2:
+ if (chip & CHIP_TREX0)
+ {
+ voodoo->nccTable[0][0].y[2] = val;
+ voodoo->ncc_dirty[0] = 1;
+ }
+ if (chip & CHIP_TREX1)
+ {
+ voodoo->nccTable[1][0].y[2] = val;
+ voodoo->ncc_dirty[1] = 1;
+ }
+ break;
+ case SST_nccTable0_Y3:
+ if (chip & CHIP_TREX0)
+ {
+ voodoo->nccTable[0][0].y[3] = val;
+ voodoo->ncc_dirty[0] = 1;
+ }
+ if (chip & CHIP_TREX1)
+ {
+ voodoo->nccTable[1][0].y[3] = val;
+ voodoo->ncc_dirty[1] = 1;
+ }
+ break;
+
+ case SST_nccTable0_I0:
+ if (!(val & (1 << 31)))
+ {
+ if (chip & CHIP_TREX0)
+ {
+ voodoo->nccTable[0][0].i[0] = val;
+ voodoo->ncc_dirty[0] = 1;
+ }
+ if (chip & CHIP_TREX1)
+ {
+ voodoo->nccTable[1][0].i[0] = val;
+ voodoo->ncc_dirty[1] = 1;
+ }
+ break;
+ }
+ case SST_nccTable0_I2:
+ if (!(val & (1 << 31)))
+ {
+ if (chip & CHIP_TREX0)
+ {
+ voodoo->nccTable[0][0].i[2] = val;
+ voodoo->ncc_dirty[0] = 1;
+ }
+ if (chip & CHIP_TREX1)
+ {
+ voodoo->nccTable[1][0].i[2] = val;
+ voodoo->ncc_dirty[1] = 1;
+ }
+ break;
+ }
+ case SST_nccTable0_Q0:
+ if (!(val & (1 << 31)))
+ {
+ if (chip & CHIP_TREX0)
+ {
+ voodoo->nccTable[0][0].q[0] = val;
+ voodoo->ncc_dirty[0] = 1;
+ }
+ if (chip & CHIP_TREX1)
+ {
+ voodoo->nccTable[1][0].q[0] = val;
+ voodoo->ncc_dirty[1] = 1;
+ }
+ break;
+ }
+ case SST_nccTable0_Q2:
+ if (!(val & (1 << 31)))
+ {
+ if (chip & CHIP_TREX0)
+ {
+ voodoo->nccTable[0][0].i[2] = val;
+ voodoo->ncc_dirty[0] = 1;
+ }
+ if (chip & CHIP_TREX1)
+ {
+ voodoo->nccTable[1][0].i[2] = val;
+ voodoo->ncc_dirty[1] = 1;
+ }
+ break;
+ }
+ if (val & (1 << 31))
+ {
+ int p = (val >> 23) & 0xfe;
+ if (chip & CHIP_TREX0)
+ {
+ voodoo->palette[0][p].u = val | 0xff000000;
+ voodoo->palette_dirty[0] = 1;
+ }
+ if (chip & CHIP_TREX1)
+ {
+ voodoo->palette[1][p].u = val | 0xff000000;
+ voodoo->palette_dirty[1] = 1;
+ }
+ }
+ break;
+
+ case SST_nccTable0_I1:
+ if (!(val & (1 << 31)))
+ {
+ if (chip & CHIP_TREX0)
+ {
+ voodoo->nccTable[0][0].i[1] = val;
+ voodoo->ncc_dirty[0] = 1;
+ }
+ if (chip & CHIP_TREX1)
+ {
+ voodoo->nccTable[1][0].i[1] = val;
+ voodoo->ncc_dirty[1] = 1;
+ }
+ break;
+ }
+ case SST_nccTable0_I3:
+ if (!(val & (1 << 31)))
+ {
+ if (chip & CHIP_TREX0)
+ {
+ voodoo->nccTable[0][0].i[3] = val;
+ voodoo->ncc_dirty[0] = 1;
+ }
+ if (chip & CHIP_TREX1)
+ {
+ voodoo->nccTable[1][0].i[3] = val;
+ voodoo->ncc_dirty[1] = 1;
+ }
+ break;
+ }
+ case SST_nccTable0_Q1:
+ if (!(val & (1 << 31)))
+ {
+ if (chip & CHIP_TREX0)
+ {
+ voodoo->nccTable[0][0].q[1] = val;
+ voodoo->ncc_dirty[0] = 1;
+ }
+ if (chip & CHIP_TREX1)
+ {
+ voodoo->nccTable[1][0].q[1] = val;
+ voodoo->ncc_dirty[1] = 1;
+ }
+ break;
+ }
+ case SST_nccTable0_Q3:
+ if (!(val & (1 << 31)))
+ {
+ if (chip & CHIP_TREX0)
+ {
+ voodoo->nccTable[0][0].q[3] = val;
+ voodoo->ncc_dirty[0] = 1;
+ }
+ if (chip & CHIP_TREX1)
+ {
+ voodoo->nccTable[1][0].q[3] = val;
+ voodoo->ncc_dirty[1] = 1;
+ }
+ break;
+ }
+ if (val & (1 << 31))
+ {
+ int p = ((val >> 23) & 0xfe) | 0x01;
+ if (chip & CHIP_TREX0)
+ {
+ voodoo->palette[0][p].u = val | 0xff000000;
+ voodoo->palette_dirty[0] = 1;
+ }
+ if (chip & CHIP_TREX1)
+ {
+ voodoo->palette[1][p].u = val | 0xff000000;
+ voodoo->palette_dirty[1] = 1;
+ }
+ }
+ break;
+
+ case SST_nccTable1_Y0:
+ if (chip & CHIP_TREX0)
+ {
+ voodoo->nccTable[0][1].y[0] = val;
+ voodoo->ncc_dirty[0] = 1;
+ }
+ if (chip & CHIP_TREX1)
+ {
+ voodoo->nccTable[1][1].y[0] = val;
+ voodoo->ncc_dirty[1] = 1;
+ }
+ break;
+ case SST_nccTable1_Y1:
+ if (chip & CHIP_TREX0)
+ {
+ voodoo->nccTable[0][1].y[1] = val;
+ voodoo->ncc_dirty[0] = 1;
+ }
+ if (chip & CHIP_TREX1)
+ {
+ voodoo->nccTable[1][1].y[1] = val;
+ voodoo->ncc_dirty[1] = 1;
+ }
+ break;
+ case SST_nccTable1_Y2:
+ if (chip & CHIP_TREX0)
+ {
+ voodoo->nccTable[0][1].y[2] = val;
+ voodoo->ncc_dirty[0] = 1;
+ }
+ if (chip & CHIP_TREX1)
+ {
+ voodoo->nccTable[1][1].y[2] = val;
+ voodoo->ncc_dirty[1] = 1;
+ }
+ break;
+ case SST_nccTable1_Y3:
+ if (chip & CHIP_TREX0)
+ {
+ voodoo->nccTable[0][1].y[3] = val;
+ voodoo->ncc_dirty[0] = 1;
+ }
+ if (chip & CHIP_TREX1)
+ {
+ voodoo->nccTable[1][1].y[3] = val;
+ voodoo->ncc_dirty[1] = 1;
+ }
+ break;
+ case SST_nccTable1_I0:
+ if (chip & CHIP_TREX0)
+ {
+ voodoo->nccTable[0][1].i[0] = val;
+ voodoo->ncc_dirty[0] = 1;
+ }
+ if (chip & CHIP_TREX1)
+ {
+ voodoo->nccTable[1][1].i[0] = val;
+ voodoo->ncc_dirty[1] = 1;
+ }
+ break;
+ case SST_nccTable1_I1:
+ if (chip & CHIP_TREX0)
+ {
+ voodoo->nccTable[0][1].i[1] = val;
+ voodoo->ncc_dirty[0] = 1;
+ }
+ if (chip & CHIP_TREX1)
+ {
+ voodoo->nccTable[1][1].i[1] = val;
+ voodoo->ncc_dirty[1] = 1;
+ }
+ break;
+ case SST_nccTable1_I2:
+ if (chip & CHIP_TREX0)
+ {
+ voodoo->nccTable[0][1].i[2] = val;
+ voodoo->ncc_dirty[0] = 1;
+ }
+ if (chip & CHIP_TREX1)
+ {
+ voodoo->nccTable[1][1].i[2] = val;
+ voodoo->ncc_dirty[1] = 1;
+ }
+ break;
+ case SST_nccTable1_I3:
+ if (chip & CHIP_TREX0)
+ {
+ voodoo->nccTable[0][1].i[3] = val;
+ voodoo->ncc_dirty[0] = 1;
+ }
+ if (chip & CHIP_TREX1)
+ {
+ voodoo->nccTable[1][1].i[3] = val;
+ voodoo->ncc_dirty[1] = 1;
+ }
+ break;
+ case SST_nccTable1_Q0:
+ if (chip & CHIP_TREX0)
+ {
+ voodoo->nccTable[0][1].q[0] = val;
+ voodoo->ncc_dirty[0] = 1;
+ }
+ if (chip & CHIP_TREX1)
+ {
+ voodoo->nccTable[1][1].q[0] = val;
+ voodoo->ncc_dirty[1] = 1;
+ }
+ break;
+ case SST_nccTable1_Q1:
+ if (chip & CHIP_TREX0)
+ {
+ voodoo->nccTable[0][1].q[1] = val;
+ voodoo->ncc_dirty[0] = 1;
+ }
+ if (chip & CHIP_TREX1)
+ {
+ voodoo->nccTable[1][1].q[1] = val;
+ voodoo->ncc_dirty[1] = 1;
+ }
+ break;
+ case SST_nccTable1_Q2:
+ if (chip & CHIP_TREX0)
+ {
+ voodoo->nccTable[0][1].q[2] = val;
+ voodoo->ncc_dirty[0] = 1;
+ }
+ if (chip & CHIP_TREX1)
+ {
+ voodoo->nccTable[1][1].q[2] = val;
+ voodoo->ncc_dirty[1] = 1;
+ }
+ break;
+ case SST_nccTable1_Q3:
+ if (chip & CHIP_TREX0)
+ {
+ voodoo->nccTable[0][1].q[3] = val;
+ voodoo->ncc_dirty[0] = 1;
+ }
+ if (chip & CHIP_TREX1)
+ {
+ voodoo->nccTable[1][1].q[3] = val;
+ voodoo->ncc_dirty[1] = 1;
+ }
+ break;
+
+ case SST_userIntrCMD:
+ fatal("userIntrCMD write %08x from FIFO\n", val);
+ break;
+
+
+ case SST_leftOverlayBuf:
+ voodoo->leftOverlayBuf = val;
+ break;
+ }
+}
--- /dev/null
+void voodoo_reg_writel(uint32_t addr, uint32_t val, void *p);
--- /dev/null
+enum
+{
+ SST_status = 0x000,
+ SST_intrCtrl = 0x004,
+
+ SST_vertexAx = 0x008,
+ SST_vertexAy = 0x00c,
+ SST_vertexBx = 0x010,
+ SST_vertexBy = 0x014,
+ SST_vertexCx = 0x018,
+ SST_vertexCy = 0x01c,
+
+ SST_startR = 0x0020,
+ SST_startG = 0x0024,
+ SST_startB = 0x0028,
+ SST_startZ = 0x002c,
+ SST_startA = 0x0030,
+ SST_startS = 0x0034,
+ SST_startT = 0x0038,
+ SST_startW = 0x003c,
+
+ SST_dRdX = 0x0040,
+ SST_dGdX = 0x0044,
+ SST_dBdX = 0x0048,
+ SST_dZdX = 0x004c,
+ SST_dAdX = 0x0050,
+ SST_dSdX = 0x0054,
+ SST_dTdX = 0x0058,
+ SST_dWdX = 0x005c,
+
+ SST_dRdY = 0x0060,
+ SST_dGdY = 0x0064,
+ SST_dBdY = 0x0068,
+ SST_dZdY = 0x006c,
+ SST_dAdY = 0x0070,
+ SST_dSdY = 0x0074,
+ SST_dTdY = 0x0078,
+ SST_dWdY = 0x007c,
+
+ SST_triangleCMD = 0x0080,
+
+ SST_fvertexAx = 0x088,
+ SST_fvertexAy = 0x08c,
+ SST_fvertexBx = 0x090,
+ SST_fvertexBy = 0x094,
+ SST_fvertexCx = 0x098,
+ SST_fvertexCy = 0x09c,
+
+ SST_fstartR = 0x00a0,
+ SST_fstartG = 0x00a4,
+ SST_fstartB = 0x00a8,
+ SST_fstartZ = 0x00ac,
+ SST_fstartA = 0x00b0,
+ SST_fstartS = 0x00b4,
+ SST_fstartT = 0x00b8,
+ SST_fstartW = 0x00bc,
+
+ SST_fdRdX = 0x00c0,
+ SST_fdGdX = 0x00c4,
+ SST_fdBdX = 0x00c8,
+ SST_fdZdX = 0x00cc,
+ SST_fdAdX = 0x00d0,
+ SST_fdSdX = 0x00d4,
+ SST_fdTdX = 0x00d8,
+ SST_fdWdX = 0x00dc,
+
+ SST_fdRdY = 0x00e0,
+ SST_fdGdY = 0x00e4,
+ SST_fdBdY = 0x00e8,
+ SST_fdZdY = 0x00ec,
+ SST_fdAdY = 0x00f0,
+ SST_fdSdY = 0x00f4,
+ SST_fdTdY = 0x00f8,
+ SST_fdWdY = 0x00fc,
+
+ SST_ftriangleCMD = 0x0100,
+
+ SST_fbzColorPath = 0x104,
+ SST_fogMode = 0x108,
+
+ SST_alphaMode = 0x10c,
+ SST_fbzMode = 0x110,
+ SST_lfbMode = 0x114,
+
+ SST_clipLeftRight = 0x118,
+ SST_clipLowYHighY = 0x11c,
+
+ SST_nopCMD = 0x120,
+ SST_fastfillCMD = 0x124,
+ SST_swapbufferCMD = 0x128,
+
+ SST_fogColor = 0x12c,
+ SST_zaColor = 0x130,
+ SST_chromaKey = 0x134,
+
+ SST_userIntrCMD = 0x13c,
+ SST_stipple = 0x140,
+ SST_color0 = 0x144,
+ SST_color1 = 0x148,
+
+ SST_fbiPixelsIn = 0x14c,
+ SST_fbiChromaFail = 0x150,
+ SST_fbiZFuncFail = 0x154,
+ SST_fbiAFuncFail = 0x158,
+ SST_fbiPixelsOut = 0x15c,
+
+ SST_fogTable00 = 0x160,
+ SST_fogTable01 = 0x164,
+ SST_fogTable02 = 0x168,
+ SST_fogTable03 = 0x16c,
+ SST_fogTable04 = 0x170,
+ SST_fogTable05 = 0x174,
+ SST_fogTable06 = 0x178,
+ SST_fogTable07 = 0x17c,
+ SST_fogTable08 = 0x180,
+ SST_fogTable09 = 0x184,
+ SST_fogTable0a = 0x188,
+ SST_fogTable0b = 0x18c,
+ SST_fogTable0c = 0x190,
+ SST_fogTable0d = 0x194,
+ SST_fogTable0e = 0x198,
+ SST_fogTable0f = 0x19c,
+ SST_fogTable10 = 0x1a0,
+ SST_fogTable11 = 0x1a4,
+ SST_fogTable12 = 0x1a8,
+ SST_fogTable13 = 0x1ac,
+ SST_fogTable14 = 0x1b0,
+ SST_fogTable15 = 0x1b4,
+ SST_fogTable16 = 0x1b8,
+ SST_fogTable17 = 0x1bc,
+ SST_fogTable18 = 0x1c0,
+ SST_fogTable19 = 0x1c4,
+ SST_fogTable1a = 0x1c8,
+ SST_fogTable1b = 0x1cc,
+ SST_fogTable1c = 0x1d0,
+ SST_fogTable1d = 0x1d4,
+ SST_fogTable1e = 0x1d8,
+ SST_fogTable1f = 0x1dc,
+
+ SST_cmdFifoBaseAddr = 0x1e0,
+ SST_cmdFifoBump = 0x1e4,
+ SST_cmdFifoRdPtr = 0x1e8,
+ SST_cmdFifoAMin = 0x1ec,
+ SST_cmdFifoAMax = 0x1f0,
+ SST_cmdFifoDepth = 0x1f4,
+ SST_cmdFifoHoles = 0x1f8,
+
+ SST_colBufferAddr = 0x1ec, /*Banshee*/
+ SST_colBufferStride = 0x1f0, /*Banshee*/
+ SST_auxBufferAddr = 0x1f4, /*Banshee*/
+ SST_auxBufferStride = 0x1f8, /*Banshee*/
+
+ SST_clipLeftRight1 = 0x200, /*Banshee*/
+ SST_clipTopBottom1 = 0x204, /*Banshee*/
+
+ SST_fbiInit4 = 0x200,
+ SST_vRetrace = 0x204,
+ SST_backPorch = 0x208,
+ SST_videoDimensions = 0x20c,
+ SST_fbiInit0 = 0x210,
+ SST_fbiInit1 = 0x214,
+ SST_fbiInit2 = 0x218,
+ SST_fbiInit3 = 0x21c,
+ SST_hSync = 0x220,
+ SST_vSync = 0x224,
+ SST_clutData = 0x228,
+ SST_dacData = 0x22c,
+
+ SST_scrFilter = 0x230,
+
+ SST_hvRetrace = 0x240,
+ SST_fbiInit5 = 0x244,
+ SST_fbiInit6 = 0x248,
+ SST_fbiInit7 = 0x24c,
+
+ SST_swapPending = 0x24c, /*Banshee*/
+ SST_leftOverlayBuf = 0x250, /*Banshee*/
+
+ SST_sSetupMode = 0x260,
+ SST_sVx = 0x264,
+ SST_sVy = 0x268,
+ SST_sARGB = 0x26c,
+ SST_sRed = 0x270,
+ SST_sGreen = 0x274,
+ SST_sBlue = 0x278,
+ SST_sAlpha = 0x27c,
+ SST_sVz = 0x280,
+ SST_sWb = 0x284,
+ SST_sW0 = 0x288,
+ SST_sS0 = 0x28c,
+ SST_sT0 = 0x290,
+ SST_sW1 = 0x294,
+ SST_sS1 = 0x298,
+ SST_sT1 = 0x29c,
+
+ SST_sDrawTriCMD = 0x2a0,
+ SST_sBeginTriCMD = 0x2a4,
+
+ SST_bltSrcBaseAddr = 0x2c0,
+ SST_bltDstBaseAddr = 0x2c4,
+ SST_bltXYStrides = 0x2c8,
+ SST_bltSrcChromaRange = 0x2cc,
+ SST_bltDstChromaRange = 0x2d0,
+ SST_bltClipX = 0x2d4,
+ SST_bltClipY = 0x2d8,
+
+ SST_bltSrcXY = 0x2e0,
+ SST_bltDstXY = 0x2e4,
+ SST_bltSize = 0x2e8,
+ SST_bltRop = 0x2ec,
+ SST_bltColor = 0x2f0,
+
+ SST_bltCommand = 0x2f8,
+ SST_bltData = 0x2fc,
+
+ SST_textureMode = 0x300,
+ SST_tLOD = 0x304,
+ SST_tDetail = 0x308,
+ SST_texBaseAddr = 0x30c,
+ SST_texBaseAddr1 = 0x310,
+ SST_texBaseAddr2 = 0x314,
+ SST_texBaseAddr38 = 0x318,
+
+ SST_trexInit1 = 0x320,
+
+ SST_nccTable0_Y0 = 0x324,
+ SST_nccTable0_Y1 = 0x328,
+ SST_nccTable0_Y2 = 0x32c,
+ SST_nccTable0_Y3 = 0x330,
+ SST_nccTable0_I0 = 0x334,
+ SST_nccTable0_I1 = 0x338,
+ SST_nccTable0_I2 = 0x33c,
+ SST_nccTable0_I3 = 0x340,
+ SST_nccTable0_Q0 = 0x344,
+ SST_nccTable0_Q1 = 0x348,
+ SST_nccTable0_Q2 = 0x34c,
+ SST_nccTable0_Q3 = 0x350,
+
+ SST_nccTable1_Y0 = 0x354,
+ SST_nccTable1_Y1 = 0x358,
+ SST_nccTable1_Y2 = 0x35c,
+ SST_nccTable1_Y3 = 0x360,
+ SST_nccTable1_I0 = 0x364,
+ SST_nccTable1_I1 = 0x368,
+ SST_nccTable1_I2 = 0x36c,
+ SST_nccTable1_I3 = 0x370,
+ SST_nccTable1_Q0 = 0x374,
+ SST_nccTable1_Q1 = 0x378,
+ SST_nccTable1_Q2 = 0x37c,
+ SST_nccTable1_Q3 = 0x380,
+
+ SST_remap_status = 0x000 | 0x400,
+
+ SST_remap_vertexAx = 0x008 | 0x400,
+ SST_remap_vertexAy = 0x00c | 0x400,
+ SST_remap_vertexBx = 0x010 | 0x400,
+ SST_remap_vertexBy = 0x014 | 0x400,
+ SST_remap_vertexCx = 0x018 | 0x400,
+ SST_remap_vertexCy = 0x01c | 0x400,
+
+ SST_remap_startR = 0x0020 | 0x400,
+ SST_remap_startG = 0x002c | 0x400,
+ SST_remap_startB = 0x0038 | 0x400,
+ SST_remap_startZ = 0x0044 | 0x400,
+ SST_remap_startA = 0x0050 | 0x400,
+ SST_remap_startS = 0x005c | 0x400,
+ SST_remap_startT = 0x0068 | 0x400,
+ SST_remap_startW = 0x0074 | 0x400,
+
+ SST_remap_dRdX = 0x0024 | 0x400,
+ SST_remap_dGdX = 0x0030 | 0x400,
+ SST_remap_dBdX = 0x003c | 0x400,
+ SST_remap_dZdX = 0x0048 | 0x400,
+ SST_remap_dAdX = 0x0054 | 0x400,
+ SST_remap_dSdX = 0x0060 | 0x400,
+ SST_remap_dTdX = 0x006c | 0x400,
+ SST_remap_dWdX = 0x0078 | 0x400,
+
+ SST_remap_dRdY = 0x0028 | 0x400,
+ SST_remap_dGdY = 0x0034 | 0x400,
+ SST_remap_dBdY = 0x0040 | 0x400,
+ SST_remap_dZdY = 0x004c | 0x400,
+ SST_remap_dAdY = 0x0058 | 0x400,
+ SST_remap_dSdY = 0x0064 | 0x400,
+ SST_remap_dTdY = 0x0070 | 0x400,
+ SST_remap_dWdY = 0x007c | 0x400,
+
+ SST_remap_triangleCMD = 0x0080 | 0x400,
+
+ SST_remap_fvertexAx = 0x088 | 0x400,
+ SST_remap_fvertexAy = 0x08c | 0x400,
+ SST_remap_fvertexBx = 0x090 | 0x400,
+ SST_remap_fvertexBy = 0x094 | 0x400,
+ SST_remap_fvertexCx = 0x098 | 0x400,
+ SST_remap_fvertexCy = 0x09c | 0x400,
+
+ SST_remap_fstartR = 0x00a0 | 0x400,
+ SST_remap_fstartG = 0x00ac | 0x400,
+ SST_remap_fstartB = 0x00b8 | 0x400,
+ SST_remap_fstartZ = 0x00c4 | 0x400,
+ SST_remap_fstartA = 0x00d0 | 0x400,
+ SST_remap_fstartS = 0x00dc | 0x400,
+ SST_remap_fstartT = 0x00e8 | 0x400,
+ SST_remap_fstartW = 0x00f4 | 0x400,
+
+ SST_remap_fdRdX = 0x00a4 | 0x400,
+ SST_remap_fdGdX = 0x00b0 | 0x400,
+ SST_remap_fdBdX = 0x00bc | 0x400,
+ SST_remap_fdZdX = 0x00c8 | 0x400,
+ SST_remap_fdAdX = 0x00d4 | 0x400,
+ SST_remap_fdSdX = 0x00e0 | 0x400,
+ SST_remap_fdTdX = 0x00ec | 0x400,
+ SST_remap_fdWdX = 0x00f8 | 0x400,
+
+ SST_remap_fdRdY = 0x00a8 | 0x400,
+ SST_remap_fdGdY = 0x00b4 | 0x400,
+ SST_remap_fdBdY = 0x00c0 | 0x400,
+ SST_remap_fdZdY = 0x00cc | 0x400,
+ SST_remap_fdAdY = 0x00d8 | 0x400,
+ SST_remap_fdSdY = 0x00e4 | 0x400,
+ SST_remap_fdTdY = 0x00f0 | 0x400,
+ SST_remap_fdWdY = 0x00fc | 0x400,
+};
+
+enum
+{
+ LFB_WRITE_FRONT = 0x0000,
+ LFB_WRITE_BACK = 0x0010,
+ LFB_WRITE_MASK = 0x0030
+};
+
+enum
+{
+ LFB_READ_FRONT = 0x0000,
+ LFB_READ_BACK = 0x0040,
+ LFB_READ_AUX = 0x0080,
+ LFB_READ_MASK = 0x00c0
+};
+
+enum
+{
+ LFB_FORMAT_RGB565 = 0,
+ LFB_FORMAT_RGB555 = 1,
+ LFB_FORMAT_ARGB1555 = 2,
+ LFB_FORMAT_ARGB8888 = 5,
+ LFB_FORMAT_DEPTH = 15,
+ LFB_FORMAT_MASK = 15
+};
+
+enum
+{
+ LFB_WRITE_COLOUR = 1,
+ LFB_WRITE_DEPTH = 2
+};
+
+enum
+{
+ FBZ_CHROMAKEY = (1 << 1),
+ FBZ_W_BUFFER = (1 << 3),
+ FBZ_DEPTH_ENABLE = (1 << 4),
+
+ FBZ_DITHER = (1 << 8),
+ FBZ_RGB_WMASK = (1 << 9),
+ FBZ_DEPTH_WMASK = (1 << 10),
+ FBZ_DITHER_2x2 = (1 << 11),
+
+ FBZ_DRAW_FRONT = 0x0000,
+ FBZ_DRAW_BACK = 0x4000,
+ FBZ_DRAW_MASK = 0xc000,
+
+ FBZ_DEPTH_BIAS = (1 << 16),
+
+ FBZ_DEPTH_SOURCE = (1 << 20),
+
+ FBZ_PARAM_ADJUST = (1 << 26)
+};
+
+enum
+{
+ TEX_RGB332 = 0x0,
+ TEX_Y4I2Q2 = 0x1,
+ TEX_A8 = 0x2,
+ TEX_I8 = 0x3,
+ TEX_AI8 = 0x4,
+ TEX_PAL8 = 0x5,
+ TEX_APAL8 = 0x6,
+ TEX_ARGB8332 = 0x8,
+ TEX_A8Y4I2Q2 = 0x9,
+ TEX_R5G6B5 = 0xa,
+ TEX_ARGB1555 = 0xb,
+ TEX_ARGB4444 = 0xc,
+ TEX_A8I8 = 0xd,
+ TEX_APAL88 = 0xe
+};
+
+enum
+{
+ TEXTUREMODE_NCC_SEL = (1 << 5),
+ TEXTUREMODE_TCLAMPS = (1 << 6),
+ TEXTUREMODE_TCLAMPT = (1 << 7),
+ TEXTUREMODE_TRILINEAR = (1 << 30)
+};
+
+enum
+{
+ FBIINIT0_VGA_PASS = 1,
+ FBIINIT0_GRAPHICS_RESET = (1 << 1)
+};
+
+enum
+{
+ FBIINIT1_MULTI_SST = (1 << 2), /*Voodoo Graphics only*/
+ FBIINIT1_VIDEO_RESET = (1 << 8),
+ FBIINIT1_SLI_ENABLE = (1 << 23)
+};
+
+enum
+{
+ FBIINIT2_SWAP_ALGORITHM_MASK = (3 << 9)
+};
+
+enum
+{
+ FBIINIT2_SWAP_ALGORITHM_DAC_VSYNC = (0 << 9),
+ FBIINIT2_SWAP_ALGORITHM_DAC_DATA = (1 << 9),
+ FBIINIT2_SWAP_ALGORITHM_PCI_FIFO_STALL = (2 << 9),
+ FBIINIT2_SWAP_ALGORITHM_SLI_SYNC = (3 << 9)
+};
+
+enum
+{
+ FBIINIT3_REMAP = 1
+};
+
+enum
+{
+ FBIINIT5_MULTI_CVG = (1 << 14)
+};
+
+enum
+{
+ FBIINIT7_CMDFIFO_ENABLE = (1 << 8)
+};
+
+enum
+{
+ CC_LOCALSELECT_ITER_RGB = 0,
+ CC_LOCALSELECT_TEX = 1,
+ CC_LOCALSELECT_COLOR1 = 2,
+ CC_LOCALSELECT_LFB = 3
+};
+
+enum
+{
+ CCA_LOCALSELECT_ITER_A = 0,
+ CCA_LOCALSELECT_COLOR0 = 1,
+ CCA_LOCALSELECT_ITER_Z = 2
+};
+
+enum
+{
+ C_SEL_ITER_RGB = 0,
+ C_SEL_TEX = 1,
+ C_SEL_COLOR1 = 2,
+ C_SEL_LFB = 3
+};
+
+enum
+{
+ A_SEL_ITER_A = 0,
+ A_SEL_TEX = 1,
+ A_SEL_COLOR1 = 2,
+ A_SEL_LFB = 3
+};
+
+enum
+{
+ CC_MSELECT_ZERO = 0,
+ CC_MSELECT_CLOCAL = 1,
+ CC_MSELECT_AOTHER = 2,
+ CC_MSELECT_ALOCAL = 3,
+ CC_MSELECT_TEX = 4,
+ CC_MSELECT_TEXRGB = 5
+};
+
+enum
+{
+ CCA_MSELECT_ZERO = 0,
+ CCA_MSELECT_ALOCAL = 1,
+ CCA_MSELECT_AOTHER = 2,
+ CCA_MSELECT_ALOCAL2 = 3,
+ CCA_MSELECT_TEX = 4
+};
+
+enum
+{
+ TC_MSELECT_ZERO = 0,
+ TC_MSELECT_CLOCAL = 1,
+ TC_MSELECT_AOTHER = 2,
+ TC_MSELECT_ALOCAL = 3,
+ TC_MSELECT_DETAIL = 4,
+ TC_MSELECT_LOD_FRAC = 5
+};
+
+enum
+{
+ TCA_MSELECT_ZERO = 0,
+ TCA_MSELECT_CLOCAL = 1,
+ TCA_MSELECT_AOTHER = 2,
+ TCA_MSELECT_ALOCAL = 3,
+ TCA_MSELECT_DETAIL = 4,
+ TCA_MSELECT_LOD_FRAC = 5
+};
+
+enum
+{
+ CC_ADD_CLOCAL = 1,
+ CC_ADD_ALOCAL = 2
+};
+
+enum
+{
+ CCA_ADD_CLOCAL = 1,
+ CCA_ADD_ALOCAL = 2
+};
+
+enum
+{
+ AFUNC_AZERO = 0x0,
+ AFUNC_ASRC_ALPHA = 0x1,
+ AFUNC_A_COLOR = 0x2,
+ AFUNC_ADST_ALPHA = 0x3,
+ AFUNC_AONE = 0x4,
+ AFUNC_AOMSRC_ALPHA = 0x5,
+ AFUNC_AOM_COLOR = 0x6,
+ AFUNC_AOMDST_ALPHA = 0x7,
+ AFUNC_ASATURATE = 0xf
+};
+
+enum
+{
+ AFUNC_ACOLORBEFOREFOG = 0xf
+};
+
+enum
+{
+ AFUNC_NEVER = 0,
+ AFUNC_LESSTHAN = 1,
+ AFUNC_EQUAL = 2,
+ AFUNC_LESSTHANEQUAL = 3,
+ AFUNC_GREATERTHAN = 4,
+ AFUNC_NOTEQUAL = 5,
+ AFUNC_GREATERTHANEQUAL = 6,
+ AFUNC_ALWAYS = 7
+};
+
+enum
+{
+ DEPTHOP_NEVER = 0,
+ DEPTHOP_LESSTHAN = 1,
+ DEPTHOP_EQUAL = 2,
+ DEPTHOP_LESSTHANEQUAL = 3,
+ DEPTHOP_GREATERTHAN = 4,
+ DEPTHOP_NOTEQUAL = 5,
+ DEPTHOP_GREATERTHANEQUAL = 6,
+ DEPTHOP_ALWAYS = 7
+};
+
+enum
+{
+ FOG_ENABLE = 0x01,
+ FOG_ADD = 0x02,
+ FOG_MULT = 0x04,
+ FOG_ALPHA = 0x08,
+ FOG_Z = 0x10,
+ FOG_W = 0x18,
+ FOG_CONSTANT = 0x20
+};
+
+enum
+{
+ LOD_ODD = (1 << 18),
+ LOD_SPLIT = (1 << 19),
+ LOD_S_IS_WIDER = (1 << 20),
+ LOD_TMULTIBASEADDR = (1 << 24),
+ LOD_TMIRROR_S = (1 << 28),
+ LOD_TMIRROR_T = (1 << 29)
+};
+enum
+{
+ CMD_INVALID = 0,
+ CMD_DRAWTRIANGLE,
+ CMD_FASTFILL,
+ CMD_SWAPBUF
+};
+
+enum
+{
+ FBZCP_TEXTURE_ENABLED = (1 << 27)
+};
+
+enum
+{
+ BLTCMD_SRC_TILED = (1 << 14),
+ BLTCMD_DST_TILED = (1 << 15)
+};
+
+enum
+{
+ INITENABLE_SLI_MASTER_SLAVE = (1 << 11)
+};
+
+enum
+{
+ SETUPMODE_RGB = (1 << 0),
+ SETUPMODE_ALPHA = (1 << 1),
+ SETUPMODE_Z = (1 << 2),
+ SETUPMODE_Wb = (1 << 3),
+ SETUPMODE_W0 = (1 << 4),
+ SETUPMODE_S0_T0 = (1 << 5),
+ SETUPMODE_W1 = (1 << 6),
+ SETUPMODE_S1_T1 = (1 << 7),
+
+ SETUPMODE_STRIP_MODE = (1 << 16),
+ SETUPMODE_CULLING_ENABLE = (1 << 17),
+ SETUPMODE_CULLING_SIGN = (1 << 18),
+ SETUPMODE_DISABLE_PINGPONG = (1 << 19)
+};
+
+#define TEXTUREMODE_MASK 0x3ffff000
+#define TEXTUREMODE_PASSTHROUGH 0
+
+#define TEXTUREMODE_LOCAL_MASK 0x00643000
+#define TEXTUREMODE_LOCAL 0x00241000
+
+
+#define SLI_ENABLED (voodoo->fbiInit1 & FBIINIT1_SLI_ENABLE)
+#define TRIPLE_BUFFER ((voodoo->fbiInit2 & 0x10) || (voodoo->fbiInit5 & 0x600) == 0x400)
+
+
+#define _rgb_sel ( params->fbzColorPath & 3)
+#define a_sel ( (params->fbzColorPath >> 2) & 3)
+#define cc_localselect ( params->fbzColorPath & (1 << 4))
+#define cca_localselect ( (params->fbzColorPath >> 5) & 3)
+#define cc_localselect_override ( params->fbzColorPath & (1 << 7))
+#define cc_zero_other ( params->fbzColorPath & (1 << 8))
+#define cc_sub_clocal ( params->fbzColorPath & (1 << 9))
+#define cc_mselect ( (params->fbzColorPath >> 10) & 7)
+#define cc_reverse_blend ( params->fbzColorPath & (1 << 13))
+#define cc_add ( (params->fbzColorPath >> 14) & 3)
+#define cc_add_alocal ( params->fbzColorPath & (1 << 15))
+#define cc_invert_output ( params->fbzColorPath & (1 << 16))
+#define cca_zero_other ( params->fbzColorPath & (1 << 17))
+#define cca_sub_clocal ( params->fbzColorPath & (1 << 18))
+#define cca_mselect ( (params->fbzColorPath >> 19) & 7)
+#define cca_reverse_blend ( params->fbzColorPath & (1 << 22))
+#define cca_add ( (params->fbzColorPath >> 23) & 3)
+#define cca_invert_output ( params->fbzColorPath & (1 << 25))
+#define tc_zero_other (params->textureMode[0] & (1 << 12))
+#define tc_sub_clocal (params->textureMode[0] & (1 << 13))
+#define tc_mselect ((params->textureMode[0] >> 14) & 7)
+#define tc_reverse_blend (params->textureMode[0] & (1 << 17))
+#define tc_add_clocal (params->textureMode[0] & (1 << 18))
+#define tc_add_alocal (params->textureMode[0] & (1 << 19))
+#define tc_invert_output (params->textureMode[0] & (1 << 20))
+#define tca_zero_other (params->textureMode[0] & (1 << 21))
+#define tca_sub_clocal (params->textureMode[0] & (1 << 22))
+#define tca_mselect ((params->textureMode[0] >> 23) & 7)
+#define tca_reverse_blend (params->textureMode[0] & (1 << 26))
+#define tca_add_clocal (params->textureMode[0] & (1 << 27))
+#define tca_add_alocal (params->textureMode[0] & (1 << 28))
+#define tca_invert_output (params->textureMode[0] & (1 << 29))
+
+#define tc_sub_clocal_1 (params->textureMode[1] & (1 << 13))
+#define tc_mselect_1 ((params->textureMode[1] >> 14) & 7)
+#define tc_reverse_blend_1 (params->textureMode[1] & (1 << 17))
+#define tc_add_clocal_1 (params->textureMode[1] & (1 << 18))
+#define tc_add_alocal_1 (params->textureMode[1] & (1 << 19))
+#define tca_sub_clocal_1 (params->textureMode[1] & (1 << 22))
+#define tca_mselect_1 ((params->textureMode[1] >> 23) & 7)
+#define tca_reverse_blend_1 (params->textureMode[1] & (1 << 26))
+#define tca_add_clocal_1 (params->textureMode[1] & (1 << 27))
+#define tca_add_alocal_1 (params->textureMode[1] & (1 << 28))
+
+#define src_afunc ( (params->alphaMode >> 8) & 0xf)
+#define dest_afunc ( (params->alphaMode >> 12) & 0xf)
+#define alpha_func ( (params->alphaMode >> 1) & 7)
+#define a_ref ( params->alphaMode >> 24)
+#define depth_op ( (params->fbzMode >> 5) & 7)
+#define dither ( params->fbzMode & FBZ_DITHER)
+#define dither2x2 (params->fbzMode & FBZ_DITHER_2x2)
--- /dev/null
+#include <math.h>
+#include <stddef.h>
+#include "ibm.h"
+#include "device.h"
+#include "mem.h"
+#include "thread.h"
+#include "video.h"
+#include "vid_svga.h"
+#include "vid_voodoo.h"
+#include "vid_voodoo_common.h"
+#include "vid_voodoo_dither.h"
+#include "vid_voodoo_regs.h"
+#include "vid_voodoo_render.h"
+#include "vid_voodoo_texture.h"
+
+typedef struct voodoo_state_t
+{
+ int xstart, xend, xdir;
+ uint32_t base_r, base_g, base_b, base_a, base_z;
+ struct
+ {
+ int64_t base_s, base_t, base_w;
+ int lod;
+ } tmu[2];
+ int64_t base_w;
+ int lod;
+ int lod_min[2], lod_max[2];
+ int dx1, dx2;
+ int y, yend, ydir;
+ int32_t dxAB, dxAC, dxBC;
+ int tex_b[2], tex_g[2], tex_r[2], tex_a[2];
+ int tex_s, tex_t;
+ int clamp_s[2], clamp_t[2];
+
+ int32_t vertexAx, vertexAy, vertexBx, vertexBy, vertexCx, vertexCy;
+
+ uint32_t *tex[2][LOD_MAX+1];
+ int tformat;
+
+ int *tex_w_mask[2];
+ int *tex_h_mask[2];
+ int *tex_shift[2];
+ int *tex_lod[2];
+
+ uint16_t *fb_mem, *aux_mem;
+
+ int32_t ib, ig, ir, ia;
+ int32_t z;
+
+ int32_t new_depth;
+
+ int64_t tmu0_s, tmu0_t;
+ int64_t tmu0_w;
+ int64_t tmu1_s, tmu1_t;
+ int64_t tmu1_w;
+ int64_t w;
+
+ int pixel_count, texel_count;
+ int x, x2, x_tiled;
+
+ uint32_t w_depth;
+
+ float log_temp;
+ uint32_t ebp_store;
+ uint32_t texBaseAddr;
+
+ int lod_frac[2];
+} voodoo_state_t;
+
+static int voodoo_output = 0;
+
+static uint8_t logtable[256] =
+{
+ 0x00,0x01,0x02,0x04,0x05,0x07,0x08,0x09,0x0b,0x0c,0x0e,0x0f,0x10,0x12,0x13,0x15,
+ 0x16,0x17,0x19,0x1a,0x1b,0x1d,0x1e,0x1f,0x21,0x22,0x23,0x25,0x26,0x27,0x28,0x2a,
+ 0x2b,0x2c,0x2e,0x2f,0x30,0x31,0x33,0x34,0x35,0x36,0x38,0x39,0x3a,0x3b,0x3d,0x3e,
+ 0x3f,0x40,0x41,0x43,0x44,0x45,0x46,0x47,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x50,0x51,
+ 0x52,0x53,0x54,0x55,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x60,0x61,0x62,0x63,
+ 0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6c,0x6d,0x6e,0x6f,0x70,0x71,0x72,0x73,0x74,
+ 0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,0x80,0x81,0x83,0x84,0x85,
+ 0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8c,0x8d,0x8e,0x8f,0x90,0x91,0x92,0x93,0x94,
+ 0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,0xa0,0xa1,0xa2,0xa2,0xa3,
+ 0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
+ 0xb3,0xb4,0xb5,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbc,0xbd,0xbe,0xbf,0xc0,
+ 0xc1,0xc2,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xcd,
+ 0xce,0xcf,0xd0,0xd1,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd6,0xd7,0xd8,0xd9,0xda,0xda,
+ 0xdb,0xdc,0xdd,0xde,0xde,0xdf,0xe0,0xe1,0xe1,0xe2,0xe3,0xe4,0xe5,0xe5,0xe6,0xe7,
+ 0xe8,0xe8,0xe9,0xea,0xeb,0xeb,0xec,0xed,0xee,0xef,0xef,0xf0,0xf1,0xf2,0xf2,0xf3,
+ 0xf4,0xf5,0xf5,0xf6,0xf7,0xf7,0xf8,0xf9,0xfa,0xfa,0xfb,0xfc,0xfd,0xfd,0xfe,0xff
+};
+
+static inline int fastlog(uint64_t val)
+{
+ uint64_t oldval = val;
+ int exp = 63;
+ int frac;
+
+ if (!val || val & (1ULL << 63))
+ return 0x80000000;
+
+ if (!(val & 0xffffffff00000000))
+ {
+ exp -= 32;
+ val <<= 32;
+ }
+ if (!(val & 0xffff000000000000))
+ {
+ exp -= 16;
+ val <<= 16;
+ }
+ if (!(val & 0xff00000000000000))
+ {
+ exp -= 8;
+ val <<= 8;
+ }
+ if (!(val & 0xf000000000000000))
+ {
+ exp -= 4;
+ val <<= 4;
+ }
+ if (!(val & 0xc000000000000000))
+ {
+ exp -= 2;
+ val <<= 2;
+ }
+ if (!(val & 0x8000000000000000))
+ {
+ exp -= 1;
+ val <<= 1;
+ }
+
+ if (exp >= 8)
+ frac = (oldval >> (exp - 8)) & 0xff;
+ else
+ frac = (oldval << (8 - exp)) & 0xff;
+
+ return (exp << 8) | logtable[frac];
+}
+
+static inline int voodoo_fls(uint16_t val)
+{
+ int num = 0;
+
+//pclog("fls(%04x) = ", val);
+ if (!(val & 0xff00))
+ {
+ num += 8;
+ val <<= 8;
+ }
+ if (!(val & 0xf000))
+ {
+ num += 4;
+ val <<= 4;
+ }
+ if (!(val & 0xc000))
+ {
+ num += 2;
+ val <<= 2;
+ }
+ if (!(val & 0x8000))
+ {
+ num += 1;
+ val <<= 1;
+ }
+//pclog("%i %04x\n", num, val);
+ return num;
+}
+
+typedef struct voodoo_texture_state_t
+{
+ int s, t;
+ int w_mask, h_mask;
+ int tex_shift;
+} voodoo_texture_state_t;
+
+static inline void tex_read(voodoo_state_t *state, voodoo_texture_state_t *texture_state, int tmu)
+{
+ uint32_t dat;
+
+ if (texture_state->s & ~texture_state->w_mask)
+ {
+ if (state->clamp_s[tmu])
+ {
+ if (texture_state->s < 0)
+ texture_state->s = 0;
+ if (texture_state->s > texture_state->w_mask)
+ texture_state->s = texture_state->w_mask;
+ }
+ else
+ texture_state->s &= texture_state->w_mask;
+ }
+ if (texture_state->t & ~texture_state->h_mask)
+ {
+ if (state->clamp_t[tmu])
+ {
+ if (texture_state->t < 0)
+ texture_state->t = 0;
+ if (texture_state->t > texture_state->h_mask)
+ texture_state->t = texture_state->h_mask;
+ }
+ else
+ texture_state->t &= texture_state->h_mask;
+ }
+
+ dat = state->tex[tmu][state->lod][texture_state->s + (texture_state->t << texture_state->tex_shift)];
+
+ state->tex_b[tmu] = dat & 0xff;
+ state->tex_g[tmu] = (dat >> 8) & 0xff;
+ state->tex_r[tmu] = (dat >> 16) & 0xff;
+ state->tex_a[tmu] = (dat >> 24) & 0xff;
+}
+
+#define LOW4(x) ((x & 0x0f) | ((x & 0x0f) << 4))
+#define HIGH4(x) ((x & 0xf0) | ((x & 0xf0) >> 4))
+
+static inline void tex_read_4(voodoo_state_t *state, voodoo_texture_state_t *texture_state, int s, int t, int *d, int tmu, int x)
+{
+ rgba_u dat[4];
+
+ if (((s | (s + 1)) & ~texture_state->w_mask) || ((t | (t + 1)) & ~texture_state->h_mask))
+ {
+ int c;
+ for (c = 0; c < 4; c++)
+ {
+ int _s = s + (c & 1);
+ int _t = t + ((c & 2) >> 1);
+
+ if (_s & ~texture_state->w_mask)
+ {
+ if (state->clamp_s[tmu])
+ {
+ if (_s < 0)
+ _s = 0;
+ if (_s > texture_state->w_mask)
+ _s = texture_state->w_mask;
+ }
+ else
+ _s &= texture_state->w_mask;
+ }
+ if (_t & ~texture_state->h_mask)
+ {
+ if (state->clamp_t[tmu])
+ {
+ if (_t < 0)
+ _t = 0;
+ if (_t > texture_state->h_mask)
+ _t = texture_state->h_mask;
+ }
+ else
+ _t &= texture_state->h_mask;
+ }
+ dat[c].u = state->tex[tmu][state->lod][_s + (_t << texture_state->tex_shift)];
+ }
+ }
+ else
+ {
+ dat[0].u = state->tex[tmu][state->lod][s + (t << texture_state->tex_shift)];
+ dat[1].u = state->tex[tmu][state->lod][s + 1 + (t << texture_state->tex_shift)];
+ dat[2].u = state->tex[tmu][state->lod][s + ((t + 1) << texture_state->tex_shift)];
+ dat[3].u = state->tex[tmu][state->lod][s + 1 + ((t + 1) << texture_state->tex_shift)];
+ }
+
+ state->tex_r[tmu] = (dat[0].rgba.r * d[0] + dat[1].rgba.r * d[1] + dat[2].rgba.r * d[2] + dat[3].rgba.r * d[3]) >> 8;
+ state->tex_g[tmu] = (dat[0].rgba.g * d[0] + dat[1].rgba.g * d[1] + dat[2].rgba.g * d[2] + dat[3].rgba.g * d[3]) >> 8;
+ state->tex_b[tmu] = (dat[0].rgba.b * d[0] + dat[1].rgba.b * d[1] + dat[2].rgba.b * d[2] + dat[3].rgba.b * d[3]) >> 8;
+ state->tex_a[tmu] = (dat[0].rgba.a * d[0] + dat[1].rgba.a * d[1] + dat[2].rgba.a * d[2] + dat[3].rgba.a * d[3]) >> 8;
+}
+
+static inline void voodoo_get_texture(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int tmu, int x)
+{
+ voodoo_texture_state_t texture_state;
+ int d[4];
+ int s, t;
+ int tex_lod = state->tex_lod[tmu][state->lod];
+
+ texture_state.w_mask = state->tex_w_mask[tmu][state->lod];
+ texture_state.h_mask = state->tex_h_mask[tmu][state->lod];
+ texture_state.tex_shift = 8 - tex_lod;
+
+ if (params->tLOD[tmu] & LOD_TMIRROR_S)
+ {
+ if (state->tex_s & 0x1000)
+ state->tex_s = ~state->tex_s;
+ }
+ if (params->tLOD[tmu] & LOD_TMIRROR_T)
+ {
+ if (state->tex_t & 0x1000)
+ state->tex_t = ~state->tex_t;
+ }
+
+ if (voodoo->bilinear_enabled && params->textureMode[tmu] & 6)
+ {
+ int _ds, dt;
+
+ state->tex_s -= 1 << (3+tex_lod);
+ state->tex_t -= 1 << (3+tex_lod);
+
+ s = state->tex_s >> tex_lod;
+ t = state->tex_t >> tex_lod;
+
+ _ds = s & 0xf;
+ dt = t & 0xf;
+
+ s >>= 4;
+ t >>= 4;
+//if (x == 80)
+//if (voodoo_output)
+// pclog("s=%08x t=%08x _ds=%02x _dt=%02x\n", s, t, _ds, dt);
+ d[0] = (16 - _ds) * (16 - dt);
+ d[1] = _ds * (16 - dt);
+ d[2] = (16 - _ds) * dt;
+ d[3] = _ds * dt;
+
+// texture_state.s = s;
+// texture_state.t = t;
+ tex_read_4(state, &texture_state, s, t, d, tmu, x);
+
+
+/* state->tex_r = (tex_samples[0].rgba.r * d[0] + tex_samples[1].rgba.r * d[1] + tex_samples[2].rgba.r * d[2] + tex_samples[3].rgba.r * d[3]) >> 8;
+ state->tex_g = (tex_samples[0].rgba.g * d[0] + tex_samples[1].rgba.g * d[1] + tex_samples[2].rgba.g * d[2] + tex_samples[3].rgba.g * d[3]) >> 8;
+ state->tex_b = (tex_samples[0].rgba.b * d[0] + tex_samples[1].rgba.b * d[1] + tex_samples[2].rgba.b * d[2] + tex_samples[3].rgba.b * d[3]) >> 8;
+ state->tex_a = (tex_samples[0].rgba.a * d[0] + tex_samples[1].rgba.a * d[1] + tex_samples[2].rgba.a * d[2] + tex_samples[3].rgba.a * d[3]) >> 8;*/
+/* state->tex_r = tex_samples[0].r;
+ state->tex_g = tex_samples[0].g;
+ state->tex_b = tex_samples[0].b;
+ state->tex_a = tex_samples[0].a;*/
+ }
+ else
+ {
+ // rgba_t tex_samples;
+ // voodoo_texture_state_t texture_state;
+// int s = state->tex_s >> (18+state->lod);
+// int t = state->tex_t >> (18+state->lod);
+ // int s, t;
+
+// state->tex_s -= 1 << (17+state->lod);
+// state->tex_t -= 1 << (17+state->lod);
+
+ s = state->tex_s >> (4+tex_lod);
+ t = state->tex_t >> (4+tex_lod);
+
+ texture_state.s = s;
+ texture_state.t = t;
+ tex_read(state, &texture_state, tmu);
+
+/* state->tex_r = tex_samples[0].rgba.r;
+ state->tex_g = tex_samples[0].rgba.g;
+ state->tex_b = tex_samples[0].rgba.b;
+ state->tex_a = tex_samples[0].rgba.a;*/
+ }
+}
+
+static inline void voodoo_tmu_fetch(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int tmu, int x)
+{
+ if (params->textureMode[tmu] & 1)
+ {
+ int64_t _w = 0;
+
+ if (tmu)
+ {
+ if (state->tmu1_w)
+ _w = (int64_t)((1ULL << 48) / state->tmu1_w);
+ state->tex_s = (int32_t)(((((state->tmu1_s + (1 << 13)) >> 14) * _w) + (1 << 29)) >> 30);
+ state->tex_t = (int32_t)(((((state->tmu1_t + (1 << 13)) >> 14) * _w) + (1 << 29)) >> 30);
+ }
+ else
+ {
+ if (state->tmu0_w)
+ _w = (int64_t)((1ULL << 48) / state->tmu0_w);
+ state->tex_s = (int32_t)(((((state->tmu0_s + (1 << 13)) >> 14) * _w) + (1 << 29)) >> 30);
+ state->tex_t = (int32_t)(((((state->tmu0_t + (1 << 13)) >> 14) * _w) + (1 << 29)) >> 30);
+ }
+
+ state->lod = state->tmu[tmu].lod + (fastlog(_w) - (19 << 8));
+ }
+ else
+ {
+ if (tmu)
+ {
+ state->tex_s = (int32_t)(state->tmu1_s >> (14+14));
+ state->tex_t = (int32_t)(state->tmu1_t >> (14+14));
+ }
+ else
+ {
+ state->tex_s = (int32_t)(state->tmu0_s >> (14+14));
+ state->tex_t = (int32_t)(state->tmu0_t >> (14+14));
+ }
+ state->lod = state->tmu[tmu].lod;
+ }
+
+ if (state->lod < state->lod_min[tmu])
+ state->lod = state->lod_min[tmu];
+ else if (state->lod > state->lod_max[tmu])
+ state->lod = state->lod_max[tmu];
+ state->lod_frac[tmu] = state->lod & 0xff;
+ state->lod >>= 8;
+
+ voodoo_get_texture(voodoo, params, state, tmu, x);
+}
+
+
+/*Perform texture fetch and blending for both TMUs*/
+static inline void voodoo_tmu_fetch_and_blend(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int x)
+{
+ int r,g,b,a;
+ int c_reverse, a_reverse;
+// int c_reverse1, a_reverse1;
+ int factor_r = 0, factor_g = 0, factor_b = 0, factor_a = 0;
+
+ voodoo_tmu_fetch(voodoo, params, state, 1, x);
+
+ if ((params->textureMode[1] & TEXTUREMODE_TRILINEAR) && (state->lod & 1))
+ {
+ c_reverse = tc_reverse_blend;
+ a_reverse = tca_reverse_blend;
+ }
+ else
+ {
+ c_reverse = !tc_reverse_blend;
+ a_reverse = !tca_reverse_blend;
+ }
+/* c_reverse1 = c_reverse;
+ a_reverse1 = a_reverse;*/
+ if (tc_sub_clocal_1)
+ {
+ switch (tc_mselect_1)
+ {
+ case TC_MSELECT_ZERO:
+ factor_r = factor_g = factor_b = 0;
+ break;
+ case TC_MSELECT_CLOCAL:
+ factor_r = state->tex_r[1];
+ factor_g = state->tex_g[1];
+ factor_b = state->tex_b[1];
+ break;
+ case TC_MSELECT_AOTHER:
+ factor_r = factor_g = factor_b = 0;
+ break;
+ case TC_MSELECT_ALOCAL:
+ factor_r = factor_g = factor_b = state->tex_a[1];
+ break;
+ case TC_MSELECT_DETAIL:
+ factor_r = (params->detail_bias[1] - state->lod) << params->detail_scale[1];
+ if (factor_r > params->detail_max[1])
+ factor_r = params->detail_max[1];
+ factor_g = factor_b = factor_r;
+ break;
+ case TC_MSELECT_LOD_FRAC:
+ factor_r = factor_g = factor_b = state->lod_frac[1];
+ break;
+ }
+ if (!c_reverse)
+ {
+ r = (-state->tex_r[1] * (factor_r + 1)) >> 8;
+ g = (-state->tex_g[1] * (factor_g + 1)) >> 8;
+ b = (-state->tex_b[1] * (factor_b + 1)) >> 8;
+ }
+ else
+ {
+ r = (-state->tex_r[1] * ((factor_r^0xff) + 1)) >> 8;
+ g = (-state->tex_g[1] * ((factor_g^0xff) + 1)) >> 8;
+ b = (-state->tex_b[1] * ((factor_b^0xff) + 1)) >> 8;
+ }
+ if (tc_add_clocal_1)
+ {
+ r += state->tex_r[1];
+ g += state->tex_g[1];
+ b += state->tex_b[1];
+ }
+ else if (tc_add_alocal_1)
+ {
+ r += state->tex_a[1];
+ g += state->tex_a[1];
+ b += state->tex_a[1];
+ }
+ state->tex_r[1] = CLAMP(r);
+ state->tex_g[1] = CLAMP(g);
+ state->tex_b[1] = CLAMP(b);
+ }
+ if (tca_sub_clocal_1)
+ {
+ switch (tca_mselect_1)
+ {
+ case TCA_MSELECT_ZERO:
+ factor_a = 0;
+ break;
+ case TCA_MSELECT_CLOCAL:
+ factor_a = state->tex_a[1];
+ break;
+ case TCA_MSELECT_AOTHER:
+ factor_a = 0;
+ break;
+ case TCA_MSELECT_ALOCAL:
+ factor_a = state->tex_a[1];
+ break;
+ case TCA_MSELECT_DETAIL:
+ factor_a = (params->detail_bias[1] - state->lod) << params->detail_scale[1];
+ if (factor_a > params->detail_max[1])
+ factor_a = params->detail_max[1];
+ break;
+ case TCA_MSELECT_LOD_FRAC:
+ factor_a = state->lod_frac[1];
+ break;
+ }
+ if (!a_reverse)
+ a = (-state->tex_a[1] * ((factor_a ^ 0xff) + 1)) >> 8;
+ else
+ a = (-state->tex_a[1] * (factor_a + 1)) >> 8;
+ if (tca_add_clocal_1 || tca_add_alocal_1)
+ a += state->tex_a[1];
+ state->tex_a[1] = CLAMP(a);
+ }
+
+
+ voodoo_tmu_fetch(voodoo, params, state, 0, x);
+
+ if ((params->textureMode[0] & TEXTUREMODE_TRILINEAR) && (state->lod & 1))
+ {
+ c_reverse = tc_reverse_blend;
+ a_reverse = tca_reverse_blend;
+ }
+ else
+ {
+ c_reverse = !tc_reverse_blend;
+ a_reverse = !tca_reverse_blend;
+ }
+
+ if (!tc_zero_other)
+ {
+ r = state->tex_r[1];
+ g = state->tex_g[1];
+ b = state->tex_b[1];
+ }
+ else
+ r = g = b = 0;
+ if (tc_sub_clocal)
+ {
+ r -= state->tex_r[0];
+ g -= state->tex_g[0];
+ b -= state->tex_b[0];
+ }
+ switch (tc_mselect)
+ {
+ case TC_MSELECT_ZERO:
+ factor_r = factor_g = factor_b = 0;
+ break;
+ case TC_MSELECT_CLOCAL:
+ factor_r = state->tex_r[0];
+ factor_g = state->tex_g[0];
+ factor_b = state->tex_b[0];
+ break;
+ case TC_MSELECT_AOTHER:
+ factor_r = factor_g = factor_b = state->tex_a[1];
+ break;
+ case TC_MSELECT_ALOCAL:
+ factor_r = factor_g = factor_b = state->tex_a[0];
+ break;
+ case TC_MSELECT_DETAIL:
+ factor_r = (params->detail_bias[0] - state->lod) << params->detail_scale[0];
+ if (factor_r > params->detail_max[0])
+ factor_r = params->detail_max[0];
+ factor_g = factor_b = factor_r;
+ break;
+ case TC_MSELECT_LOD_FRAC:
+ factor_r = factor_g = factor_b = state->lod_frac[0];
+ break;
+ }
+ if (!c_reverse)
+ {
+ r = (r * (factor_r + 1)) >> 8;
+ g = (g * (factor_g + 1)) >> 8;
+ b = (b * (factor_b + 1)) >> 8;
+ }
+ else
+ {
+ r = (r * ((factor_r^0xff) + 1)) >> 8;
+ g = (g * ((factor_g^0xff) + 1)) >> 8;
+ b = (b * ((factor_b^0xff) + 1)) >> 8;
+ }
+ if (tc_add_clocal)
+ {
+ r += state->tex_r[0];
+ g += state->tex_g[0];
+ b += state->tex_b[0];
+ }
+ else if (tc_add_alocal)
+ {
+ r += state->tex_a[0];
+ g += state->tex_a[0];
+ b += state->tex_a[0];
+ }
+
+ if (!tca_zero_other)
+ a = state->tex_a[1];
+ else
+ a = 0;
+ if (tca_sub_clocal)
+ a -= state->tex_a[0];
+ switch (tca_mselect)
+ {
+ case TCA_MSELECT_ZERO:
+ factor_a = 0;
+ break;
+ case TCA_MSELECT_CLOCAL:
+ factor_a = state->tex_a[0];
+ break;
+ case TCA_MSELECT_AOTHER:
+ factor_a = state->tex_a[1];
+ break;
+ case TCA_MSELECT_ALOCAL:
+ factor_a = state->tex_a[0];
+ break;
+ case TCA_MSELECT_DETAIL:
+ factor_a = (params->detail_bias[0] - state->lod) << params->detail_scale[0];
+ if (factor_a > params->detail_max[0])
+ factor_a = params->detail_max[0];
+ break;
+ case TCA_MSELECT_LOD_FRAC:
+ factor_a = state->lod_frac[0];
+ break;
+ }
+ if (a_reverse)
+ a = (a * ((factor_a ^ 0xff) + 1)) >> 8;
+ else
+ a = (a * (factor_a + 1)) >> 8;
+ if (tca_add_clocal || tca_add_alocal)
+ a += state->tex_a[0];
+
+
+ state->tex_r[0] = CLAMP(r);
+ state->tex_g[0] = CLAMP(g);
+ state->tex_b[0] = CLAMP(b);
+ state->tex_a[0] = CLAMP(a);
+
+ if (tc_invert_output)
+ {
+ state->tex_r[0] ^= 0xff;
+ state->tex_g[0] ^= 0xff;
+ state->tex_b[0] ^= 0xff;
+ }
+ if (tca_invert_output)
+ state->tex_a[0] ^= 0xff;
+}
+
+#if (defined i386 || defined __i386 || defined __i386__ || defined _X86_ || defined WIN32 || defined _WIN32 || defined _WIN32) && !(defined __amd64__)
+#include "vid_voodoo_codegen_x86.h"
+#elif (defined __amd64__)
+#include "vid_voodoo_codegen_x86-64.h"
+#else
+int voodoo_recomp = 0;
+#endif
+
+static void voodoo_half_triangle(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int ystart, int yend, int odd_even)
+{
+/* int rgb_sel = params->fbzColorPath & 3;
+ int a_sel = (params->fbzColorPath >> 2) & 3;
+ int cc_localselect = params->fbzColorPath & (1 << 4);
+ int cca_localselect = (params->fbzColorPath >> 5) & 3;
+ int cc_localselect_override = params->fbzColorPath & (1 << 7);
+ int cc_zero_other = params->fbzColorPath & (1 << 8);
+ int cc_sub_clocal = params->fbzColorPath & (1 << 9);
+ int cc_mselect = (params->fbzColorPath >> 10) & 7;
+ int cc_reverse_blend = params->fbzColorPath & (1 << 13);
+ int cc_add = (params->fbzColorPath >> 14) & 3;
+ int cc_add_alocal = params->fbzColorPath & (1 << 15);
+ int cc_invert_output = params->fbzColorPath & (1 << 16);
+ int cca_zero_other = params->fbzColorPath & (1 << 17);
+ int cca_sub_clocal = params->fbzColorPath & (1 << 18);
+ int cca_mselect = (params->fbzColorPath >> 19) & 7;
+ int cca_reverse_blend = params->fbzColorPath & (1 << 22);
+ int cca_add = (params->fbzColorPath >> 23) & 3;
+ int cca_invert_output = params->fbzColorPath & (1 << 25);
+ int src_afunc = (params->alphaMode >> 8) & 0xf;
+ int dest_afunc = (params->alphaMode >> 12) & 0xf;
+ int alpha_func = (params->alphaMode >> 1) & 7;
+ int a_ref = params->alphaMode >> 24;
+ int depth_op = (params->fbzMode >> 5) & 7;
+ int dither = params->fbzMode & FBZ_DITHER;*/
+ int texels;
+ int c;
+#ifndef NO_CODEGEN
+ uint8_t (*voodoo_draw)(voodoo_state_t *state, voodoo_params_t *params, int x, int real_y);
+#endif
+ int y_diff = SLI_ENABLED ? 2 : 1;
+
+ if ((params->textureMode[0] & TEXTUREMODE_MASK) == TEXTUREMODE_PASSTHROUGH ||
+ (params->textureMode[0] & TEXTUREMODE_LOCAL_MASK) == TEXTUREMODE_LOCAL)
+ texels = 1;
+ else
+ texels = 2;
+
+ state->clamp_s[0] = params->textureMode[0] & TEXTUREMODE_TCLAMPS;
+ state->clamp_t[0] = params->textureMode[0] & TEXTUREMODE_TCLAMPT;
+ state->clamp_s[1] = params->textureMode[1] & TEXTUREMODE_TCLAMPS;
+ state->clamp_t[1] = params->textureMode[1] & TEXTUREMODE_TCLAMPT;
+// int last_x;
+// pclog("voodoo_triangle : bottom-half %X %X %X %X %X %i %i %i %i\n", xstart, xend, dx1, dx2, dx2 * 36, xdir, y, yend, ydir);
+
+ for (c = 0; c <= LOD_MAX; c++)
+ {
+ state->tex[0][c] = &voodoo->texture_cache[0][params->tex_entry[0]].data[texture_offset[c]];
+ state->tex[1][c] = &voodoo->texture_cache[1][params->tex_entry[1]].data[texture_offset[c]];
+ }
+
+ state->tformat = params->tformat[0];
+
+ state->tex_w_mask[0] = params->tex_w_mask[0];
+ state->tex_h_mask[0] = params->tex_h_mask[0];
+ state->tex_shift[0] = params->tex_shift[0];
+ state->tex_lod[0] = params->tex_lod[0];
+ state->tex_w_mask[1] = params->tex_w_mask[1];
+ state->tex_h_mask[1] = params->tex_h_mask[1];
+ state->tex_shift[1] = params->tex_shift[1];
+ state->tex_lod[1] = params->tex_lod[1];
+
+ if ((params->fbzMode & 1) && (ystart < params->clipLowY))
+ {
+ int dy = params->clipLowY - ystart;
+
+ state->base_r += params->dRdY*dy;
+ state->base_g += params->dGdY*dy;
+ state->base_b += params->dBdY*dy;
+ state->base_a += params->dAdY*dy;
+ state->base_z += params->dZdY*dy;
+ state->tmu[0].base_s += params->tmu[0].dSdY*dy;
+ state->tmu[0].base_t += params->tmu[0].dTdY*dy;
+ state->tmu[0].base_w += params->tmu[0].dWdY*dy;
+ state->tmu[1].base_s += params->tmu[1].dSdY*dy;
+ state->tmu[1].base_t += params->tmu[1].dTdY*dy;
+ state->tmu[1].base_w += params->tmu[1].dWdY*dy;
+ state->base_w += params->dWdY*dy;
+ state->xstart += state->dx1*dy;
+ state->xend += state->dx2*dy;
+
+ ystart = params->clipLowY;
+ }
+
+ if ((params->fbzMode & 1) && (yend >= params->clipHighY))
+ yend = params->clipHighY;
+
+ state->y = ystart;
+// yend--;
+
+ if (SLI_ENABLED)
+ {
+ int test_y;
+
+ if (params->fbzMode & (1 << 17))
+ test_y = (voodoo->v_disp-1) - state->y;
+ else
+ test_y = state->y;
+
+ if ((!(voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && (test_y & 1)) ||
+ ((voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && !(test_y & 1)))
+ {
+ state->y++;
+
+ state->base_r += params->dRdY;
+ state->base_g += params->dGdY;
+ state->base_b += params->dBdY;
+ state->base_a += params->dAdY;
+ state->base_z += params->dZdY;
+ state->tmu[0].base_s += params->tmu[0].dSdY;
+ state->tmu[0].base_t += params->tmu[0].dTdY;
+ state->tmu[0].base_w += params->tmu[0].dWdY;
+ state->tmu[1].base_s += params->tmu[1].dSdY;
+ state->tmu[1].base_t += params->tmu[1].dTdY;
+ state->tmu[1].base_w += params->tmu[1].dWdY;
+ state->base_w += params->dWdY;
+ state->xstart += state->dx1;
+ state->xend += state->dx2;
+ }
+ }
+#ifndef NO_CODEGEN
+ if (voodoo->use_recompiler)
+ voodoo_draw = voodoo_get_block(voodoo, params, state, odd_even);
+ else
+ voodoo_draw = NULL;
+#endif
+
+ if (voodoo_output)
+ pclog("dxAB=%08x dxBC=%08x dxAC=%08x\n", state->dxAB, state->dxBC, state->dxAC);
+// pclog("Start %i %i\n", ystart, voodoo->fbzMode & (1 << 17));
+
+ for (; state->y < yend; state->y += y_diff)
+ {
+ int x, x2;
+ int real_y = (state->y << 4) + 8;
+ int start_x, start_x2;
+ int dx;
+ uint16_t *fb_mem, *aux_mem;
+
+ state->ir = state->base_r;
+ state->ig = state->base_g;
+ state->ib = state->base_b;
+ state->ia = state->base_a;
+ state->z = state->base_z;
+ state->tmu0_s = state->tmu[0].base_s;
+ state->tmu0_t = state->tmu[0].base_t;
+ state->tmu0_w = state->tmu[0].base_w;
+ state->tmu1_s = state->tmu[1].base_s;
+ state->tmu1_t = state->tmu[1].base_t;
+ state->tmu1_w = state->tmu[1].base_w;
+ state->w = state->base_w;
+
+ x = (state->vertexAx << 12) + ((state->dxAC * (real_y - state->vertexAy)) >> 4);
+
+ if (real_y < state->vertexBy)
+ x2 = (state->vertexAx << 12) + ((state->dxAB * (real_y - state->vertexAy)) >> 4);
+ else
+ x2 = (state->vertexBx << 12) + ((state->dxBC * (real_y - state->vertexBy)) >> 4);
+
+ if (params->fbzMode & (1 << 17))
+ real_y = (voodoo->v_disp-1) - (real_y >> 4);
+ else
+ real_y >>= 4;
+
+ if (SLI_ENABLED)
+ {
+ if (((real_y >> 1) & voodoo->odd_even_mask) != odd_even)
+ goto next_line;
+ }
+ else
+ {
+ if ((real_y & voodoo->odd_even_mask) != odd_even)
+ goto next_line;
+ }
+
+ start_x = x;
+
+ if (state->xdir > 0)
+ x2 -= (1 << 16);
+ else
+ x -= (1 << 16);
+ dx = ((x + 0x7000) >> 16) - (((state->vertexAx << 12) + 0x7000) >> 16);
+ start_x2 = x + 0x7000;
+ x = (x + 0x7000) >> 16;
+ x2 = (x2 + 0x7000) >> 16;
+
+ if (voodoo_output)
+ pclog("%03i:%03i : Ax=%08x start_x=%08x dSdX=%016llx dx=%08x s=%08x -> ", x, state->y, state->vertexAx << 8, start_x, params->tmu[0].dTdX, dx, state->tmu0_t);
+
+ state->ir += (params->dRdX * dx);
+ state->ig += (params->dGdX * dx);
+ state->ib += (params->dBdX * dx);
+ state->ia += (params->dAdX * dx);
+ state->z += (params->dZdX * dx);
+ state->tmu0_s += (params->tmu[0].dSdX * dx);
+ state->tmu0_t += (params->tmu[0].dTdX * dx);
+ state->tmu0_w += (params->tmu[0].dWdX * dx);
+ state->tmu1_s += (params->tmu[1].dSdX * dx);
+ state->tmu1_t += (params->tmu[1].dTdX * dx);
+ state->tmu1_w += (params->tmu[1].dWdX * dx);
+ state->w += (params->dWdX * dx);
+
+ if (voodoo_output)
+ pclog("%08llx %lli %lli\n", state->tmu0_t, state->tmu0_t >> (18+state->lod), (state->tmu0_t + (1 << (17+state->lod))) >> (18+state->lod));
+
+ if (params->fbzMode & 1)
+ {
+ if (state->xdir > 0)
+ {
+ if (x < params->clipLeft)
+ {
+ int dx = params->clipLeft - x;
+
+ state->ir += params->dRdX*dx;
+ state->ig += params->dGdX*dx;
+ state->ib += params->dBdX*dx;
+ state->ia += params->dAdX*dx;
+ state->z += params->dZdX*dx;
+ state->tmu0_s += params->tmu[0].dSdX*dx;
+ state->tmu0_t += params->tmu[0].dTdX*dx;
+ state->tmu0_w += params->tmu[0].dWdX*dx;
+ state->tmu1_s += params->tmu[1].dSdX*dx;
+ state->tmu1_t += params->tmu[1].dTdX*dx;
+ state->tmu1_w += params->tmu[1].dWdX*dx;
+ state->w += params->dWdX*dx;
+
+ x = params->clipLeft;
+ }
+ if (x2 >= params->clipRight)
+ x2 = params->clipRight-1;
+ }
+ else
+ {
+ if (x >= params->clipRight)
+ {
+ int dx = (params->clipRight-1) - x;
+
+ state->ir += params->dRdX*dx;
+ state->ig += params->dGdX*dx;
+ state->ib += params->dBdX*dx;
+ state->ia += params->dAdX*dx;
+ state->z += params->dZdX*dx;
+ state->tmu0_s += params->tmu[0].dSdX*dx;
+ state->tmu0_t += params->tmu[0].dTdX*dx;
+ state->tmu0_w += params->tmu[0].dWdX*dx;
+ state->tmu1_s += params->tmu[1].dSdX*dx;
+ state->tmu1_t += params->tmu[1].dTdX*dx;
+ state->tmu1_w += params->tmu[1].dWdX*dx;
+ state->w += params->dWdX*dx;
+
+ x = params->clipRight-1;
+ }
+ if (x2 < params->clipLeft)
+ x2 = params->clipLeft;
+ }
+ }
+
+ if (x2 < x && state->xdir > 0)
+ goto next_line;
+ if (x2 > x && state->xdir < 0)
+ goto next_line;
+
+ if (SLI_ENABLED)
+ {
+ state->fb_mem = fb_mem = (uint16_t *)&voodoo->fb_mem[params->draw_offset + ((real_y >> 1) * params->row_width)];
+ state->aux_mem = aux_mem = (uint16_t *)&voodoo->fb_mem[(params->aux_offset + ((real_y >> 1) * params->row_width)) & voodoo->fb_mask];
+ }
+ else
+ {
+ if (params->col_tiled)
+ state->fb_mem = fb_mem = (uint16_t *)&voodoo->fb_mem[params->draw_offset + (real_y >> 5) * params->row_width + (real_y & 31) * 128];
+ else
+ state->fb_mem = fb_mem = (uint16_t *)&voodoo->fb_mem[params->draw_offset + (real_y * params->row_width)];
+ if (params->aux_tiled)
+ state->aux_mem = aux_mem = (uint16_t *)&voodoo->fb_mem[(params->aux_offset + (real_y >> 5) * params->aux_row_width + (real_y & 31) * 128) & voodoo->fb_mask];
+ else
+ state->aux_mem = aux_mem = (uint16_t *)&voodoo->fb_mem[(params->aux_offset + (real_y * params->row_width)) & voodoo->fb_mask];
+ }
+
+ if (voodoo_output)
+ pclog("%03i: x=%08x x2=%08x xstart=%08x xend=%08x dx=%08x start_x2=%08x\n", state->y, x, x2, state->xstart, state->xend, dx, start_x2);
+
+ state->pixel_count = 0;
+ state->texel_count = 0;
+ state->x = x;
+ state->x2 = x2;
+#ifndef NO_CODEGEN
+ if (voodoo->use_recompiler)
+ {
+ voodoo_draw(state, params, x, real_y);
+ }
+ else
+#endif
+ do
+ {
+ int x_tiled = (x & 63) | ((x >> 6) * 128*32/2);
+ start_x = x;
+ state->x = x;
+ voodoo->pixel_count[odd_even]++;
+ voodoo->texel_count[odd_even] += texels;
+ voodoo->fbiPixelsIn++;
+
+ if (voodoo_output)
+ pclog(" X=%03i T=%08x\n", x, state->tmu0_t);
+// if (voodoo->fbzMode & FBZ_RGB_WMASK)
+ {
+ int update = 1;
+ uint8_t cother_r, cother_g, cother_b, aother;
+ uint8_t clocal_r, clocal_g, clocal_b, alocal;
+ int src_r = 0, src_g = 0, src_b = 0, src_a = 0;
+ int msel_r, msel_g, msel_b, msel_a;
+ uint8_t dest_r, dest_g, dest_b, dest_a;
+ uint16_t dat;
+ int sel;
+ int32_t new_depth, w_depth;
+
+ if (state->w & 0xffff00000000)
+ w_depth = 0;
+ else if (!(state->w & 0xffff0000))
+ w_depth = 0xf001;
+ else
+ {
+ int exp = voodoo_fls((uint16_t)((uint32_t)state->w >> 16));
+ int mant = ((~(uint32_t)state->w >> (19 - exp))) & 0xfff;
+ w_depth = (exp << 12) + mant + 1;
+ if (w_depth > 0xffff)
+ w_depth = 0xffff;
+ }
+
+// w_depth = CLAMP16(w_depth);
+
+ if (params->fbzMode & FBZ_W_BUFFER)
+ new_depth = w_depth;
+ else
+ new_depth = CLAMP16(state->z >> 12);
+
+ if (params->fbzMode & FBZ_DEPTH_BIAS)
+ new_depth = CLAMP16(new_depth + (int16_t)params->zaColor);
+
+ if (params->fbzMode & FBZ_DEPTH_ENABLE)
+ {
+ uint16_t old_depth = voodoo->params.aux_tiled ? aux_mem[x_tiled] : aux_mem[x];
+
+ DEPTH_TEST((params->fbzMode & FBZ_DEPTH_SOURCE) ? (params->zaColor & 0xffff) : new_depth);
+ }
+
+ dat = voodoo->params.col_tiled ? fb_mem[x_tiled] : fb_mem[x];
+ dest_r = (dat >> 8) & 0xf8;
+ dest_g = (dat >> 3) & 0xfc;
+ dest_b = (dat << 3) & 0xf8;
+ dest_r |= (dest_r >> 5);
+ dest_g |= (dest_g >> 6);
+ dest_b |= (dest_b >> 5);
+ dest_a = 0xff;
+
+ if (params->fbzColorPath & FBZCP_TEXTURE_ENABLED)
+ {
+ if ((params->textureMode[0] & TEXTUREMODE_LOCAL_MASK) == TEXTUREMODE_LOCAL || !voodoo->dual_tmus)
+ {
+ /*TMU0 only sampling local colour or only one TMU, only sample TMU0*/
+ voodoo_tmu_fetch(voodoo, params, state, 0, x);
+ }
+ else if ((params->textureMode[0] & TEXTUREMODE_MASK) == TEXTUREMODE_PASSTHROUGH)
+ {
+ /*TMU0 in pass-through mode, only sample TMU1*/
+ voodoo_tmu_fetch(voodoo, params, state, 1, x);
+
+ state->tex_r[0] = state->tex_r[1];
+ state->tex_g[0] = state->tex_g[1];
+ state->tex_b[0] = state->tex_b[1];
+ state->tex_a[0] = state->tex_a[1];
+ }
+ else
+ {
+ voodoo_tmu_fetch_and_blend(voodoo, params, state, x);
+ }
+
+ if ((params->fbzMode & FBZ_CHROMAKEY) &&
+ state->tex_r[0] == params->chromaKey_r &&
+ state->tex_g[0] == params->chromaKey_g &&
+ state->tex_b[0] == params->chromaKey_b)
+ {
+ voodoo->fbiChromaFail++;
+ goto skip_pixel;
+ }
+ }
+
+ if (voodoo->trexInit1[0] & (1 << 18))
+ {
+ state->tex_r[0] = state->tex_g[0] = 0;
+ state->tex_b[0] = voodoo->tmuConfig;
+ }
+
+ if (cc_localselect_override)
+ sel = (state->tex_a[0] & 0x80) ? 1 : 0;
+ else
+ sel = cc_localselect;
+
+ if (sel)
+ {
+ clocal_r = (params->color0 >> 16) & 0xff;
+ clocal_g = (params->color0 >> 8) & 0xff;
+ clocal_b = params->color0 & 0xff;
+ }
+ else
+ {
+ clocal_r = CLAMP(state->ir >> 12);
+ clocal_g = CLAMP(state->ig >> 12);
+ clocal_b = CLAMP(state->ib >> 12);
+ }
+
+ switch (_rgb_sel)
+ {
+ case CC_LOCALSELECT_ITER_RGB: /*Iterated RGB*/
+ cother_r = CLAMP(state->ir >> 12);
+ cother_g = CLAMP(state->ig >> 12);
+ cother_b = CLAMP(state->ib >> 12);
+ break;
+
+ case CC_LOCALSELECT_TEX: /*TREX Color Output*/
+ cother_r = state->tex_r[0];
+ cother_g = state->tex_g[0];
+ cother_b = state->tex_b[0];
+ break;
+
+ case CC_LOCALSELECT_COLOR1: /*Color1 RGB*/
+ cother_r = (params->color1 >> 16) & 0xff;
+ cother_g = (params->color1 >> 8) & 0xff;
+ cother_b = params->color1 & 0xff;
+ break;
+
+ case CC_LOCALSELECT_LFB: /*Linear Frame Buffer*/
+ cother_r = src_r;
+ cother_g = src_g;
+ cother_b = src_b;
+ break;
+ }
+
+ switch (cca_localselect)
+ {
+ case CCA_LOCALSELECT_ITER_A:
+ alocal = CLAMP(state->ia >> 12);
+ break;
+
+ case CCA_LOCALSELECT_COLOR0:
+ alocal = (params->color0 >> 24) & 0xff;
+ break;
+
+ case CCA_LOCALSELECT_ITER_Z:
+ alocal = CLAMP(state->z >> 20);
+ break;
+
+ default:
+ fatal("Bad cca_localselect %i\n", cca_localselect);
+ alocal = 0xff;
+ break;
+ }
+
+ switch (a_sel)
+ {
+ case A_SEL_ITER_A:
+ aother = CLAMP(state->ia >> 12);
+ break;
+ case A_SEL_TEX:
+ aother = state->tex_a[0];
+ break;
+ case A_SEL_COLOR1:
+ aother = (params->color1 >> 24) & 0xff;
+ break;
+ default:
+ fatal("Bad a_sel %i\n", a_sel);
+ aother = 0;
+ break;
+ }
+
+ if (cc_zero_other)
+ {
+ src_r = 0;
+ src_g = 0;
+ src_b = 0;
+ }
+ else
+ {
+ src_r = cother_r;
+ src_g = cother_g;
+ src_b = cother_b;
+ }
+
+ if (cca_zero_other)
+ src_a = 0;
+ else
+ src_a = aother;
+
+ if (cc_sub_clocal)
+ {
+ src_r -= clocal_r;
+ src_g -= clocal_g;
+ src_b -= clocal_b;
+ }
+
+ if (cca_sub_clocal)
+ src_a -= alocal;
+
+ switch (cc_mselect)
+ {
+ case CC_MSELECT_ZERO:
+ msel_r = 0;
+ msel_g = 0;
+ msel_b = 0;
+ break;
+ case CC_MSELECT_CLOCAL:
+ msel_r = clocal_r;
+ msel_g = clocal_g;
+ msel_b = clocal_b;
+ break;
+ case CC_MSELECT_AOTHER:
+ msel_r = aother;
+ msel_g = aother;
+ msel_b = aother;
+ break;
+ case CC_MSELECT_ALOCAL:
+ msel_r = alocal;
+ msel_g = alocal;
+ msel_b = alocal;
+ break;
+ case CC_MSELECT_TEX:
+ msel_r = state->tex_a[0];
+ msel_g = state->tex_a[0];
+ msel_b = state->tex_a[0];
+ break;
+ case CC_MSELECT_TEXRGB:
+ msel_r = state->tex_r[0];
+ msel_g = state->tex_g[0];
+ msel_b = state->tex_b[0];
+ break;
+
+ default:
+ fatal("Bad cc_mselect %i\n", cc_mselect);
+ msel_r = 0;
+ msel_g = 0;
+ msel_b = 0;
+ break;
+ }
+
+ switch (cca_mselect)
+ {
+ case CCA_MSELECT_ZERO:
+ msel_a = 0;
+ break;
+ case CCA_MSELECT_ALOCAL:
+ msel_a = alocal;
+ break;
+ case CCA_MSELECT_AOTHER:
+ msel_a = aother;
+ break;
+ case CCA_MSELECT_ALOCAL2:
+ msel_a = alocal;
+ break;
+ case CCA_MSELECT_TEX:
+ msel_a = state->tex_a[0];
+ break;
+
+ default:
+ fatal("Bad cca_mselect %i\n", cca_mselect);
+ msel_a = 0;
+ break;
+ }
+
+ if (!cc_reverse_blend)
+ {
+ msel_r ^= 0xff;
+ msel_g ^= 0xff;
+ msel_b ^= 0xff;
+ }
+ msel_r++;
+ msel_g++;
+ msel_b++;
+
+ if (!cca_reverse_blend)
+ msel_a ^= 0xff;
+ msel_a++;
+
+ src_r = (src_r * msel_r) >> 8;
+ src_g = (src_g * msel_g) >> 8;
+ src_b = (src_b * msel_b) >> 8;
+ src_a = (src_a * msel_a) >> 8;
+
+ switch (cc_add)
+ {
+ case CC_ADD_CLOCAL:
+ src_r += clocal_r;
+ src_g += clocal_g;
+ src_b += clocal_b;
+ break;
+ case CC_ADD_ALOCAL:
+ src_r += alocal;
+ src_g += alocal;
+ src_b += alocal;
+ break;
+ case 0:
+ break;
+ default:
+ fatal("Bad cc_add %i\n", cc_add);
+ }
+
+ if (cca_add)
+ src_a += alocal;
+
+ src_r = CLAMP(src_r);
+ src_g = CLAMP(src_g);
+ src_b = CLAMP(src_b);
+ src_a = CLAMP(src_a);
+
+ if (cc_invert_output)
+ {
+ src_r ^= 0xff;
+ src_g ^= 0xff;
+ src_b ^= 0xff;
+ }
+ if (cca_invert_output)
+ src_a ^= 0xff;
+
+ if (params->fogMode & FOG_ENABLE)
+ APPLY_FOG(src_r, src_g, src_b, state->z, state->ia, state->w);
+
+ if (params->alphaMode & 1)
+ ALPHA_TEST(src_a);
+
+ if (params->alphaMode & (1 << 4))
+ ALPHA_BLEND(src_r, src_g, src_b, src_a);
+
+ if (update)
+ {
+ if (dither)
+ {
+ if (dither2x2)
+ {
+ src_r = dither_rb2x2[src_r][real_y & 1][x & 1];
+ src_g = dither_g2x2[src_g][real_y & 1][x & 1];
+ src_b = dither_rb2x2[src_b][real_y & 1][x & 1];
+ }
+ else
+ {
+ src_r = dither_rb[src_r][real_y & 3][x & 3];
+ src_g = dither_g[src_g][real_y & 3][x & 3];
+ src_b = dither_rb[src_b][real_y & 3][x & 3];
+ }
+ }
+ else
+ {
+ src_r >>= 3;
+ src_g >>= 2;
+ src_b >>= 3;
+ }
+
+ if (params->fbzMode & FBZ_RGB_WMASK)
+ {
+ if (voodoo->params.col_tiled)
+ fb_mem[x_tiled] = src_b | (src_g << 5) | (src_r << 11);
+ else
+ fb_mem[x] = src_b | (src_g << 5) | (src_r << 11);
+ }
+ if ((params->fbzMode & (FBZ_DEPTH_WMASK | FBZ_DEPTH_ENABLE)) == (FBZ_DEPTH_WMASK | FBZ_DEPTH_ENABLE))
+ {
+ if (voodoo->params.aux_tiled)
+ aux_mem[x_tiled] = new_depth;
+ else
+ aux_mem[x] = new_depth;
+ }
+ }
+ }
+ voodoo_output &= ~2;
+ voodoo->fbiPixelsOut++;
+skip_pixel:
+ if (state->xdir > 0)
+ {
+ state->ir += params->dRdX;
+ state->ig += params->dGdX;
+ state->ib += params->dBdX;
+ state->ia += params->dAdX;
+ state->z += params->dZdX;
+ state->tmu0_s += params->tmu[0].dSdX;
+ state->tmu0_t += params->tmu[0].dTdX;
+ state->tmu0_w += params->tmu[0].dWdX;
+ state->tmu1_s += params->tmu[1].dSdX;
+ state->tmu1_t += params->tmu[1].dTdX;
+ state->tmu1_w += params->tmu[1].dWdX;
+ state->w += params->dWdX;
+ }
+ else
+ {
+ state->ir -= params->dRdX;
+ state->ig -= params->dGdX;
+ state->ib -= params->dBdX;
+ state->ia -= params->dAdX;
+ state->z -= params->dZdX;
+ state->tmu0_s -= params->tmu[0].dSdX;
+ state->tmu0_t -= params->tmu[0].dTdX;
+ state->tmu0_w -= params->tmu[0].dWdX;
+ state->tmu1_s -= params->tmu[1].dSdX;
+ state->tmu1_t -= params->tmu[1].dTdX;
+ state->tmu1_w -= params->tmu[1].dWdX;
+ state->w -= params->dWdX;
+ }
+
+ x += state->xdir;
+ } while (start_x != x2);
+
+ voodoo->pixel_count[odd_even] += state->pixel_count;
+ voodoo->texel_count[odd_even] += state->texel_count;
+ voodoo->fbiPixelsIn += state->pixel_count;
+
+ if (voodoo->params.draw_offset == voodoo->params.front_offset && (real_y >> 1) < 2048)
+ voodoo->dirty_line[real_y >> 1] = 1;
+
+next_line:
+ if (SLI_ENABLED)
+ {
+ state->base_r += params->dRdY;
+ state->base_g += params->dGdY;
+ state->base_b += params->dBdY;
+ state->base_a += params->dAdY;
+ state->base_z += params->dZdY;
+ state->tmu[0].base_s += params->tmu[0].dSdY;
+ state->tmu[0].base_t += params->tmu[0].dTdY;
+ state->tmu[0].base_w += params->tmu[0].dWdY;
+ state->tmu[1].base_s += params->tmu[1].dSdY;
+ state->tmu[1].base_t += params->tmu[1].dTdY;
+ state->tmu[1].base_w += params->tmu[1].dWdY;
+ state->base_w += params->dWdY;
+ state->xstart += state->dx1;
+ state->xend += state->dx2;
+ }
+ state->base_r += params->dRdY;
+ state->base_g += params->dGdY;
+ state->base_b += params->dBdY;
+ state->base_a += params->dAdY;
+ state->base_z += params->dZdY;
+ state->tmu[0].base_s += params->tmu[0].dSdY;
+ state->tmu[0].base_t += params->tmu[0].dTdY;
+ state->tmu[0].base_w += params->tmu[0].dWdY;
+ state->tmu[1].base_s += params->tmu[1].dSdY;
+ state->tmu[1].base_t += params->tmu[1].dTdY;
+ state->tmu[1].base_w += params->tmu[1].dWdY;
+ state->base_w += params->dWdY;
+ state->xstart += state->dx1;
+ state->xend += state->dx2;
+ }
+
+ voodoo->texture_cache[0][params->tex_entry[0]].refcount_r[odd_even]++;
+ voodoo->texture_cache[1][params->tex_entry[1]].refcount_r[odd_even]++;
+}
+
+void voodoo_triangle(voodoo_t *voodoo, voodoo_params_t *params, int odd_even)
+{
+ voodoo_state_t state;
+ int vertexAy_adjusted;
+ int vertexCy_adjusted;
+ int dx, dy;
+
+ uint64_t tempdx, tempdy;
+ uint64_t tempLOD;
+ int LOD;
+ int lodbias;
+
+ voodoo->tri_count++;
+
+ dx = 8 - (params->vertexAx & 0xf);
+ if ((params->vertexAx & 0xf) > 8)
+ dx += 16;
+ dy = 8 - (params->vertexAy & 0xf);
+ if ((params->vertexAy & 0xf) > 8)
+ dy += 16;
+
+/* pclog("voodoo_triangle %i %i %i : vA %f, %f vB %f, %f vC %f, %f f %i,%i %08x %08x %08x,%08x tex=%i,%i fogMode=%08x\n", odd_even, voodoo->params_read_idx[odd_even], voodoo->params_read_idx[odd_even] & PARAM_MASK, (float)params->vertexAx / 16.0, (float)params->vertexAy / 16.0,
+ (float)params->vertexBx / 16.0, (float)params->vertexBy / 16.0,
+ (float)params->vertexCx / 16.0, (float)params->vertexCy / 16.0,
+ (params->fbzColorPath & FBZCP_TEXTURE_ENABLED) ? params->tformat[0] : 0,
+ (params->fbzColorPath & FBZCP_TEXTURE_ENABLED) ? params->tformat[1] : 0, params->fbzColorPath, params->alphaMode, params->textureMode[0],params->textureMode[1], params->tex_entry[0],params->tex_entry[1], params->fogMode);*/
+
+ state.base_r = params->startR;
+ state.base_g = params->startG;
+ state.base_b = params->startB;
+ state.base_a = params->startA;
+ state.base_z = params->startZ;
+ state.tmu[0].base_s = params->tmu[0].startS;
+ state.tmu[0].base_t = params->tmu[0].startT;
+ state.tmu[0].base_w = params->tmu[0].startW;
+ state.tmu[1].base_s = params->tmu[1].startS;
+ state.tmu[1].base_t = params->tmu[1].startT;
+ state.tmu[1].base_w = params->tmu[1].startW;
+ state.base_w = params->startW;
+
+ if (params->fbzColorPath & FBZ_PARAM_ADJUST)
+ {
+ state.base_r += (dx*params->dRdX + dy*params->dRdY) >> 4;
+ state.base_g += (dx*params->dGdX + dy*params->dGdY) >> 4;
+ state.base_b += (dx*params->dBdX + dy*params->dBdY) >> 4;
+ state.base_a += (dx*params->dAdX + dy*params->dAdY) >> 4;
+ state.base_z += (dx*params->dZdX + dy*params->dZdY) >> 4;
+ state.tmu[0].base_s += (dx*params->tmu[0].dSdX + dy*params->tmu[0].dSdY) >> 4;
+ state.tmu[0].base_t += (dx*params->tmu[0].dTdX + dy*params->tmu[0].dTdY) >> 4;
+ state.tmu[0].base_w += (dx*params->tmu[0].dWdX + dy*params->tmu[0].dWdY) >> 4;
+ state.tmu[1].base_s += (dx*params->tmu[1].dSdX + dy*params->tmu[1].dSdY) >> 4;
+ state.tmu[1].base_t += (dx*params->tmu[1].dTdX + dy*params->tmu[1].dTdY) >> 4;
+ state.tmu[1].base_w += (dx*params->tmu[1].dWdX + dy*params->tmu[1].dWdY) >> 4;
+ state.base_w += (dx*params->dWdX + dy*params->dWdY) >> 4;
+ }
+
+ tris++;
+
+ state.vertexAy = params->vertexAy & ~0xffff0000;
+ if (state.vertexAy & 0x8000)
+ state.vertexAy |= 0xffff0000;
+ state.vertexBy = params->vertexBy & ~0xffff0000;
+ if (state.vertexBy & 0x8000)
+ state.vertexBy |= 0xffff0000;
+ state.vertexCy = params->vertexCy & ~0xffff0000;
+ if (state.vertexCy & 0x8000)
+ state.vertexCy |= 0xffff0000;
+
+ state.vertexAx = params->vertexAx & ~0xffff0000;
+ if (state.vertexAx & 0x8000)
+ state.vertexAx |= 0xffff0000;
+ state.vertexBx = params->vertexBx & ~0xffff0000;
+ if (state.vertexBx & 0x8000)
+ state.vertexBx |= 0xffff0000;
+ state.vertexCx = params->vertexCx & ~0xffff0000;
+ if (state.vertexCx & 0x8000)
+ state.vertexCx |= 0xffff0000;
+
+ vertexAy_adjusted = (state.vertexAy+7) >> 4;
+ vertexCy_adjusted = (state.vertexCy+7) >> 4;
+
+ if (state.vertexBy - state.vertexAy)
+ state.dxAB = (int)((((int64_t)state.vertexBx << 12) - ((int64_t)state.vertexAx << 12)) << 4) / (int)(state.vertexBy - state.vertexAy);
+ else
+ state.dxAB = 0;
+ if (state.vertexCy - state.vertexAy)
+ state.dxAC = (int)((((int64_t)state.vertexCx << 12) - ((int64_t)state.vertexAx << 12)) << 4) / (int)(state.vertexCy - state.vertexAy);
+ else
+ state.dxAC = 0;
+ if (state.vertexCy - state.vertexBy)
+ state.dxBC = (int)((((int64_t)state.vertexCx << 12) - ((int64_t)state.vertexBx << 12)) << 4) / (int)(state.vertexCy - state.vertexBy);
+ else
+ state.dxBC = 0;
+
+ state.lod_min[0] = (params->tLOD[0] & 0x3f) << 6;
+ state.lod_max[0] = ((params->tLOD[0] >> 6) & 0x3f) << 6;
+ if (state.lod_max[0] > 0x800)
+ state.lod_max[0] = 0x800;
+ state.lod_min[1] = (params->tLOD[1] & 0x3f) << 6;
+ state.lod_max[1] = ((params->tLOD[1] >> 6) & 0x3f) << 6;
+ if (state.lod_max[1] > 0x800)
+ state.lod_max[1] = 0x800;
+
+ state.xstart = state.xend = state.vertexAx << 8;
+ state.xdir = params->sign ? -1 : 1;
+
+ state.y = (state.vertexAy + 8) >> 4;
+ state.ydir = 1;
+
+
+ tempdx = (params->tmu[0].dSdX >> 14) * (params->tmu[0].dSdX >> 14) + (params->tmu[0].dTdX >> 14) * (params->tmu[0].dTdX >> 14);
+ tempdy = (params->tmu[0].dSdY >> 14) * (params->tmu[0].dSdY >> 14) + (params->tmu[0].dTdY >> 14) * (params->tmu[0].dTdY >> 14);
+
+ if (tempdx > tempdy)
+ tempLOD = tempdx;
+ else
+ tempLOD = tempdy;
+
+ LOD = (int)(log2((double)tempLOD / (double)(1ULL << 36)) * 256);
+ LOD >>= 2;
+
+ lodbias = (params->tLOD[0] >> 12) & 0x3f;
+ if (lodbias & 0x20)
+ lodbias |= ~0x3f;
+ state.tmu[0].lod = LOD + (lodbias << 6);
+
+
+ tempdx = (params->tmu[1].dSdX >> 14) * (params->tmu[1].dSdX >> 14) + (params->tmu[1].dTdX >> 14) * (params->tmu[1].dTdX >> 14);
+ tempdy = (params->tmu[1].dSdY >> 14) * (params->tmu[1].dSdY >> 14) + (params->tmu[1].dTdY >> 14) * (params->tmu[1].dTdY >> 14);
+
+ if (tempdx > tempdy)
+ tempLOD = tempdx;
+ else
+ tempLOD = tempdy;
+
+ LOD = (int)(log2((double)tempLOD / (double)(1ULL << 36)) * 256);
+ LOD >>= 2;
+
+ lodbias = (params->tLOD[1] >> 12) & 0x3f;
+ if (lodbias & 0x20)
+ lodbias |= ~0x3f;
+ state.tmu[1].lod = LOD + (lodbias << 6);
+
+
+ voodoo_half_triangle(voodoo, params, &state, vertexAy_adjusted, vertexCy_adjusted, odd_even);
+}
+
+
+static void render_thread(void *param, int odd_even)
+{
+ voodoo_t *voodoo = (voodoo_t *)param;
+
+ while (1)
+ {
+ thread_set_event(voodoo->render_not_full_event[odd_even]);
+ thread_wait_event(voodoo->wake_render_thread[odd_even], -1);
+ thread_reset_event(voodoo->wake_render_thread[odd_even]);
+ voodoo->render_voodoo_busy[odd_even] = 1;
+
+ while (!PARAM_EMPTY(odd_even))
+ {
+ uint64_t start_time = timer_read();
+ uint64_t end_time;
+ voodoo_params_t *params = &voodoo->params_buffer[voodoo->params_read_idx[odd_even] & PARAM_MASK];
+
+ voodoo_triangle(voodoo, params, odd_even);
+
+ voodoo->params_read_idx[odd_even]++;
+
+ if (PARAM_ENTRIES(odd_even) > (PARAM_SIZE - 10))
+ thread_set_event(voodoo->render_not_full_event[odd_even]);
+
+ end_time = timer_read();
+ voodoo->render_time[odd_even] += end_time - start_time;
+ }
+
+ voodoo->render_voodoo_busy[odd_even] = 0;
+ }
+}
+
+void voodoo_render_thread_1(void *param)
+{
+ render_thread(param, 0);
+}
+void voodoo_render_thread_2(void *param)
+{
+ render_thread(param, 1);
+}
+void voodoo_render_thread_3(void *param)
+{
+ render_thread(param, 2);
+}
+void voodoo_render_thread_4(void *param)
+{
+ render_thread(param, 3);
+}
+
+void voodoo_queue_triangle(voodoo_t *voodoo, voodoo_params_t *params)
+{
+ voodoo_params_t *params_new = &voodoo->params_buffer[voodoo->params_write_idx & PARAM_MASK];
+
+ while (PARAM_FULL(0) || (voodoo->render_threads >= 2 && PARAM_FULL(1)) ||
+ (voodoo->render_threads == 4 && (PARAM_FULL(2) || PARAM_FULL(3))))
+ {
+ thread_reset_event(voodoo->render_not_full_event[0]);
+ if (voodoo->render_threads >= 2)
+ thread_reset_event(voodoo->render_not_full_event[1]);
+ if (voodoo->render_threads == 4)
+ {
+ thread_reset_event(voodoo->render_not_full_event[2]);
+ thread_reset_event(voodoo->render_not_full_event[3]);
+ }
+ if (PARAM_FULL(0))
+ thread_wait_event(voodoo->render_not_full_event[0], -1); /*Wait for room in ringbuffer*/
+ if (voodoo->render_threads >= 2 && PARAM_FULL(1))
+ thread_wait_event(voodoo->render_not_full_event[1], -1); /*Wait for room in ringbuffer*/
+ if (voodoo->render_threads == 4 && PARAM_FULL(2))
+ thread_wait_event(voodoo->render_not_full_event[2], -1); /*Wait for room in ringbuffer*/
+ if (voodoo->render_threads == 4 && PARAM_FULL(3))
+ thread_wait_event(voodoo->render_not_full_event[3], -1); /*Wait for room in ringbuffer*/
+ }
+
+ voodoo_use_texture(voodoo, params, 0);
+ if (voodoo->dual_tmus)
+ voodoo_use_texture(voodoo, params, 1);
+
+ memcpy(params_new, params, sizeof(voodoo_params_t));
+
+ voodoo->params_write_idx++;
+
+ if (PARAM_ENTRIES(0) < 4 || (voodoo->render_threads >= 2 && PARAM_ENTRIES(1) < 4) ||
+ (voodoo->render_threads == 4 && (PARAM_ENTRIES(2) < 4 || PARAM_ENTRIES(3) < 4)))
+ voodoo_wake_render_thread(voodoo);
+}
--- /dev/null
+#if !(defined i386 || defined __i386 || defined __i386__ || defined _X86_ || defined WIN32 || defined _WIN32 || defined _WIN32) && !(defined __amd64__)
+#define NO_CODEGEN
+#endif
+
+#ifndef NO_CODEGEN
+void voodoo_codegen_init(voodoo_t *voodoo);
+void voodoo_codegen_close(voodoo_t *voodoo);
+#endif
+
+#define DEPTH_TEST(comp_depth) \
+ do \
+ { \
+ switch (depth_op) \
+ { \
+ case DEPTHOP_NEVER: \
+ voodoo->fbiZFuncFail++; \
+ goto skip_pixel; \
+ case DEPTHOP_LESSTHAN: \
+ if (!(comp_depth < old_depth)) \
+ { \
+ voodoo->fbiZFuncFail++; \
+ goto skip_pixel; \
+ } \
+ break; \
+ case DEPTHOP_EQUAL: \
+ if (!(comp_depth == old_depth)) \
+ { \
+ voodoo->fbiZFuncFail++; \
+ goto skip_pixel; \
+ } \
+ break; \
+ case DEPTHOP_LESSTHANEQUAL: \
+ if (!(comp_depth <= old_depth)) \
+ { \
+ voodoo->fbiZFuncFail++; \
+ goto skip_pixel; \
+ } \
+ break; \
+ case DEPTHOP_GREATERTHAN: \
+ if (!(comp_depth > old_depth)) \
+ { \
+ voodoo->fbiZFuncFail++; \
+ goto skip_pixel; \
+ } \
+ break; \
+ case DEPTHOP_NOTEQUAL: \
+ if (!(comp_depth != old_depth)) \
+ { \
+ voodoo->fbiZFuncFail++; \
+ goto skip_pixel; \
+ } \
+ break; \
+ case DEPTHOP_GREATERTHANEQUAL: \
+ if (!(comp_depth >= old_depth)) \
+ { \
+ voodoo->fbiZFuncFail++; \
+ goto skip_pixel; \
+ } \
+ break; \
+ case DEPTHOP_ALWAYS: \
+ break; \
+ } \
+ } while (0)
+
+#define APPLY_FOG(src_r, src_g, src_b, z, ia, w) \
+ do \
+ { \
+ if (params->fogMode & FOG_CONSTANT) \
+ { \
+ src_r += params->fogColor.r; \
+ src_g += params->fogColor.g; \
+ src_b += params->fogColor.b; \
+ } \
+ else \
+ { \
+ int fog_r, fog_g, fog_b, fog_a = 0; \
+ int fog_idx; \
+ \
+ if (!(params->fogMode & FOG_ADD)) \
+ { \
+ fog_r = params->fogColor.r; \
+ fog_g = params->fogColor.g; \
+ fog_b = params->fogColor.b; \
+ } \
+ else \
+ fog_r = fog_g = fog_b = 0; \
+ \
+ if (!(params->fogMode & FOG_MULT)) \
+ { \
+ fog_r -= src_r; \
+ fog_g -= src_g; \
+ fog_b -= src_b; \
+ } \
+ \
+ switch (params->fogMode & (FOG_Z|FOG_ALPHA)) \
+ { \
+ case 0: \
+ fog_idx = (w_depth >> 10) & 0x3f; \
+ \
+ fog_a = params->fogTable[fog_idx].fog; \
+ fog_a += (params->fogTable[fog_idx].dfog * ((w_depth >> 2) & 0xff)) >> 10; \
+ break; \
+ case FOG_Z: \
+ fog_a = (z >> 20) & 0xff; \
+ break; \
+ case FOG_ALPHA: \
+ fog_a = CLAMP(ia >> 12); \
+ break; \
+ case FOG_W: \
+ fog_a = CLAMP((w >> 32) & 0xff); \
+ break; \
+ } \
+ fog_a++; \
+ \
+ fog_r = (fog_r * fog_a) >> 8; \
+ fog_g = (fog_g * fog_a) >> 8; \
+ fog_b = (fog_b * fog_a) >> 8; \
+ \
+ if (params->fogMode & FOG_MULT) \
+ { \
+ src_r = fog_r; \
+ src_g = fog_g; \
+ src_b = fog_b; \
+ } \
+ else \
+ { \
+ src_r += fog_r; \
+ src_g += fog_g; \
+ src_b += fog_b; \
+ } \
+ } \
+ \
+ src_r = CLAMP(src_r); \
+ src_g = CLAMP(src_g); \
+ src_b = CLAMP(src_b); \
+ } while (0)
+
+#define ALPHA_TEST(src_a) \
+ do \
+ { \
+ switch (alpha_func) \
+ { \
+ case AFUNC_NEVER: \
+ voodoo->fbiAFuncFail++; \
+ goto skip_pixel; \
+ case AFUNC_LESSTHAN: \
+ if (!(src_a < a_ref)) \
+ { \
+ voodoo->fbiAFuncFail++; \
+ goto skip_pixel; \
+ } \
+ break; \
+ case AFUNC_EQUAL: \
+ if (!(src_a == a_ref)) \
+ { \
+ voodoo->fbiAFuncFail++; \
+ goto skip_pixel; \
+ } \
+ break; \
+ case AFUNC_LESSTHANEQUAL: \
+ if (!(src_a <= a_ref)) \
+ { \
+ voodoo->fbiAFuncFail++; \
+ goto skip_pixel; \
+ } \
+ break; \
+ case AFUNC_GREATERTHAN: \
+ if (!(src_a > a_ref)) \
+ { \
+ voodoo->fbiAFuncFail++; \
+ goto skip_pixel; \
+ } \
+ break; \
+ case AFUNC_NOTEQUAL: \
+ if (!(src_a != a_ref)) \
+ { \
+ voodoo->fbiAFuncFail++; \
+ goto skip_pixel; \
+ } \
+ break; \
+ case AFUNC_GREATERTHANEQUAL: \
+ if (!(src_a >= a_ref)) \
+ { \
+ voodoo->fbiAFuncFail++; \
+ goto skip_pixel; \
+ } \
+ break; \
+ case AFUNC_ALWAYS: \
+ break; \
+ } \
+ } while (0)
+
+#define ALPHA_BLEND(src_r, src_g, src_b, src_a) \
+ do \
+ { \
+ int _a; \
+ int newdest_r = 0, newdest_g = 0, newdest_b = 0; \
+ \
+ switch (dest_afunc) \
+ { \
+ case AFUNC_AZERO: \
+ newdest_r = newdest_g = newdest_b = 0; \
+ break; \
+ case AFUNC_ASRC_ALPHA: \
+ newdest_r = (dest_r * src_a) / 255; \
+ newdest_g = (dest_g * src_a) / 255; \
+ newdest_b = (dest_b * src_a) / 255; \
+ break; \
+ case AFUNC_A_COLOR: \
+ newdest_r = (dest_r * src_r) / 255; \
+ newdest_g = (dest_g * src_g) / 255; \
+ newdest_b = (dest_b * src_b) / 255; \
+ break; \
+ case AFUNC_ADST_ALPHA: \
+ newdest_r = (dest_r * dest_a) / 255; \
+ newdest_g = (dest_g * dest_a) / 255; \
+ newdest_b = (dest_b * dest_a) / 255; \
+ break; \
+ case AFUNC_AONE: \
+ newdest_r = dest_r; \
+ newdest_g = dest_g; \
+ newdest_b = dest_b; \
+ break; \
+ case AFUNC_AOMSRC_ALPHA: \
+ newdest_r = (dest_r * (255-src_a)) / 255; \
+ newdest_g = (dest_g * (255-src_a)) / 255; \
+ newdest_b = (dest_b * (255-src_a)) / 255; \
+ break; \
+ case AFUNC_AOM_COLOR: \
+ newdest_r = (dest_r * (255-src_r)) / 255; \
+ newdest_g = (dest_g * (255-src_g)) / 255; \
+ newdest_b = (dest_b * (255-src_b)) / 255; \
+ break; \
+ case AFUNC_AOMDST_ALPHA: \
+ newdest_r = (dest_r * (255-dest_a)) / 255; \
+ newdest_g = (dest_g * (255-dest_a)) / 255; \
+ newdest_b = (dest_b * (255-dest_a)) / 255; \
+ break; \
+ case AFUNC_ASATURATE: \
+ _a = MIN(src_a, 1-dest_a); \
+ newdest_r = (dest_r * _a) / 255; \
+ newdest_g = (dest_g * _a) / 255; \
+ newdest_b = (dest_b * _a) / 255; \
+ break; \
+ } \
+ \
+ switch (src_afunc) \
+ { \
+ case AFUNC_AZERO: \
+ src_r = src_g = src_b = 0; \
+ break; \
+ case AFUNC_ASRC_ALPHA: \
+ src_r = (src_r * src_a) / 255; \
+ src_g = (src_g * src_a) / 255; \
+ src_b = (src_b * src_a) / 255; \
+ break; \
+ case AFUNC_A_COLOR: \
+ src_r = (src_r * dest_r) / 255; \
+ src_g = (src_g * dest_g) / 255; \
+ src_b = (src_b * dest_b) / 255; \
+ break; \
+ case AFUNC_ADST_ALPHA: \
+ src_r = (src_r * dest_a) / 255; \
+ src_g = (src_g * dest_a) / 255; \
+ src_b = (src_b * dest_a) / 255; \
+ break; \
+ case AFUNC_AONE: \
+ break; \
+ case AFUNC_AOMSRC_ALPHA: \
+ src_r = (src_r * (255-src_a)) / 255; \
+ src_g = (src_g * (255-src_a)) / 255; \
+ src_b = (src_b * (255-src_a)) / 255; \
+ break; \
+ case AFUNC_AOM_COLOR: \
+ src_r = (src_r * (255-dest_r)) / 255; \
+ src_g = (src_g * (255-dest_g)) / 255; \
+ src_b = (src_b * (255-dest_b)) / 255; \
+ break; \
+ case AFUNC_AOMDST_ALPHA: \
+ src_r = (src_r * (255-dest_a)) / 255; \
+ src_g = (src_g * (255-dest_a)) / 255; \
+ src_b = (src_b * (255-dest_a)) / 255; \
+ break; \
+ case AFUNC_ACOLORBEFOREFOG: \
+ fatal("AFUNC_ACOLORBEFOREFOG\n"); \
+ break; \
+ } \
+ \
+ src_r += newdest_r; \
+ src_g += newdest_g; \
+ src_b += newdest_b; \
+ \
+ src_r = CLAMP(src_r); \
+ src_g = CLAMP(src_g); \
+ src_b = CLAMP(src_b); \
+ } while(0)
+
+
+
+void voodoo_render_thread_1(void *param);
+void voodoo_render_thread_2(void *param);
+void voodoo_render_thread_3(void *param);
+void voodoo_render_thread_4(void *param);
+void voodoo_queue_triangle(voodoo_t *voodoo, voodoo_params_t *params);
+
+extern int voodoo_recomp;
+extern int tris;
+
+static inline void voodoo_wake_render_thread(voodoo_t *voodoo)
+{
+ thread_set_event(voodoo->wake_render_thread[0]); /*Wake up render thread if moving from idle*/
+ if (voodoo->render_threads >= 2)
+ thread_set_event(voodoo->wake_render_thread[1]); /*Wake up render thread if moving from idle*/
+ if (voodoo->render_threads == 4)
+ {
+ thread_set_event(voodoo->wake_render_thread[2]); /*Wake up render thread if moving from idle*/
+ thread_set_event(voodoo->wake_render_thread[3]); /*Wake up render thread if moving from idle*/
+ }
+}
+
+static inline void voodoo_wait_for_render_thread_idle(voodoo_t *voodoo)
+{
+ while (!PARAM_EMPTY(0) || (voodoo->render_threads >= 2 && !PARAM_EMPTY(1)) ||
+ (voodoo->render_threads == 4 && (!PARAM_EMPTY(2) || !PARAM_EMPTY(3))) ||
+ voodoo->render_voodoo_busy[0] || (voodoo->render_threads >= 2 && voodoo->render_voodoo_busy[1]) ||
+ (voodoo->render_threads == 4 && (voodoo->render_voodoo_busy[2] || voodoo->render_voodoo_busy[3])))
+ {
+ voodoo_wake_render_thread(voodoo);
+ if (!PARAM_EMPTY(0) || voodoo->render_voodoo_busy[0])
+ thread_wait_event(voodoo->render_not_full_event[0], 1);
+ if (voodoo->render_threads >= 2 && (!PARAM_EMPTY(1) || voodoo->render_voodoo_busy[1]))
+ thread_wait_event(voodoo->render_not_full_event[1], 1);
+ if (voodoo->render_threads == 4 && (!PARAM_EMPTY(2) || voodoo->render_voodoo_busy[2]))
+ thread_wait_event(voodoo->render_not_full_event[2], 1);
+ if (voodoo->render_threads == 4 && (!PARAM_EMPTY(3) || voodoo->render_voodoo_busy[3]))
+ thread_wait_event(voodoo->render_not_full_event[3], 1);
+ }
+}
--- /dev/null
+#include "ibm.h"
+#include "device.h"
+#include "mem.h"
+#include "thread.h"
+#include "video.h"
+#include "vid_svga.h"
+#include "vid_voodoo.h"
+#include "vid_voodoo_common.h"
+#include "vid_voodoo_regs.h"
+#include "vid_voodoo_render.h"
+#include "vid_voodoo_setup.h"
+
+void voodoo_triangle_setup(voodoo_t *voodoo)
+{
+ float dxAB, dxBC, dyAB, dyBC;
+ float area;
+ int va = 0, vb = 1, vc = 2;
+ vert_t verts[3];
+
+ verts[0] = voodoo->verts[0];
+ verts[1] = voodoo->verts[1];
+ verts[2] = voodoo->verts[2];
+
+ if (verts[0].sVy < verts[1].sVy)
+ {
+ if (verts[1].sVy < verts[2].sVy)
+ {
+ /* V1>V0, V2>V1, V2>V1>V0*/
+ va = 0; /*OK*/
+ vb = 1;
+ vc = 2;
+ }
+ else
+ {
+ /* V1>V0, V1>V2*/
+ if (verts[0].sVy < verts[2].sVy)
+ {
+ /* V1>V0, V1>V2, V2>V0, V1>V2>V0*/
+ va = 0;
+ vb = 2;
+ vc = 1;
+ }
+ else
+ {
+ /* V1>V0, V1>V2, V0>V2, V1>V0>V2*/
+ va = 2;
+ vb = 0;
+ vc = 1;
+ }
+ }
+ }
+ else
+ {
+ if (verts[1].sVy < verts[2].sVy)
+ {
+ /* V0>V1, V2>V1*/
+ if (verts[0].sVy < verts[2].sVy)
+ {
+ /* V0>V1, V2>V1, V2>V0, V2>V0>V1*/
+ va = 1;
+ vb = 0;
+ vc = 2;
+ }
+ else
+ {
+ /* V0>V1, V2>V1, V0>V2, V0>V2>V1*/
+ va = 1;
+ vb = 2;
+ vc = 0;
+ }
+ }
+ else
+ {
+ /*V0>V1>V2*/
+ va = 2;
+ vb = 1;
+ vc = 0;
+ }
+ }
+
+ dxAB = verts[0].sVx - verts[1].sVx;
+ dxBC = verts[1].sVx - verts[2].sVx;
+ dyAB = verts[0].sVy - verts[1].sVy;
+ dyBC = verts[1].sVy - verts[2].sVy;
+
+ area = dxAB * dyBC - dxBC * dyAB;
+
+ if (area == 0.0)
+ return;
+
+ if (voodoo->sSetupMode & SETUPMODE_CULLING_ENABLE)
+ {
+ int cull_sign = voodoo->sSetupMode & SETUPMODE_CULLING_SIGN;
+ int sign = (area < 0.0);
+
+ if ((voodoo->sSetupMode & (SETUPMODE_CULLING_ENABLE | SETUPMODE_DISABLE_PINGPONG))
+ == SETUPMODE_CULLING_ENABLE && voodoo->cull_pingpong)
+ cull_sign = !cull_sign;
+
+ if (cull_sign && sign)
+ return;
+ if (!cull_sign && !sign)
+ return;
+ }
+
+
+ dxAB = verts[va].sVx - verts[vb].sVx;
+ dxBC = verts[vb].sVx - verts[vc].sVx;
+ dyAB = verts[va].sVy - verts[vb].sVy;
+ dyBC = verts[vb].sVy - verts[vc].sVy;
+
+ area = dxAB * dyBC - dxBC * dyAB;
+
+ dxAB /= area;
+ dxBC /= area;
+ dyAB /= area;
+ dyBC /= area;
+
+
+
+ voodoo->params.vertexAx = (int32_t)(int16_t)((int32_t)(verts[va].sVx * 16.0f) & 0xffff);
+ voodoo->params.vertexAy = (int32_t)(int16_t)((int32_t)(verts[va].sVy * 16.0f) & 0xffff);
+ voodoo->params.vertexBx = (int32_t)(int16_t)((int32_t)(verts[vb].sVx * 16.0f) & 0xffff);
+ voodoo->params.vertexBy = (int32_t)(int16_t)((int32_t)(verts[vb].sVy * 16.0f) & 0xffff);
+ voodoo->params.vertexCx = (int32_t)(int16_t)((int32_t)(verts[vc].sVx * 16.0f) & 0xffff);
+ voodoo->params.vertexCy = (int32_t)(int16_t)((int32_t)(verts[vc].sVy * 16.0f) & 0xffff);
+
+ if (voodoo->params.vertexAy > voodoo->params.vertexBy || voodoo->params.vertexBy > voodoo->params.vertexCy)
+ {
+ pclog("triangle_setup wrong order %d %d %d\n", voodoo->params.vertexAy, voodoo->params.vertexBy, voodoo->params.vertexCy);
+ return;
+ }
+
+ if (voodoo->sSetupMode & SETUPMODE_RGB)
+ {
+ voodoo->params.startR = (int32_t)(verts[va].sRed * 4096.0f);
+ voodoo->params.dRdX = (int32_t)(((verts[va].sRed - verts[vb].sRed) * dyBC - (verts[vb].sRed - verts[vc].sRed) * dyAB) * 4096.0f);
+ voodoo->params.dRdY = (int32_t)(((verts[vb].sRed - verts[vc].sRed) * dxAB - (verts[va].sRed - verts[vb].sRed) * dxBC) * 4096.0f);
+ voodoo->params.startG = (int32_t)(verts[va].sGreen * 4096.0f);
+ voodoo->params.dGdX = (int32_t)(((verts[va].sGreen - verts[vb].sGreen) * dyBC - (verts[vb].sGreen - verts[vc].sGreen) * dyAB) * 4096.0f);
+ voodoo->params.dGdY = (int32_t)(((verts[vb].sGreen - verts[vc].sGreen) * dxAB - (verts[va].sGreen - verts[vb].sGreen) * dxBC) * 4096.0f);
+ voodoo->params.startB = (int32_t)(verts[va].sBlue * 4096.0f);
+ voodoo->params.dBdX = (int32_t)(((verts[va].sBlue - verts[vb].sBlue) * dyBC - (verts[vb].sBlue - verts[vc].sBlue) * dyAB) * 4096.0f);
+ voodoo->params.dBdY = (int32_t)(((verts[vb].sBlue - verts[vc].sBlue) * dxAB - (verts[va].sBlue - verts[vb].sBlue) * dxBC) * 4096.0f);
+ }
+ if (voodoo->sSetupMode & SETUPMODE_ALPHA)
+ {
+ voodoo->params.startA = (int32_t)(verts[va].sAlpha * 4096.0f);
+ voodoo->params.dAdX = (int32_t)(((verts[va].sAlpha - verts[vb].sAlpha) * dyBC - (verts[vb].sAlpha - verts[vc].sAlpha) * dyAB) * 4096.0f);
+ voodoo->params.dAdY = (int32_t)(((verts[vb].sAlpha - verts[vc].sAlpha) * dxAB - (verts[va].sAlpha - verts[vb].sAlpha) * dxBC) * 4096.0f);
+ }
+ if (voodoo->sSetupMode & SETUPMODE_Z)
+ {
+ voodoo->params.startZ = (int32_t)(verts[va].sVz * 4096.0f);
+ voodoo->params.dZdX = (int32_t)(((verts[va].sVz - verts[vb].sVz) * dyBC - (verts[vb].sVz - verts[vc].sVz) * dyAB) * 4096.0f);
+ voodoo->params.dZdY = (int32_t)(((verts[vb].sVz - verts[vc].sVz) * dxAB - (verts[va].sVz - verts[vb].sVz) * dxBC) * 4096.0f);
+ }
+ if (voodoo->sSetupMode & SETUPMODE_Wb)
+ {
+ voodoo->params.startW = (int64_t)(verts[va].sWb * 4294967296.0f);
+ voodoo->params.dWdX = (int64_t)(((verts[va].sWb - verts[vb].sWb) * dyBC - (verts[vb].sWb - verts[vc].sWb) * dyAB) * 4294967296.0f);
+ voodoo->params.dWdY = (int64_t)(((verts[vb].sWb - verts[vc].sWb) * dxAB - (verts[va].sWb - verts[vb].sWb) * dxBC) * 4294967296.0f);
+ voodoo->params.tmu[0].startW = voodoo->params.tmu[1].startW = voodoo->params.startW;
+ voodoo->params.tmu[0].dWdX = voodoo->params.tmu[1].dWdX = voodoo->params.dWdX;
+ voodoo->params.tmu[0].dWdY = voodoo->params.tmu[1].dWdY = voodoo->params.dWdY;
+ }
+ if (voodoo->sSetupMode & SETUPMODE_W0)
+ {
+ voodoo->params.tmu[0].startW = (int64_t)(verts[va].sW0 * 4294967296.0f);
+ voodoo->params.tmu[0].dWdX = (int64_t)(((verts[va].sW0 - verts[vb].sW0) * dyBC - (verts[vb].sW0 - verts[vc].sW0) * dyAB) * 4294967296.0f);
+ voodoo->params.tmu[0].dWdY = (int64_t)(((verts[vb].sW0 - verts[vc].sW0) * dxAB - (verts[va].sW0 - verts[vb].sW0) * dxBC) * 4294967296.0f);
+ voodoo->params.tmu[1].startW = voodoo->params.tmu[0].startW;
+ voodoo->params.tmu[1].dWdX = voodoo->params.tmu[0].dWdX;
+ voodoo->params.tmu[1].dWdY = voodoo->params.tmu[0].dWdY;
+ }
+ if (voodoo->sSetupMode & SETUPMODE_S0_T0)
+ {
+ voodoo->params.tmu[0].startS = (int64_t)(verts[va].sS0 * 4294967296.0f);
+ voodoo->params.tmu[0].dSdX = (int64_t)(((verts[va].sS0 - verts[vb].sS0) * dyBC - (verts[vb].sS0 - verts[vc].sS0) * dyAB) * 4294967296.0f);
+ voodoo->params.tmu[0].dSdY = (int64_t)(((verts[vb].sS0 - verts[vc].sS0) * dxAB - (verts[va].sS0 - verts[vb].sS0) * dxBC) * 4294967296.0f);
+ voodoo->params.tmu[0].startT = (int64_t)(verts[va].sT0 * 4294967296.0f);
+ voodoo->params.tmu[0].dTdX = (int64_t)(((verts[va].sT0 - verts[vb].sT0) * dyBC - (verts[vb].sT0 - verts[vc].sT0) * dyAB) * 4294967296.0f);
+ voodoo->params.tmu[0].dTdY = (int64_t)(((verts[vb].sT0 - verts[vc].sT0) * dxAB - (verts[va].sT0 - verts[vb].sT0) * dxBC) * 4294967296.0f);
+ voodoo->params.tmu[1].startS = voodoo->params.tmu[0].startS;
+ voodoo->params.tmu[1].dSdX = voodoo->params.tmu[0].dSdX;
+ voodoo->params.tmu[1].dSdY = voodoo->params.tmu[0].dSdY;
+ voodoo->params.tmu[1].startT = voodoo->params.tmu[0].startT;
+ voodoo->params.tmu[1].dTdX = voodoo->params.tmu[0].dTdX;
+ voodoo->params.tmu[1].dTdY = voodoo->params.tmu[0].dTdY;
+ }
+ if (voodoo->sSetupMode & SETUPMODE_W1)
+ {
+ voodoo->params.tmu[1].startW = (int64_t)(verts[va].sW1 * 4294967296.0f);
+ voodoo->params.tmu[1].dWdX = (int64_t)(((verts[va].sW1 - verts[vb].sW1) * dyBC - (verts[vb].sW1 - verts[vc].sW1) * dyAB) * 4294967296.0f);
+ voodoo->params.tmu[1].dWdY = (int64_t)(((verts[vb].sW1 - verts[vc].sW1) * dxAB - (verts[va].sW1 - verts[vb].sW1) * dxBC) * 4294967296.0f);
+ }
+ if (voodoo->sSetupMode & SETUPMODE_S1_T1)
+ {
+ voodoo->params.tmu[1].startS = (int64_t)(verts[va].sS1 * 4294967296.0f);
+ voodoo->params.tmu[1].dSdX = (int64_t)(((verts[va].sS1 - verts[vb].sS1) * dyBC - (verts[vb].sS1 - verts[vc].sS1) * dyAB) * 4294967296.0f);
+ voodoo->params.tmu[1].dSdY = (int64_t)(((verts[vb].sS1 - verts[vc].sS1) * dxAB - (verts[va].sS1 - verts[vb].sS1) * dxBC) * 4294967296.0f);
+ voodoo->params.tmu[1].startT = (int64_t)(verts[va].sT1 * 4294967296.0f);
+ voodoo->params.tmu[1].dTdX = (int64_t)(((verts[va].sT1 - verts[vb].sT1) * dyBC - (verts[vb].sT1 - verts[vc].sT1) * dyAB) * 4294967296.0f);
+ voodoo->params.tmu[1].dTdY = (int64_t)(((verts[vb].sT1 - verts[vc].sT1) * dxAB - (verts[va].sT1 - verts[vb].sT1) * dxBC) * 4294967296.0f);
+ }
+
+ voodoo->params.sign = (area < 0.0);
+
+ if (voodoo->ncc_dirty[0])
+ voodoo_update_ncc(voodoo, 0);
+ if (voodoo->ncc_dirty[1])
+ voodoo_update_ncc(voodoo, 1);
+ voodoo->ncc_dirty[0] = voodoo->ncc_dirty[1] = 0;
+
+ voodoo_queue_triangle(voodoo, &voodoo->params);
+}
--- /dev/null
+void voodoo_triangle_setup(voodoo_t *voodoo);
--- /dev/null
+#include <math.h>
+#include <stddef.h>
+#include "ibm.h"
+#include "device.h"
+#include "mem.h"
+#include "thread.h"
+#include "video.h"
+#include "vid_svga.h"
+#include "vid_voodoo.h"
+#include "vid_voodoo_common.h"
+#include "vid_voodoo_dither.h"
+#include "vid_voodoo_regs.h"
+#include "vid_voodoo_render.h"
+#include "vid_voodoo_texture.h"
+
+void voodoo_recalc_tex(voodoo_t *voodoo, int tmu)
+{
+ int aspect = (voodoo->params.tLOD[tmu] >> 21) & 3;
+ int width = 256, height = 256;
+ int shift = 8;
+ int lod;
+ uint32_t base = voodoo->params.texBaseAddr[tmu];
+ uint32_t offset = 0;
+ int tex_lod = 0;
+ uint32_t offsets[LOD_MAX+3];
+ int widths[LOD_MAX+3], heights[LOD_MAX+3], shifts[LOD_MAX+3];
+
+ if (voodoo->params.tLOD[tmu] & LOD_S_IS_WIDER)
+ height >>= aspect;
+ else
+ {
+ width >>= aspect;
+ shift -= aspect;
+ }
+
+ for (lod = 0; lod <= LOD_MAX + 2; lod++)
+ {
+ offsets[lod] = offset;
+ widths[lod] = width >> lod;
+ heights[lod] = height >> lod;
+ shifts[lod] = shift - lod;
+
+ if (!widths[lod])
+ widths[lod] = 1;
+ if (!heights[lod])
+ heights[lod] = 1;
+ if (shifts[lod] < 0)
+ shifts[lod] = 0;
+
+ if (!(voodoo->params.tLOD[tmu] & LOD_SPLIT) ||
+ ((lod & 1) && (voodoo->params.tLOD[tmu] & LOD_ODD)) ||
+ (!(lod & 1) && !(voodoo->params.tLOD[tmu] & LOD_ODD)))
+ {
+ if (voodoo->params.tformat[tmu] & 8)
+ offset += (width >> lod) * (height >> lod) * 2;
+ else
+ offset += (width >> lod) * (height >> lod);
+ }
+ }
+
+
+ if ((voodoo->params.textureMode[tmu] & TEXTUREMODE_TRILINEAR) && (voodoo->params.tLOD[tmu] & LOD_ODD))
+ tex_lod++; /*Skip LOD 0*/
+
+// pclog("TMU %i: %08x\n", tmu, voodoo->params.textureMode[tmu]);
+ for (lod = 0; lod <= LOD_MAX+1; lod++)
+ {
+ if (voodoo->params.tLOD[tmu] & LOD_TMULTIBASEADDR)
+ {
+ switch (tex_lod)
+ {
+ case 0:
+ base = voodoo->params.texBaseAddr[tmu];
+ break;
+ case 1:
+ base = voodoo->params.texBaseAddr1[tmu];
+ break;
+ case 2:
+ base = voodoo->params.texBaseAddr2[tmu];
+ break;
+ default:
+ base = voodoo->params.texBaseAddr38[tmu];
+ break;
+ }
+ }
+
+ voodoo->params.tex_base[tmu][lod] = base + offsets[tex_lod];
+ if (voodoo->params.tformat[tmu] & 8)
+ voodoo->params.tex_end[tmu][lod] = base + offsets[tex_lod] + (widths[tex_lod] * heights[tex_lod] * 2);
+ else
+ voodoo->params.tex_end[tmu][lod] = base + offsets[tex_lod] + (widths[tex_lod] * heights[tex_lod]);
+ voodoo->params.tex_w_mask[tmu][lod] = widths[tex_lod] - 1;
+ voodoo->params.tex_w_nmask[tmu][lod] = ~(widths[tex_lod] - 1);
+ voodoo->params.tex_h_mask[tmu][lod] = heights[tex_lod] - 1;
+ voodoo->params.tex_shift[tmu][lod] = shifts[tex_lod];
+ voodoo->params.tex_lod[tmu][lod] = tex_lod;
+
+ if (!(voodoo->params.textureMode[tmu] & TEXTUREMODE_TRILINEAR) ||
+ ((lod & 1) && (voodoo->params.tLOD[tmu] & LOD_ODD)) ||
+ (!(lod & 1) && !(voodoo->params.tLOD[tmu] & LOD_ODD)))
+ {
+ if (!(voodoo->params.tLOD[tmu] & LOD_ODD) || lod != 0)
+ {
+ if (voodoo->params.textureMode[tmu] & TEXTUREMODE_TRILINEAR)
+ tex_lod += 2;
+ else
+ tex_lod++;
+ }
+ }
+ }
+
+ voodoo->params.tex_width[tmu] = width;
+}
+
+#define makergba(r, g, b, a) ((b) | ((g) << 8) | ((r) << 16) | ((a) << 24))
+
+void voodoo_use_texture(voodoo_t *voodoo, voodoo_params_t *params, int tmu)
+{
+ int c, d;
+ int lod;
+ int lod_min, lod_max;
+ uint32_t addr = 0, addr_end;
+ uint32_t palette_checksum;
+
+ lod_min = (params->tLOD[tmu] >> 2) & 15;
+ lod_max = (params->tLOD[tmu] >> 8) & 15;
+
+ if (params->tformat[tmu] == TEX_PAL8 || params->tformat[tmu] == TEX_APAL8 || params->tformat[tmu] == TEX_APAL88)
+ {
+ if (voodoo->palette_dirty[tmu])
+ {
+ palette_checksum = 0;
+
+ for (c = 0; c < 256; c++)
+ palette_checksum ^= voodoo->palette[tmu][c].u;
+
+ voodoo->palette_checksum[tmu] = palette_checksum;
+ voodoo->palette_dirty[tmu] = 0;
+ }
+ else
+ palette_checksum = voodoo->palette_checksum[tmu];
+ }
+ else
+ palette_checksum = 0;
+
+ if ((voodoo->params.tLOD[tmu] & LOD_SPLIT) && (voodoo->params.tLOD[tmu] & LOD_ODD) && (voodoo->params.tLOD[tmu] & LOD_TMULTIBASEADDR))
+ addr = params->texBaseAddr1[tmu];
+ else
+ addr = params->texBaseAddr[tmu];
+
+ /*Try to find texture in cache*/
+ for (c = 0; c < TEX_CACHE_MAX; c++)
+ {
+ if (voodoo->texture_cache[tmu][c].base == addr &&
+ voodoo->texture_cache[tmu][c].tLOD == (params->tLOD[tmu] & 0xf00fff) &&
+ voodoo->texture_cache[tmu][c].palette_checksum == palette_checksum)
+ {
+ params->tex_entry[tmu] = c;
+ voodoo->texture_cache[tmu][c].refcount++;
+ return;
+ }
+ }
+
+ /*Texture not found, search for unused texture*/
+ do
+ {
+ for (c = 0; c < TEX_CACHE_MAX; c++)
+ {
+ voodoo->texture_last_removed++;
+ voodoo->texture_last_removed &= (TEX_CACHE_MAX-1);
+ if (voodoo->texture_cache[tmu][voodoo->texture_last_removed].refcount == voodoo->texture_cache[tmu][voodoo->texture_last_removed].refcount_r[0] &&
+ (voodoo->render_threads == 1 || voodoo->texture_cache[tmu][voodoo->texture_last_removed].refcount == voodoo->texture_cache[tmu][voodoo->texture_last_removed].refcount_r[1]))
+ break;
+ }
+ if (c == TEX_CACHE_MAX)
+ voodoo_wait_for_render_thread_idle(voodoo);
+ } while (c == TEX_CACHE_MAX);
+ if (c == TEX_CACHE_MAX)
+ fatal("Texture cache full!\n");
+
+ c = voodoo->texture_last_removed;
+
+
+ if ((voodoo->params.tLOD[tmu] & LOD_SPLIT) && (voodoo->params.tLOD[tmu] & LOD_ODD) && (voodoo->params.tLOD[tmu] & LOD_TMULTIBASEADDR))
+ voodoo->texture_cache[tmu][c].base = params->texBaseAddr1[tmu];
+ else
+ voodoo->texture_cache[tmu][c].base = params->texBaseAddr[tmu];
+ voodoo->texture_cache[tmu][c].tLOD = params->tLOD[tmu] & 0xf00fff;
+
+ lod_min = (params->tLOD[tmu] >> 2) & 15;
+ lod_max = (params->tLOD[tmu] >> 8) & 15;
+// pclog(" add new texture to %i tformat=%i %08x LOD=%i-%i tmu=%i\n", c, voodoo->params.tformat[tmu], params->texBaseAddr[tmu], lod_min, lod_max, tmu);
+ lod_min = MIN(lod_min, 8);
+ lod_max = MIN(lod_max, 8);
+ for (lod = lod_min; lod <= lod_max; lod++)
+ {
+ uint32_t *base = &voodoo->texture_cache[tmu][c].data[texture_offset[lod]];
+ uint32_t tex_addr = params->tex_base[tmu][lod] & voodoo->texture_mask;
+ int x, y;
+ int shift = 8 - params->tex_lod[tmu][lod];
+ rgba_u *pal;
+
+ //pclog(" LOD %i : %08x - %08x %i %i,%i\n", lod, params->tex_base[tmu][lod] & voodoo->texture_mask, addr, voodoo->params.tformat[tmu], voodoo->params.tex_w_mask[tmu][lod],voodoo->params.tex_h_mask[tmu][lod]);
+
+
+ switch (params->tformat[tmu])
+ {
+ case TEX_RGB332:
+ for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++)
+ {
+ for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++)
+ {
+ uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask];
+
+ base[x] = makergba(rgb332[dat].r, rgb332[dat].g, rgb332[dat].b, 0xff);
+ }
+ tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]);
+ base += (1 << shift);
+ }
+ break;
+
+ case TEX_Y4I2Q2:
+ pal = voodoo->ncc_lookup[tmu][(voodoo->params.textureMode[tmu] & TEXTUREMODE_NCC_SEL) ? 1 : 0];
+ for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++)
+ {
+ for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++)
+ {
+ uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask];
+
+ base[x] = makergba(pal[dat].rgba.r, pal[dat].rgba.g, pal[dat].rgba.b, 0xff);
+ }
+ tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]);
+ base += (1 << shift);
+ }
+ break;
+
+ case TEX_A8:
+ for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++)
+ {
+ for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++)
+ {
+ uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask];
+
+ base[x] = makergba(dat, dat, dat, dat);
+ }
+ tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]);
+ base += (1 << shift);
+ }
+ break;
+
+ case TEX_I8:
+ for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++)
+ {
+ for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++)
+ {
+ uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask];
+
+ base[x] = makergba(dat, dat, dat, 0xff);
+ }
+ tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]);
+ base += (1 << shift);
+ }
+ break;
+
+ case TEX_AI8:
+ for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++)
+ {
+ for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++)
+ {
+ uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask];
+
+ base[x] = makergba((dat & 0x0f) | ((dat << 4) & 0xf0), (dat & 0x0f) | ((dat << 4) & 0xf0), (dat & 0x0f) | ((dat << 4) & 0xf0), (dat & 0xf0) | ((dat >> 4) & 0x0f));
+ }
+ tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]);
+ base += (1 << shift);
+ }
+ break;
+
+ case TEX_PAL8:
+ pal = voodoo->palette[tmu];
+ for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++)
+ {
+ for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++)
+ {
+ uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask];
+
+ base[x] = makergba(pal[dat].rgba.r, pal[dat].rgba.g, pal[dat].rgba.b, 0xff);
+ }
+ tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]);
+ base += (1 << shift);
+ }
+ break;
+
+ case TEX_APAL8:
+ pal = voodoo->palette[tmu];
+ for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++)
+ {
+ for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++)
+ {
+ uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask];
+
+ int r = ((pal[dat].rgba.r & 3) << 6) | ((pal[dat].rgba.g & 0xf0) >> 2) | (pal[dat].rgba.r & 3);
+ int g = ((pal[dat].rgba.g & 0xf) << 4) | ((pal[dat].rgba.b & 0xc0) >> 4) | ((pal[dat].rgba.g & 0xf) >> 2);
+ int b = ((pal[dat].rgba.b & 0x3f) << 2) | ((pal[dat].rgba.b & 0x30) >> 4);
+ int a = (pal[dat].rgba.r & 0xfc) | ((pal[dat].rgba.r & 0xc0) >> 6);
+
+ base[x] = makergba(r, g, b, a);
+ }
+ tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]);
+ base += (1 << shift);
+ }
+ break;
+
+ case TEX_ARGB8332:
+ for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++)
+ {
+ for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++)
+ {
+ uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask];
+
+ base[x] = makergba(rgb332[dat & 0xff].r, rgb332[dat & 0xff].g, rgb332[dat & 0xff].b, dat >> 8);
+ }
+ tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1));
+ base += (1 << shift);
+ }
+ break;
+
+ case TEX_A8Y4I2Q2:
+ pal = voodoo->ncc_lookup[tmu][(voodoo->params.textureMode[tmu] & TEXTUREMODE_NCC_SEL) ? 1 : 0];
+ for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++)
+ {
+ for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++)
+ {
+ uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask];
+
+ base[x] = makergba(pal[dat & 0xff].rgba.r, pal[dat & 0xff].rgba.g, pal[dat & 0xff].rgba.b, dat >> 8);
+ }
+ tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1));
+ base += (1 << shift);
+ }
+ break;
+
+ case TEX_R5G6B5:
+ for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++)
+ {
+ for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++)
+ {
+ uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask];
+
+ base[x] = makergba(rgb565[dat].r, rgb565[dat].g, rgb565[dat].b, 0xff);
+ }
+ tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1));
+ base += (1 << shift);
+ }
+ break;
+
+ case TEX_ARGB1555:
+ for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++)
+ {
+ for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++)
+ {
+ uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask];
+
+ base[x] = makergba(argb1555[dat].r, argb1555[dat].g, argb1555[dat].b, argb1555[dat].a);
+ }
+ tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1));
+ base += (1 << shift);
+ }
+ break;
+
+ case TEX_ARGB4444:
+ for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++)
+ {
+ for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++)
+ {
+ uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask];
+
+ base[x] = makergba(argb4444[dat].r, argb4444[dat].g, argb4444[dat].b, argb4444[dat].a);
+ }
+ tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1));
+ base += (1 << shift);
+ }
+ break;
+
+ case TEX_A8I8:
+ for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++)
+ {
+ for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++)
+ {
+ uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask];
+
+ base[x] = makergba(dat & 0xff, dat & 0xff, dat & 0xff, dat >> 8);
+ }
+ tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1));
+ base += (1 << shift);
+ }
+ break;
+
+ case TEX_APAL88:
+ pal = voodoo->palette[tmu];
+ for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++)
+ {
+ for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++)
+ {
+ uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask];
+
+ base[x] = makergba(pal[dat & 0xff].rgba.r, pal[dat & 0xff].rgba.g, pal[dat & 0xff].rgba.b, dat >> 8);
+ }
+ tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1));
+ base += (1 << shift);
+ }
+ break;
+
+ default:
+ fatal("Unknown texture format %i\n", params->tformat[tmu]);
+ }
+ }
+
+ voodoo->texture_cache[tmu][c].is16 = voodoo->params.tformat[tmu] & 8;
+
+ if (params->tformat[tmu] == TEX_PAL8 || params->tformat[tmu] == TEX_APAL8 || params->tformat[tmu] == TEX_APAL88)
+ voodoo->texture_cache[tmu][c].palette_checksum = palette_checksum;
+ else
+ voodoo->texture_cache[tmu][c].palette_checksum = 0;
+
+ if (lod_min == 0)
+ {
+ voodoo->texture_cache[tmu][c].addr_start[0] = voodoo->params.tex_base[tmu][0];
+ voodoo->texture_cache[tmu][c].addr_end[0] = voodoo->params.tex_end[tmu][0];
+ }
+ else
+ voodoo->texture_cache[tmu][c].addr_start[0] = voodoo->texture_cache[tmu][c].addr_end[0] = 0;
+
+ if (lod_min <= 1 && lod_max >= 1)
+ {
+ voodoo->texture_cache[tmu][c].addr_start[1] = voodoo->params.tex_base[tmu][1];
+ voodoo->texture_cache[tmu][c].addr_end[1] = voodoo->params.tex_end[tmu][1];
+ }
+ else
+ voodoo->texture_cache[tmu][c].addr_start[1] = voodoo->texture_cache[tmu][c].addr_end[1] = 0;
+
+ if (lod_min <= 2 && lod_max >= 2)
+ {
+ voodoo->texture_cache[tmu][c].addr_start[2] = voodoo->params.tex_base[tmu][2];
+ voodoo->texture_cache[tmu][c].addr_end[2] = voodoo->params.tex_end[tmu][2];
+ }
+ else
+ voodoo->texture_cache[tmu][c].addr_start[2] = voodoo->texture_cache[tmu][c].addr_end[2] = 0;
+
+ if (lod_max >= 3)
+ {
+ voodoo->texture_cache[tmu][c].addr_start[3] = voodoo->params.tex_base[tmu][(lod_min > 3) ? lod_min : 3];
+ voodoo->texture_cache[tmu][c].addr_end[3] = voodoo->params.tex_end[tmu][(lod_max < 8) ? lod_max : 8];
+ }
+ else
+ voodoo->texture_cache[tmu][c].addr_start[3] = voodoo->texture_cache[tmu][c].addr_end[3] = 0;
+
+
+ for (d = 0; d < 4; d++)
+ {
+ addr = voodoo->texture_cache[tmu][c].addr_start[d];
+ addr_end = voodoo->texture_cache[tmu][c].addr_end[d];
+
+ if (addr_end != 0)
+ {
+ for (; addr <= addr_end; addr += (1 << TEX_DIRTY_SHIFT))
+ voodoo->texture_present[tmu][(addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT] = 1;
+ }
+ }
+
+ params->tex_entry[tmu] = c;
+ voodoo->texture_cache[tmu][c].refcount++;
+}
+
+void flush_texture_cache(voodoo_t *voodoo, uint32_t dirty_addr, int tmu)
+{
+ int wait_for_idle = 0;
+ int c;
+
+ memset(voodoo->texture_present[tmu], 0, sizeof(voodoo->texture_present[0]));
+// pclog("Evict %08x %i\n", dirty_addr, sizeof(voodoo->texture_present));
+ for (c = 0; c < TEX_CACHE_MAX; c++)
+ {
+ if (voodoo->texture_cache[tmu][c].base != -1)
+ {
+ int d;
+
+ for (d = 0; d < 4; d++)
+ {
+ int addr_start = voodoo->texture_cache[tmu][c].addr_start[d];
+ int addr_end = voodoo->texture_cache[tmu][c].addr_end[d];
+
+ if (addr_end != 0)
+ {
+ int addr_start_masked = addr_start & voodoo->texture_mask & ~0x3ff;
+ int addr_end_masked = ((addr_end & voodoo->texture_mask) + 0x3ff) & ~0x3ff;
+
+ if (addr_end_masked < addr_start_masked)
+ addr_end_masked = voodoo->texture_mask+1;
+ if (dirty_addr >= addr_start_masked && dirty_addr < addr_end_masked)
+ {
+// pclog(" Evict texture %i %08x\n", c, voodoo->texture_cache[tmu][c].base);
+
+ if (voodoo->texture_cache[tmu][c].refcount != voodoo->texture_cache[tmu][c].refcount_r[0] ||
+ (voodoo->render_threads == 2 && voodoo->texture_cache[tmu][c].refcount != voodoo->texture_cache[tmu][c].refcount_r[1]))
+ wait_for_idle = 1;
+
+ voodoo->texture_cache[tmu][c].base = -1;
+ }
+ else
+ {
+ for (; addr_start <= addr_end; addr_start += (1 << TEX_DIRTY_SHIFT))
+ voodoo->texture_present[tmu][(addr_start & voodoo->texture_mask) >> TEX_DIRTY_SHIFT] = 1;
+ }
+ }
+ }
+ }
+ }
+ if (wait_for_idle)
+ voodoo_wait_for_render_thread_idle(voodoo);
+}
+
+void voodoo_tex_writel(uint32_t addr, uint32_t val, void *p)
+{
+ int lod, s, t;
+ voodoo_t *voodoo = (voodoo_t *)p;
+ int tmu;
+
+ if (addr & 0x400000)
+ return; /*TREX != 0*/
+
+ tmu = (addr & 0x200000) ? 1 : 0;
+
+ if (tmu && !voodoo->dual_tmus)
+ return;
+
+ if (voodoo->type < VOODOO_BANSHEE)
+ {
+ if (!(voodoo->params.tformat[tmu] & 8) && voodoo->type >= VOODOO_BANSHEE)
+ {
+ lod = (addr >> 16) & 0xf;
+ t = (addr >> 8) & 0xff;
+ }
+ else
+ {
+ lod = (addr >> 17) & 0xf;
+ t = (addr >> 9) & 0xff;
+ }
+ if (voodoo->params.tformat[tmu] & 8)
+ s = (addr >> 1) & 0xfe;
+ else
+ {
+ if ((voodoo->params.textureMode[tmu] & (1 << 31)) || voodoo->type >= VOODOO_BANSHEE)
+ s = addr & 0xfc;
+ else
+ s = (addr >> 1) & 0xfc;
+ }
+ if (lod > LOD_MAX)
+ return;
+
+// if (addr >= 0x200000)
+// return;
+
+ if (voodoo->params.tformat[tmu] & 8)
+ addr = voodoo->params.tex_base[tmu][lod] + s*2 + (t << voodoo->params.tex_shift[tmu][lod])*2;
+ else
+ addr = voodoo->params.tex_base[tmu][lod] + s + (t << voodoo->params.tex_shift[tmu][lod]);
+ }
+ else
+ addr = (addr & 0x1ffffc) + voodoo->params.tex_base[tmu][0];
+
+ if (voodoo->texture_present[tmu][(addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT])
+ {
+// pclog("texture_present at %08x %i\n", addr, (addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT);
+ flush_texture_cache(voodoo, addr & voodoo->texture_mask, tmu);
+ }
+ if (voodoo->type == VOODOO_3 && voodoo->texture_present[tmu^1][(addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT])
+ {
+// pclog("texture_present at %08x %i\n", addr, (addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT);
+ flush_texture_cache(voodoo, addr & voodoo->texture_mask, tmu^1);
+ }
+ *(uint32_t *)(&voodoo->tex_mem[tmu][addr & voodoo->texture_mask]) = val;
+}
--- /dev/null
+static const uint32_t texture_offset[LOD_MAX+3] =
+{
+ 0,
+ 256*256,
+ 256*256 + 128*128,
+ 256*256 + 128*128 + 64*64,
+ 256*256 + 128*128 + 64*64 + 32*32,
+ 256*256 + 128*128 + 64*64 + 32*32 + 16*16,
+ 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8,
+ 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4,
+ 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4 + 2*2,
+ 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4 + 2*2 + 1*1,
+ 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4 + 2*2 + 1*1 + 1
+};
+
+void voodoo_recalc_tex(voodoo_t *voodoo, int tmu);
+void voodoo_use_texture(voodoo_t *voodoo, voodoo_params_t *params, int tmu);
+void voodoo_tex_writel(uint32_t addr, uint32_t val, void *p);
+void flush_texture_cache(voodoo_t *voodoo, uint32_t dirty_addr, int tmu);