From 02dbc440b54d0be9d45b6cd9dba99d2d363b9215 Mon Sep 17 00:00:00 2001 From: Toni Wilen Date: Sat, 19 Dec 2020 22:31:24 +0200 Subject: [PATCH] PCem Voodoo emulation --- pcem/vid_voodoo.cpp | 1470 ++++++++ pcem/vid_voodoo.h | 1 + pcem/vid_voodoo_banshee.cpp | 2881 +++++++++++++++ pcem/vid_voodoo_banshee.h | 6 + pcem/vid_voodoo_banshee_blitter.cpp | 1450 ++++++++ pcem/vid_voodoo_banshee_blitter.h | 1 + pcem/vid_voodoo_blitter.cpp | 507 +++ pcem/vid_voodoo_blitter.h | 3 + pcem/vid_voodoo_codegen_x86-64.h | 3467 ++++++++++++++++++ pcem/vid_voodoo_codegen_x86.h | 3412 ++++++++++++++++++ pcem/vid_voodoo_common.h | 509 +++ pcem/vid_voodoo_display.cpp | 609 ++++ pcem/vid_voodoo_display.h | 6 + pcem/vid_voodoo_dither.h | 5136 +++++++++++++++++++++++++++ pcem/vid_voodoo_fb.cpp | 447 +++ pcem/vid_voodoo_fb.h | 4 + pcem/vid_voodoo_fifo.cpp | 503 +++ pcem/vid_voodoo_fifo.h | 8 + pcem/vid_voodoo_reg.cpp | 1321 +++++++ pcem/vid_voodoo_reg.h | 1 + pcem/vid_voodoo_regs.h | 691 ++++ pcem/vid_voodoo_render.cpp | 1640 +++++++++ pcem/vid_voodoo_render.h | 338 ++ pcem/vid_voodoo_setup.cpp | 216 ++ pcem/vid_voodoo_setup.h | 1 + pcem/vid_voodoo_texture.cpp | 583 +++ pcem/vid_voodoo_texture.h | 19 + 27 files changed, 25230 insertions(+) create mode 100644 pcem/vid_voodoo.cpp create mode 100644 pcem/vid_voodoo.h create mode 100644 pcem/vid_voodoo_banshee.cpp create mode 100644 pcem/vid_voodoo_banshee.h create mode 100644 pcem/vid_voodoo_banshee_blitter.cpp create mode 100644 pcem/vid_voodoo_banshee_blitter.h create mode 100644 pcem/vid_voodoo_blitter.cpp create mode 100644 pcem/vid_voodoo_blitter.h create mode 100644 pcem/vid_voodoo_codegen_x86-64.h create mode 100644 pcem/vid_voodoo_codegen_x86.h create mode 100644 pcem/vid_voodoo_common.h create mode 100644 pcem/vid_voodoo_display.cpp create mode 100644 pcem/vid_voodoo_display.h create mode 100644 pcem/vid_voodoo_dither.h create mode 100644 pcem/vid_voodoo_fb.cpp create mode 100644 pcem/vid_voodoo_fb.h create mode 100644 pcem/vid_voodoo_fifo.cpp create mode 100644 pcem/vid_voodoo_fifo.h create mode 100644 pcem/vid_voodoo_reg.cpp create mode 100644 pcem/vid_voodoo_reg.h create mode 100644 pcem/vid_voodoo_regs.h create mode 100644 pcem/vid_voodoo_render.cpp create mode 100644 pcem/vid_voodoo_render.h create mode 100644 pcem/vid_voodoo_setup.cpp create mode 100644 pcem/vid_voodoo_setup.h create mode 100644 pcem/vid_voodoo_texture.cpp create mode 100644 pcem/vid_voodoo_texture.h diff --git a/pcem/vid_voodoo.cpp b/pcem/vid_voodoo.cpp new file mode 100644 index 00000000..ed4553c6 --- /dev/null +++ b/pcem/vid_voodoo.cpp @@ -0,0 +1,1470 @@ +#include +#include +#include +#include "ibm.h" +#include "device.h" +#include "mem.h" +#include "pci.h" +#include "thread.h" +#include "timer.h" +#include "video.h" +#include "vid_svga.h" +#include "vid_voodoo.h" +#include "vid_voodoo_common.h" +#include "vid_voodoo_blitter.h" +#include "vid_voodoo_display.h" +#include "vid_voodoo_dither.h" +#include "vid_voodoo_fb.h" +#include "vid_voodoo_fifo.h" +#include "vid_voodoo_reg.h" +#include "vid_voodoo_regs.h" +#include "vid_voodoo_render.h" +#include "vid_voodoo_texture.h" + + +rgba8_t rgb332[0x100], ai44[0x100], rgb565[0x10000], argb1555[0x10000], argb4444[0x10000], ai88[0x10000]; + + +int tris = 0; + +static uint64_t status_time = 0; + + +void voodoo_recalc(voodoo_t *voodoo) +{ + uint32_t buffer_offset = ((voodoo->fbiInit2 >> 11) & 511) * 4096; + + if (voodoo->type >= VOODOO_BANSHEE) + return; + + voodoo->params.front_offset = voodoo->disp_buffer*buffer_offset; + voodoo->back_offset = voodoo->draw_buffer*buffer_offset; + + voodoo->buffer_cutoff = TRIPLE_BUFFER ? (buffer_offset * 4) : (buffer_offset * 3); + if (TRIPLE_BUFFER) + voodoo->params.aux_offset = buffer_offset * 3; + else + voodoo->params.aux_offset = buffer_offset * 2; + + switch (voodoo->lfbMode & LFB_WRITE_MASK) + { + case LFB_WRITE_FRONT: + voodoo->fb_write_offset = voodoo->params.front_offset; + voodoo->fb_write_buffer = voodoo->disp_buffer; + break; + case LFB_WRITE_BACK: + voodoo->fb_write_offset = voodoo->back_offset; + voodoo->fb_write_buffer = voodoo->draw_buffer; + break; + + default: + /*BreakNeck sets invalid LFB write buffer select*/ + voodoo->fb_write_offset = voodoo->params.front_offset; + break; + } + + switch (voodoo->lfbMode & LFB_READ_MASK) + { + case LFB_READ_FRONT: + voodoo->fb_read_offset = voodoo->params.front_offset; + break; + case LFB_READ_BACK: + voodoo->fb_read_offset = voodoo->back_offset; + break; + case LFB_READ_AUX: + voodoo->fb_read_offset = voodoo->params.aux_offset; + break; + + default: + fatal("voodoo_recalc : unknown lfb source\n"); + } + + switch (voodoo->params.fbzMode & FBZ_DRAW_MASK) + { + case FBZ_DRAW_FRONT: + voodoo->params.draw_offset = voodoo->params.front_offset; + voodoo->fb_draw_buffer = voodoo->disp_buffer; + break; + case FBZ_DRAW_BACK: + voodoo->params.draw_offset = voodoo->back_offset; + voodoo->fb_draw_buffer = voodoo->draw_buffer; + break; + + default: + fatal("voodoo_recalc : unknown draw buffer\n"); + } + + voodoo->block_width = ((voodoo->fbiInit1 >> 4) & 15) * 2; + if (voodoo->fbiInit6 & (1 << 30)) + voodoo->block_width += 1; + if (voodoo->fbiInit1 & (1 << 24)) + voodoo->block_width += 32; + voodoo->row_width = voodoo->block_width * 32 * 2; + voodoo->params.row_width = voodoo->row_width; + voodoo->aux_row_width = voodoo->row_width; + voodoo->params.aux_row_width = voodoo->aux_row_width; + +/* pclog("voodoo_recalc : front_offset %08X back_offset %08X aux_offset %08X draw_offset %08x\n", voodoo->params.front_offset, voodoo->back_offset, voodoo->params.aux_offset, voodoo->params.draw_offset); + pclog(" fb_read_offset %08X fb_write_offset %08X row_width %i %08x %08x\n", voodoo->fb_read_offset, voodoo->fb_write_offset, voodoo->row_width, voodoo->lfbMode, voodoo->params.fbzMode);*/ +} + + +static uint16_t voodoo_readw(uint32_t addr, void *p) +{ + voodoo_t *voodoo = (voodoo_t *)p; + + addr &= 0xffffff; + + cycles -= voodoo->read_time; + + if ((addr & 0xc00000) == 0x400000) /*Framebuffer*/ + { + if (SLI_ENABLED) + { + voodoo_set_t *set = voodoo->set; + int y = (addr >> 11) & 0x3ff; + + if (y & 1) + voodoo = set->voodoos[1]; + else + voodoo = set->voodoos[0]; + } + + voodoo->flush = 1; + while (!FIFO_EMPTY) + { + voodoo_wake_fifo_thread_now(voodoo); + thread_wait_event(voodoo->fifo_not_full_event, 1); + } + voodoo_wait_for_render_thread_idle(voodoo); + voodoo->flush = 0; + + return voodoo_fb_readw(addr, voodoo); + } + + return 0xffff; +} + + +static uint32_t voodoo_readl(uint32_t addr, void *p) +{ + voodoo_t *voodoo = (voodoo_t *)p; + uint32_t temp; + int fifo_size; + voodoo->rd_count++; + addr &= 0xffffff; + + cycles -= voodoo->read_time; + + if (addr & 0x800000) /*Texture*/ + { + } + else if (addr & 0x400000) /*Framebuffer*/ + { + if (SLI_ENABLED) + { + voodoo_set_t *set = voodoo->set; + int y = (addr >> 11) & 0x3ff; + + if (y & 1) + voodoo = set->voodoos[1]; + else + voodoo = set->voodoos[0]; + } + + voodoo->flush = 1; + while (!FIFO_EMPTY) + { + voodoo_wake_fifo_thread_now(voodoo); + thread_wait_event(voodoo->fifo_not_full_event, 1); + } + voodoo_wait_for_render_thread_idle(voodoo); + voodoo->flush = 0; + + temp = voodoo_fb_readl(addr, voodoo); + } + else switch (addr & 0x3fc) + { + case SST_status: + { + int fifo_entries = FIFO_ENTRIES; + int swap_count = voodoo->swap_count; + int written = voodoo->cmd_written + voodoo->cmd_written_fifo; + int busy = (written - voodoo->cmd_read) || (voodoo->cmdfifo_depth_rd != voodoo->cmdfifo_depth_wr); + + if (SLI_ENABLED && voodoo->type != VOODOO_2) + { + voodoo_t *voodoo_other = (voodoo == voodoo->set->voodoos[0]) ? voodoo->set->voodoos[1] : voodoo->set->voodoos[0]; + int other_written = voodoo_other->cmd_written + voodoo_other->cmd_written_fifo; + + if (voodoo_other->swap_count > swap_count) + swap_count = voodoo_other->swap_count; + if ((voodoo_other->fifo_write_idx - voodoo_other->fifo_read_idx) > fifo_entries) + fifo_entries = voodoo_other->fifo_write_idx - voodoo_other->fifo_read_idx; + if ((other_written - voodoo_other->cmd_read) || + (voodoo_other->cmdfifo_depth_rd != voodoo_other->cmdfifo_depth_wr)) + busy = 1; + if (!voodoo_other->voodoo_busy) + voodoo_wake_fifo_thread(voodoo_other); + } + + fifo_size = 0xffff - fifo_entries; + temp = fifo_size << 12; + if (fifo_size < 0x40) + temp |= fifo_size; + else + temp |= 0x3f; + if (swap_count < 7) + temp |= (swap_count << 28); + else + temp |= (7 << 28); + if (!voodoo->v_retrace) + temp |= 0x40; + + if (busy) + temp |= 0x380; /*Busy*/ + + if (!voodoo->voodoo_busy) + voodoo_wake_fifo_thread(voodoo); + } + break; + + case SST_fbzColorPath: + voodoo_flush(voodoo); + temp = voodoo->params.fbzColorPath; + break; + case SST_fogMode: + voodoo_flush(voodoo); + temp = voodoo->params.fogMode; + break; + case SST_alphaMode: + voodoo_flush(voodoo); + temp = voodoo->params.alphaMode; + break; + case SST_fbzMode: + voodoo_flush(voodoo); + temp = voodoo->params.fbzMode; + break; + case SST_lfbMode: + voodoo_flush(voodoo); + temp = voodoo->lfbMode; + break; + case SST_clipLeftRight: + voodoo_flush(voodoo); + temp = voodoo->params.clipRight | (voodoo->params.clipLeft << 16); + break; + case SST_clipLowYHighY: + voodoo_flush(voodoo); + temp = voodoo->params.clipHighY | (voodoo->params.clipLowY << 16); + break; + + case SST_stipple: + voodoo_flush(voodoo); + temp = voodoo->params.stipple; + break; + case SST_color0: + voodoo_flush(voodoo); + temp = voodoo->params.color0; + break; + case SST_color1: + voodoo_flush(voodoo); + temp = voodoo->params.color1; + break; + + case SST_fbiPixelsIn: + temp = voodoo->fbiPixelsIn & 0xffffff; + break; + case SST_fbiChromaFail: + temp = voodoo->fbiChromaFail & 0xffffff; + break; + case SST_fbiZFuncFail: + temp = voodoo->fbiZFuncFail & 0xffffff; + break; + case SST_fbiAFuncFail: + temp = voodoo->fbiAFuncFail & 0xffffff; + break; + case SST_fbiPixelsOut: + temp = voodoo->fbiPixelsOut & 0xffffff; + break; + + case SST_fbiInit4: + temp = voodoo->fbiInit4; + break; + case SST_fbiInit0: + temp = voodoo->fbiInit0; + break; + case SST_fbiInit1: + temp = voodoo->fbiInit1; + break; + case SST_fbiInit2: + if (voodoo->initEnable & 0x04) + temp = voodoo->dac_readdata; + else + temp = voodoo->fbiInit2; + break; + case SST_fbiInit3: + temp = voodoo->fbiInit3 | (1 << 10) | (2 << 8); + break; + + case SST_vRetrace: + temp = voodoo->line & 0x1fff; + break; + case SST_hvRetrace: + { + uint32_t line_time = (uint32_t)(voodoo->line_time >> 32); + uint32_t diff = (timer_get_ts_int(&voodoo->timer) > (tsc & 0xffffffff)) ? (timer_get_ts_int(&voodoo->timer) - (tsc & 0xffffffff)) : 0; + uint32_t pre_div = diff * voodoo->h_total; + uint32_t post_div = pre_div / line_time; + uint32_t h_pos = (voodoo->h_total - 1) - post_div; + + if (h_pos >= voodoo->h_total) + h_pos = 0; + + temp = voodoo->line & 0x1fff; + temp |= (h_pos << 16); + } + break; + + case SST_fbiInit5: + temp = voodoo->fbiInit5 & ~0x1ff; + break; + case SST_fbiInit6: + temp = voodoo->fbiInit6; + break; + case SST_fbiInit7: + temp = voodoo->fbiInit7 & ~0xff; + break; + + case SST_cmdFifoBaseAddr: + temp = voodoo->cmdfifo_base >> 12; + temp |= (voodoo->cmdfifo_end >> 12) << 16; + break; + + case SST_cmdFifoRdPtr: + temp = voodoo->cmdfifo_rp; + break; + case SST_cmdFifoAMin: + temp = voodoo->cmdfifo_amin; + break; + case SST_cmdFifoAMax: + temp = voodoo->cmdfifo_amax; + break; + case SST_cmdFifoDepth: + temp = voodoo->cmdfifo_depth_wr - voodoo->cmdfifo_depth_rd; + break; + + default: + pclog("voodoo_readl : bad addr %08X\n", addr); + temp = 0xffffffff; + } + + return temp; +} + +static void voodoo_writew(uint32_t addr, uint16_t val, void *p) +{ + voodoo_t *voodoo = (voodoo_t *)p; + voodoo->wr_count++; + addr &= 0xffffff; + + cycles -= voodoo->write_time; + + if ((addr & 0xc00000) == 0x400000) /*Framebuffer*/ + voodoo_queue_command(voodoo, addr | FIFO_WRITEW_FB, val); +} + +static void voodoo_writel(uint32_t addr, uint32_t val, void *p) +{ + voodoo_t *voodoo = (voodoo_t *)p; + + voodoo->wr_count++; + + addr &= 0xffffff; + + if (addr == voodoo->last_write_addr+4) + cycles -= voodoo->burst_time; + else + cycles -= voodoo->write_time; + voodoo->last_write_addr = addr; + + if (addr & 0x800000) /*Texture*/ + { + voodoo->tex_count++; + voodoo_queue_command(voodoo, addr | FIFO_WRITEL_TEX, val); + } + else if (addr & 0x400000) /*Framebuffer*/ + { + voodoo_queue_command(voodoo, addr | FIFO_WRITEL_FB, val); + } + else if ((addr & 0x200000) && (voodoo->fbiInit7 & FBIINIT7_CMDFIFO_ENABLE)) + { +// pclog("Write CMDFIFO %08x(%08x) %08x %08x\n", addr, voodoo->cmdfifo_base + (addr & 0x3fffc), val, (voodoo->cmdfifo_base + (addr & 0x3fffc)) & voodoo->fb_mask); + *(uint32_t *)&voodoo->fb_mem[(voodoo->cmdfifo_base + (addr & 0x3fffc)) & voodoo->fb_mask] = val; + voodoo->cmdfifo_depth_wr++; + if ((voodoo->cmdfifo_depth_wr - voodoo->cmdfifo_depth_rd) < 20) + voodoo_wake_fifo_thread(voodoo); + } + else switch (addr & 0x3fc) + { + case SST_intrCtrl: + fatal("intrCtrl write %08x\n", val); + break; + + case SST_userIntrCMD: + fatal("userIntrCMD write %08x\n", val); + break; + + case SST_swapbufferCMD: + voodoo->cmd_written++; + thread_lock_mutex(voodoo->swap_mutex); + voodoo->swap_count++; + thread_unlock_mutex(voodoo->swap_mutex); + if (voodoo->fbiInit7 & FBIINIT7_CMDFIFO_ENABLE) + return; + voodoo_queue_command(voodoo, addr | FIFO_WRITEL_REG, val); + if (!voodoo->voodoo_busy) + voodoo_wake_fifo_threads(voodoo->set, voodoo); + break; + case SST_triangleCMD: + if (voodoo->fbiInit7 & FBIINIT7_CMDFIFO_ENABLE) + return; + voodoo->cmd_written++; + voodoo_queue_command(voodoo, addr | FIFO_WRITEL_REG, val); + if (!voodoo->voodoo_busy) + voodoo_wake_fifo_threads(voodoo->set, voodoo); + break; + case SST_ftriangleCMD: + if (voodoo->fbiInit7 & FBIINIT7_CMDFIFO_ENABLE) + return; + voodoo->cmd_written++; + voodoo_queue_command(voodoo, addr | FIFO_WRITEL_REG, val); + if (!voodoo->voodoo_busy) + voodoo_wake_fifo_threads(voodoo->set, voodoo); + break; + case SST_fastfillCMD: + if (voodoo->fbiInit7 & FBIINIT7_CMDFIFO_ENABLE) + return; + voodoo->cmd_written++; + voodoo_queue_command(voodoo, addr | FIFO_WRITEL_REG, val); + if (!voodoo->voodoo_busy) + voodoo_wake_fifo_threads(voodoo->set, voodoo); + break; + case SST_nopCMD: + if (voodoo->fbiInit7 & FBIINIT7_CMDFIFO_ENABLE) + return; + voodoo->cmd_written++; + voodoo_queue_command(voodoo, addr | FIFO_WRITEL_REG, val); + if (!voodoo->voodoo_busy) + voodoo_wake_fifo_threads(voodoo->set, voodoo); + break; + + case SST_fbiInit4: + if (voodoo->initEnable & 0x01) + { + voodoo->fbiInit4 = val; + voodoo->read_time = pci_nonburst_time + pci_burst_time * ((voodoo->fbiInit4 & 1) ? 2 : 1); +// pclog("fbiInit4 write %08x - read_time=%i\n", val, voodoo->read_time); + } + break; + case SST_backPorch: + voodoo->backPorch = val; + break; + case SST_videoDimensions: + voodoo->videoDimensions = val; + voodoo->h_disp = (val & 0xfff) + 1; + voodoo->v_disp = (val >> 16) & 0xfff; + break; + case SST_fbiInit0: + if (voodoo->initEnable & 0x01) + { + voodoo->fbiInit0 = val; + if (voodoo->set->nr_cards == 2) + svga_set_override(voodoo->svga, (voodoo->set->voodoos[0]->fbiInit0 | voodoo->set->voodoos[1]->fbiInit0) & 1); + else + svga_set_override(voodoo->svga, val & 1); + if (val & FBIINIT0_GRAPHICS_RESET) + { + /*Reset display/draw buffer selection. This may not actually + happen here on a real Voodoo*/ + voodoo->disp_buffer = 0; + voodoo->draw_buffer = 1; + voodoo_recalc(voodoo); + voodoo->front_offset = voodoo->params.front_offset; + } + } + break; + case SST_fbiInit1: + if (voodoo->initEnable & 0x01) + { + if ((voodoo->fbiInit1 & FBIINIT1_VIDEO_RESET) && !(val & FBIINIT1_VIDEO_RESET)) + { + voodoo->line = 0; + thread_lock_mutex(voodoo->swap_mutex); + voodoo->swap_count = 0; + thread_unlock_mutex(voodoo->swap_mutex); + voodoo->retrace_count = 0; + } + voodoo->fbiInit1 = (val & ~5) | (voodoo->fbiInit1 & 5); + voodoo->write_time = pci_nonburst_time + pci_burst_time * ((voodoo->fbiInit1 & 2) ? 1 : 0); + voodoo->burst_time = pci_burst_time * ((voodoo->fbiInit1 & 2) ? 2 : 1); +// pclog("fbiInit1 write %08x - write_time=%i burst_time=%i\n", val, voodoo->write_time, voodoo->burst_time); + } + break; + case SST_fbiInit2: + if (voodoo->initEnable & 0x01) + { + voodoo->fbiInit2 = val; + voodoo_recalc(voodoo); + } + break; + case SST_fbiInit3: + if (voodoo->initEnable & 0x01) + voodoo->fbiInit3 = val; + break; + + case SST_hSync: + voodoo->hSync = val; + voodoo->h_total = (val & 0xffff) + (val >> 16); + voodoo_pixelclock_update(voodoo); + break; + case SST_vSync: + voodoo->vSync = val; + voodoo->v_total = (val & 0xffff) + (val >> 16); + break; + + case SST_clutData: + voodoo->clutData[(val >> 24) & 0x3f].b = val & 0xff; + voodoo->clutData[(val >> 24) & 0x3f].g = (val >> 8) & 0xff; + voodoo->clutData[(val >> 24) & 0x3f].r = (val >> 16) & 0xff; + if (val & 0x20000000) + { + voodoo->clutData[(val >> 24) & 0x3f].b = 255; + voodoo->clutData[(val >> 24) & 0x3f].g = 255; + voodoo->clutData[(val >> 24) & 0x3f].r = 255; + } + voodoo->clutData_dirty = 1; + break; + + case SST_dacData: + voodoo->dac_reg = (val >> 8) & 7; + voodoo->dac_readdata = 0xff; + if (val & 0x800) + { +// pclog(" dacData read %i %02X\n", voodoo->dac_reg, voodoo->dac_data[7]); + if (voodoo->dac_reg == 5) + { + switch (voodoo->dac_data[7]) + { + case 0x01: voodoo->dac_readdata = 0x55; break; + case 0x07: voodoo->dac_readdata = 0x71; break; + case 0x0b: voodoo->dac_readdata = 0x79; break; + } + } + else + voodoo->dac_readdata = voodoo->dac_data[voodoo->dac_readdata & 7]; + } + else + { + if (voodoo->dac_reg == 5) + { + if (!voodoo->dac_reg_ff) + voodoo->dac_pll_regs[voodoo->dac_data[4] & 0xf] = (voodoo->dac_pll_regs[voodoo->dac_data[4] & 0xf] & 0xff00) | val; + else + voodoo->dac_pll_regs[voodoo->dac_data[4] & 0xf] = (voodoo->dac_pll_regs[voodoo->dac_data[4] & 0xf] & 0xff) | (val << 8); +// pclog("Write PLL reg %x %04x\n", voodoo->dac_data[4] & 0xf, voodoo->dac_pll_regs[voodoo->dac_data[4] & 0xf]); + voodoo->dac_reg_ff = !voodoo->dac_reg_ff; + if (!voodoo->dac_reg_ff) + voodoo->dac_data[4]++; + + } + else + { + voodoo->dac_data[voodoo->dac_reg] = val & 0xff; + voodoo->dac_reg_ff = 0; + } + voodoo_pixelclock_update(voodoo); + } + break; + + case SST_scrFilter: + if (voodoo->initEnable & 0x01) + { + voodoo->scrfilterEnabled = 1; + voodoo->scrfilterThreshold = val; /* update the threshold values and generate a new lookup table if necessary */ + + if (val < 1) + voodoo->scrfilterEnabled = 0; + voodoo_threshold_check(voodoo); + pclog("Voodoo Filter: %06x\n", val); + } + break; + + case SST_fbiInit5: + if (voodoo->initEnable & 0x01) + voodoo->fbiInit5 = (val & ~0x41e6) | (voodoo->fbiInit5 & 0x41e6); + break; + case SST_fbiInit6: + if (voodoo->initEnable & 0x01) + voodoo->fbiInit6 = val; + break; + case SST_fbiInit7: + if (voodoo->initEnable & 0x01) + { + voodoo->fbiInit7 = val; + voodoo->cmdfifo_enabled = val & 0x100; + } + break; + + case SST_cmdFifoBaseAddr: + voodoo->cmdfifo_base = (val & 0x3ff) << 12; + voodoo->cmdfifo_end = ((val >> 16) & 0x3ff) << 12; +// pclog("CMDFIFO base=%08x end=%08x\n", voodoo->cmdfifo_base, voodoo->cmdfifo_end); + break; + + case SST_cmdFifoRdPtr: + voodoo->cmdfifo_rp = val; + break; + case SST_cmdFifoAMin: + voodoo->cmdfifo_amin = val; + break; + case SST_cmdFifoAMax: + voodoo->cmdfifo_amax = val; + break; + case SST_cmdFifoDepth: + voodoo->cmdfifo_depth_rd = 0; + voodoo->cmdfifo_depth_wr = val & 0xffff; + break; + + default: + if (voodoo->fbiInit7 & FBIINIT7_CMDFIFO_ENABLE) + { + pclog("Unknown register write in CMDFIFO mode %08x %08x\n", addr, val); + } + else + { + voodoo_queue_command(voodoo, addr | FIFO_WRITEL_REG, val); + } + break; + } +} + +static uint16_t voodoo_snoop_readw(uint32_t addr, void *p) +{ + voodoo_set_t *set = (voodoo_set_t *)p; + + return voodoo_readw(addr, set->voodoos[0]); +} +static uint32_t voodoo_snoop_readl(uint32_t addr, void *p) +{ + voodoo_set_t *set = (voodoo_set_t *)p; + + return voodoo_readl(addr, set->voodoos[0]); +} + +static void voodoo_snoop_writew(uint32_t addr, uint16_t val, void *p) +{ + voodoo_set_t *set = (voodoo_set_t *)p; + + voodoo_writew(addr, val, set->voodoos[0]); + voodoo_writew(addr, val, set->voodoos[1]); +} +static void voodoo_snoop_writel(uint32_t addr, uint32_t val, void *p) +{ + voodoo_set_t *set = (voodoo_set_t *)p; + + voodoo_writel(addr, val, set->voodoos[0]); + voodoo_writel(addr, val, set->voodoos[1]); +} + +static void voodoo_recalcmapping(voodoo_set_t *set) +{ + if (set->nr_cards == 2) + { + if (set->voodoos[0]->pci_enable && set->voodoos[0]->memBaseAddr) + { + if (set->voodoos[0]->type == VOODOO_2 && set->voodoos[1]->initEnable & (1 << 23)) + { + pclog("voodoo_recalcmapping (pri) with snoop : memBaseAddr %08X\n", set->voodoos[0]->memBaseAddr); + mem_mapping_disable(&set->voodoos[0]->mapping); + mem_mapping_set_addr(&set->snoop_mapping, set->voodoos[0]->memBaseAddr, 0x01000000); + } + else if (set->voodoos[1]->pci_enable && (set->voodoos[0]->memBaseAddr == set->voodoos[1]->memBaseAddr)) + { + pclog("voodoo_recalcmapping (pri) (sec) same addr : memBaseAddr %08X\n", set->voodoos[0]->memBaseAddr); + mem_mapping_disable(&set->voodoos[0]->mapping); + mem_mapping_disable(&set->voodoos[1]->mapping); + mem_mapping_set_addr(&set->snoop_mapping, set->voodoos[0]->memBaseAddr, 0x01000000); + return; + } + else + { + pclog("voodoo_recalcmapping (pri) : memBaseAddr %08X\n", set->voodoos[0]->memBaseAddr); + mem_mapping_disable(&set->snoop_mapping); + mem_mapping_set_addr(&set->voodoos[0]->mapping, set->voodoos[0]->memBaseAddr, 0x01000000); + } + } + else + { + pclog("voodoo_recalcmapping (pri) : disabled\n"); + mem_mapping_disable(&set->voodoos[0]->mapping); + } + + if (set->voodoos[1]->pci_enable && set->voodoos[1]->memBaseAddr) + { + pclog("voodoo_recalcmapping (sec) : memBaseAddr %08X\n", set->voodoos[1]->memBaseAddr); + mem_mapping_set_addr(&set->voodoos[1]->mapping, set->voodoos[1]->memBaseAddr, 0x01000000); + } + else + { + pclog("voodoo_recalcmapping (sec) : disabled\n"); + mem_mapping_disable(&set->voodoos[1]->mapping); + } + } + else + { + voodoo_t *voodoo = set->voodoos[0]; + + if (voodoo->pci_enable && voodoo->memBaseAddr) + { + pclog("voodoo_recalcmapping : memBaseAddr %08X\n", voodoo->memBaseAddr); + mem_mapping_set_addr(&voodoo->mapping, voodoo->memBaseAddr, 0x01000000); + } + else + { + pclog("voodoo_recalcmapping : disabled\n"); + mem_mapping_disable(&voodoo->mapping); + } + } +} + +uint8_t voodoo_pci_read(int func, int addr, void *p) +{ + voodoo_t *voodoo = (voodoo_t *)p; + + if (func) + return 0; + +// pclog("Voodoo PCI read %08X PC=%08x\n", addr, cpu_state.pc); + + switch (addr) + { + case 0x00: return 0x1a; /*3dfx*/ + case 0x01: return 0x12; + + case 0x02: + if (voodoo->type == VOODOO_2) + return 0x02; /*Voodoo 2*/ + else + return 0x01; /*SST-1 (Voodoo Graphics)*/ + case 0x03: return 0x00; + + case 0x04: return voodoo->pci_enable ? 0x02 : 0x00; /*Respond to memory accesses*/ + + case 0x08: return 2; /*Revision ID*/ + case 0x09: return 0; /*Programming interface*/ + case 0x0a: return 0; + case 0x0b: return 0x04; + + case 0x10: return 0x00; /*memBaseAddr*/ + case 0x11: return 0x00; + case 0x12: return 0x00; + case 0x13: return voodoo->memBaseAddr >> 24; + + case 0x40: + return voodoo->initEnable & 0xff; + case 0x41: + if (voodoo->type == VOODOO_2) + return 0x50 | ((voodoo->initEnable >> 8) & 0x0f); + return (voodoo->initEnable >> 8) & 0x0f; + case 0x42: + return (voodoo->initEnable >> 16) & 0xff; + case 0x43: + return (voodoo->initEnable >> 24) & 0xff; + } + return 0; +} + +void voodoo_pci_write(int func, int addr, uint8_t val, void *p) +{ + voodoo_t *voodoo = (voodoo_t *)p; + + if (func) + return; + +// pclog("Voodoo PCI write %04X %02X PC=%08x\n", addr, val, cpu_state.pc); + + switch (addr) + { + case 0x04: + voodoo->pci_enable = val & 2; + voodoo_recalcmapping(voodoo->set); + break; + + case 0x13: + voodoo->memBaseAddr = val << 24; + voodoo_recalcmapping(voodoo->set); + break; + + case 0x40: + voodoo->initEnable = (voodoo->initEnable & ~0x000000ff) | val; + break; + case 0x41: + voodoo->initEnable = (voodoo->initEnable & ~0x0000ff00) | (val << 8); + break; + case 0x42: + voodoo->initEnable = (voodoo->initEnable & ~0x00ff0000) | (val << 16); + voodoo_recalcmapping(voodoo->set); + break; + case 0x43: + voodoo->initEnable = (voodoo->initEnable & ~0xff000000) | (val << 24); + voodoo_recalcmapping(voodoo->set); + break; + } +} + + +static void voodoo_add_status_info(char *s, int max_len, void *p) +{ + voodoo_set_t *voodoo_set = (voodoo_set_t *)p; + voodoo_t *voodoo = voodoo_set->voodoos[0]; + voodoo_t *voodoo_slave = voodoo_set->voodoos[1]; + char temps[512], temps2[256]; + int pixel_count_current[4]; + int pixel_count_total; + int texel_count_current[4]; + int texel_count_total; + int render_time[4]; + uint64_t new_time = timer_read(); + uint64_t status_diff = new_time - status_time; + status_time = new_time; + int c; + + if (!status_diff) + status_diff = 1; + + for (c = 0; c < 4; c++) + { + pixel_count_current[c] = voodoo->pixel_count[c]; + texel_count_current[c] = voodoo->texel_count[c]; + render_time[c] = voodoo->render_time[c]; + } + if (voodoo_set->nr_cards == 2) + { + for (c = 0; c < 4; c++) + { + pixel_count_current[c] += voodoo_slave->pixel_count[c]; + texel_count_current[c] += voodoo_slave->texel_count[c]; + render_time[c] = (render_time[c] + voodoo_slave->render_time[c]) / 2; + } + } + pixel_count_total = (pixel_count_current[0] + pixel_count_current[1] + pixel_count_current[2] + pixel_count_current[3]) - + (voodoo->pixel_count_old[0] + voodoo->pixel_count_old[1] + voodoo->pixel_count_old[2] + voodoo->pixel_count_old[3]); + texel_count_total = (texel_count_current[0] + texel_count_current[1] + texel_count_current[2] + texel_count_current[3]) - + (voodoo->texel_count_old[0] + voodoo->texel_count_old[1] + voodoo->texel_count_old[2] + voodoo->texel_count_old[3]); + sprintf(temps, "%f Mpixels/sec (%f)\n%f Mtexels/sec (%f)\n%f ktris/sec\n%f%% CPU (%f%% real)\n%d frames/sec (%i)\n%f%% CPU (%f%% real)\n"/*%d reads/sec\n%d write/sec\n%d tex/sec\n*/, + (double)pixel_count_total/1000000.0, + ((double)pixel_count_total/1000000.0) / ((double)render_time[0] / status_diff), + (double)texel_count_total/1000000.0, + ((double)texel_count_total/1000000.0) / ((double)render_time[0] / status_diff), + (double)voodoo->tri_count/1000.0, ((double)voodoo->time * 100.0) / timer_freq, ((double)voodoo->time * 100.0) / status_diff, voodoo->frame_count, voodoo_recomp, + ((double)voodoo->render_time[0] * 100.0) / timer_freq, ((double)voodoo->render_time[0] * 100.0) / status_diff); + if (voodoo->render_threads >= 2) + { + sprintf(temps2, "%f%% CPU (%f%% real)\n", + ((double)voodoo->render_time[1] * 100.0) / timer_freq, ((double)voodoo->render_time[1] * 100.0) / status_diff); + strncat(temps, temps2, sizeof(temps)-1); + } + if (voodoo->render_threads == 4) + { + sprintf(temps2, "%f%% CPU (%f%% real)\n%f%% CPU (%f%% real)\n", + ((double)voodoo->render_time[2] * 100.0) / timer_freq, ((double)voodoo->render_time[2] * 100.0) / status_diff, + ((double)voodoo->render_time[3] * 100.0) / timer_freq, ((double)voodoo->render_time[3] * 100.0) / status_diff); + strncat(temps, temps2, sizeof(temps)-1); + } + if (voodoo_set->nr_cards == 2) + { + sprintf(temps2, "%f%% CPU (%f%% real)\n", + ((double)voodoo_slave->render_time[0] * 100.0) / timer_freq, ((double)voodoo_slave->render_time[0] * 100.0) / status_diff); + strncat(temps, temps2, sizeof(temps)-1); + + if (voodoo_slave->render_threads >= 2) + { + sprintf(temps2, "%f%% CPU (%f%% real)\n", + ((double)voodoo_slave->render_time[1] * 100.0) / timer_freq, ((double)voodoo_slave->render_time[1] * 100.0) / status_diff); + strncat(temps, temps2, sizeof(temps)-1); + } + if (voodoo_slave->render_threads == 4) + { + sprintf(temps2, "%f%% CPU (%f%% real)\n%f%% CPU (%f%% real)\n", + ((double)voodoo_slave->render_time[2] * 100.0) / timer_freq, ((double)voodoo_slave->render_time[2] * 100.0) / status_diff, + ((double)voodoo_slave->render_time[3] * 100.0) / timer_freq, ((double)voodoo_slave->render_time[3] * 100.0) / status_diff); + strncat(temps, temps2, sizeof(temps)-1); + } + } + strncat(s, temps, max_len); + + for (c = 0; c < 4; c++) + { + voodoo->pixel_count_old[c] = pixel_count_current[c]; + voodoo->texel_count_old[c] = texel_count_current[c]; + voodoo->render_time[c] = 0; + } + voodoo->tri_count = voodoo->frame_count = 0; + voodoo->rd_count = voodoo->wr_count = voodoo->tex_count = 0; + voodoo->time = 0; + if (voodoo_set->nr_cards == 2) + { + for (c = 0; c < 4; c++) + { + voodoo_slave->pixel_count_old[c] = pixel_count_current[c]; + voodoo_slave->texel_count_old[c] = texel_count_current[c]; + voodoo_slave->render_time[c] = 0; + } + voodoo_slave->tri_count = voodoo_slave->frame_count = 0; + voodoo_slave->rd_count = voodoo_slave->wr_count = voodoo_slave->tex_count = 0; + voodoo_slave->time = 0; + } + voodoo_recomp = 0; +} + +static void voodoo_speed_changed(void *p) +{ + voodoo_set_t *voodoo_set = (voodoo_set_t *)p; + + voodoo_pixelclock_update(voodoo_set->voodoos[0]); + voodoo_set->voodoos[0]->read_time = pci_nonburst_time + pci_burst_time * ((voodoo_set->voodoos[0]->fbiInit4 & 1) ? 2 : 1); + voodoo_set->voodoos[0]->write_time = pci_nonburst_time + pci_burst_time * ((voodoo_set->voodoos[0]->fbiInit1 & 2) ? 1 : 0); + voodoo_set->voodoos[0]->burst_time = pci_burst_time * ((voodoo_set->voodoos[0]->fbiInit1 & 2) ? 2 : 1); + if (voodoo_set->nr_cards == 2) + { + voodoo_pixelclock_update(voodoo_set->voodoos[1]); + voodoo_set->voodoos[1]->read_time = pci_nonburst_time + pci_burst_time * ((voodoo_set->voodoos[1]->fbiInit4 & 1) ? 2 : 1); + voodoo_set->voodoos[1]->write_time = pci_nonburst_time + pci_burst_time * ((voodoo_set->voodoos[1]->fbiInit1 & 2) ? 1 : 0); + voodoo_set->voodoos[1]->burst_time = pci_burst_time * ((voodoo_set->voodoos[1]->fbiInit1 & 2) ? 2 : 1); + } +// pclog("Voodoo read_time=%i write_time=%i burst_time=%i %08x %08x\n", voodoo->read_time, voodoo->write_time, voodoo->burst_time, voodoo->fbiInit1, voodoo->fbiInit4); +} + +void *voodoo_card_init() +{ + int c; + voodoo_t *voodoo = malloc(sizeof(voodoo_t)); + memset(voodoo, 0, sizeof(voodoo_t)); + + voodoo->bilinear_enabled = device_get_config_int("bilinear"); + voodoo->scrfilter = device_get_config_int("dacfilter"); + voodoo->texture_size = device_get_config_int("texture_memory"); + voodoo->texture_mask = (voodoo->texture_size << 20) - 1; + voodoo->fb_size = device_get_config_int("framebuffer_memory"); + voodoo->fb_mask = (voodoo->fb_size << 20) - 1; + voodoo->render_threads = device_get_config_int("render_threads"); + voodoo->odd_even_mask = voodoo->render_threads - 1; +#ifndef NO_CODEGEN + voodoo->use_recompiler = device_get_config_int("recompiler"); +#endif + voodoo->type = device_get_config_int("type"); + switch (voodoo->type) + { + case VOODOO_1: + voodoo->dual_tmus = 0; + break; + case VOODOO_SB50: + voodoo->dual_tmus = 1; + break; + case VOODOO_2: + voodoo->dual_tmus = 1; + break; + } + + if (voodoo->type == VOODOO_2) /*generate filter lookup tables*/ + voodoo_generate_filter_v2(voodoo); + else + voodoo_generate_filter_v1(voodoo); + + pci_add(voodoo_pci_read, voodoo_pci_write, voodoo); + + mem_mapping_add(&voodoo->mapping, 0, 0, NULL, voodoo_readw, voodoo_readl, NULL, voodoo_writew, voodoo_writel, NULL, MEM_MAPPING_EXTERNAL, voodoo); + + voodoo->fb_mem = malloc(4 * 1024 * 1024); + voodoo->tex_mem[0] = malloc(voodoo->texture_size * 1024 * 1024); + if (voodoo->dual_tmus) + voodoo->tex_mem[1] = malloc(voodoo->texture_size * 1024 * 1024); + voodoo->tex_mem_w[0] = (uint16_t *)voodoo->tex_mem[0]; + voodoo->tex_mem_w[1] = (uint16_t *)voodoo->tex_mem[1]; + + for (c = 0; c < TEX_CACHE_MAX; c++) + { + voodoo->texture_cache[0][c].data = malloc((256*256 + 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4 + 2*2) * 4); + voodoo->texture_cache[0][c].base = -1; /*invalid*/ + voodoo->texture_cache[0][c].refcount = 0; + if (voodoo->dual_tmus) + { + voodoo->texture_cache[1][c].data = malloc((256*256 + 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4 + 2*2) * 4); + voodoo->texture_cache[1][c].base = -1; /*invalid*/ + voodoo->texture_cache[1][c].refcount = 0; + } + } + + timer_add(&voodoo->timer, voodoo_callback, voodoo, 1); + + voodoo->svga = svga_get_pri(); + voodoo->fbiInit0 = 0; + + voodoo->wake_fifo_thread = thread_create_event(); + voodoo->wake_render_thread[0] = thread_create_event(); + voodoo->wake_render_thread[1] = thread_create_event(); + voodoo->wake_render_thread[2] = thread_create_event(); + voodoo->wake_render_thread[3] = thread_create_event(); + voodoo->wake_main_thread = thread_create_event(); + voodoo->fifo_not_full_event = thread_create_event(); + voodoo->render_not_full_event[0] = thread_create_event(); + voodoo->render_not_full_event[1] = thread_create_event(); + voodoo->render_not_full_event[2] = thread_create_event(); + voodoo->render_not_full_event[3] = thread_create_event(); + voodoo->fifo_thread = thread_create(voodoo_fifo_thread, voodoo); + voodoo->render_thread[0] = thread_create(voodoo_render_thread_1, voodoo); + if (voodoo->render_threads >= 2) + voodoo->render_thread[1] = thread_create(voodoo_render_thread_2, voodoo); + if (voodoo->render_threads == 4) + { + voodoo->render_thread[2] = thread_create(voodoo_render_thread_3, voodoo); + voodoo->render_thread[3] = thread_create(voodoo_render_thread_4, voodoo); + } + voodoo->swap_mutex = thread_create_mutex(); + timer_add(&voodoo->wake_timer, voodoo_wake_timer, (void *)voodoo, 0); + + for (c = 0; c < 0x100; c++) + { + rgb332[c].r = c & 0xe0; + rgb332[c].g = (c << 3) & 0xe0; + rgb332[c].b = (c << 6) & 0xc0; + rgb332[c].r = rgb332[c].r | (rgb332[c].r >> 3) | (rgb332[c].r >> 6); + rgb332[c].g = rgb332[c].g | (rgb332[c].g >> 3) | (rgb332[c].g >> 6); + rgb332[c].b = rgb332[c].b | (rgb332[c].b >> 2); + rgb332[c].b = rgb332[c].b | (rgb332[c].b >> 4); + rgb332[c].a = 0xff; + + ai44[c].a = (c & 0xf0) | ((c & 0xf0) >> 4); + ai44[c].r = (c & 0x0f) | ((c & 0x0f) << 4); + ai44[c].g = ai44[c].b = ai44[c].r; + } + + for (c = 0; c < 0x10000; c++) + { + rgb565[c].r = (c >> 8) & 0xf8; + rgb565[c].g = (c >> 3) & 0xfc; + rgb565[c].b = (c << 3) & 0xf8; + rgb565[c].r |= (rgb565[c].r >> 5); + rgb565[c].g |= (rgb565[c].g >> 6); + rgb565[c].b |= (rgb565[c].b >> 5); + rgb565[c].a = 0xff; + + argb1555[c].r = (c >> 7) & 0xf8; + argb1555[c].g = (c >> 2) & 0xf8; + argb1555[c].b = (c << 3) & 0xf8; + argb1555[c].r |= (argb1555[c].r >> 5); + argb1555[c].g |= (argb1555[c].g >> 5); + argb1555[c].b |= (argb1555[c].b >> 5); + argb1555[c].a = (c & 0x8000) ? 0xff : 0; + + argb4444[c].a = (c >> 8) & 0xf0; + argb4444[c].r = (c >> 4) & 0xf0; + argb4444[c].g = c & 0xf0; + argb4444[c].b = (c << 4) & 0xf0; + argb4444[c].a |= (argb4444[c].a >> 4); + argb4444[c].r |= (argb4444[c].r >> 4); + argb4444[c].g |= (argb4444[c].g >> 4); + argb4444[c].b |= (argb4444[c].b >> 4); + + ai88[c].a = (c >> 8); + ai88[c].r = c & 0xff; + ai88[c].g = c & 0xff; + ai88[c].b = c & 0xff; + } +#ifndef NO_CODEGEN + voodoo_codegen_init(voodoo); +#endif + + voodoo->disp_buffer = 0; + voodoo->draw_buffer = 1; + + return voodoo; +} + +void *voodoo_2d3d_card_init(int type) +{ + int c; + voodoo_t *voodoo = malloc(sizeof(voodoo_t)); + memset(voodoo, 0, sizeof(voodoo_t)); + + voodoo->bilinear_enabled = device_get_config_int("bilinear"); + voodoo->scrfilter = device_get_config_int("dacfilter"); + voodoo->render_threads = device_get_config_int("render_threads"); + voodoo->odd_even_mask = voodoo->render_threads - 1; +#ifndef NO_CODEGEN + voodoo->use_recompiler = device_get_config_int("recompiler"); +#endif + voodoo->type = type; + voodoo->dual_tmus = (type == VOODOO_3) ? 1 : 0; + + /*generate filter lookup tables*/ + voodoo_generate_filter_v2(voodoo); + + for (c = 0; c < TEX_CACHE_MAX; c++) + { + voodoo->texture_cache[0][c].data = malloc((256*256 + 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4 + 2*2) * 4); + voodoo->texture_cache[0][c].base = -1; /*invalid*/ + voodoo->texture_cache[0][c].refcount = 0; + if (voodoo->dual_tmus) + { + voodoo->texture_cache[1][c].data = malloc((256*256 + 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4 + 2*2) * 4); + voodoo->texture_cache[1][c].base = -1; /*invalid*/ + voodoo->texture_cache[1][c].refcount = 0; + } + } + + timer_add(&voodoo->timer, voodoo_callback, voodoo, 1); + + voodoo->fbiInit0 = 0; + + voodoo->wake_fifo_thread = thread_create_event(); + voodoo->wake_render_thread[0] = thread_create_event(); + voodoo->wake_render_thread[1] = thread_create_event(); + voodoo->wake_render_thread[2] = thread_create_event(); + voodoo->wake_render_thread[3] = thread_create_event(); + voodoo->wake_main_thread = thread_create_event(); + voodoo->fifo_not_full_event = thread_create_event(); + voodoo->render_not_full_event[0] = thread_create_event(); + voodoo->render_not_full_event[1] = thread_create_event(); + voodoo->render_not_full_event[2] = thread_create_event(); + voodoo->render_not_full_event[3] = thread_create_event(); + voodoo->fifo_thread = thread_create(voodoo_fifo_thread, voodoo); + voodoo->render_thread[0] = thread_create(voodoo_render_thread_1, voodoo); + if (voodoo->render_threads >= 2) + voodoo->render_thread[1] = thread_create(voodoo_render_thread_2, voodoo); + if (voodoo->render_threads == 4) + { + voodoo->render_thread[2] = thread_create(voodoo_render_thread_3, voodoo); + voodoo->render_thread[3] = thread_create(voodoo_render_thread_4, voodoo); + } + voodoo->swap_mutex = thread_create_mutex(); + timer_add(&voodoo->wake_timer, voodoo_wake_timer, (void *)voodoo, 0); + + for (c = 0; c < 0x100; c++) + { + rgb332[c].r = c & 0xe0; + rgb332[c].g = (c << 3) & 0xe0; + rgb332[c].b = (c << 6) & 0xc0; + rgb332[c].r = rgb332[c].r | (rgb332[c].r >> 3) | (rgb332[c].r >> 6); + rgb332[c].g = rgb332[c].g | (rgb332[c].g >> 3) | (rgb332[c].g >> 6); + rgb332[c].b = rgb332[c].b | (rgb332[c].b >> 2); + rgb332[c].b = rgb332[c].b | (rgb332[c].b >> 4); + rgb332[c].a = 0xff; + + ai44[c].a = (c & 0xf0) | ((c & 0xf0) >> 4); + ai44[c].r = (c & 0x0f) | ((c & 0x0f) << 4); + ai44[c].g = ai44[c].b = ai44[c].r; + } + + for (c = 0; c < 0x10000; c++) + { + rgb565[c].r = (c >> 8) & 0xf8; + rgb565[c].g = (c >> 3) & 0xfc; + rgb565[c].b = (c << 3) & 0xf8; + rgb565[c].r |= (rgb565[c].r >> 5); + rgb565[c].g |= (rgb565[c].g >> 6); + rgb565[c].b |= (rgb565[c].b >> 5); + rgb565[c].a = 0xff; + + argb1555[c].r = (c >> 7) & 0xf8; + argb1555[c].g = (c >> 2) & 0xf8; + argb1555[c].b = (c << 3) & 0xf8; + argb1555[c].r |= (argb1555[c].r >> 5); + argb1555[c].g |= (argb1555[c].g >> 5); + argb1555[c].b |= (argb1555[c].b >> 5); + argb1555[c].a = (c & 0x8000) ? 0xff : 0; + + argb4444[c].a = (c >> 8) & 0xf0; + argb4444[c].r = (c >> 4) & 0xf0; + argb4444[c].g = c & 0xf0; + argb4444[c].b = (c << 4) & 0xf0; + argb4444[c].a |= (argb4444[c].a >> 4); + argb4444[c].r |= (argb4444[c].r >> 4); + argb4444[c].g |= (argb4444[c].g >> 4); + argb4444[c].b |= (argb4444[c].b >> 4); + + ai88[c].a = (c >> 8); + ai88[c].r = c & 0xff; + ai88[c].g = c & 0xff; + ai88[c].b = c & 0xff; + } +#ifndef NO_CODEGEN + voodoo_codegen_init(voodoo); +#endif + + voodoo->disp_buffer = 0; + voodoo->draw_buffer = 1; + + return voodoo; +} + +void *voodoo_init() +{ + voodoo_set_t *voodoo_set = malloc(sizeof(voodoo_set_t)); + uint32_t tmuConfig = 1; + int type; + memset(voodoo_set, 0, sizeof(voodoo_set_t)); + + type = device_get_config_int("type"); + + voodoo_set->nr_cards = device_get_config_int("sli") ? 2 : 1; + voodoo_set->voodoos[0] = voodoo_card_init(); + voodoo_set->voodoos[0]->set = voodoo_set; + if (voodoo_set->nr_cards == 2) + { + voodoo_set->voodoos[1] = voodoo_card_init(); + + voodoo_set->voodoos[1]->set = voodoo_set; + + if (type == VOODOO_2) + { + voodoo_set->voodoos[0]->fbiInit5 |= FBIINIT5_MULTI_CVG; + voodoo_set->voodoos[1]->fbiInit5 |= FBIINIT5_MULTI_CVG; + } + else + { + voodoo_set->voodoos[0]->fbiInit1 |= FBIINIT1_MULTI_SST; + voodoo_set->voodoos[1]->fbiInit1 |= FBIINIT1_MULTI_SST; + } + } + + switch (type) + { + case VOODOO_1: + if (voodoo_set->nr_cards == 2) + tmuConfig = 1 | (3 << 3); + else + tmuConfig = 1; + break; + case VOODOO_SB50: + if (voodoo_set->nr_cards == 2) + tmuConfig = 1 | (3 << 3) | (3 << 6) | (2 << 9); + else + tmuConfig = 1 | (3 << 6); + break; + case VOODOO_2: + tmuConfig = 1 | (3 << 6); + break; + } + + voodoo_set->voodoos[0]->tmuConfig = tmuConfig; + if (voodoo_set->nr_cards == 2) + voodoo_set->voodoos[1]->tmuConfig = tmuConfig; + + mem_mapping_add(&voodoo_set->snoop_mapping, 0, 0, NULL, voodoo_snoop_readw, voodoo_snoop_readl, NULL, voodoo_snoop_writew, voodoo_snoop_writel, NULL, MEM_MAPPING_EXTERNAL, voodoo_set); + + return voodoo_set; +} + +void voodoo_card_close(voodoo_t *voodoo) +{ +#ifndef RELEASE_BUILD + FILE *f; +#endif + int c; + +#ifndef RELEASE_BUILD + if (voodoo->tex_mem[0]) + { + f = romfopen("texram.dmp", "wb"); + fwrite(voodoo->tex_mem[0], voodoo->texture_size*1024*1024, 1, f); + fclose(f); + if (voodoo->dual_tmus) + { + f = romfopen("texram2.dmp", "wb"); + fwrite(voodoo->tex_mem[1], voodoo->texture_size*1024*1024, 1, f); + fclose(f); + } + } +#endif + + thread_kill(voodoo->fifo_thread); + thread_kill(voodoo->render_thread[0]); + if (voodoo->render_threads >= 2) + thread_kill(voodoo->render_thread[1]); + if (voodoo->render_threads == 4) + { + thread_kill(voodoo->render_thread[2]); + thread_kill(voodoo->render_thread[3]); + } + thread_destroy_event(voodoo->fifo_not_full_event); + thread_destroy_event(voodoo->wake_main_thread); + thread_destroy_event(voodoo->wake_fifo_thread); + thread_destroy_event(voodoo->wake_render_thread[0]); + thread_destroy_event(voodoo->wake_render_thread[1]); + thread_destroy_event(voodoo->render_not_full_event[0]); + thread_destroy_event(voodoo->render_not_full_event[1]); + + for (c = 0; c < TEX_CACHE_MAX; c++) + { + if (voodoo->dual_tmus) + free(voodoo->texture_cache[1][c].data); + free(voodoo->texture_cache[0][c].data); + } +#ifndef NO_CODEGEN + voodoo_codegen_close(voodoo); +#endif + if (voodoo->type < VOODOO_BANSHEE && voodoo->fb_mem) + { + free(voodoo->fb_mem); + if (voodoo->dual_tmus) + free(voodoo->tex_mem[1]); + free(voodoo->tex_mem[0]); + } + free(voodoo); +} + +void voodoo_close(void *p) +{ + voodoo_set_t *voodoo_set = (voodoo_set_t *)p; + + if (voodoo_set->nr_cards == 2) + voodoo_card_close(voodoo_set->voodoos[1]); + voodoo_card_close(voodoo_set->voodoos[0]); + + free(voodoo_set); +} + +static device_config_t voodoo_config[] = +{ + { + .name = "type", + .description = "Voodoo type", + .type = CONFIG_SELECTION, + .selection = + { + { + .description = "Voodoo Graphics", + .value = VOODOO_1 + }, + { + .description = "Obsidian SB50 + Amethyst (2 TMUs)", + .value = VOODOO_SB50 + }, + { + .description = "Voodoo 2", + .value = VOODOO_2 + }, + { + .description = "" + } + }, + .default_int = 0 + }, + { + .name = "framebuffer_memory", + .description = "Framebuffer memory size", + .type = CONFIG_SELECTION, + .selection = + { + { + .description = "2 MB", + .value = 2 + }, + { + .description = "4 MB", + .value = 4 + }, + { + .description = "" + } + }, + .default_int = 2 + }, + { + .name = "texture_memory", + .description = "Texture memory size", + .type = CONFIG_SELECTION, + .selection = + { + { + .description = "2 MB", + .value = 2 + }, + { + .description = "4 MB", + .value = 4 + }, + { + .description = "" + } + }, + .default_int = 2 + }, + { + .name = "bilinear", + .description = "Bilinear filtering", + .type = CONFIG_BINARY, + .default_int = 1 + }, + { + .name = "dacfilter", + .description = "Screen Filter", + .type = CONFIG_BINARY, + .default_int = 0 + }, + { + .name = "render_threads", + .description = "Render threads", + .type = CONFIG_SELECTION, + .selection = + { + { + .description = "1", + .value = 1 + }, + { + .description = "2", + .value = 2 + }, + { + .description = "4", + .value = 4 + }, + { + .description = "" + } + }, + .default_int = 2 + }, + { + .name = "sli", + .description = "SLI", + .type = CONFIG_BINARY, + .default_int = 0 + }, +#ifndef NO_CODEGEN + { + .name = "recompiler", + .description = "Recompiler", + .type = CONFIG_BINARY, + .default_int = 1 + }, +#endif + { + .type = -1 + } +}; + +device_t voodoo_device = +{ + "3DFX Voodoo Graphics", + DEVICE_PCI, + voodoo_init, + voodoo_close, + NULL, + voodoo_speed_changed, + NULL, + voodoo_add_status_info, + voodoo_config +}; diff --git a/pcem/vid_voodoo.h b/pcem/vid_voodoo.h new file mode 100644 index 00000000..04797832 --- /dev/null +++ b/pcem/vid_voodoo.h @@ -0,0 +1 @@ +extern device_t voodoo_device; diff --git a/pcem/vid_voodoo_banshee.cpp b/pcem/vid_voodoo_banshee.cpp new file mode 100644 index 00000000..2ac803e5 --- /dev/null +++ b/pcem/vid_voodoo_banshee.cpp @@ -0,0 +1,2881 @@ +#include +#include "ibm.h" +#include "device.h" +#include "io.h" +#include "mem.h" +#include "pci.h" +#include "rom.h" +#include "thread.h" +#include "video.h" +#include "vid_ddc.h" +#include "vid_svga.h" +#include "vid_svga_render.h" +#include "vid_voodoo_banshee.h" +#include "vid_voodoo_common.h" +#include "vid_voodoo_display.h" +#include "vid_voodoo_fifo.h" +#include "vid_voodoo_regs.h" +#include "vid_voodoo_render.h" +#include "x86.h" + +#ifdef CLAMP +#undef CLAMP +#endif + +static uint8_t vb_filter_v1_rb[256][256]; +static uint8_t vb_filter_v1_g [256][256]; + +static uint8_t vb_filter_bx_rb[256][256]; +static uint8_t vb_filter_bx_g [256][256]; + +enum +{ + TYPE_BANSHEE = 0, + TYPE_V3_2000, + TYPE_V3_3000 +}; + +typedef struct banshee_t +{ + svga_t svga; + + rom_t bios_rom; + + uint8_t pci_regs[256]; + + uint32_t memBaseAddr0; + uint32_t memBaseAddr1; + uint32_t ioBaseAddr; + + uint32_t agpInit0; + uint32_t dramInit0, dramInit1; + uint32_t lfbMemoryConfig; + uint32_t miscInit0, miscInit1; + uint32_t pciInit0; + uint32_t vgaInit0, vgaInit1; + + uint32_t command_2d; + uint32_t srcBaseAddr_2d; + + uint32_t pllCtrl0, pllCtrl1, pllCtrl2; + + uint32_t dacMode; + int dacAddr; + + uint32_t vidDesktopOverlayStride; + uint32_t vidDesktopStartAddr; + uint32_t vidProcCfg; + uint32_t vidScreenSize; + uint32_t vidSerialParallelPort; + + int overlay_pix_fmt; + + uint32_t hwCurPatAddr, hwCurLoc, hwCurC0, hwCurC1; + + uint32_t intrCtrl; + + uint32_t overlay_buffer[2][4096]; + + mem_mapping_t linear_mapping; + + mem_mapping_t reg_mapping_low; /*0000000-07fffff*/ + mem_mapping_t reg_mapping_high; /*0c00000-1ffffff - Windows 2000 puts the BIOS ROM in between these two areas*/ + + voodoo_t *voodoo; + + uint32_t desktop_addr; + int desktop_y; + uint32_t desktop_stride_tiled; + + int type; +} banshee_t; + +enum +{ + Init_status = 0x00, + Init_pciInit0 = 0x04, + Init_lfbMemoryConfig = 0x0c, + Init_miscInit0 = 0x10, + Init_miscInit1 = 0x14, + Init_dramInit0 = 0x18, + Init_dramInit1 = 0x1c, + Init_agpInit0 = 0x20, + Init_vgaInit0 = 0x28, + Init_vgaInit1 = 0x2c, + Init_2dCommand = 0x30, + Init_2dSrcBaseAddr = 0x34, + Init_strapInfo = 0x38, + + PLL_pllCtrl0 = 0x40, + PLL_pllCtrl1 = 0x44, + PLL_pllCtrl2 = 0x48, + + DAC_dacMode = 0x4c, + DAC_dacAddr = 0x50, + DAC_dacData = 0x54, + + Video_vidProcCfg = 0x5c, + Video_maxRgbDelta = 0x58, + Video_hwCurPatAddr = 0x60, + Video_hwCurLoc = 0x64, + Video_hwCurC0 = 0x68, + Video_hwCurC1 = 0x6c, + Video_vidSerialParallelPort = 0x78, + Video_vidScreenSize = 0x98, + Video_vidOverlayStartCoords = 0x9c, + Video_vidOverlayEndScreenCoords = 0xa0, + Video_vidOverlayDudx = 0xa4, + Video_vidOverlayDudxOffsetSrcWidth = 0xa8, + Video_vidOverlayDvdy = 0xac, + Video_vidOverlayDvdyOffset = 0xe0, + Video_vidDesktopStartAddr = 0xe4, + Video_vidDesktopOverlayStride = 0xe8 +}; + +enum +{ + cmdBaseAddr0 = 0x20, + cmdBaseSize0 = 0x24, + cmdBump0 = 0x28, + cmdRdPtrL0 = 0x2c, + cmdRdPtrH0 = 0x30, + cmdAMin0 = 0x34, + cmdAMax0 = 0x3c, + cmdFifoDepth0 = 0x44, + cmdHoleCnt0 = 0x48 +}; + +#define VGAINIT0_EXTENDED_SHIFT_OUT (1 << 12) + +#define VIDPROCCFG_CURSOR_MODE (1 << 1) +#define VIDPROCCFG_HALF_MODE (1 << 4) +#define VIDPROCCFG_OVERLAY_ENABLE (1 << 8) +#define VIDPROCCFG_OVERLAY_CLUT_BYPASS (1 << 11) +#define VIDPROCCFG_OVERLAY_CLUT_SEL (1 << 13) +#define VIDPROCCFG_H_SCALE_ENABLE (1 << 14) +#define VIDPROCCFG_V_SCALE_ENABLE (1 << 15) +#define VIDPROCCFG_FILTER_MODE_MASK (3 << 16) +#define VIDPROCCFG_FILTER_MODE_POINT (0 << 16) +#define VIDPROCCFG_FILTER_MODE_DITHER_2X2 (1 << 16) +#define VIDPROCCFG_FILTER_MODE_DITHER_4X4 (2 << 16) +#define VIDPROCCFG_FILTER_MODE_BILINEAR (3 << 16) +#define VIDPROCCFG_DESKTOP_PIX_FORMAT ((banshee->vidProcCfg >> 18) & 7) +#define VIDPROCCFG_OVERLAY_PIX_FORMAT ((banshee->vidProcCfg >> 21) & 7) +#define VIDPROCCFG_OVERLAY_PIX_FORMAT_SHIFT (21) +#define VIDPROCCFG_OVERLAY_PIX_FORMAT_MASK (7 << VIDPROCCFG_OVERLAY_PIX_FORMAT_SHIFT) +#define VIDPROCCFG_DESKTOP_TILE (1 << 24) +#define VIDPROCCFG_OVERLAY_TILE (1 << 25) +#define VIDPROCCFG_2X_MODE (1 << 26) +#define VIDPROCCFG_HWCURSOR_ENA (1 << 27) + +#define OVERLAY_FMT_565 (1) +#define OVERLAY_FMT_YUYV422 (5) +#define OVERLAY_FMT_UYVY422 (6) +#define OVERLAY_FMT_565_DITHER (7) + +#define OVERLAY_START_X_MASK (0xfff) +#define OVERLAY_START_Y_SHIFT (12) +#define OVERLAY_START_Y_MASK (0xfff << OVERLAY_START_Y_SHIFT) + +#define OVERLAY_END_X_MASK (0xfff) +#define OVERLAY_END_Y_SHIFT (12) +#define OVERLAY_END_Y_MASK (0xfff << OVERLAY_END_Y_SHIFT) + +#define OVERLAY_SRC_WIDTH_SHIFT (19) +#define OVERLAY_SRC_WIDTH_MASK (0x1fff << OVERLAY_SRC_WIDTH_SHIFT) + +#define VID_STRIDE_OVERLAY_SHIFT (16) +#define VID_STRIDE_OVERLAY_MASK (0x7fff << VID_STRIDE_OVERLAY_SHIFT) + +#define VID_DUDX_MASK (0xffffff) +#define VID_DVDY_MASK (0xffffff) + +#define PIX_FORMAT_8 0 +#define PIX_FORMAT_RGB565 1 +#define PIX_FORMAT_RGB24 2 +#define PIX_FORMAT_RGB32 3 + +#define VIDSERIAL_DDC_DCK_W (1 << 19) +#define VIDSERIAL_DDC_DDA_W (1 << 20) +#define VIDSERIAL_DDC_DCK_R (1 << 21) +#define VIDSERIAL_DDC_DDA_R (1 << 22) +#define VIDSERIAL_I2C_SCK_W (1 << 24) +#define VIDSERIAL_I2C_SDA_W (1 << 25) +#define VIDSERIAL_I2C_SCK_R (1 << 26) +#define VIDSERIAL_I2C_SDA_R (1 << 27) + +static uint32_t banshee_status(banshee_t *banshee); + +static void banshee_out(uint16_t addr, uint8_t val, void *p) +{ + banshee_t *banshee = (banshee_t *)p; + svga_t *svga = &banshee->svga; + uint8_t old; + +// /*if (addr != 0x3c9) */pclog("banshee_out : %04X %02X %04X:%04X\n", addr, val, CS,cpu_state.pc); + + if (((addr & 0xfff0) == 0x3d0 || (addr & 0xfff0) == 0x3b0) && !(svga->miscout & 1)) + addr ^= 0x60; + + switch (addr) + { + case 0x3D4: + svga->crtcreg = val & 0x3f; + return; + case 0x3D5: + if ((svga->crtcreg < 7) && (svga->crtc[0x11] & 0x80)) + return; + if ((svga->crtcreg == 7) && (svga->crtc[0x11] & 0x80)) + val = (svga->crtc[7] & ~0x10) | (val & 0x10); + old = svga->crtc[svga->crtcreg]; + svga->crtc[svga->crtcreg] = val; + if (old != val) + { + if (svga->crtcreg < 0xe || svga->crtcreg > 0x10) + { + svga->fullchange = changeframecount; + svga_recalctimings(svga); + } + } + break; + } + svga_out(addr, val, svga); +} + +static uint8_t banshee_in(uint16_t addr, void *p) +{ + banshee_t *banshee = (banshee_t *)p; + svga_t *svga = &banshee->svga; + uint8_t temp; + +// if (addr != 0x3da) pclog("banshee_in : %04X ", addr); + + if (((addr & 0xfff0) == 0x3d0 || (addr & 0xfff0) == 0x3b0) && !(svga->miscout & 1)) + addr ^= 0x60; + + switch (addr) + { + case 0x3c2: + if ((svga->vgapal[0].r + svga->vgapal[0].g + svga->vgapal[0].b) >= 0x40) + temp = 0; + else + temp = 0x10; + break; + case 0x3D4: + temp = svga->crtcreg; + break; + case 0x3D5: + temp = svga->crtc[svga->crtcreg]; + break; + default: + temp = svga_in(addr, svga); + break; + } +// if (addr != 0x3da) pclog("%02X %04X:%04X %i\n", temp, CS,cpu_state.pc, ins); + return temp; +} + +static void banshee_updatemapping(banshee_t *banshee) +{ + svga_t *svga = &banshee->svga; + + if (!(banshee->pci_regs[PCI_REG_COMMAND] & PCI_COMMAND_MEM)) + { +// pclog("Update mapping - PCI disabled\n"); + mem_mapping_disable(&svga->mapping); + mem_mapping_disable(&banshee->linear_mapping); + mem_mapping_disable(&banshee->reg_mapping_low); + mem_mapping_disable(&banshee->reg_mapping_high); + return; + } + + pclog("Update mapping - bank %02X ", svga->gdcreg[6] & 0xc); + switch (svga->gdcreg[6] & 0xc) /*Banked framebuffer*/ + { + case 0x0: /*128k at A0000*/ + mem_mapping_set_addr(&svga->mapping, 0xa0000, 0x20000); + svga->banked_mask = 0xffff; + break; + case 0x4: /*64k at A0000*/ + mem_mapping_set_addr(&svga->mapping, 0xa0000, 0x10000); + svga->banked_mask = 0xffff; + break; + case 0x8: /*32k at B0000*/ + mem_mapping_set_addr(&svga->mapping, 0xb0000, 0x08000); + svga->banked_mask = 0x7fff; + break; + case 0xC: /*32k at B8000*/ + mem_mapping_set_addr(&svga->mapping, 0xb8000, 0x08000); + svga->banked_mask = 0x7fff; + break; + } + + pclog("Linear framebuffer %08X ", banshee->memBaseAddr1); + mem_mapping_set_addr(&banshee->linear_mapping, banshee->memBaseAddr1, 32 << 20); + pclog("registers %08X\n", banshee->memBaseAddr0); + mem_mapping_set_addr(&banshee->reg_mapping_low, banshee->memBaseAddr0, 8 << 20); + mem_mapping_set_addr(&banshee->reg_mapping_high, banshee->memBaseAddr0 + 0xc00000, 20 << 20); +} + +static void banshee_render_16bpp_tiled(svga_t *svga) +{ + banshee_t *banshee = (banshee_t *)svga->p; + int x; + int offset = 32; + uint32_t *p = &((uint32_t *)buffer32->line[svga->displine])[offset]; + uint32_t addr; + int drawn = 0; + + if (banshee->vidProcCfg & VIDPROCCFG_HALF_MODE) + addr = banshee->desktop_addr + ((banshee->desktop_y >> 1) & 31) * 128 + ((banshee->desktop_y >> 6) * banshee->desktop_stride_tiled); + else + addr = banshee->desktop_addr + (banshee->desktop_y & 31) * 128 + ((banshee->desktop_y >> 5) * banshee->desktop_stride_tiled); + + for (x = 0; x <= svga->hdisp; x += 64) + { + if (svga->hwcursor_on || svga->overlay_on) + svga->changedvram[addr >> 12] = 2; + if (svga->changedvram[addr >> 12] || svga->fullchange) + { + uint16_t *vram_p = (uint16_t *)&svga->vram[addr & svga->vram_display_mask]; + int xx; + + for (xx = 0; xx < 64; xx++) + *p++ = video_16to32[*vram_p++]; + + drawn = 1; + } + else + p += 64; + addr += 128*32; + } + + if (drawn) + { + if (svga->firstline_draw == 2000) + svga->firstline_draw = svga->displine; + svga->lastline_draw = svga->displine; + } + + banshee->desktop_y++; +} + +static void banshee_recalctimings(svga_t *svga) +{ + banshee_t *banshee = (banshee_t *)svga->p; + voodoo_t *voodoo = banshee->voodoo; + +/*7 R/W Horizontal Retrace End bit 5. - + 6 R/W Horizontal Retrace Start bit 8 0x4 + 5 R/W Horizontal Blank End bit 6. - + 4 R/W Horizontal Blank Start bit 8. 0x3 + 3 R/W Reserved. - + 2 R/W Horizontal Display Enable End bit 8. 0x1 + 1 R/W Reserved. - + 0 R/W Horizontal Total bit 8. 0x0*/ + if (svga->crtc[0x1a] & 0x01) svga->htotal += 0x100; + if (svga->crtc[0x1a] & 0x04) svga->hdisp += 0x100; +/*6 R/W Vertical Retrace Start bit 10 0x10 + 5 R/W Reserved. - + 4 R/W Vertical Blank Start bit 10. 0x15 + 3 R/W Reserved. - + 2 R/W Vertical Display Enable End bit 10 0x12 + 1 R/W Reserved. - + 0 R/W Vertical Total bit 10. 0x6*/ + if (svga->crtc[0x1b] & 0x01) svga->vtotal += 0x400; + if (svga->crtc[0x1b] & 0x04) svga->dispend += 0x400; + if (svga->crtc[0x1b] & 0x10) svga->vblankstart += 0x400; + if (svga->crtc[0x1b] & 0x40) svga->vsyncstart += 0x400; +// pclog("svga->hdisp=%i\n", svga->hdisp); + + if (banshee->vgaInit0 & VGAINIT0_EXTENDED_SHIFT_OUT) + { + switch (VIDPROCCFG_DESKTOP_PIX_FORMAT) + { + case PIX_FORMAT_8: + svga->render = svga_render_8bpp_highres; + svga->bpp = 8; + break; + case PIX_FORMAT_RGB565: + svga->render = (banshee->vidProcCfg & VIDPROCCFG_DESKTOP_TILE) ? banshee_render_16bpp_tiled : svga_render_16bpp_highres; + svga->bpp = 16; + break; + case PIX_FORMAT_RGB24: + svga->render = svga_render_24bpp_highres; + svga->bpp = 24; + break; + case PIX_FORMAT_RGB32: + svga->render = svga_render_32bpp_highres; + svga->bpp = 32; + break; + +#ifndef RELEASE_BUILD + default: + fatal("Unknown pixel format %08x\n", banshee->vgaInit0); +#endif + } + svga->rowcount = 0; + if (!(banshee->vidProcCfg & VIDPROCCFG_DESKTOP_TILE) && (banshee->vidProcCfg & VIDPROCCFG_HALF_MODE)) + svga->linedbl = 1; + else + svga->linedbl = 0; + if (banshee->vidProcCfg & VIDPROCCFG_DESKTOP_TILE) + svga->rowoffset = ((banshee->vidDesktopOverlayStride & 0x3fff) * 128) >> 3; + else + svga->rowoffset = (banshee->vidDesktopOverlayStride & 0x3fff) >> 3; + svga->ma_latch = banshee->vidDesktopStartAddr >> 2; + banshee->desktop_stride_tiled = (banshee->vidDesktopOverlayStride & 0x3fff) * 128 * 32; +// pclog("Extended shift out %i rowoffset=%i %02x\n", VIDPROCCFG_DESKTOP_PIX_FORMAT, svga->rowoffset, svga->crtc[1]); + + svga->char_width = 8; + svga->split = 99999; + + if (banshee->vidProcCfg & VIDPROCCFG_2X_MODE) + { + svga->hdisp *= 2; + svga->htotal *= 2; + } + + svga->overlay.ena = banshee->vidProcCfg & VIDPROCCFG_OVERLAY_ENABLE; + + svga->overlay.x = voodoo->overlay.start_x; + svga->overlay.y = voodoo->overlay.start_y; + svga->overlay.xsize = voodoo->overlay.size_x; + svga->overlay.ysize = voodoo->overlay.size_y; + svga->overlay.pitch = (banshee->vidDesktopOverlayStride & VID_STRIDE_OVERLAY_MASK) >> VID_STRIDE_OVERLAY_SHIFT; + if (banshee->vidProcCfg & VIDPROCCFG_OVERLAY_TILE) + svga->overlay.pitch *= 128*32; + if (svga->overlay.xsize <= 0 || svga->overlay.ysize <= 0) + svga->overlay.ena = 0; + if (svga->overlay.ena) + { +/* pclog("Overlay enabled : start=%i,%i end=%i,%i size=%i,%i pitch=%x\n", + voodoo->overlay.start_x, voodoo->overlay.start_y, + voodoo->overlay.end_x, voodoo->overlay.end_y, + voodoo->overlay.size_x, voodoo->overlay.size_y, + svga->overlay.pitch);*/ + if (!voodoo->overlay.start_x && !voodoo->overlay.start_y && + svga->hdisp == voodoo->overlay.size_x && svga->dispend == voodoo->overlay.size_y) + { + /*Overlay is full screen, so don't bother rendering the desktop + behind it*/ + svga->render = svga_render_null; + svga->bpp = 0; + } + } + + svga->video_res_override = 1; + svga->video_res_x = svga->hdisp; + svga->video_res_y = svga->dispend; + svga->video_bpp = svga->bpp; + } + else + { +// pclog("Normal shift out\n"); + svga->bpp = 8; + svga->video_res_override = 0; + } + + if (((svga->miscout >> 2) & 3) == 3) + { + int k = banshee->pllCtrl0 & 3; + int m = (banshee->pllCtrl0 >> 2) & 0x3f; + int n = (banshee->pllCtrl0 >> 8) & 0xff; + double freq = (((double)n + 2) / (((double)m + 2) * (double)(1 << k))) * 14318184.0; + + svga->clock = (cpuclock * (float)(1ull << 32)) / freq; +// svga->clock = cpuclock / freq; + +// pclog("svga->clock = %g %g m=%i k=%i n=%i\n", freq, freq / 1000000.0, m, k, n); + } +} + +static void banshee_ext_out(uint16_t addr, uint8_t val, void *p) +{ +// banshee_t *banshee = (banshee_t *)p; +// svga_t *svga = &banshee->svga; + +// pclog("banshee_ext_out: addr=%04x val=%02x\n", addr, val); + + switch (addr & 0xff) + { + case 0xb0: case 0xb1: case 0xb2: case 0xb3: + case 0xb4: case 0xb5: case 0xb6: case 0xb7: + case 0xb8: case 0xb9: case 0xba: case 0xbb: + case 0xbc: case 0xbd: case 0xbe: case 0xbf: + case 0xc0: case 0xc1: case 0xc2: case 0xc3: + case 0xc4: case 0xc5: case 0xc6: case 0xc7: + case 0xc8: case 0xc9: case 0xca: case 0xcb: + case 0xcc: case 0xcd: case 0xce: case 0xcf: + case 0xd0: case 0xd1: case 0xd2: case 0xd3: + case 0xd4: case 0xd5: case 0xd6: case 0xd7: + case 0xd8: case 0xd9: case 0xda: case 0xdb: + case 0xdc: case 0xdd: case 0xde: case 0xdf: + banshee_out((addr & 0xff)+0x300, val, p); + break; + + default: + pclog("bad banshee_ext_out: addr=%04x val=%02x\n", addr, val); + } +} +static void banshee_ext_outl(uint16_t addr, uint32_t val, void *p) +{ + banshee_t *banshee = (banshee_t *)p; + voodoo_t *voodoo = banshee->voodoo; + svga_t *svga = &banshee->svga; + +// pclog("banshee_ext_outl: addr=%04x val=%08x %04x(%08x):%08x\n", addr, val, CS,cs,cpu_state.pc); + + switch (addr & 0xff) + { + case Init_pciInit0: + banshee->pciInit0 = val; + voodoo->read_time = pci_nonburst_time + pci_burst_time * ((val & 0x100) ? 2 : 1); + voodoo->burst_time = pci_burst_time * ((val & 0x200) ? 1 : 0); + voodoo->write_time = pci_nonburst_time + voodoo->burst_time; + break; + + case Init_lfbMemoryConfig: + banshee->lfbMemoryConfig = val; +// pclog("lfbMemoryConfig=%08x\n", val); + voodoo->tile_base = (val & 0x1fff) << 12; + voodoo->tile_stride = 1024 << ((val >> 13) & 7); + voodoo->tile_stride_shift = 10 + ((val >> 13) & 7); + voodoo->tile_x = ((val >> 16) & 0x7f) * 128; + voodoo->tile_x_real = ((val >> 16) & 0x7f) * 128*32; + break; + + case Init_miscInit0: + banshee->miscInit0 = val; + break; + case Init_miscInit1: + banshee->miscInit1 = val; + break; + case Init_dramInit0: + banshee->dramInit0 = val; + break; + case Init_dramInit1: + banshee->dramInit1 = val; + break; + case Init_agpInit0: + banshee->agpInit0 = val; + break; + + case Init_2dCommand: + banshee->command_2d = val; + break; + case Init_2dSrcBaseAddr: + banshee->srcBaseAddr_2d = val; + break; + case Init_vgaInit0: + banshee->vgaInit0 = val; + break; + case Init_vgaInit1: + banshee->vgaInit1 = val; + svga->write_bank = (val & 0x3ff) << 15; + svga->read_bank = ((val >> 10) & 0x3ff) << 15; + break; + + case PLL_pllCtrl0: + banshee->pllCtrl0 = val; + break; + case PLL_pllCtrl1: + banshee->pllCtrl1 = val; + break; + case PLL_pllCtrl2: + banshee->pllCtrl2 = val; + break; + + case DAC_dacMode: + banshee->dacMode = val; + break; + case DAC_dacAddr: + banshee->dacAddr = val & 0x1ff; + break; + case DAC_dacData: + svga->pallook[banshee->dacAddr] = val & 0xffffff; + svga->fullchange = changeframecount; + break; + + case Video_vidProcCfg: + banshee->vidProcCfg = val; +// pclog("vidProcCfg=%08x\n", val); + banshee->overlay_pix_fmt = (val & VIDPROCCFG_OVERLAY_PIX_FORMAT_MASK) >> VIDPROCCFG_OVERLAY_PIX_FORMAT_SHIFT; + svga->hwcursor.ena = val & VIDPROCCFG_HWCURSOR_ENA; + svga->fullchange = changeframecount; + svga_recalctimings(svga); + break; + + case Video_maxRgbDelta: + banshee->voodoo->scrfilterThreshold = val; + if (val > 0x00) + banshee->voodoo->scrfilterEnabled = 1; + else + banshee->voodoo->scrfilterEnabled = 0; + voodoo_threshold_check(banshee->voodoo); + pclog("Banshee Filter: %06x\n", val); + + break; + + case Video_hwCurPatAddr: + banshee->hwCurPatAddr = val; + svga->hwcursor.addr = (val & 0xfffff0) + (svga->hwcursor.yoff * 16); + break; + case Video_hwCurLoc: + banshee->hwCurLoc = val; + svga->hwcursor.x = (val & 0x7ff) - 32; + svga->hwcursor.y = ((val >> 16) & 0x7ff) - 64; + if (svga->hwcursor.y < 0) + { + svga->hwcursor.yoff = -svga->hwcursor.y; + svga->hwcursor.y = 0; + } + else + svga->hwcursor.yoff = 0; + svga->hwcursor.addr = (banshee->hwCurPatAddr & 0xfffff0) + (svga->hwcursor.yoff * 16); + svga->hwcursor.xsize = 64; + svga->hwcursor.ysize = 64; +// pclog("hwCurLoc %08x %i\n", val, svga->hwcursor.y); + break; + case Video_hwCurC0: + banshee->hwCurC0 = val; + break; + case Video_hwCurC1: + banshee->hwCurC1 = val; + break; + + case Video_vidSerialParallelPort: + banshee->vidSerialParallelPort = val; +// pclog("vidSerialParallelPort: write %08x %08x %04x(%08x):%08x\n", val, val & (VIDSERIAL_DDC_DCK_W | VIDSERIAL_DDC_DDA_W), CS,cs,cpu_state.pc); + ddc_i2c_change((val & VIDSERIAL_DDC_DCK_W) ? 1 : 0, (val & VIDSERIAL_DDC_DDA_W) ? 1 : 0); + break; + + case Video_vidScreenSize: + banshee->vidScreenSize = val; + voodoo->h_disp = (val & 0xfff) + 1; + voodoo->v_disp = (val >> 12) & 0xfff; + break; + case Video_vidOverlayStartCoords: + voodoo->overlay.vidOverlayStartCoords = val; + voodoo->overlay.start_x = val & OVERLAY_START_X_MASK; + voodoo->overlay.start_y = (val & OVERLAY_START_Y_MASK) >> OVERLAY_START_Y_SHIFT; + voodoo->overlay.size_x = voodoo->overlay.end_x - voodoo->overlay.start_x; + voodoo->overlay.size_y = voodoo->overlay.end_y - voodoo->overlay.start_y; + svga_recalctimings(svga); + break; + case Video_vidOverlayEndScreenCoords: + voodoo->overlay.vidOverlayEndScreenCoords = val; + voodoo->overlay.end_x = val & OVERLAY_END_X_MASK; + voodoo->overlay.end_y = (val & OVERLAY_END_Y_MASK) >> OVERLAY_END_Y_SHIFT; + voodoo->overlay.size_x = (voodoo->overlay.end_x - voodoo->overlay.start_x) + 1; + voodoo->overlay.size_y = (voodoo->overlay.end_y - voodoo->overlay.start_y) + 1; + svga_recalctimings(svga); + break; + case Video_vidOverlayDudx: + voodoo->overlay.vidOverlayDudx = val & VID_DUDX_MASK; +// pclog("vidOverlayDudx=%08x\n", val); + break; + case Video_vidOverlayDudxOffsetSrcWidth: + voodoo->overlay.vidOverlayDudxOffsetSrcWidth = val; + voodoo->overlay.overlay_bytes = (val & OVERLAY_SRC_WIDTH_MASK) >> OVERLAY_SRC_WIDTH_SHIFT; +// pclog("vidOverlayDudxOffsetSrcWidth=%08x\n", val); + break; + case Video_vidOverlayDvdy: + voodoo->overlay.vidOverlayDvdy = val & VID_DVDY_MASK; +// pclog("vidOverlayDvdy=%08x\n", val); + break; + case Video_vidOverlayDvdyOffset: + voodoo->overlay.vidOverlayDvdyOffset = val; + break; + + + case Video_vidDesktopStartAddr: + banshee->vidDesktopStartAddr = val & 0xffffff; +// pclog("vidDesktopStartAddr=%08x\n", val); + svga->fullchange = changeframecount; + svga_recalctimings(svga); + break; + case Video_vidDesktopOverlayStride: + banshee->vidDesktopOverlayStride = val; +// pclog("vidDesktopOverlayStride=%08x\n", val); + svga->fullchange = changeframecount; + svga_recalctimings(svga); + break; +// default: +// fatal("bad banshee_ext_outl: addr=%04x val=%08x\n", addr, val); + } +} + +static uint8_t banshee_ext_in(uint16_t addr, void *p) +{ + banshee_t *banshee = (banshee_t *)p; +// svga_t *svga = &banshee->svga; + uint8_t ret = 0xff; + + switch (addr & 0xff) + { + case Init_status: case Init_status+1: case Init_status+2: case Init_status+3: + ret = (banshee_status(banshee) >> ((addr & 3) * 8)) & 0xff; +// pclog("Read status reg! %04x(%08x):%08x\n", CS, cs, cpu_state.pc); + break; + + case 0xb0: case 0xb1: case 0xb2: case 0xb3: + case 0xb4: case 0xb5: case 0xb6: case 0xb7: + case 0xb8: case 0xb9: case 0xba: case 0xbb: + case 0xbc: case 0xbd: case 0xbe: case 0xbf: + case 0xc0: case 0xc1: case 0xc2: case 0xc3: + case 0xc4: case 0xc5: case 0xc6: case 0xc7: + case 0xc8: case 0xc9: case 0xca: case 0xcb: + case 0xcc: case 0xcd: case 0xce: case 0xcf: + case 0xd0: case 0xd1: case 0xd2: case 0xd3: + case 0xd4: case 0xd5: case 0xd6: case 0xd7: + case 0xd8: case 0xd9: case 0xda: case 0xdb: + case 0xdc: case 0xdd: case 0xde: case 0xdf: + ret = banshee_in((addr & 0xff)+0x300, p); + break; + + default: + pclog("bad banshee_ext_in: addr=%04x\n", addr); + break; + } + +// pclog("banshee_ext_in: addr=%04x val=%02x\n", addr, ret); + + return ret; +} + +static uint32_t banshee_status(banshee_t *banshee) +{ + voodoo_t *voodoo = banshee->voodoo; + svga_t *svga = &banshee->svga; + int fifo_entries = FIFO_ENTRIES; + int fifo_size = 0xffff - fifo_entries; + int swap_count = voodoo->swap_count; + int written = voodoo->cmd_written + voodoo->cmd_written_fifo; + int busy = (written - voodoo->cmd_read) || (voodoo->cmdfifo_depth_rd != voodoo->cmdfifo_depth_wr) || + voodoo->render_voodoo_busy[0] || voodoo->render_voodoo_busy[1] || + voodoo->render_voodoo_busy[2] || voodoo->render_voodoo_busy[3] || + voodoo->voodoo_busy; + uint32_t ret; + + ret = 0; + if (fifo_size < 0x20) + ret |= fifo_size; + else + ret |= 0x1f; + if (fifo_size) + ret |= 0x20; + if (swap_count < 7) + ret |= (swap_count << 28); + else + ret |= (7 << 28); + if (!(svga->cgastat & 8)) + ret |= 0x40; + + if (busy) + ret |= 0x780; /*Busy*/ + + if (voodoo->cmdfifo_depth_rd != voodoo->cmdfifo_depth_wr) + ret |= (1 << 11); + + if (!voodoo->voodoo_busy) + voodoo_wake_fifo_thread(voodoo); + +// pclog("banshee_status: busy %i %i (%i %i) %i %i %i %04x(%08x):%08x %08x\n", busy, written, voodoo->cmd_written, voodoo->cmd_written_fifo, voodoo->cmd_read, voodoo->cmdfifo_depth_rd, voodoo->cmdfifo_depth_wr, CS,cs,cpu_state.pc, ret); + + return ret; +} + +static uint32_t banshee_ext_inl(uint16_t addr, void *p) +{ + banshee_t *banshee = (banshee_t *)p; + voodoo_t *voodoo = banshee->voodoo; + svga_t *svga = &banshee->svga; + uint32_t ret = 0xffffffff; + + cycles -= voodoo->read_time; + + switch (addr & 0xff) + { + case Init_status: + ret = banshee_status(banshee); +// pclog("Read status reg! %04x(%08x):%08x\n", CS, cs, cpu_state.pc); + break; + case Init_pciInit0: + ret = banshee->pciInit0; + break; + case Init_lfbMemoryConfig: + ret = banshee->lfbMemoryConfig; + break; + + case Init_miscInit0: + ret = banshee->miscInit0; + break; + case Init_miscInit1: + ret = banshee->miscInit1; + break; + case Init_dramInit0: + ret = banshee->dramInit0; + break; + case Init_dramInit1: + ret = banshee->dramInit1; + break; + case Init_agpInit0: + ret = banshee->agpInit0; + break; + + case Init_vgaInit0: + ret = banshee->vgaInit0; + break; + case Init_vgaInit1: + ret = banshee->vgaInit1; + break; + + case Init_2dCommand: + ret = banshee->command_2d; + break; + case Init_2dSrcBaseAddr: + ret = banshee->srcBaseAddr_2d; + break; + case Init_strapInfo: + ret = 0x00000040; /*8 MB SGRAM, PCI, IRQ enabled, 32kB BIOS*/ + break; + + case PLL_pllCtrl0: + ret = banshee->pllCtrl0; + break; + case PLL_pllCtrl1: + ret = banshee->pllCtrl1; + break; + case PLL_pllCtrl2: + ret = banshee->pllCtrl2; + break; + + case DAC_dacMode: + ret = banshee->dacMode; + break; + case DAC_dacAddr: + ret = banshee->dacAddr; + break; + case DAC_dacData: + ret = svga->pallook[banshee->dacAddr]; + break; + + case Video_vidProcCfg: + ret = banshee->vidProcCfg; + break; + + case Video_hwCurPatAddr: + ret = banshee->hwCurPatAddr; + break; + case Video_hwCurLoc: + ret = banshee->hwCurLoc; + break; + case Video_hwCurC0: + ret = banshee->hwCurC0; + break; + case Video_hwCurC1: + ret = banshee->hwCurC1; + break; + + case Video_vidSerialParallelPort: + ret = banshee->vidSerialParallelPort & ~(VIDSERIAL_DDC_DCK_R | VIDSERIAL_DDC_DDA_R); + if ((banshee->vidSerialParallelPort & VIDSERIAL_DDC_DCK_W) && ddc_read_clock()) + ret |= VIDSERIAL_DDC_DCK_R; + if ((banshee->vidSerialParallelPort & VIDSERIAL_DDC_DDA_W) && ddc_read_data()) + ret |= VIDSERIAL_DDC_DDA_R; + ret = ret & ~(VIDSERIAL_I2C_SCK_R | VIDSERIAL_I2C_SDA_R); + if (banshee->vidSerialParallelPort & VIDSERIAL_I2C_SCK_W) + ret |= VIDSERIAL_I2C_SCK_R; + if (banshee->vidSerialParallelPort & VIDSERIAL_I2C_SDA_W) + ret |= VIDSERIAL_I2C_SDA_R; +// pclog("vidSerialParallelPort: read %08x %08x %04x(%08x):%08x\n", ret, ret & (VIDSERIAL_DDC_DCK_R | VIDSERIAL_DDC_DDA_R), CS,cs,cpu_state.pc); + break; + + case Video_vidScreenSize: + ret = banshee->vidScreenSize; + break; + case Video_vidOverlayStartCoords: + ret = voodoo->overlay.vidOverlayStartCoords; + break; + case Video_vidOverlayEndScreenCoords: + ret = voodoo->overlay.vidOverlayEndScreenCoords; + break; + case Video_vidOverlayDudx: + ret = voodoo->overlay.vidOverlayDudx; + break; + case Video_vidOverlayDudxOffsetSrcWidth: + ret = voodoo->overlay.vidOverlayDudxOffsetSrcWidth; + break; + case Video_vidOverlayDvdy: + ret = voodoo->overlay.vidOverlayDvdy; + break; + case Video_vidOverlayDvdyOffset: + ret = voodoo->overlay.vidOverlayDvdyOffset; + break; + + case Video_vidDesktopStartAddr: + ret = banshee->vidDesktopStartAddr; + break; + case Video_vidDesktopOverlayStride: + ret = banshee->vidDesktopOverlayStride; + break; + + default: +// fatal("bad banshee_ext_inl: addr=%04x\n", addr); + break; + } + +// /*if (addr) */pclog("banshee_ext_inl: addr=%04x val=%08x\n", addr, ret); + + return ret; +} + + +static uint32_t banshee_reg_readl(uint32_t addr, void *p); + +static uint8_t banshee_reg_read(uint32_t addr, void *p) +{ +// pclog("banshee_reg_read: addr=%08x\n", addr); + return banshee_reg_readl(addr & ~3, p) >> (8*(addr & 3)); +} + +static uint16_t banshee_reg_readw(uint32_t addr, void *p) +{ +// pclog("banshee_reg_readw: addr=%08x\n", addr); + return banshee_reg_readl(addr & ~3, p) >> (8*(addr & 2)); +} + +static uint32_t banshee_cmd_read(banshee_t *banshee, uint32_t addr) +{ + voodoo_t *voodoo = banshee->voodoo; + uint32_t ret = 0xffffffff; + + switch (addr & 0x1fc) + { + case cmdBaseAddr0: + ret = voodoo->cmdfifo_base >> 12; +// pclog("Read cmdfifo_base %08x\n", ret); + break; + + case cmdRdPtrL0: + ret = voodoo->cmdfifo_rp; +// pclog("Read cmdfifo_rp %08x\n", ret); + break; + + case cmdFifoDepth0: + ret = voodoo->cmdfifo_depth_wr - voodoo->cmdfifo_depth_rd; +// pclog("Read cmdfifo_depth %08x\n", ret); + break; + + case 0x108: + break; + +#ifndef RELEASE_BUILD + default: + fatal("Unknown banshee_cmd_read %08x\n", addr); +#endif + } + + return ret; +} + +static uint32_t banshee_reg_readl(uint32_t addr, void *p) +{ + banshee_t *banshee = (banshee_t *)p; + voodoo_t *voodoo = banshee->voodoo; + uint32_t ret = 0xffffffff; + + cycles -= voodoo->read_time; + + switch (addr & 0x1f00000) + { + case 0x0000000: /*IO remap*/ + if (!(addr & 0x80000)) + ret = banshee_ext_inl(addr & 0xff, banshee); + else + ret = banshee_cmd_read(banshee, addr); + break; + + case 0x0100000: /*2D registers*/ + voodoo_flush(voodoo); + switch (addr & 0x1fc) + { + case 0x08: + ret = voodoo->banshee_blt.clip0Min; + break; + case 0x0c: + ret = voodoo->banshee_blt.clip0Max; + break; + case 0x10: + ret = voodoo->banshee_blt.dstBaseAddr; + break; + case 0x14: + ret = voodoo->banshee_blt.dstFormat; + break; + case 0x34: + ret = voodoo->banshee_blt.srcBaseAddr; + break; + case 0x38: + ret = voodoo->banshee_blt.commandExtra; + break; + case 0x5c: + ret = voodoo->banshee_blt.srcXY; + break; + case 0x60: + ret = voodoo->banshee_blt.colorBack; + break; + case 0x64: + ret = voodoo->banshee_blt.colorFore; + break; + case 0x68: + ret = voodoo->banshee_blt.dstSize; + break; + case 0x6c: + ret = voodoo->banshee_blt.dstXY; + break; + case 0x70: + ret = voodoo->banshee_blt.command; + break; + default: + pclog("banshee_reg_readl: addr=%08x\n", addr); + } + break; + + case 0x0200000: case 0x0300000: case 0x0400000: case 0x0500000: /*3D registers*/ + switch (addr & 0x3fc) + { + case SST_status: + ret = banshee_status(banshee); + break; + + case SST_intrCtrl: + ret = banshee->intrCtrl & 0x0030003f; + break; + + case SST_fbzColorPath: + voodoo_flush(voodoo); + ret = voodoo->params.fbzColorPath; + break; + case SST_fogMode: + voodoo_flush(voodoo); + ret = voodoo->params.fogMode; + break; + case SST_alphaMode: + voodoo_flush(voodoo); + ret = voodoo->params.alphaMode; + break; + case SST_fbzMode: + voodoo_flush(voodoo); + ret = voodoo->params.fbzMode; + break; + case SST_lfbMode: + voodoo_flush(voodoo); + ret = voodoo->lfbMode; + break; + case SST_clipLeftRight: + ret = voodoo->params.clipRight | (voodoo->params.clipLeft << 16); + break; + case SST_clipLowYHighY: + ret = voodoo->params.clipHighY | (voodoo->params.clipLowY << 16); + break; + + case SST_clipLeftRight1: + ret = voodoo->params.clipRight1 | (voodoo->params.clipLeft1 << 16); + break; + case SST_clipTopBottom1: + ret = voodoo->params.clipHighY1 | (voodoo->params.clipLowY1 << 16); + break; + + case SST_stipple: + voodoo_flush(voodoo); + ret = voodoo->params.stipple; + break; + case SST_color0: + voodoo_flush(voodoo); + ret = voodoo->params.color0; + break; + case SST_color1: + voodoo_flush(voodoo); + ret = voodoo->params.color1; + break; + + case SST_fbiPixelsIn: + ret = voodoo->fbiPixelsIn & 0xffffff; + break; + case SST_fbiChromaFail: + ret = voodoo->fbiChromaFail & 0xffffff; + break; + case SST_fbiZFuncFail: + ret = voodoo->fbiZFuncFail & 0xffffff; + break; + case SST_fbiAFuncFail: + ret = voodoo->fbiAFuncFail & 0xffffff; + break; + case SST_fbiPixelsOut: + ret = voodoo->fbiPixelsOut & 0xffffff; + break; + + default: + pclog("banshee_reg_readl: 3D addr=%08x\n", addr); + break; + } + break; + } + +// /*if (addr != 0xe0000000) */pclog("banshee_reg_readl: addr=%08x ret=%08x %04x(%08x):%08x\n", addr, ret, CS,cs,cpu_state.pc); +// if (cpu_state.pc == 0x1000e437) +// output = 3; + return ret; +} + +static void banshee_reg_write(uint32_t addr, uint8_t val, void *p) +{ +// pclog("banshee_reg_writeb: addr=%08x val=%02x\n", addr, val); +} + +static void banshee_reg_writew(uint32_t addr, uint16_t val, void *p) +{ + banshee_t *banshee = (banshee_t *)p; + voodoo_t *voodoo = banshee->voodoo; + + cycles -= voodoo->write_time; + +// pclog("banshee_reg_writew: addr=%08x val=%04x\n", addr, val); + switch (addr & 0x1f00000) + { + case 0x1000000: case 0x1100000: case 0x1200000: case 0x1300000: /*3D LFB*/ + case 0x1400000: case 0x1500000: case 0x1600000: case 0x1700000: + case 0x1800000: case 0x1900000: case 0x1a00000: case 0x1b00000: + case 0x1c00000: case 0x1d00000: case 0x1e00000: case 0x1f00000: + voodoo_queue_command(voodoo, (addr & 0xffffff) | FIFO_WRITEW_FB, val); + break; + } +} + +static void banshee_cmd_write(banshee_t *banshee, uint32_t addr, uint32_t val) +{ + voodoo_t *voodoo = banshee->voodoo; +// pclog("banshee_cmd_write: addr=%03x val=%08x\n", addr & 0x1fc, val); + switch (addr & 0x1fc) + { + case cmdBaseAddr0: + voodoo->cmdfifo_base = (val & 0xfff) << 12; + voodoo->cmdfifo_end = voodoo->cmdfifo_base + (((voodoo->cmdfifo_size & 0xff) + 1) << 12); +// pclog("cmdfifo_base=%08x cmdfifo_end=%08x %08x\n", voodoo->cmdfifo_base, voodoo->cmdfifo_end, val); + break; + + case cmdBaseSize0: + voodoo->cmdfifo_size = val; + voodoo->cmdfifo_end = voodoo->cmdfifo_base + (((voodoo->cmdfifo_size & 0xff) + 1) << 12); + voodoo->cmdfifo_enabled = val & 0x100; + if (!voodoo->cmdfifo_enabled) + voodoo->cmdfifo_in_sub = 0; /*Not sure exactly when this should be reset*/ +// pclog("cmdfifo_base=%08x cmdfifo_end=%08x\n", voodoo->cmdfifo_base, voodoo->cmdfifo_end); + break; + +// voodoo->cmdfifo_end = ((val >> 16) & 0x3ff) << 12; +// pclog("CMDFIFO base=%08x end=%08x\n", voodoo->cmdfifo_base, voodoo->cmdfifo_end); +// break; + + case cmdRdPtrL0: + voodoo->cmdfifo_rp = val; + break; + case cmdAMin0: + voodoo->cmdfifo_amin = val; + break; + case cmdAMax0: + voodoo->cmdfifo_amax = val; + break; + case cmdFifoDepth0: + voodoo->cmdfifo_depth_rd = 0; + voodoo->cmdfifo_depth_wr = val & 0xffff; + break; + + default: + pclog("Unknown banshee_cmd_write: addr=%08x val=%08x\n", addr, val); + break; + } + +/* cmdBaseSize0 = 0x24, + cmdBump0 = 0x28, + cmdRdPtrL0 = 0x2c, + cmdRdPtrH0 = 0x30, + cmdAMin0 = 0x34, + cmdAMax0 = 0x3c, + cmdFifoDepth0 = 0x44, + cmdHoleCnt0 = 0x48 + }*/ +} + +static void banshee_reg_writel(uint32_t addr, uint32_t val, void *p) +{ + banshee_t *banshee = (banshee_t *)p; + voodoo_t *voodoo = banshee->voodoo; + + if (addr == voodoo->last_write_addr+4) + cycles -= voodoo->burst_time; + else + cycles -= voodoo->write_time; + voodoo->last_write_addr = addr; + +// pclog("banshee_reg_writel: addr=%08x val=%08x\n", addr, val); + + switch (addr & 0x1f00000) + { + case 0x0000000: /*IO remap*/ + if (!(addr & 0x80000)) + banshee_ext_outl(addr & 0xff, val, banshee); + else + banshee_cmd_write(banshee, addr, val); +// pclog("CMD!!! write %08x %08x\n", addr, val); + break; + + case 0x0100000: /*2D registers*/ + voodoo_queue_command(voodoo, (addr & 0x1fc) | FIFO_WRITEL_2DREG, val); + break; + + case 0x0200000: case 0x0300000: case 0x0400000: case 0x0500000: /*3D registers*/ + switch (addr & 0x3fc) + { + case SST_intrCtrl: + banshee->intrCtrl = val & 0x0030003f; +// pclog("intrCtrl=%08x\n", val); + break; + + case SST_userIntrCMD: +#ifndef RELEASE_BUILD + fatal("userIntrCMD write %08x\n", val); +#endif + break; + + case SST_swapbufferCMD: + voodoo->cmd_written++; + voodoo_queue_command(voodoo, (addr & 0x3fc) | FIFO_WRITEL_REG, val); + if (!voodoo->voodoo_busy) + voodoo_wake_fifo_threads(voodoo->set, voodoo); +// pclog("SST_swapbufferCMD write: %i %i\n", voodoo->cmd_written, voodoo->cmd_written_fifo); + break; + case SST_triangleCMD: + voodoo->cmd_written++; + voodoo_queue_command(voodoo, (addr & 0x3fc) | FIFO_WRITEL_REG, val); + if (!voodoo->voodoo_busy) + voodoo_wake_fifo_threads(voodoo->set, voodoo); + break; + case SST_ftriangleCMD: + voodoo->cmd_written++; + voodoo_queue_command(voodoo, (addr & 0x3fc) | FIFO_WRITEL_REG, val); + if (!voodoo->voodoo_busy) + voodoo_wake_fifo_threads(voodoo->set, voodoo); + break; + case SST_fastfillCMD: + voodoo->cmd_written++; + voodoo_queue_command(voodoo, (addr & 0x3fc) | FIFO_WRITEL_REG, val); + if (!voodoo->voodoo_busy) + voodoo_wake_fifo_threads(voodoo->set, voodoo); + break; + case SST_nopCMD: + voodoo->cmd_written++; + voodoo_queue_command(voodoo, (addr & 0x3fc) | FIFO_WRITEL_REG, val); + if (!voodoo->voodoo_busy) + voodoo_wake_fifo_threads(voodoo->set, voodoo); + break; + + case SST_swapPending: + thread_lock_mutex(voodoo->swap_mutex); + voodoo->swap_count++; + thread_unlock_mutex(voodoo->swap_mutex); +// voodoo->cmd_written++; + break; + + default: + voodoo_queue_command(voodoo, (addr & 0x3ffffc) | FIFO_WRITEL_REG, val); + break; + } + break; + + case 0x0600000: case 0x0700000: /*Texture download*/ + voodoo->tex_count++; + voodoo_queue_command(voodoo, (addr & 0x1ffffc) | FIFO_WRITEL_TEX, val); + break; + + case 0x1000000: case 0x1100000: case 0x1200000: case 0x1300000: /*3D LFB*/ + case 0x1400000: case 0x1500000: case 0x1600000: case 0x1700000: + case 0x1800000: case 0x1900000: case 0x1a00000: case 0x1b00000: + case 0x1c00000: case 0x1d00000: case 0x1e00000: case 0x1f00000: + voodoo_queue_command(voodoo, (addr & 0xfffffc) | FIFO_WRITEL_FB, val); + break; + } +} + +static uint8_t banshee_read_linear(uint32_t addr, void *p) +{ + banshee_t *banshee = (banshee_t *)p; + voodoo_t *voodoo = banshee->voodoo; + svga_t *svga = &banshee->svga; + + cycles -= voodoo->read_time; + cycles_lost += voodoo->read_time; + + addr &= svga->decode_mask; + if (addr >= voodoo->tile_base) + { + int x, y; + + addr -= voodoo->tile_base; + x = addr & (voodoo->tile_stride-1); + y = addr >> voodoo->tile_stride_shift; + + addr = voodoo->tile_base + (x & 127) + ((x >> 7) * 128*32) + ((y & 31) * 128) + (y >> 5)*voodoo->tile_x_real; +// pclog(" Tile rb %08x->%08x %i %i\n", old_addr, addr, x, y); + } + if (addr >= svga->vram_max) + return 0xff; + + egareads++; + cycles -= video_timing_read_b; + cycles_lost += video_timing_read_b; + +// pclog("read_linear: addr=%08x val=%02x\n", addr, svga->vram[addr & svga->vram_mask]); + + return svga->vram[addr & svga->vram_mask]; +} + +static uint16_t banshee_read_linear_w(uint32_t addr, void *p) +{ + banshee_t *banshee = (banshee_t *)p; + voodoo_t *voodoo = banshee->voodoo; + svga_t *svga = &banshee->svga; + + if (addr & 1) + return banshee_read_linear(addr, p) | (banshee_read_linear(addr+1, p) << 8); + + cycles -= voodoo->read_time; + cycles_lost += voodoo->read_time; + + addr &= svga->decode_mask; + if (addr >= voodoo->tile_base) + { + int x, y; + + addr -= voodoo->tile_base; + x = addr & (voodoo->tile_stride-1); + y = addr >> voodoo->tile_stride_shift; + + addr = voodoo->tile_base + (x & 127) + ((x >> 7) * 128*32) + ((y & 31) * 128) + (y >> 5)*voodoo->tile_x_real; +// pclog(" Tile rb %08x->%08x %i %i\n", old_addr, addr, x, y); + } + if (addr >= svga->vram_max) + return 0xff; + + egareads++; + cycles -= video_timing_read_w; + cycles_lost += video_timing_read_w; + +// pclog("read_linear: addr=%08x val=%02x\n", addr, svga->vram[addr & svga->vram_mask]); + + return *(uint16_t *)&svga->vram[addr & svga->vram_mask]; +} + +static uint32_t banshee_read_linear_l(uint32_t addr, void *p) +{ + banshee_t *banshee = (banshee_t *)p; + voodoo_t *voodoo = banshee->voodoo; + svga_t *svga = &banshee->svga; + + if (addr & 3) + return banshee_read_linear_w(addr, p) | (banshee_read_linear_w(addr+2, p) << 16); + + cycles -= voodoo->read_time; + cycles_lost += voodoo->read_time; + + addr &= svga->decode_mask; + if (addr >= voodoo->tile_base) + { + int x, y; + + addr -= voodoo->tile_base; + x = addr & (voodoo->tile_stride-1); + y = addr >> voodoo->tile_stride_shift; + + addr = voodoo->tile_base + (x & 127) + ((x >> 7) * 128*32) + ((y & 31) * 128) + (y >> 5)*voodoo->tile_x_real; +// pclog(" Tile rb %08x->%08x %i %i\n", old_addr, addr, x, y); + } + if (addr >= svga->vram_max) + return 0xff; + + egareads++; + cycles -= video_timing_read_l; + cycles_lost += video_timing_read_l; + +// pclog("read_linear: addr=%08x val=%02x\n", addr, svga->vram[addr & svga->vram_mask]); + + return *(uint32_t *)&svga->vram[addr & svga->vram_mask]; +} + +static void banshee_write_linear(uint32_t addr, uint8_t val, void *p) +{ + banshee_t *banshee = (banshee_t *)p; + voodoo_t *voodoo = banshee->voodoo; + svga_t *svga = &banshee->svga; + + cycles -= voodoo->write_time; + cycles_lost += voodoo->write_time; + +// pclog("write_linear: addr=%08x val=%02x\n", addr, val); + addr &= svga->decode_mask; + if (addr >= voodoo->tile_base) + { + int x, y; + + addr -= voodoo->tile_base; + x = addr & (voodoo->tile_stride-1); + y = addr >> voodoo->tile_stride_shift; + + addr = voodoo->tile_base + (x & 127) + ((x >> 7) * 128*32) + ((y & 31) * 128) + (y >> 5)*voodoo->tile_x_real; +// pclog(" Tile b %08x->%08x %i %i\n", old_addr, addr, x, y); + } + if (addr >= svga->vram_max) + return; + + egawrites++; + + cycles -= video_timing_write_b; + cycles_lost += video_timing_write_b; + + svga->changedvram[addr >> 12] = changeframecount; + svga->vram[addr & svga->vram_mask] = val; +} + +static void banshee_write_linear_w(uint32_t addr, uint16_t val, void *p) +{ + banshee_t *banshee = (banshee_t *)p; + voodoo_t *voodoo = banshee->voodoo; + svga_t *svga = &banshee->svga; + + if (addr & 1) + { + banshee_write_linear(addr, val, p); + banshee_write_linear(addr + 1, val >> 8, p); + return; + } + + cycles -= voodoo->write_time; + cycles_lost += voodoo->write_time; + +// pclog("write_linear: addr=%08x val=%02x\n", addr, val); + addr &= svga->decode_mask; + if (addr >= voodoo->tile_base) + { + int x, y; + + addr -= voodoo->tile_base; + x = addr & (voodoo->tile_stride-1); + y = addr >> voodoo->tile_stride_shift; + + addr = voodoo->tile_base + (x & 127) + ((x >> 7) * 128*32) + ((y & 31) * 128) + (y >> 5)*voodoo->tile_x_real; +// pclog(" Tile b %08x->%08x %i %i\n", old_addr, addr, x, y); + } + if (addr >= svga->vram_max) + return; + + egawrites++; + + cycles -= video_timing_write_w; + cycles_lost += video_timing_write_w; + + svga->changedvram[addr >> 12] = changeframecount; + *(uint16_t *)&svga->vram[addr & svga->vram_mask] = val; +} + +static void banshee_write_linear_l(uint32_t addr, uint32_t val, void *p) +{ + banshee_t *banshee = (banshee_t *)p; + voodoo_t *voodoo = banshee->voodoo; + svga_t *svga = &banshee->svga; + int timing; + + if (addr & 3) + { + banshee_write_linear_w(addr, val, p); + banshee_write_linear_w(addr + 2, val >> 16, p); + return; + } + + if (addr == voodoo->last_write_addr+4) + timing = voodoo->burst_time; + else + timing = voodoo->write_time; + cycles -= timing; + cycles_lost += timing; + voodoo->last_write_addr = addr; + +// /*if (val) */pclog("write_linear_l: addr=%08x val=%08x %08x\n", addr, val, voodoo->tile_base); + addr &= svga->decode_mask; + if (addr >= voodoo->tile_base) + { + int x, y; + + addr -= voodoo->tile_base; + x = addr & (voodoo->tile_stride-1); + y = addr >> voodoo->tile_stride_shift; + + addr = voodoo->tile_base + (x & 127) + ((x >> 7) * 128*32) + ((y & 31) * 128) + (y >> 5)*voodoo->tile_x_real; +// pclog(" Tile %08x->%08x->%08x->%08x %i %i tile_x=%i\n", old_addr, addr_off, addr2, addr, x, y, voodoo->tile_x_real); + } + + if (addr >= svga->vram_max) + return; + + egawrites += 4; + + cycles -= video_timing_write_l; + cycles_lost += video_timing_write_l; + + svga->changedvram[addr >> 12] = changeframecount; + *(uint32_t *)&svga->vram[addr & svga->vram_mask] = val; + if (voodoo->cmdfifo_enabled && addr >= voodoo->cmdfifo_base && addr < voodoo->cmdfifo_end) + { +// pclog("CMDFIFO write %08x %08x old amin=%08x amax=%08x hlcnt=%i depth_wr=%i rp=%08x\n", addr, val, voodoo->cmdfifo_amin, voodoo->cmdfifo_amax, voodoo->cmdfifo_holecount, voodoo->cmdfifo_depth_wr, voodoo->cmdfifo_rp); + if (addr == voodoo->cmdfifo_base && !voodoo->cmdfifo_holecount) + { +// if (voodoo->cmdfifo_holecount) +// fatal("CMDFIFO reset pointers while outstanding holes\n"); + /*Reset pointers*/ + voodoo->cmdfifo_amin = voodoo->cmdfifo_base; + voodoo->cmdfifo_amax = voodoo->cmdfifo_base; + voodoo->cmdfifo_depth_wr++; + voodoo_wake_fifo_thread(voodoo); + } + else if (voodoo->cmdfifo_holecount) + { +// if ((addr <= voodoo->cmdfifo_amin && voodoo->cmdfifo_amin != -4) || addr >= voodoo->cmdfifo_amax) +// fatal("CMDFIFO holecount write outside of amin/amax - amin=%08x amax=%08x holecount=%i\n", voodoo->cmdfifo_amin, voodoo->cmdfifo_amax, voodoo->cmdfifo_holecount); +// pclog("holecount %i\n", voodoo->cmdfifo_holecount); + voodoo->cmdfifo_holecount--; + if (!voodoo->cmdfifo_holecount) + { + /*Filled in holes, resume normal operation*/ + voodoo->cmdfifo_depth_wr += ((voodoo->cmdfifo_amax - voodoo->cmdfifo_amin) >> 2); + voodoo->cmdfifo_amin = voodoo->cmdfifo_amax; + voodoo_wake_fifo_thread(voodoo); +// pclog("hole filled! amin=%08x amax=%08x added %i words\n", voodoo->cmdfifo_amin, voodoo->cmdfifo_amax, words_to_add); + } + } + else if (addr == voodoo->cmdfifo_amax+4) + { + /*In-order write*/ + voodoo->cmdfifo_amin = addr; + voodoo->cmdfifo_amax = addr; + voodoo->cmdfifo_depth_wr++; + voodoo_wake_fifo_thread(voodoo); + } + else + { + /*Out-of-order write*/ + if (addr < voodoo->cmdfifo_amin) + { + /*Reset back to start. Note that write is still out of order!*/ + voodoo->cmdfifo_amin = voodoo->cmdfifo_base-4; + + } +// else if (addr < voodoo->cmdfifo_amax) +// fatal("Out-of-order write really out of order\n"); + voodoo->cmdfifo_amax = addr; + voodoo->cmdfifo_holecount = ((voodoo->cmdfifo_amax - voodoo->cmdfifo_amin) >> 2) - 1; +// pclog("CMDFIFO out of order: amin=%08x amax=%08x holecount=%i\n", voodoo->cmdfifo_amin, voodoo->cmdfifo_amax, voodoo->cmdfifo_holecount); + } + } +} + +void banshee_hwcursor_draw(svga_t *svga, int displine) +{ + banshee_t *banshee = (banshee_t *)svga->p; + int x, c; + int x_off; + uint32_t col0 = banshee->hwCurC0; + uint32_t col1 = banshee->hwCurC1; + uint8_t plane0[8], plane1[8]; + + for (c = 0; c < 8; c++) + plane0[c] = svga->vram[svga->hwcursor_latch.addr + c]; + for (c = 0; c < 8; c++) + plane1[c] = svga->vram[svga->hwcursor_latch.addr + c + 8]; + svga->hwcursor_latch.addr += 16; + + x_off = svga->hwcursor_latch.x; + + if (banshee->vidProcCfg & VIDPROCCFG_CURSOR_MODE) + { + /*X11 mode*/ + for (x = 0; x < 64; x += 8) + { + if (x_off > (32-8)) + { + int xx; + + for (xx = 0; xx < 8; xx++) + { + if (plane0[x >> 3] & (1 << 7)) + ((uint32_t *)buffer32->line[displine])[x_off + xx] = (plane1[x >> 3] & (1 << 7)) ? col1 : col0; + + plane0[x >> 3] <<= 1; + plane1[x >> 3] <<= 1; + } + } + + x_off += 8; + } + } + else + { + /*Windows mode*/ + for (x = 0; x < 64; x += 8) + { + if (x_off > (32-8)) + { + int xx; + + for (xx = 0; xx < 8; xx++) + { + if (!(plane0[x >> 3] & (1 << 7))) + ((uint32_t *)buffer32->line[displine])[x_off + xx] = (plane1[x >> 3] & (1 << 7)) ? col1 : col0; + else if (plane1[x >> 3] & (1 << 7)) + ((uint32_t *)buffer32->line[displine])[x_off + xx] ^= 0xffffff; + + plane0[x >> 3] <<= 1; + plane1[x >> 3] <<= 1; + } + } + + x_off += 8; + } + } +} + +#define CLAMP(x) do \ + { \ + if ((x) & ~0xff) \ + x = ((x) < 0) ? 0 : 0xff; \ + } \ + while (0) + +#define DECODE_RGB565(buf) \ + do \ + { \ + int c; \ + int wp = 0; \ + \ + for (c = 0; c < voodoo->overlay.overlay_bytes; c += 2) \ + { \ + uint16_t data = *(uint16_t *)src; \ + int r = data & 0x1f; \ + int g = (data >> 5) & 0x3f; \ + int b = data >> 11; \ + \ + if (banshee->vidProcCfg & VIDPROCCFG_OVERLAY_CLUT_BYPASS) \ + buf[wp++] = (r << 3) | (g << 10) | (b << 19); \ + else \ + buf[wp++] = (clut[r << 3] & 0x0000ff) | \ + (clut[g << 2] & 0x00ff00) | \ + (clut[b << 3] & 0xff0000); \ + src += 2; \ + } \ + } while (0) + +#define DECODE_RGB565_TILED(buf) \ + do \ + { \ + int c; \ + int wp = 0; \ + uint32_t base_addr = (buf == banshee->overlay_buffer[1]) ? src_addr2 : src_addr; \ + \ + for (c = 0; c < voodoo->overlay.overlay_bytes; c += 2) \ + { \ + uint16_t data = *(uint16_t *)&svga->vram[(base_addr + (c & 127) + (c >> 7)*128*32) & svga->vram_mask]; \ + int r = data & 0x1f; \ + int g = (data >> 5) & 0x3f; \ + int b = data >> 11; \ + \ + if (banshee->vidProcCfg & VIDPROCCFG_OVERLAY_CLUT_BYPASS) \ + buf[wp++] = (r << 3) | (g << 10) | (b << 19); \ + else \ + buf[wp++] = (clut[r << 3] & 0x0000ff) | \ + (clut[g << 2] & 0x00ff00) | \ + (clut[b << 3] & 0xff0000); \ + } \ + } while (0) + +#define DECODE_YUYV422(buf) \ + do \ + { \ + int c; \ + int wp = 0; \ + \ + for (c = 0; c < voodoo->overlay.overlay_bytes; c += 4) \ + { \ + uint8_t y1, y2; \ + int8_t Cr, Cb; \ + int dR, dG, dB; \ + int r, g, b; \ + \ + y1 = src[0]; \ + Cr = src[1] - 0x80; \ + y2 = src[2]; \ + Cb = src[3] - 0x80; \ + src += 4; \ + \ + dR = (359*Cr) >> 8; \ + dG = (88*Cb + 183*Cr) >> 8; \ + dB = (453*Cb) >> 8; \ + \ + r = y1 + dR; \ + CLAMP(r); \ + g = y1 - dG; \ + CLAMP(g); \ + b = y1 + dB; \ + CLAMP(b); \ + buf[wp++] = r | (g << 8) | (b << 16); \ + \ + r = y2 + dR; \ + CLAMP(r); \ + g = y2 - dG; \ + CLAMP(g); \ + b = y2 + dB; \ + CLAMP(b); \ + buf[wp++] = r | (g << 8) | (b << 16); \ + } \ + } while (0) + +#define DECODE_UYUV422(buf) \ + do \ + { \ + int c; \ + int wp = 0; \ + \ + for (c = 0; c < voodoo->overlay.overlay_bytes; c += 4) \ + { \ + uint8_t y1, y2; \ + int8_t Cr, Cb; \ + int dR, dG, dB; \ + int r, g, b; \ + \ + Cr = src[0] - 0x80; \ + y1 = src[1]; \ + Cb = src[2] - 0x80; \ + y2 = src[3]; \ + src += 4; \ + \ + dR = (359*Cr) >> 8; \ + dG = (88*Cb + 183*Cr) >> 8; \ + dB = (453*Cb) >> 8; \ + \ + r = y1 + dR; \ + CLAMP(r); \ + g = y1 - dG; \ + CLAMP(g); \ + b = y1 + dB; \ + CLAMP(b); \ + buf[wp++] = r | (g << 8) | (b << 16); \ + \ + r = y2 + dR; \ + CLAMP(r); \ + g = y2 - dG; \ + CLAMP(g); \ + b = y2 + dB; \ + CLAMP(b); \ + buf[wp++] = r | (g << 8) | (b << 16); \ + } \ + } while (0) + + +#define OVERLAY_SAMPLE(buf) \ + do \ + { \ + switch (banshee->overlay_pix_fmt) \ + { \ + case 0: \ + break; \ + \ + case OVERLAY_FMT_YUYV422: \ + DECODE_YUYV422(buf); \ + break; \ + \ + case OVERLAY_FMT_UYVY422: \ + DECODE_UYUV422(buf); \ + break; \ + \ + case OVERLAY_FMT_565: \ + case OVERLAY_FMT_565_DITHER: \ + if (banshee->vidProcCfg & VIDPROCCFG_OVERLAY_TILE) \ + DECODE_RGB565_TILED(buf); \ + else \ + DECODE_RGB565(buf); \ + break; \ + } \ + } while (0) + +/* generate both filters for the static table here */ +void voodoo_generate_vb_filters(voodoo_t *voodoo, int fcr, int fcg) +{ + int g, h; + float difference, diffg; + float thiscol, thiscolg; + float clr, clg = 0; + float hack = 1.0f; + // pre-clamping + + fcr *= hack; + fcg *= hack; + + + /* box prefilter */ + for (g=0;g<256;g++) // pixel 1 - our target pixel we want to bleed into + { + for (h=0;h<256;h++) // pixel 2 - our main pixel + { + float avg; + float avgdiff; + + difference = (float)(g - h); + avg = g; + avgdiff = avg - h; + + avgdiff = avgdiff * 0.75f; + if (avgdiff < 0) avgdiff *= -1; + if (difference < 0) difference *= -1; + + thiscol = thiscolg = g; + + if (h > g) + { + clr = clg = avgdiff; + + if (clr>fcr) clr=fcr; + if (clg>fcg) clg=fcg; + + thiscol = g; + thiscolg = g; + + if (thiscol>g+fcr) + thiscol=g+fcr; + if (thiscolg>g+fcg) + thiscolg=g+fcg; + + if (thiscol>g+difference) + thiscol=g+difference; + if (thiscolg>g+difference) + thiscolg=g+difference; + + // hmm this might not be working out.. + int ugh = g - h; + if (ugh < fcr) + thiscol = h; + if (ugh < fcg) + thiscolg = h; + } + + if (difference > fcr) + thiscol = g; + if (difference > fcg) + thiscolg = g; + + // clamp + if (thiscol < 0) thiscol = 0; + if (thiscolg < 0) thiscolg = 0; + + if (thiscol > 255) thiscol = 255; + if (thiscolg > 255) thiscolg = 255; + + vb_filter_bx_rb[g][h] = (thiscol); + vb_filter_bx_g [g][h] = (thiscolg); + + } + float lined = g + 4; + if (lined > 255) + lined = 255; + voodoo->purpleline[g][0] = lined; + voodoo->purpleline[g][2] = lined; + + lined = g + 0; + if (lined > 255) + lined = 255; + voodoo->purpleline[g][1] = lined; + } + + /* 4x1 and 2x2 filter */ + //fcr *= 5; + //fcg *= 6; + + for (g=0;g<256;g++) // pixel 1 + { + for (h=0;h<256;h++) // pixel 2 + { + difference = (float)(h - g); + diffg = difference; + + thiscol = thiscolg = g; + + if (difference > fcr) + difference = fcr; + if (difference < -fcr) + difference = -fcr; + + if (diffg > fcg) + diffg = fcg; + if (diffg < -fcg) + diffg = -fcg; + + if ((difference < fcr) || (-difference > -fcr)) + thiscol = g + (difference / 2); + if ((diffg < fcg) || (-diffg > -fcg)) + thiscolg = g + (diffg / 2); + + if (thiscol < 0) + thiscol = 0; + if (thiscol > 255) + thiscol = 255; + + if (thiscolg < 0) + thiscolg = 0; + if (thiscolg > 255) + thiscolg = 255; + + vb_filter_v1_rb[g][h] = thiscol; + vb_filter_v1_g [g][h] = thiscolg; + + } + } + +} + + +static void banshee_overlay_draw(svga_t *svga, int displine) +{ + banshee_t *banshee = (banshee_t *)svga->p; + voodoo_t *voodoo = banshee->voodoo; + uint32_t *p; + int x; + int y = voodoo->overlay.src_y >> 20; + uint32_t src_addr = svga->overlay_latch.addr + ((banshee->vidProcCfg & VIDPROCCFG_OVERLAY_TILE) ? + ((y & 31) * 128 + (y >> 5) * svga->overlay_latch.pitch) : + y * svga->overlay_latch.pitch); + uint32_t src_addr2 = svga->overlay_latch.addr + ((banshee->vidProcCfg & VIDPROCCFG_OVERLAY_TILE) ? + (((y + 1) & 31) * 128 + ((y + 1) >> 5) * svga->overlay_latch.pitch) : + (y + 1) * svga->overlay_latch.pitch); + uint8_t *src = &svga->vram[src_addr & svga->vram_mask]; + uint32_t src_x = 0; + unsigned int y_coeff = (voodoo->overlay.src_y & 0xfffff) >> 4; + int skip_filtering; + uint32_t *clut = &svga->pallook[(banshee->vidProcCfg & VIDPROCCFG_OVERLAY_CLUT_SEL) ? 256 : 0]; + + if (svga->render == svga_render_null && + !svga->changedvram[src_addr >> 12] && !svga->changedvram[src_addr2 >> 12] && + !svga->fullchange && + ((voodoo->overlay.src_y >> 20) < 2048 && !voodoo->dirty_line[voodoo->overlay.src_y >> 20]) && + !(banshee->vidProcCfg & VIDPROCCFG_V_SCALE_ENABLE)) + { + voodoo->overlay.src_y += (1 << 20); + return; + } + + if ((voodoo->overlay.src_y >> 20) < 2048) + voodoo->dirty_line[voodoo->overlay.src_y >> 20] = 0; +// pclog("displine=%i addr=%08x %08x %08x %08x\n", displine, svga->overlay_latch.addr, src_addr, voodoo->overlay.vidOverlayDvdy, *(uint32_t *)src); +// if (src_addr >= 0x800000) +// fatal("overlay out of range!\n"); + p = &((uint32_t *)buffer32->line[displine])[svga->overlay_latch.x + 32]; + + if (banshee->voodoo->scrfilter && banshee->voodoo->scrfilterEnabled) + skip_filtering = ((banshee->vidProcCfg & VIDPROCCFG_FILTER_MODE_MASK) != VIDPROCCFG_FILTER_MODE_BILINEAR && + !(banshee->vidProcCfg & VIDPROCCFG_H_SCALE_ENABLE) && !(banshee->vidProcCfg & VIDPROCCFG_FILTER_MODE_DITHER_4X4) && + !(banshee->vidProcCfg & VIDPROCCFG_FILTER_MODE_DITHER_2X2)); + else + skip_filtering = ((banshee->vidProcCfg & VIDPROCCFG_FILTER_MODE_MASK) != VIDPROCCFG_FILTER_MODE_BILINEAR && + !(banshee->vidProcCfg & VIDPROCCFG_H_SCALE_ENABLE)); + + if (skip_filtering) + { + /*No scaling or filtering required, just write straight to output buffer*/ + OVERLAY_SAMPLE(p); + } + else + { + OVERLAY_SAMPLE(banshee->overlay_buffer[0]); + + switch (banshee->vidProcCfg & VIDPROCCFG_FILTER_MODE_MASK) + { + case VIDPROCCFG_FILTER_MODE_BILINEAR: + src = &svga->vram[src_addr2 & svga->vram_mask]; + OVERLAY_SAMPLE(banshee->overlay_buffer[1]); + if (banshee->vidProcCfg & VIDPROCCFG_H_SCALE_ENABLE) + { + for (x = 0; x < svga->overlay_latch.xsize; x++) + { + unsigned int x_coeff = (src_x & 0xfffff) >> 4; + unsigned int coeffs[4] = { + ((0x10000 - x_coeff) * (0x10000 - y_coeff)) >> 16, + ( x_coeff * (0x10000 - y_coeff)) >> 16, + ((0x10000 - x_coeff) * y_coeff) >> 16, + ( x_coeff * y_coeff) >> 16 + }; + uint32_t samp0 = banshee->overlay_buffer[0][src_x >> 20]; + uint32_t samp1 = banshee->overlay_buffer[0][(src_x >> 20) + 1]; + uint32_t samp2 = banshee->overlay_buffer[1][src_x >> 20]; + uint32_t samp3 = banshee->overlay_buffer[1][(src_x >> 20) + 1]; + int r = (((samp0 >> 16) & 0xff) * coeffs[0] + + ((samp1 >> 16) & 0xff) * coeffs[1] + + ((samp2 >> 16) & 0xff) * coeffs[2] + + ((samp3 >> 16) & 0xff) * coeffs[3]) >> 16; + int g = (((samp0 >> 8) & 0xff) * coeffs[0] + + ((samp1 >> 8) & 0xff) * coeffs[1] + + ((samp2 >> 8) & 0xff) * coeffs[2] + + ((samp3 >> 8) & 0xff) * coeffs[3]) >> 16; + int b = ((samp0 & 0xff) * coeffs[0] + + (samp1 & 0xff) * coeffs[1] + + (samp2 & 0xff) * coeffs[2] + + (samp3 & 0xff) * coeffs[3]) >> 16; + p[x] = (r << 16) | (g << 8) | b; + + src_x += voodoo->overlay.vidOverlayDudx; + } + } + else + { + for (x = 0; x < svga->overlay_latch.xsize; x++) + { + uint32_t samp0 = banshee->overlay_buffer[0][src_x >> 20]; + uint32_t samp1 = banshee->overlay_buffer[1][src_x >> 20]; + int r = (((samp0 >> 16) & 0xff) * (0x10000 - y_coeff) + + ((samp1 >> 16) & 0xff) * y_coeff) >> 16; + int g = (((samp0 >> 8) & 0xff) * (0x10000 - y_coeff) + + ((samp1 >> 8) & 0xff) * y_coeff) >> 16; + int b = ((samp0 & 0xff) * (0x10000 - y_coeff) + + (samp1 & 0xff) * y_coeff) >> 16; + p[x] = (r << 16) | (g << 8) | b; + } + } + break; + + case VIDPROCCFG_FILTER_MODE_DITHER_4X4: + if (banshee->voodoo->scrfilter && banshee->voodoo->scrfilterEnabled) + { + uint8_t fil[(svga->overlay_latch.xsize) * 3]; + uint8_t fil3[(svga->overlay_latch.xsize) * 3]; + + if (banshee->vidProcCfg & VIDPROCCFG_H_SCALE_ENABLE) /* leilei HACK - don't know of real 4x1 hscaled behavior yet, double for now */ + { + for (x=0; xoverlay_latch.xsize;x++) + { + fil[x*3] = ((banshee->overlay_buffer[0][src_x >> 20])); + fil[x*3+1] = ((banshee->overlay_buffer[0][src_x >> 20] >> 8)); + fil[x*3+2] = ((banshee->overlay_buffer[0][src_x >> 20] >> 16)); + fil3[x*3+0] = fil[x*3+0]; + fil3[x*3+1] = fil[x*3+1]; + fil3[x*3+2] = fil[x*3+2]; + src_x += voodoo->overlay.vidOverlayDudx; + } + } + else + { + for (x=0; xoverlay_latch.xsize;x++) + { + fil[x*3] = ((banshee->overlay_buffer[0][x])); + fil[x*3+1] = ((banshee->overlay_buffer[0][x] >> 8)); + fil[x*3+2] = ((banshee->overlay_buffer[0][x] >> 16)); + fil3[x*3+0] = fil[x*3+0]; + fil3[x*3+1] = fil[x*3+1]; + fil3[x*3+2] = fil[x*3+2]; + } + } + if (y % 2 == 0) + { + for (x=0; xoverlay_latch.xsize;x++) + { + fil[x*3] = banshee->voodoo->purpleline[fil[x*3+0]][0]; + fil[x*3+1] = banshee->voodoo->purpleline[fil[x*3+1]][1]; + fil[x*3+2] = banshee->voodoo->purpleline[fil[x*3+2]][2]; + } + } + + for (x=1; xoverlay_latch.xsize;x++) + { + fil3[(x)*3] = vb_filter_v1_rb [fil[x*3]] [fil[(x-1) *3]]; + fil3[(x)*3+1] = vb_filter_v1_g [fil[x*3+1]][fil[(x-1) *3+1]]; + fil3[(x)*3+2] = vb_filter_v1_rb [fil[x*3+2]] [fil[(x-1) *3+2]]; + } + for (x=1; xoverlay_latch.xsize;x++) + { + fil[(x)*3] = vb_filter_v1_rb [fil[x*3]] [fil3[(x-1) *3]]; + fil[(x)*3+1] = vb_filter_v1_g [fil[x*3+1]][fil3[(x-1) *3+1]]; + fil[(x)*3+2] = vb_filter_v1_rb [fil[x*3+2]] [fil3[(x-1) *3+2]]; + } + for (x=1; xoverlay_latch.xsize;x++) + { + fil3[(x)*3] = vb_filter_v1_rb [fil[x*3]] [fil[(x-1) *3]]; + fil3[(x)*3+1] = vb_filter_v1_g [fil[x*3+1]][fil[(x-1) *3+1]]; + fil3[(x)*3+2] = vb_filter_v1_rb [fil[x*3+2]] [fil[(x-1) *3+2]]; + } + for (x=0; xoverlay_latch.xsize;x++) + { + fil[(x)*3] = vb_filter_v1_rb [fil[x*3]] [fil3[(x+1) *3]]; + fil[(x)*3+1] = vb_filter_v1_g [fil[x*3+1]][fil3[(x+1) *3+1]]; + fil[(x)*3+2] = vb_filter_v1_rb [fil[x*3+2]] [fil3[(x+1) *3+2]]; + p[x] = (fil[x*3+2] << 16) | (fil[x*3+1] << 8) | fil[x*3]; + } + } + else /* filter disabled by emulator option */ + { + if (banshee->vidProcCfg & VIDPROCCFG_H_SCALE_ENABLE) + { + for (x = 0; x < svga->overlay_latch.xsize; x++) + { + p[x] = banshee->overlay_buffer[0][src_x >> 20]; + src_x += voodoo->overlay.vidOverlayDudx; + } + } + else + { + for (x = 0; x < svga->overlay_latch.xsize; x++) + p[x] = banshee->overlay_buffer[0][x]; + } + } + break; + + case VIDPROCCFG_FILTER_MODE_DITHER_2X2: + if (banshee->voodoo->scrfilter && banshee->voodoo->scrfilterEnabled) + { + uint8_t fil[(svga->overlay_latch.xsize) * 3]; + uint8_t soak[(svga->overlay_latch.xsize) * 3]; + uint8_t soak2[(svga->overlay_latch.xsize) * 3]; + + uint8_t samp1[(svga->overlay_latch.xsize) * 3]; + uint8_t samp2[(svga->overlay_latch.xsize) * 3]; + uint8_t samp3[(svga->overlay_latch.xsize) * 3]; + uint8_t samp4[(svga->overlay_latch.xsize) * 3]; + + src = &svga->vram[src_addr2 & svga->vram_mask]; + OVERLAY_SAMPLE(banshee->overlay_buffer[1]); + for (x=0; xoverlay_latch.xsize;x++) + { + samp1[x*3] = ((banshee->overlay_buffer[0][x])); + samp1[x*3+1] = ((banshee->overlay_buffer[0][x] >> 8)); + samp1[x*3+2] = ((banshee->overlay_buffer[0][x] >> 16)); + + samp2[x*3+0] = ((banshee->overlay_buffer[0][x+1])); + samp2[x*3+1] = ((banshee->overlay_buffer[0][x+1] >> 8)); + samp2[x*3+2] = ((banshee->overlay_buffer[0][x+1] >> 16)); + + samp3[x*3+0] = ((banshee->overlay_buffer[1][x])); + samp3[x*3+1] = ((banshee->overlay_buffer[1][x] >> 8)); + samp3[x*3+2] = ((banshee->overlay_buffer[1][x] >> 16)); + + samp4[x*3+0] = ((banshee->overlay_buffer[1][x+1])); + samp4[x*3+1] = ((banshee->overlay_buffer[1][x+1] >> 8)); + samp4[x*3+2] = ((banshee->overlay_buffer[1][x+1] >> 16)); + + /* sample two lines */ + + soak[x*3+0] = vb_filter_bx_rb [samp1[x*3+0]] [samp2[x*3+0]]; + soak[x*3+1] = vb_filter_bx_g [samp1[x*3+1]] [samp2[x*3+1]]; + soak[x*3+2] = vb_filter_bx_rb [samp1[x*3+2]] [samp2[x*3+2]]; + + soak2[x*3+0] = vb_filter_bx_rb[samp3[x*3+0]] [samp4[x*3+0]]; + soak2[x*3+1] = vb_filter_bx_g [samp3[x*3+1]] [samp4[x*3+1]]; + soak2[x*3+2] = vb_filter_bx_rb[samp3[x*3+2]] [samp4[x*3+2]]; + + /* then pour it on the rest */ + + fil[x*3+0] = vb_filter_v1_rb[soak[x*3+0]] [soak2[x*3+0]]; + fil[x*3+1] = vb_filter_v1_g [soak[x*3+1]] [soak2[x*3+1]]; + fil[x*3+2] = vb_filter_v1_rb[soak[x*3+2]] [soak2[x*3+2]]; + } + + if (banshee->vidProcCfg & VIDPROCCFG_H_SCALE_ENABLE) /* 2x2 on a scaled low res */ + { + for (x=0; xoverlay_latch.xsize;x++) + { + p[x] = (fil[(src_x >> 20)*3+2] << 16) | (fil[(src_x >> 20)*3+1] << 8) | fil[(src_x >> 20)*3]; + src_x += voodoo->overlay.vidOverlayDudx; + } + } + else + { + for (x=0; xoverlay_latch.xsize;x++) + { + p[x] = (fil[x*3+2] << 16) | (fil[x*3+1] << 8) | fil[x*3]; + } + } + } + else /* filter disabled by emulator option */ + { + if (banshee->vidProcCfg & VIDPROCCFG_H_SCALE_ENABLE) + { + for (x = 0; x < svga->overlay_latch.xsize; x++) + { + p[x] = banshee->overlay_buffer[0][src_x >> 20]; + + src_x += voodoo->overlay.vidOverlayDudx; + } + } + else + { + for (x = 0; x < svga->overlay_latch.xsize; x++) + p[x] = banshee->overlay_buffer[0][x]; + } + } + break; + + case VIDPROCCFG_FILTER_MODE_POINT: + default: + if (banshee->vidProcCfg & VIDPROCCFG_H_SCALE_ENABLE) + { + for (x = 0; x < svga->overlay_latch.xsize; x++) + { + p[x] = banshee->overlay_buffer[0][src_x >> 20]; + + src_x += voodoo->overlay.vidOverlayDudx; + } + } + else + { + for (x = 0; x < svga->overlay_latch.xsize; x++) + p[x] = banshee->overlay_buffer[0][x]; + } + break; + } + } + + if (banshee->vidProcCfg & VIDPROCCFG_V_SCALE_ENABLE) + voodoo->overlay.src_y += voodoo->overlay.vidOverlayDvdy; + else + voodoo->overlay.src_y += (1 << 20); +} + +void banshee_set_overlay_addr(void *p, uint32_t addr) +{ + banshee_t *banshee = (banshee_t *)p; + voodoo_t *voodoo = banshee->voodoo; + + banshee->svga.overlay.addr = banshee->voodoo->leftOverlayBuf & 0xfffffff; + banshee->svga.overlay_latch.addr = banshee->voodoo->leftOverlayBuf & 0xfffffff; + memset(voodoo->dirty_line, 1, sizeof(voodoo->dirty_line)); +} + +static void banshee_vsync_callback(svga_t *svga) +{ + banshee_t *banshee = (banshee_t *)svga->p; + voodoo_t *voodoo = banshee->voodoo; + + voodoo->retrace_count++; + thread_lock_mutex(voodoo->swap_mutex); + if (voodoo->swap_pending && (voodoo->retrace_count > voodoo->swap_interval)) + { + if (voodoo->swap_count > 0) + voodoo->swap_count--; + voodoo->swap_pending = 0; + thread_unlock_mutex(voodoo->swap_mutex); + + memset(voodoo->dirty_line, 1, sizeof(voodoo->dirty_line)); + voodoo->retrace_count = 0; + banshee_set_overlay_addr(banshee, voodoo->swap_offset); + thread_set_event(voodoo->wake_fifo_thread); + voodoo->frame_count++; + } + else + thread_unlock_mutex(voodoo->swap_mutex); + + voodoo->overlay.src_y = 0; + banshee->desktop_addr = banshee->vidDesktopStartAddr; + banshee->desktop_y = 0; +} + +static uint8_t banshee_pci_read(int func, int addr, void *p) +{ + banshee_t *banshee = (banshee_t *)p; +// svga_t *svga = &banshee->svga; + uint8_t ret = 0; + + if (func) + return 0xff; +// pclog("Banshee PCI read %08X ", addr); + switch (addr) + { + case 0x00: ret = 0x1a; break; /*3DFX*/ + case 0x01: ret = 0x12; break; + + case 0x02: ret = (banshee->type == TYPE_BANSHEE) ? 0x03 : 0x05; break; + case 0x03: ret = 0x00; break; + + case 0x04: ret = banshee->pci_regs[0x04] & 0x27; break; + + case 0x07: ret = banshee->pci_regs[0x07] & 0x36; break; + + case 0x08: ret = (banshee->type == TYPE_BANSHEE) ? 3 : 1; break; /*Revision ID*/ + case 0x09: ret = 0; break; /*Programming interface*/ + + case 0x0a: ret = 0x00; break; /*Supports VGA interface*/ + case 0x0b: ret = 0x03; /*output = 3; */break; + + case 0x0d: ret = banshee->pci_regs[0x0d] & 0xf8; break; + + case 0x10: ret = 0x00; break; /*memBaseAddr0*/ + case 0x11: ret = 0x00; break; + case 0x12: ret = 0x00; break; + case 0x13: ret = banshee->memBaseAddr0 >> 24; break; + + case 0x14: ret = 0x00; break; /*memBaseAddr1*/ + case 0x15: ret = 0x00; break; + case 0x16: ret = 0x00; break; + case 0x17: ret = banshee->memBaseAddr1 >> 24; break; + + case 0x18: ret = 0x01; break; /*ioBaseAddr*/ + case 0x19: ret = banshee->ioBaseAddr >> 8; break; + case 0x1a: ret = 0x00; break; + case 0x1b: ret = 0x00; break; + + /*Subsystem vendor ID*/ + case 0x2c: ret = banshee->pci_regs[0x2c]; break; + case 0x2d: ret = banshee->pci_regs[0x2d]; break; + case 0x2e: ret = banshee->pci_regs[0x2e]; break; + case 0x2f: ret = banshee->pci_regs[0x2f]; break; + + case 0x30: ret = banshee->pci_regs[0x30] & 0x01; break; /*BIOS ROM address*/ + case 0x31: ret = 0x00; break; + case 0x32: ret = banshee->pci_regs[0x32]; break; + case 0x33: ret = banshee->pci_regs[0x33]; break; + + case 0x3c: ret = banshee->pci_regs[0x3c]; break; + + case 0x3d: ret = 0x01; break; /*INTA*/ + + case 0x3e: ret = 0x04; break; + case 0x3f: ret = 0xff; break; + + } +// pclog("%02X\n", ret); + return ret; +} + +static void banshee_pci_write(int func, int addr, uint8_t val, void *p) +{ + banshee_t *banshee = (banshee_t *)p; +// svga_t *svga = &banshee->svga; + + if (func) + return; +// pclog("Banshee write %08X %02X %04X:%08X\n", addr, val, CS, cpu_state.pc); + switch (addr) + { + case 0x00: case 0x01: case 0x02: case 0x03: + case 0x08: case 0x09: case 0x0a: case 0x0b: + case 0x3d: case 0x3e: case 0x3f: + return; + + case PCI_REG_COMMAND: + if (val & PCI_COMMAND_IO) + { + io_removehandler(0x03c0, 0x0020, banshee_in, NULL, NULL, banshee_out, NULL, NULL, banshee); + if (banshee->ioBaseAddr) + io_removehandler(banshee->ioBaseAddr, 0x0100, banshee_ext_in, NULL, banshee_ext_inl, banshee_ext_out, NULL, banshee_ext_outl, banshee); + + io_sethandler(0x03c0, 0x0020, banshee_in, NULL, NULL, banshee_out, NULL, NULL, banshee); + if (banshee->ioBaseAddr) + io_sethandler(banshee->ioBaseAddr, 0x0100, banshee_ext_in, NULL, banshee_ext_inl, banshee_ext_out, NULL, banshee_ext_outl, banshee); + } + else + { + io_removehandler(0x03c0, 0x0020, banshee_in, NULL, NULL, banshee_out, NULL, NULL, banshee); + io_removehandler(banshee->ioBaseAddr, 0x0100, banshee_ext_in, NULL, banshee_ext_inl, banshee_ext_out, NULL, banshee_ext_outl, banshee); + } + banshee->pci_regs[PCI_REG_COMMAND] = val & 0x27; + banshee_updatemapping(banshee); + return; + case 0x07: + banshee->pci_regs[0x07] = val & 0x3e; + return; + case 0x0d: + banshee->pci_regs[0x0d] = val & 0xf8; + return; + + case 0x13: + banshee->memBaseAddr0 = (val & 0xfe) << 24; + banshee_updatemapping(banshee); + return; + + case 0x17: + banshee->memBaseAddr1 = (val & 0xfe) << 24; + banshee_updatemapping(banshee); + return; + + case 0x19: + if (banshee->pci_regs[PCI_REG_COMMAND] & PCI_COMMAND_IO) + io_removehandler(banshee->ioBaseAddr, 0x0100, banshee_ext_in, NULL, banshee_ext_inl, banshee_ext_out, NULL, banshee_ext_outl, banshee); + banshee->ioBaseAddr = val << 8; + if ((banshee->pci_regs[PCI_REG_COMMAND] & PCI_COMMAND_IO) && banshee->ioBaseAddr) + io_sethandler(banshee->ioBaseAddr, 0x0100, banshee_ext_in, NULL, banshee_ext_inl, banshee_ext_out, NULL, banshee_ext_outl, banshee); + pclog("Banshee ioBaseAddr=%08x\n", banshee->ioBaseAddr); +// s3_virge_updatemapping(virge); + return; + + case 0x30: case 0x32: case 0x33: + banshee->pci_regs[addr] = val; + if (banshee->pci_regs[0x30] & 0x01) + { + uint32_t addr = (banshee->pci_regs[0x32] << 16) | (banshee->pci_regs[0x33] << 24); + pclog("Banshee bios_rom enabled at %08x\n", addr); + mem_mapping_set_addr(&banshee->bios_rom.mapping, addr, 0x10000); + mem_mapping_enable(&banshee->bios_rom.mapping); + } + else + { + pclog("Banshee bios_rom disabled\n"); + mem_mapping_disable(&banshee->bios_rom.mapping); + } + return; + case 0x3c: + banshee->pci_regs[0x3c] = val; + return; + } +} + +static device_config_t banshee_sgram_config[] = +{ + { + .name = "memory", + .description = "Memory size", + .type = CONFIG_SELECTION, + .selection = + { + { + .description = "8 MB", + .value = 8 + }, + { + .description = "16 MB", + .value = 16 + }, + { + .description = "" + } + }, + .default_int = 16 + }, + { + .name = "bilinear", + .description = "Bilinear filtering", + .type = CONFIG_BINARY, + .default_int = 1 + }, + { + .name = "dacfilter", + .description = "Screen Filter", + .type = CONFIG_BINARY, + .default_int = 0 + }, + { + .name = "render_threads", + .description = "Render threads", + .type = CONFIG_SELECTION, + .selection = + { + { + .description = "1", + .value = 1 + }, + { + .description = "2", + .value = 2 + }, + { + .description = "4", + .value = 4 + }, + { + .description = "" + } + }, + .default_int = 2 + }, +#ifndef NO_CODEGEN + { + .name = "recompiler", + .description = "Recompiler", + .type = CONFIG_BINARY, + .default_int = 1 + }, +#endif + { + .type = -1 + } +}; + +static device_config_t banshee_sdram_config[] = +{ + { + .name = "bilinear", + .description = "Bilinear filtering", + .type = CONFIG_BINARY, + .default_int = 1 + }, + { + .name = "dacfilter", + .description = "Screen Filter", + .type = CONFIG_BINARY, + .default_int = 0 + }, + { + .name = "render_threads", + .description = "Render threads", + .type = CONFIG_SELECTION, + .selection = + { + { + .description = "1", + .value = 1 + }, + { + .description = "2", + .value = 2 + }, + { + .description = "4", + .value = 4 + }, + { + .description = "" + } + }, + .default_int = 2 + }, +#ifndef NO_CODEGEN + { + .name = "recompiler", + .description = "Recompiler", + .type = CONFIG_BINARY, + .default_int = 1 + }, +#endif + { + .type = -1 + } +}; + +static void *banshee_init_common(char *fn, int has_sgram, int type, int voodoo_type) +{ + int mem_size; + banshee_t *banshee = malloc(sizeof(banshee_t)); + memset(banshee, 0, sizeof(banshee_t)); + + banshee->type = type; + + rom_init(&banshee->bios_rom, fn, 0xc0000, 0x10000, 0xffff, 0, MEM_MAPPING_EXTERNAL); + mem_mapping_disable(&banshee->bios_rom.mapping); + + if (has_sgram) + mem_size = device_get_config_int("memory"); + else + mem_size = 16; /*SDRAM Banshee only supports 16 MB*/ + + svga_init(&banshee->svga, banshee, mem_size << 20, + banshee_recalctimings, + banshee_in, banshee_out, + banshee_hwcursor_draw, + banshee_overlay_draw); + banshee->svga.vsync_callback = banshee_vsync_callback; + + mem_mapping_add(&banshee->linear_mapping, 0, 0, banshee_read_linear, + banshee_read_linear_w, + banshee_read_linear_l, + banshee_write_linear, + banshee_write_linear_w, + banshee_write_linear_l, + NULL, + MEM_MAPPING_EXTERNAL, + &banshee->svga); + mem_mapping_add(&banshee->reg_mapping_low, 0, 0,banshee_reg_read, + banshee_reg_readw, + banshee_reg_readl, + banshee_reg_write, + banshee_reg_writew, + banshee_reg_writel, + NULL, + MEM_MAPPING_EXTERNAL, + banshee); + mem_mapping_add(&banshee->reg_mapping_high, 0,0,banshee_reg_read, + banshee_reg_readw, + banshee_reg_readl, + banshee_reg_write, + banshee_reg_writew, + banshee_reg_writel, + NULL, + MEM_MAPPING_EXTERNAL, + banshee); + +// io_sethandler(0x03c0, 0x0020, banshee_in, NULL, NULL, banshee_out, NULL, NULL, banshee); + + banshee->svga.bpp = 8; + banshee->svga.miscout = 1; + + banshee->dramInit0 = 1 << 27; + if (has_sgram && mem_size == 16) + banshee->dramInit0 |= (1 << 26); /*2xSGRAM = 16 MB*/ + if (!has_sgram) + banshee->dramInit1 = 1 << 30; /*SDRAM*/ + banshee->svga.decode_mask = 0x1ffffff; + + pci_add(banshee_pci_read, banshee_pci_write, banshee); + + banshee->voodoo = voodoo_2d3d_card_init(voodoo_type); + banshee->voodoo->p = banshee; + banshee->voodoo->vram = banshee->svga.vram; + banshee->voodoo->changedvram = banshee->svga.changedvram; + banshee->voodoo->fb_mem = banshee->svga.vram; + banshee->voodoo->fb_mask = banshee->svga.vram_mask; + banshee->voodoo->tex_mem[0] = banshee->svga.vram; + banshee->voodoo->tex_mem_w[0] = (uint16_t *)banshee->svga.vram; + banshee->voodoo->tex_mem[1] = banshee->svga.vram; + banshee->voodoo->tex_mem_w[1] = (uint16_t *)banshee->svga.vram; + banshee->voodoo->texture_mask = banshee->svga.vram_mask; + voodoo_generate_filter_v1(banshee->voodoo); + + banshee->vidSerialParallelPort = VIDSERIAL_DDC_DCK_W | VIDSERIAL_DDC_DDA_W; + + ddc_init(); + + switch (type) + { + case TYPE_BANSHEE: + if (has_sgram) + { + banshee->pci_regs[0x2c] = 0x1a; + banshee->pci_regs[0x2d] = 0x12; + banshee->pci_regs[0x2e] = 0x04; + banshee->pci_regs[0x2f] = 0x00; + } + else + { + banshee->pci_regs[0x2c] = 0x02; + banshee->pci_regs[0x2d] = 0x11; + banshee->pci_regs[0x2e] = 0x17; + banshee->pci_regs[0x2f] = 0x10; + } + break; + + case TYPE_V3_2000: + banshee->pci_regs[0x2c] = 0x1a; + banshee->pci_regs[0x2d] = 0x12; + banshee->pci_regs[0x2e] = 0x30; + banshee->pci_regs[0x2f] = 0x00; + break; + + case TYPE_V3_3000: + banshee->pci_regs[0x2c] = 0x1a; + banshee->pci_regs[0x2d] = 0x12; + banshee->pci_regs[0x2e] = 0x3a; + banshee->pci_regs[0x2f] = 0x00; + break; + } + + return banshee; +} + +static void *banshee_init() +{ + return banshee_init_common("pci_sg.rom", 1, TYPE_BANSHEE, VOODOO_BANSHEE); +} +static void *creative_banshee_init() +{ + return banshee_init_common("blasterpci.rom", 0, TYPE_BANSHEE, VOODOO_BANSHEE); +} +static void *v3_2000_init() +{ + return banshee_init_common("voodoo3_2000/2k11sd.rom", 0, TYPE_V3_2000, VOODOO_3); +} +static void *v3_3000_init() +{ + return banshee_init_common("voodoo3_3000/3k12sd.rom", 0, TYPE_V3_3000, VOODOO_3); +} + +static int banshee_available() +{ + return rom_present("pci_sg.rom"); +} +static int creative_banshee_available() +{ + return rom_present("blasterpci.rom"); +} +static int v3_2000_available() +{ + return rom_present("voodoo3_2000/2k11sd.rom"); +} +static int v3_3000_available() +{ + return rom_present("voodoo3_3000/3k12sd.rom"); +} + +static void banshee_close(void *p) +{ + banshee_t *banshee = (banshee_t *)p; + + voodoo_card_close(banshee->voodoo); + svga_close(&banshee->svga); + + free(banshee); +} + +static void banshee_speed_changed(void *p) +{ + banshee_t *banshee = (banshee_t *)p; + + svga_recalctimings(&banshee->svga); +} + +static void banshee_force_redraw(void *p) +{ + banshee_t *banshee = (banshee_t *)p; + + banshee->svga.fullchange = changeframecount; +} + +static uint64_t status_time = 0; + +static void banshee_add_status_info(char *s, int max_len, void *p) +{ + banshee_t *banshee = (banshee_t *)p; + voodoo_t *voodoo = banshee->voodoo; + char temps[512]; + int pixel_count_current[4]; + int pixel_count_total; + int texel_count_current[4]; + int texel_count_total; + int render_time[4]; + uint64_t new_time = timer_read(); + uint64_t status_diff = new_time - status_time; + int c; + status_time = new_time; + + svga_add_status_info(s, max_len, &banshee->svga); + + + for (c = 0; c < 4; c++) + { + pixel_count_current[c] = voodoo->pixel_count[c]; + texel_count_current[c] = voodoo->texel_count[c]; + render_time[c] = voodoo->render_time[c]; + } + + pixel_count_total = (pixel_count_current[0] + pixel_count_current[1] + pixel_count_current[2] + pixel_count_current[3]) - + (voodoo->pixel_count_old[0] + voodoo->pixel_count_old[1] + voodoo->pixel_count_old[2] + voodoo->pixel_count_old[3]); + texel_count_total = (texel_count_current[0] + texel_count_current[1] + texel_count_current[2] + texel_count_current[3]) - + (voodoo->texel_count_old[0] + voodoo->texel_count_old[1] + voodoo->texel_count_old[2] + voodoo->texel_count_old[3]); + sprintf(temps, "%f Mpixels/sec (%f)\n%f Mtexels/sec (%f)\n%f ktris/sec\n%f%% CPU (%f%% real)\n%d frames/sec (%i)\n%f%% CPU (%f%% real)\n"/*%d reads/sec\n%d write/sec\n%d tex/sec\n*/, + (double)pixel_count_total/1000000.0, + ((double)pixel_count_total/1000000.0) / ((double)render_time[0] / status_diff), + (double)texel_count_total/1000000.0, + ((double)texel_count_total/1000000.0) / ((double)render_time[0] / status_diff), + (double)voodoo->tri_count/1000.0, ((double)voodoo->time * 100.0) / timer_freq, ((double)voodoo->time * 100.0) / status_diff, voodoo->frame_count, voodoo_recomp, + ((double)voodoo->render_time[0] * 100.0) / timer_freq, ((double)voodoo->render_time[0] * 100.0) / status_diff); + if (voodoo->render_threads >= 2) + { + char temps2[512]; + sprintf(temps2, "%f%% CPU (%f%% real)\n", + ((double)voodoo->render_time[1] * 100.0) / timer_freq, ((double)voodoo->render_time[1] * 100.0) / status_diff); + strncat(temps, temps2, sizeof(temps)-1); + } + if (voodoo->render_threads == 4) + { + char temps2[512]; + sprintf(temps2, "%f%% CPU (%f%% real)\n%f%% CPU (%f%% real)\n", + ((double)voodoo->render_time[2] * 100.0) / timer_freq, ((double)voodoo->render_time[2] * 100.0) / status_diff, + ((double)voodoo->render_time[3] * 100.0) / timer_freq, ((double)voodoo->render_time[3] * 100.0) / status_diff); + strncat(temps, temps2, sizeof(temps)-1); + } + + strncat(s, temps, max_len); + + strncat(s, "Overlay mode: ", max_len); /* leilei debug additions */ + if ((banshee->vidProcCfg & VIDPROCCFG_FILTER_MODE_MASK) == VIDPROCCFG_FILTER_MODE_DITHER_2X2) + strncat(s, "2x2 box filter\n", max_len); + if ((banshee->vidProcCfg & VIDPROCCFG_FILTER_MODE_MASK) == VIDPROCCFG_FILTER_MODE_DITHER_4X4) + strncat(s, "4x1 tap filter\n", max_len); + if ((banshee->vidProcCfg & VIDPROCCFG_FILTER_MODE_MASK) == VIDPROCCFG_FILTER_MODE_POINT) + strncat(s, "Nearest neighbor\n", max_len); + if ((banshee->vidProcCfg & VIDPROCCFG_FILTER_MODE_MASK) == VIDPROCCFG_FILTER_MODE_BILINEAR) + strncat(s, "Bilinear filtered\n", max_len); + if ((banshee->vidProcCfg & VIDPROCCFG_H_SCALE_ENABLE)) + strncat(s, "H scaled \n", max_len); + if ((banshee->vidProcCfg & VIDPROCCFG_V_SCALE_ENABLE)) + strncat(s, "V scaled \n", max_len); + if ((banshee->vidProcCfg & VIDPROCCFG_2X_MODE)) + strncat(s, "2X mode\n", max_len); + + strncat(s, "\n", max_len); + + for (c = 0; c < 4; c++) + { + voodoo->pixel_count_old[c] = pixel_count_current[c]; + voodoo->texel_count_old[c] = texel_count_current[c]; + voodoo->render_time[c] = 0; + } + + voodoo->tri_count = voodoo->frame_count = 0; + voodoo->rd_count = voodoo->wr_count = voodoo->tex_count = 0; + voodoo->time = 0; + + voodoo->read_time = pci_nonburst_time + pci_burst_time; + + voodoo_recomp = 0; +} + +device_t voodoo_banshee_device = +{ + "Voodoo Banshee PCI (reference)", + DEVICE_PCI, + banshee_init, + banshee_close, + banshee_available, + banshee_speed_changed, + banshee_force_redraw, + banshee_add_status_info, + banshee_sgram_config +}; + +device_t creative_voodoo_banshee_device = +{ + "Creative Labs 3D Blaster Banshee PCI", + DEVICE_PCI, + creative_banshee_init, + banshee_close, + creative_banshee_available, + banshee_speed_changed, + banshee_force_redraw, + banshee_add_status_info, + banshee_sdram_config +}; + +device_t voodoo_3_2000_device = +{ + "Voodoo 3 2000 PCI", + DEVICE_PCI, + v3_2000_init, + banshee_close, + v3_2000_available, + banshee_speed_changed, + banshee_force_redraw, + banshee_add_status_info, + banshee_sdram_config +}; + +device_t voodoo_3_3000_device = +{ + "Voodoo 3 3000 PCI", + DEVICE_PCI, + v3_3000_init, + banshee_close, + v3_3000_available, + banshee_speed_changed, + banshee_force_redraw, + banshee_add_status_info, + banshee_sdram_config +}; diff --git a/pcem/vid_voodoo_banshee.h b/pcem/vid_voodoo_banshee.h new file mode 100644 index 00000000..ddd7e3fd --- /dev/null +++ b/pcem/vid_voodoo_banshee.h @@ -0,0 +1,6 @@ +extern device_t voodoo_banshee_device; +extern device_t creative_voodoo_banshee_device; +extern device_t voodoo_3_2000_device; +extern device_t voodoo_3_3000_device; + +void banshee_set_overlay_addr(void *p, uint32_t addr); diff --git a/pcem/vid_voodoo_banshee_blitter.cpp b/pcem/vid_voodoo_banshee_blitter.cpp new file mode 100644 index 00000000..f6f0d031 --- /dev/null +++ b/pcem/vid_voodoo_banshee_blitter.cpp @@ -0,0 +1,1450 @@ +/*Current issues : + - missing screen->screen scaled blits with format conversion + - missing YUV blits + - missing linestyle + - missing wait for vsync + - missing reversible lines + + Notes : + - 16 bpp runs with tiled framebuffer - to aid 3D? + 8 and 32 bpp use linear +*/ +#include +#include +#include "ibm.h" +#include "device.h" +#include "mem.h" +#include "thread.h" +#include "video.h" +#include "vid_svga.h" +#include "vid_voodoo.h" +#include "vid_voodoo_common.h" +#include "vid_voodoo_banshee_blitter.h" +#include "vid_voodoo_render.h" + +#define COMMAND_CMD_MASK (0xf) +#define COMMAND_CMD_NOP (0 << 0) +#define COMMAND_CMD_SCREEN_TO_SCREEN_BLT (1 << 0) +#define COMMAND_CMD_SCREEN_TO_SCREEN_STRETCH_BLT (2 << 0) +#define COMMAND_CMD_HOST_TO_SCREEN_BLT (3 << 0) +#define COMMAND_CMD_HOST_TO_SCREEN_STRETCH_BLT (4 << 0) +#define COMMAND_CMD_RECTFILL (5 << 0) +#define COMMAND_CMD_LINE (6 << 0) +#define COMMAND_CMD_POLYLINE (7 << 0) +#define COMMAND_CMD_POLYFILL (8 << 0) +#define COMMAND_INITIATE (1 << 8) +#define COMMAND_INC_X_START (1 << 10) +#define COMMAND_INC_Y_START (1 << 11) +#define COMMAND_STIPPLE_LINE (1 << 12) +#define COMMAND_PATTERN_MONO (1 << 13) +#define COMMAND_DX (1 << 14) +#define COMMAND_DY (1 << 15) +#define COMMAND_TRANS_MONO (1 << 16) +#define COMMAND_PATOFF_X_MASK (7 << 17) +#define COMMAND_PATOFF_X_SHIFT (17) +#define COMMAND_PATOFF_Y_MASK (7 << 20) +#define COMMAND_PATOFF_Y_SHIFT (20) +#define COMMAND_CLIP_SEL (1 << 23) + +#define CMDEXTRA_SRC_COLORKEY (1 << 0) +#define CMDEXTRA_DST_COLORKEY (1 << 1) +#define CMDEXTRA_FORCE_PAT_ROW0 (1 << 3) + +#define SRC_FORMAT_STRIDE_MASK (0x1fff) +#define SRC_FORMAT_COL_MASK (0xf << 16) +#define SRC_FORMAT_COL_1_BPP (0 << 16) +#define SRC_FORMAT_COL_8_BPP (1 << 16) +#define SRC_FORMAT_COL_16_BPP (3 << 16) +#define SRC_FORMAT_COL_24_BPP (4 << 16) +#define SRC_FORMAT_COL_32_BPP (5 << 16) +#define SRC_FORMAT_COL_YUYV (8 << 16) +#define SRC_FORMAT_COL_UYVY (9 << 16) +#define SRC_FORMAT_BYTE_SWIZZLE (1 << 20) +#define SRC_FORMAT_WORD_SWIZZLE (1 << 21) +#define SRC_FORMAT_PACKING_MASK (3 << 22) +#define SRC_FORMAT_PACKING_STRIDE (0 << 22) +#define SRC_FORMAT_PACKING_BYTE (1 << 22) +#define SRC_FORMAT_PACKING_WORD (2 << 22) +#define SRC_FORMAT_PACKING_DWORD (3 << 22) + +#define DST_FORMAT_STRIDE_MASK (0x1fff) +#define DST_FORMAT_COL_MASK (0xf << 16) +#define DST_FORMAT_COL_8_BPP (1 << 16) +#define DST_FORMAT_COL_16_BPP (3 << 16) +#define DST_FORMAT_COL_24_BPP (4 << 16) +#define DST_FORMAT_COL_32_BPP (5 << 16) + +#define BRES_ERROR_MASK (0xffff) +#define BRES_ERROR_USE (1 << 31) + +enum +{ + COLORKEY_8, + COLORKEY_16, + COLORKEY_32 +}; + +static int colorkey(voodoo_t *voodoo, uint32_t src, int src_notdst, int color_format) +{ + uint32_t min = src_notdst ? voodoo->banshee_blt.srcColorkeyMin : voodoo->banshee_blt.dstColorkeyMin; + uint32_t max = src_notdst ? voodoo->banshee_blt.srcColorkeyMax : voodoo->banshee_blt.dstColorkeyMax; + + if (!(voodoo->banshee_blt.commandExtra & (src_notdst ? CMDEXTRA_SRC_COLORKEY : CMDEXTRA_DST_COLORKEY))) + return 0; + + switch (color_format) + { + case COLORKEY_8: + return ((src & 0xff) >= (min & 0xff)) && ((src & 0xff) <= (max & 0xff)); + + case COLORKEY_16: + { + int r = (src >> 11) & 0x1f, r_min = (min >> 11) & 0x1f, r_max = (max >> 11) & 0x1f; + int g = (src >> 5) & 0x3f, g_min = (min >> 5) & 0x3f, g_max = (max >> 5) & 0x3f; + int b = src & 0x1f, b_min = min & 0x1f, b_max = max & 0x1f; + + return (r >= r_min) && (r <= r_max) && (g >= g_min) && (g <= g_max) && + (b >= b_min) && (b <= b_max); + } + + case COLORKEY_32: + { + int r = (src >> 16) & 0xff, r_min = (min >> 16) & 0xff, r_max = (max >> 16) & 0xff; + int g = (src >> 8) & 0xff, g_min = (min >> 8) & 0xff, g_max = (max >> 8) & 0xff; + int b = src & 0xff, b_min = min & 0xff, b_max = max & 0xff; + + return (r >= r_min) && (r <= r_max) && (g >= g_min) && (g <= g_max) && + (b >= b_min) && (b <= b_max); + } + + default: + return 0; + } +} + +static uint32_t MIX(voodoo_t *voodoo, uint32_t dest, uint32_t src, uint32_t pattern, int colour_format_src, int colour_format_dest) +{ + int rop_nr = 0; + uint32_t result = 0; + uint32_t rop; + + if (colorkey(voodoo, src, 1, colour_format_src)) + rop_nr |= 2; + if (colorkey(voodoo, dest, 0, colour_format_dest)) + rop_nr |= 1; + + rop = voodoo->banshee_blt.rops[rop_nr]; + + if (rop & 0x01) + result |= (~pattern & ~src & ~dest); + if (rop & 0x02) + result |= (~pattern & ~src & dest); + if (rop & 0x04) + result |= (~pattern & src & ~dest); + if (rop & 0x08) + result |= (~pattern & src & dest); + if (rop & 0x10) + result |= ( pattern & ~src & ~dest); + if (rop & 0x20) + result |= ( pattern & ~src & dest); + if (rop & 0x40) + result |= ( pattern & src & ~dest); + if (rop & 0x80) + result |= ( pattern & src & dest); + + return result; +} + +static uint32_t get_addr(voodoo_t *voodoo, int x, int y, int src_notdst, uint32_t src_stride) +{ + uint32_t stride = src_notdst ? src_stride : voodoo->banshee_blt.dst_stride; + uint32_t base_addr = src_notdst ? voodoo->banshee_blt.srcBaseAddr : voodoo->banshee_blt.dstBaseAddr; + + if (src_notdst ? voodoo->banshee_blt.srcBaseAddr_tiled : voodoo->banshee_blt.dstBaseAddr_tiled) + return (base_addr + (x & 127) + ((x >> 7) * 128*32) + ((y & 31) * 128) + (y >> 5)*stride) & voodoo->fb_mask; + else + return (base_addr + x + y*stride) & voodoo->fb_mask; +} + +static void PLOT(voodoo_t *voodoo, int x, int y, int pat_x, int pat_y, uint8_t pattern_mask, uint8_t rop, uint32_t src, int src_colorkey) +{ + switch (voodoo->banshee_blt.dstFormat & DST_FORMAT_COL_MASK) + { + case DST_FORMAT_COL_8_BPP: + { + uint32_t addr = get_addr(voodoo, x, y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + x + y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask; + uint32_t dest = voodoo->vram[addr]; + uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ? + ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) : + voodoo->banshee_blt.colorPattern8[(pat_x & 7) + (pat_y & 7)*8]; + + voodoo->vram[addr] = MIX(voodoo, dest, src, pattern, src_colorkey, COLORKEY_8); + voodoo->changedvram[addr >> 12] = changeframecount; + break; + } + case DST_FORMAT_COL_16_BPP: + { + uint32_t addr = get_addr(voodoo, x*2, y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + x*2 + y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask; + uint32_t dest = *(uint16_t *)&voodoo->vram[addr]; + uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ? + ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) : + voodoo->banshee_blt.colorPattern16[(pat_x & 7) + (pat_y & 7)*8]; + + *(uint16_t *)&voodoo->vram[addr] = MIX(voodoo, dest, src, pattern, src_colorkey, COLORKEY_16); + voodoo->changedvram[addr >> 12] = changeframecount; + break; + } + case DST_FORMAT_COL_24_BPP: + { + uint32_t addr = get_addr(voodoo, x*3, y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + x*3 + y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask; + uint32_t dest = *(uint32_t *)&voodoo->vram[addr]; + uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ? + ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) : + voodoo->banshee_blt.colorPattern24[(pat_x & 7) + (pat_y & 7)*8]; + + *(uint32_t *)&voodoo->vram[addr] = (MIX(voodoo, dest, src, pattern, src_colorkey, COLORKEY_32) & 0xffffff) | (dest & 0xff000000); + voodoo->changedvram[addr >> 12] = changeframecount; + break; + } + case DST_FORMAT_COL_32_BPP: + { + uint32_t addr = get_addr(voodoo, x*4, y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + x*4 + y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask; + uint32_t dest = *(uint32_t *)&voodoo->vram[addr]; + uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ? + ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) : + voodoo->banshee_blt.colorPattern[(pat_x & 7) + (pat_y & 7)*8]; + + *(uint32_t *)&voodoo->vram[addr] = MIX(voodoo, dest, src, pattern, src_colorkey, COLORKEY_32); + voodoo->changedvram[addr >> 12] = changeframecount; + break; + } + } +} + +static void PLOT_LINE(voodoo_t *voodoo, int x, int y, uint8_t rop, uint32_t pattern, int src_colorkey) +{ + switch (voodoo->banshee_blt.dstFormat & DST_FORMAT_COL_MASK) + { + case DST_FORMAT_COL_8_BPP: + { + uint32_t addr = get_addr(voodoo, x, y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + x + y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask; + uint32_t dest = voodoo->vram[addr]; + + voodoo->vram[addr] = MIX(voodoo, dest, voodoo->banshee_blt.colorFore, pattern, src_colorkey, COLORKEY_8); + voodoo->changedvram[addr >> 12] = changeframecount; + break; + } + case DST_FORMAT_COL_16_BPP: + { + uint32_t addr = get_addr(voodoo, x*2, y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + x*2 + y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask; + uint32_t dest = *(uint16_t *)&voodoo->vram[addr]; + + *(uint16_t *)&voodoo->vram[addr] = MIX(voodoo, dest, voodoo->banshee_blt.colorFore, pattern, src_colorkey, COLORKEY_16); + voodoo->changedvram[addr >> 12] = changeframecount; + break; + } + case DST_FORMAT_COL_24_BPP: + { + uint32_t addr = get_addr(voodoo, x*3, y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + x*3 + y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask; + uint32_t dest = *(uint32_t *)&voodoo->vram[addr]; + + *(uint32_t *)&voodoo->vram[addr] = (MIX(voodoo, dest, voodoo->banshee_blt.colorFore, pattern, src_colorkey, COLORKEY_32) & 0xffffff) | (dest & 0xff000000); + voodoo->changedvram[addr >> 12] = changeframecount; + break; + } + case DST_FORMAT_COL_32_BPP: + { + uint32_t addr = get_addr(voodoo, x*4, y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + x*4 + y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask; + uint32_t dest = *(uint32_t *)&voodoo->vram[addr]; + + *(uint32_t *)&voodoo->vram[addr] = MIX(voodoo, dest, voodoo->banshee_blt.colorFore, pattern, src_colorkey, COLORKEY_32); + voodoo->changedvram[addr >> 12] = changeframecount; + break; + } + } +} + +static void update_src_stride(voodoo_t *voodoo) +{ + int bpp; + + switch (voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) + { + case SRC_FORMAT_COL_1_BPP: + bpp = 1; + break; + case SRC_FORMAT_COL_8_BPP: + bpp = 8; + break; + case SRC_FORMAT_COL_16_BPP: + bpp = 16; + break; + case SRC_FORMAT_COL_24_BPP: + bpp = 24; + break; + case SRC_FORMAT_COL_32_BPP: + bpp = 32; + break; + + default: + bpp = 16; + break; + } + + switch (voodoo->banshee_blt.srcFormat & SRC_FORMAT_PACKING_MASK) + { + case SRC_FORMAT_PACKING_STRIDE: + voodoo->banshee_blt.src_stride_src = voodoo->banshee_blt.src_stride; //voodoo->banshee_blt.srcFormat & SRC_FORMAT_STRIDE_MASK; + voodoo->banshee_blt.src_stride_dest = voodoo->banshee_blt.src_stride; //voodoo->banshee_blt.srcFormat & SRC_FORMAT_STRIDE_MASK; + voodoo->banshee_blt.host_data_size_src = (voodoo->banshee_blt.srcSizeX * bpp + 7) >> 3; + voodoo->banshee_blt.host_data_size_dest = (voodoo->banshee_blt.dstSizeX * bpp + 7) >> 3; +// pclog("Stride packing %08x %08x bpp=%i dstSizeX=%i\n", voodoo->banshee_blt.src_stride_dest, voodoo->banshee_blt.host_data_size_dest, bpp, voodoo->banshee_blt.dstSizeX); + break; + + case SRC_FORMAT_PACKING_BYTE: + voodoo->banshee_blt.src_stride_src = (voodoo->banshee_blt.srcSizeX * bpp + 7) >> 3; + voodoo->banshee_blt.src_stride_dest = (voodoo->banshee_blt.dstSizeX * bpp + 7) >> 3; + voodoo->banshee_blt.host_data_size_src = voodoo->banshee_blt.src_stride_src; + voodoo->banshee_blt.host_data_size_dest = voodoo->banshee_blt.src_stride_dest; +// pclog("Byte packing %08x %08x\n", voodoo->banshee_blt.src_stride_dest, voodoo->banshee_blt.host_data_size_dest); + break; + + case SRC_FORMAT_PACKING_WORD: + voodoo->banshee_blt.src_stride_src = ((voodoo->banshee_blt.srcSizeX * bpp + 15) >> 4) * 2; + voodoo->banshee_blt.src_stride_dest = ((voodoo->banshee_blt.dstSizeX * bpp + 15) >> 4) * 2; + voodoo->banshee_blt.host_data_size_src = voodoo->banshee_blt.src_stride_src; + voodoo->banshee_blt.host_data_size_dest = voodoo->banshee_blt.src_stride_dest; +// pclog("Word packing %08x %08x\n", voodoo->banshee_blt.src_stride_dest, voodoo->banshee_blt.host_data_size_dest); + break; + + case SRC_FORMAT_PACKING_DWORD: + voodoo->banshee_blt.src_stride_src = ((voodoo->banshee_blt.srcSizeX * bpp + 31) >> 5) * 4; + voodoo->banshee_blt.src_stride_dest = ((voodoo->banshee_blt.dstSizeX * bpp + 31) >> 5) * 4; + voodoo->banshee_blt.host_data_size_src = voodoo->banshee_blt.src_stride_src; + voodoo->banshee_blt.host_data_size_dest = voodoo->banshee_blt.src_stride_dest; +// pclog("Dword packing %08x %08x\n", voodoo->banshee_blt.src_stride_dest, voodoo->banshee_blt.host_data_size_dest); + break; + } +} + +static void end_command(voodoo_t *voodoo) +{ + /*Update dest coordinates if required*/ + if (voodoo->banshee_blt.command & COMMAND_INC_X_START) + { + voodoo->banshee_blt.dstXY &= ~0x0000ffff; + voodoo->banshee_blt.dstXY |= (voodoo->banshee_blt.dstX & 0xffff); + } + + if (voodoo->banshee_blt.command & COMMAND_INC_Y_START) + { + voodoo->banshee_blt.dstXY &= ~0xffff0000; + voodoo->banshee_blt.dstXY |= (voodoo->banshee_blt.dstY << 16); + } +} + +static void banshee_do_rectfill(voodoo_t *voodoo) +{ + clip_t *clip = &voodoo->banshee_blt.clip[(voodoo->banshee_blt.command & COMMAND_CLIP_SEL) ? 1 : 0]; + int dst_y = voodoo->banshee_blt.dstY; + uint8_t *pattern_mono = (uint8_t *)voodoo->banshee_blt.colorPattern; + int pat_y = (voodoo->banshee_blt.commandExtra & CMDEXTRA_FORCE_PAT_ROW0) ? 0 : (voodoo->banshee_blt.patoff_y + voodoo->banshee_blt.dstY); + int use_pattern_trans = (voodoo->banshee_blt.command & (COMMAND_PATTERN_MONO | COMMAND_TRANS_MONO)) == + (COMMAND_PATTERN_MONO | COMMAND_TRANS_MONO); + uint8_t rop = voodoo->banshee_blt.command >> 24; + +// pclog("banshee_do_rectfill: size=%i,%i dst=%i,%i\n", voodoo->banshee_blt.dstSizeX, voodoo->banshee_blt.dstSizeY, voodoo->banshee_blt.dstX, voodoo->banshee_blt.dstY); +// pclog("clipping: %i,%i -> %i,%i\n", clip->x_min, clip->y_min, clip->x_max, clip->y_max); +// pclog("colorFore=%08x\n", voodoo->banshee_blt.colorFore); + for (voodoo->banshee_blt.cur_y = 0; voodoo->banshee_blt.cur_y < voodoo->banshee_blt.dstSizeY; voodoo->banshee_blt.cur_y++) + { + int dst_x = voodoo->banshee_blt.dstX; + + if (dst_y >= clip->y_min && dst_y < clip->y_max) + { + int pat_x = voodoo->banshee_blt.patoff_x + voodoo->banshee_blt.dstX; + uint8_t pattern_mask = pattern_mono[pat_y & 7]; + + for (voodoo->banshee_blt.cur_x = 0; voodoo->banshee_blt.cur_x < voodoo->banshee_blt.dstSizeX; voodoo->banshee_blt.cur_x++) + { + int pattern_trans = use_pattern_trans ? (pattern_mask & (1 << (7-(pat_x & 7)))) : 1; + + if (dst_x >= clip->x_min && dst_x < clip->x_max && pattern_trans) + PLOT(voodoo, dst_x, dst_y, pat_x, pat_y, pattern_mask, rop, voodoo->banshee_blt.colorFore, COLORKEY_32); + + dst_x += (voodoo->banshee_blt.command & COMMAND_DX) ? -1 : 1; + pat_x += (voodoo->banshee_blt.command & COMMAND_DX) ? -1 : 1; + } + } + dst_y += (voodoo->banshee_blt.command & COMMAND_DY) ? -1 : 1; + if (!(voodoo->banshee_blt.commandExtra & CMDEXTRA_FORCE_PAT_ROW0)) + pat_y += (voodoo->banshee_blt.command & COMMAND_DY) ? -1 : 1; + } + + end_command(voodoo); +} + +static void do_screen_to_screen_line(voodoo_t *voodoo, uint8_t *src_p, int use_x_dir, int src_x, int src_tiled) +{ + clip_t *clip = &voodoo->banshee_blt.clip[(voodoo->banshee_blt.command & COMMAND_CLIP_SEL) ? 1 : 0]; + int dst_y = voodoo->banshee_blt.dstY; + int pat_y = (voodoo->banshee_blt.commandExtra & CMDEXTRA_FORCE_PAT_ROW0) ? 0 : (voodoo->banshee_blt.patoff_y + voodoo->banshee_blt.dstY); + uint8_t *pattern_mono = (uint8_t *)voodoo->banshee_blt.colorPattern; + int use_pattern_trans = (voodoo->banshee_blt.command & (COMMAND_PATTERN_MONO | COMMAND_TRANS_MONO)) == + (COMMAND_PATTERN_MONO | COMMAND_TRANS_MONO); + uint8_t rop = voodoo->banshee_blt.command >> 24; + int src_colorkey; + + switch (voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) + { + case SRC_FORMAT_COL_8_BPP: + src_colorkey = COLORKEY_8; + break; + case SRC_FORMAT_COL_16_BPP: + src_colorkey = COLORKEY_16; + break; + default: + src_colorkey = COLORKEY_32; + break; + } +// pclog("do_screen_to_screen_line: srcFormat=%08x dst=%08x\n", voodoo->banshee_blt.srcFormat, voodoo->banshee_blt.dstFormat); + if ((voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) == + (voodoo->banshee_blt.dstFormat & DST_FORMAT_COL_MASK)) + { + /*No conversion required*/ + if (dst_y >= clip->y_min && dst_y < clip->y_max) + { + int dst_x = voodoo->banshee_blt.dstX; + int pat_x = voodoo->banshee_blt.patoff_x + voodoo->banshee_blt.dstX; + uint8_t pattern_mask = pattern_mono[pat_y & 7]; + + for (voodoo->banshee_blt.cur_x = 0; voodoo->banshee_blt.cur_x < voodoo->banshee_blt.dstSizeX; voodoo->banshee_blt.cur_x++) + { + int pattern_trans = use_pattern_trans ? (pattern_mask & (1 << (7-(pat_x & 7)))) : 1; + int src_x_real = (src_x * voodoo->banshee_blt.src_bpp) >> 3; + + if (src_tiled) + src_x_real = (src_x_real & 127) + ((src_x_real >> 7) * 128*32); + + if (dst_x >= clip->x_min && dst_x < clip->x_max && pattern_trans) + { + switch (voodoo->banshee_blt.dstFormat & DST_FORMAT_COL_MASK) + { + case DST_FORMAT_COL_8_BPP: + { + uint32_t dst_addr = get_addr(voodoo, dst_x, dst_y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + dst_x + dst_y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask; + uint32_t src = src_p[src_x_real]; + uint32_t dest = voodoo->vram[dst_addr]; + uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ? + ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) : + voodoo->banshee_blt.colorPattern8[(pat_x & 7) + (pat_y & 7)*8]; + + voodoo->vram[dst_addr] = MIX(voodoo, dest, src, pattern, COLORKEY_8, COLORKEY_8); + voodoo->changedvram[dst_addr >> 12] = changeframecount; + break; + } + case DST_FORMAT_COL_16_BPP: + { + uint32_t dst_addr = get_addr(voodoo, dst_x*2, dst_y, 0, 0);//dst_addr = (voodoo->banshee_blt.dstBaseAddr + dst_x*2 + dst_y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask; + uint32_t src = *(uint16_t *)&src_p[src_x_real]; + uint32_t dest = *(uint16_t *)&voodoo->vram[dst_addr]; + uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ? + ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) : + voodoo->banshee_blt.colorPattern16[(pat_x & 7) + (pat_y & 7)*8]; + + *(uint16_t *)&voodoo->vram[dst_addr] = MIX(voodoo, dest, src, pattern, COLORKEY_16, COLORKEY_16); + voodoo->changedvram[dst_addr >> 12] = changeframecount; + break; + } + case DST_FORMAT_COL_24_BPP: + { + uint32_t dst_addr = get_addr(voodoo, dst_x*3, dst_y, 0, 0);//dst_addr = (voodoo->banshee_blt.dstBaseAddr + dst_x*3 + dst_y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask; + uint32_t src = *(uint32_t *)&src_p[src_x_real]; + uint32_t dest = *(uint32_t *)&voodoo->vram[dst_addr]; + uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ? + ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) : + voodoo->banshee_blt.colorPattern24[(pat_x & 7) + (pat_y & 7)*8]; + + *(uint32_t *)&voodoo->vram[dst_addr] = (MIX(voodoo, dest, src, pattern, COLORKEY_32, COLORKEY_32) & 0xffffff) | (dest & 0xff000000); + voodoo->changedvram[dst_addr >> 12] = changeframecount; + break; + } + case DST_FORMAT_COL_32_BPP: + { + uint32_t dst_addr = get_addr(voodoo, dst_x*4, dst_y, 0, 0);//dst_addr = (voodoo->banshee_blt.dstBaseAddr + dst_x*4 + dst_y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask; + uint32_t src = *(uint32_t *)&src_p[src_x_real]; + uint32_t dest = *(uint32_t *)&voodoo->vram[dst_addr]; + uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ? + ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) : + voodoo->banshee_blt.colorPattern[(pat_x & 7) + (pat_y & 7)*8]; + + *(uint32_t *)&voodoo->vram[dst_addr] = MIX(voodoo, dest, src, pattern, COLORKEY_32, COLORKEY_32); + voodoo->changedvram[dst_addr >> 12] = changeframecount; + break; + } + } + } + if (use_x_dir) + { + src_x += (voodoo->banshee_blt.command & COMMAND_DX) ? -1 : 1; + dst_x += (voodoo->banshee_blt.command & COMMAND_DX) ? -1 : 1; + pat_x += (voodoo->banshee_blt.command & COMMAND_DX) ? -1 : 1; + } + else + { + src_x++; + dst_x++; + pat_x++; + } + } + } + voodoo->banshee_blt.srcY += (voodoo->banshee_blt.command & COMMAND_DY) ? -1 : 1; + voodoo->banshee_blt.dstY += (voodoo->banshee_blt.command & COMMAND_DY) ? -1 : 1; + } + else + { + /*Conversion required*/ + if (dst_y >= clip->y_min && dst_y < clip->y_max) + { +// int src_x = voodoo->banshee_blt.srcX; + int dst_x = voodoo->banshee_blt.dstX; + int pat_x = voodoo->banshee_blt.patoff_x + voodoo->banshee_blt.dstX; + uint8_t pattern_mask = pattern_mono[pat_y & 7]; + + for (voodoo->banshee_blt.cur_x = 0; voodoo->banshee_blt.cur_x < voodoo->banshee_blt.dstSizeX; voodoo->banshee_blt.cur_x++) + { + int pattern_trans = use_pattern_trans ? (pattern_mask & (1 << (7-(pat_x & 7)))) : 1; + int src_x_real = (src_x * voodoo->banshee_blt.src_bpp) >> 3; + + if (src_tiled) + src_x_real = (src_x_real & 127) + ((src_x_real >> 7) * 128*32); + + if (dst_x >= clip->x_min && dst_x < clip->x_max && pattern_trans) + { + uint32_t src_data = 0; + int transparent = 0; + + switch (voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) + { + case SRC_FORMAT_COL_1_BPP: + { + uint8_t src_byte = src_p[src_x_real]; + src_data = (src_byte & (0x80 >> (src_x & 7))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack; + if (voodoo->banshee_blt.command & COMMAND_TRANS_MONO) + transparent = !(src_byte & (0x80 >> (src_x & 7))); +// pclog(" 1bpp src_byte=%02x src_x=%i src_data=%x transparent=%i\n", src_byte, src_x, src_data, transparent); + break; + } + case SRC_FORMAT_COL_8_BPP: + { + src_data = src_p[src_x_real]; + break; + } + case SRC_FORMAT_COL_16_BPP: + { + uint16_t src_16 = *(uint16_t *)&src_p[src_x_real]; + int r = (src_16 >> 11); + int g = (src_16 >> 5) & 0x3f; + int b = src_16 & 0x1f; + + r = (r << 3) | (r >> 2); + g = (g << 2) | (g >> 4); + b = (b << 3) | (b >> 2); + src_data = (r << 16) | (g << 8) | b; + break; + } + case SRC_FORMAT_COL_24_BPP: + { + src_data = *(uint32_t *)&src_p[src_x_real]; + break; + } + case SRC_FORMAT_COL_32_BPP: + { + src_data = *(uint32_t *)&src_p[src_x_real]; + break; + } +#ifndef RELEASE_BUILD + default: + fatal("banshee_do_screen_to_screen_blt: unknown srcFormat %08x\n", voodoo->banshee_blt.srcFormat); +#endif + } + + if ((voodoo->banshee_blt.dstFormat & DST_FORMAT_COL_MASK) == DST_FORMAT_COL_16_BPP && + (voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) != SRC_FORMAT_COL_1_BPP) + { + int r = src_data >> 16; + int g = (src_data >> 8) & 0xff; + int b = src_data & 0xff; + + src_data = (b >> 3) | ((g >> 2) << 5) | ((r >> 3) << 11); + } + + if (!transparent) + PLOT(voodoo, dst_x, dst_y, pat_x, pat_y, pattern_mask, rop, src_data, src_colorkey); + } + if (use_x_dir) + { + src_x += (voodoo->banshee_blt.command & COMMAND_DX) ? -1 : 1; + dst_x += (voodoo->banshee_blt.command & COMMAND_DX) ? -1 : 1; + pat_x += (voodoo->banshee_blt.command & COMMAND_DX) ? -1 : 1; + } + else + { + src_x++; + dst_x++; + pat_x++; + } + } + } + voodoo->banshee_blt.srcY += (voodoo->banshee_blt.command & COMMAND_DY) ? -1 : 1; + voodoo->banshee_blt.dstY += (voodoo->banshee_blt.command & COMMAND_DY) ? -1 : 1; + } +} + +static void banshee_do_screen_to_screen_blt(voodoo_t *voodoo) +{ +// pclog("screen_to_screen: %08x %08x %08x\n", voodoo->banshee_blt.srcFormat, voodoo->banshee_blt.src_stride, voodoo->banshee_blt.src_stride_dest); +// return; + for (voodoo->banshee_blt.cur_y = 0; voodoo->banshee_blt.cur_y < voodoo->banshee_blt.dstSizeY; voodoo->banshee_blt.cur_y++) + { + uint32_t src_addr = get_addr(voodoo, 0, voodoo->banshee_blt.srcY, 1, voodoo->banshee_blt.src_stride_dest); +// if ((voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) == SRC_FORMAT_COL_1_BPP) +// pclog(" srcY=%i src_addr=%08x\n", voodoo->banshee_blt.srcY, src_addr); + do_screen_to_screen_line(voodoo, &voodoo->vram[src_addr], 1, voodoo->banshee_blt.srcX, voodoo->banshee_blt.srcBaseAddr_tiled); + } + end_command(voodoo); +} + +static void banshee_do_host_to_screen_blt(voodoo_t *voodoo, int count, uint32_t data) +{ +// if (voodoo->banshee_blt.dstBaseAddr == 0xee5194) +// pclog("banshee_do_host_to_screen_blt: data=%08x host_data_count=%i src_stride_dest=%i host_data_size_dest=%i\n", data, voodoo->banshee_blt.host_data_count, voodoo->banshee_blt.src_stride_dest, voodoo->banshee_blt.host_data_size_dest); + + if (voodoo->banshee_blt.srcFormat & SRC_FORMAT_BYTE_SWIZZLE) + data = (data >> 24) | ((data >> 8) & 0xff00) | ((data << 8) & 0xff0000) | (data << 24); + if (voodoo->banshee_blt.srcFormat & SRC_FORMAT_WORD_SWIZZLE) + data = (data >> 16) | (data << 16); + + if ((voodoo->banshee_blt.srcFormat & SRC_FORMAT_PACKING_MASK) == SRC_FORMAT_PACKING_STRIDE) + { + int last_byte; + + if ((voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) == SRC_FORMAT_COL_1_BPP) + last_byte = ((voodoo->banshee_blt.srcX & 31) + voodoo->banshee_blt.dstSizeX + 7) >> 3; + else + last_byte = (voodoo->banshee_blt.srcX & 3) + voodoo->banshee_blt.host_data_size_dest; + + *(uint32_t *)&voodoo->banshee_blt.host_data[voodoo->banshee_blt.host_data_count] = data; + voodoo->banshee_blt.host_data_count += 4; + if (voodoo->banshee_blt.host_data_count >= last_byte) + { +// pclog(" %i %i srcX=%i srcFormat=%08x\n", voodoo->banshee_blt.cur_y, voodoo->banshee_blt.dstSizeY, voodoo->banshee_blt.srcX); + if (voodoo->banshee_blt.cur_y < voodoo->banshee_blt.dstSizeY) + { + if ((voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) == SRC_FORMAT_COL_1_BPP) + do_screen_to_screen_line(voodoo, &voodoo->banshee_blt.host_data[(voodoo->banshee_blt.srcX >> 3) & 3], 0, voodoo->banshee_blt.srcX & 7, 0); + else + do_screen_to_screen_line(voodoo, &voodoo->banshee_blt.host_data[voodoo->banshee_blt.srcX & 3], 0, 0, 0); + voodoo->banshee_blt.cur_y++; + if (voodoo->banshee_blt.cur_y == voodoo->banshee_blt.dstSizeY) + end_command(voodoo); + } + + if ((voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) == SRC_FORMAT_COL_1_BPP) + voodoo->banshee_blt.srcX += (voodoo->banshee_blt.srcFormat & SRC_FORMAT_STRIDE_MASK) << 3; + else + voodoo->banshee_blt.srcX += (voodoo->banshee_blt.srcFormat & SRC_FORMAT_STRIDE_MASK); + + voodoo->banshee_blt.host_data_count = 0; + } + } + else + { + *(uint32_t *)&voodoo->banshee_blt.host_data[voodoo->banshee_blt.host_data_count] = data; + voodoo->banshee_blt.host_data_count += 4; + while (voodoo->banshee_blt.host_data_count >= voodoo->banshee_blt.src_stride_dest) + { + voodoo->banshee_blt.host_data_count -= voodoo->banshee_blt.src_stride_dest; + +// pclog(" %i %i\n", voodoo->banshee_blt.cur_y, voodoo->banshee_blt.dstSizeY); + if (voodoo->banshee_blt.cur_y < voodoo->banshee_blt.dstSizeY) + { + do_screen_to_screen_line(voodoo, voodoo->banshee_blt.host_data, 0, 0, 0); + voodoo->banshee_blt.cur_y++; + if (voodoo->banshee_blt.cur_y == voodoo->banshee_blt.dstSizeY) + end_command(voodoo); + } + + if (voodoo->banshee_blt.host_data_count) + { +// pclog(" remaining=%i\n", voodoo->banshee_blt.host_data_count); + *(uint32_t *)&voodoo->banshee_blt.host_data[0] = data >> (4-voodoo->banshee_blt.host_data_count)*8; + } + } + } +} + +static void do_screen_to_screen_stretch_line(voodoo_t *voodoo,uint8_t *src_p, int src_x, int *src_y) +{ + clip_t *clip = &voodoo->banshee_blt.clip[(voodoo->banshee_blt.command & COMMAND_CLIP_SEL) ? 1 : 0]; +// int src_y = voodoo->banshee_blt.srcY; + int dst_y = voodoo->banshee_blt.dstY; + int pat_y = (voodoo->banshee_blt.commandExtra & CMDEXTRA_FORCE_PAT_ROW0) ? 0 : (voodoo->banshee_blt.patoff_y + voodoo->banshee_blt.dstY); + uint8_t *pattern_mono = (uint8_t *)voodoo->banshee_blt.colorPattern; + int use_pattern_trans = (voodoo->banshee_blt.command & (COMMAND_PATTERN_MONO | COMMAND_TRANS_MONO)) == + (COMMAND_PATTERN_MONO | COMMAND_TRANS_MONO); + uint32_t *colorPattern = voodoo->banshee_blt.colorPattern; + + //int error_y = voodoo->banshee_blt.dstSizeY / 2; + +/* pclog("banshee_do_screen_to_screen_stretch_blt:\n"); + pclog(" srcXY=%i,%i srcsizeXY=%i,%i\n", voodoo->banshee_blt.srcX, voodoo->banshee_blt.srcY, voodoo->banshee_blt.srcSizeX, voodoo->banshee_blt.srcSizeY); + pclog(" dstXY=%i,%i dstsizeXY=%i,%i\n", voodoo->banshee_blt.dstX, voodoo->banshee_blt.dstY, voodoo->banshee_blt.dstSizeX, voodoo->banshee_blt.dstSizeY);*/ + if (dst_y >= clip->y_min && dst_y < clip->y_max) + { +// int src_x = voodoo->banshee_blt.srcX; + int dst_x = voodoo->banshee_blt.dstX; + int pat_x = voodoo->banshee_blt.patoff_x + voodoo->banshee_blt.dstX; + uint8_t pattern_mask = pattern_mono[pat_y & 7]; + int error_x = voodoo->banshee_blt.dstSizeX / 2; + +// pclog(" Plot dest line %03i : src line %03i\n", dst_y, src_y); + for (voodoo->banshee_blt.cur_x = 0; voodoo->banshee_blt.cur_x < voodoo->banshee_blt.dstSizeX; voodoo->banshee_blt.cur_x++) + { + int pattern_trans = use_pattern_trans ? (pattern_mask & (1 << (7-(pat_x & 7)))) : 1; + + if (dst_x >= clip->x_min && dst_x < clip->x_max && pattern_trans) + { + switch (voodoo->banshee_blt.dstFormat & DST_FORMAT_COL_MASK) + { + case DST_FORMAT_COL_8_BPP: + { + uint32_t dst_addr = get_addr(voodoo, dst_x, dst_y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + dst_x + dst_y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask; + uint32_t src = src_p[src_x]; + uint32_t dest = voodoo->vram[dst_addr]; + uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ? + ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) : + colorPattern[(pat_x & 7) + (pat_y & 7)*8]; + + voodoo->vram[dst_addr] = MIX(voodoo, dest, src, pattern, COLORKEY_8, COLORKEY_8); +// pclog("%i,%i : sdp=%02x,%02x,%02x res=%02x\n", voodoo->banshee_blt.cur_x, voodoo->banshee_blt.cur_y, src, dest, pattern, voodoo->vram[dst_addr]); + voodoo->changedvram[dst_addr >> 12] = changeframecount; + break; + } + case DST_FORMAT_COL_16_BPP: + { + uint32_t dst_addr = get_addr(voodoo, dst_x*2, dst_y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + dst_x*2 + dst_y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask; + uint32_t src = *(uint16_t *)&src_p[src_x*2]; + uint32_t dest = *(uint16_t *)&voodoo->vram[dst_addr]; + uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ? + ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) : + colorPattern[(pat_x & 7) + (pat_y & 7)*8]; + + *(uint16_t *)&voodoo->vram[dst_addr] = MIX(voodoo, dest, src, pattern, COLORKEY_16, COLORKEY_16); +// pclog("%i,%i : sdp=%02x,%02x,%02x res=%02x\n", voodoo->banshee_blt.cur_x, voodoo->banshee_blt.cur_y, src, dest, pattern, *(uint16_t *)&voodoo->vram[dst_addr]); + voodoo->changedvram[dst_addr >> 12] = changeframecount; + break; + } + case DST_FORMAT_COL_24_BPP: + { + uint32_t dst_addr = get_addr(voodoo, dst_x*3, dst_y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + dst_x*3 + dst_y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask; + uint32_t src = *(uint32_t *)&src_p[src_x*3]; + uint32_t dest = *(uint32_t *)&voodoo->vram[dst_addr]; + uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ? + ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) : + colorPattern[(pat_x & 7) + (pat_y & 7)*8]; + + *(uint32_t *)&voodoo->vram[dst_addr] = (MIX(voodoo, dest, src, pattern, COLORKEY_32, COLORKEY_32) & 0xffffff) | (*(uint32_t *)&voodoo->vram[dst_addr] & 0xff000000); +// pclog("%i,%i : sdp=%02x,%02x,%02x res=%02x\n", voodoo->banshee_blt.cur_x, voodoo->banshee_blt.cur_y, src, dest, pattern, voodoo->vram[dst_addr]); + voodoo->changedvram[dst_addr >> 12] = changeframecount; + break; + } + case DST_FORMAT_COL_32_BPP: + { + uint32_t dst_addr = get_addr(voodoo, dst_x*4, dst_y, 0, 0);//(voodoo->banshee_blt.dstBaseAddr + dst_x*4 + dst_y*voodoo->banshee_blt.dst_stride) & voodoo->fb_mask; + uint32_t src = *(uint32_t *)&src_p[src_x*4]; + uint32_t dest = *(uint32_t *)&voodoo->vram[dst_addr]; + uint32_t pattern = (voodoo->banshee_blt.command & COMMAND_PATTERN_MONO) ? + ((pattern_mask & (1 << (7-(pat_x & 7)))) ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack) : + colorPattern[(pat_x & 7) + (pat_y & 7)*8]; + + *(uint32_t *)&voodoo->vram[dst_addr] = MIX(voodoo, dest, src, pattern, COLORKEY_32, COLORKEY_32); +// pclog("%i,%i : sdp=%02x,%02x,%02x res=%02x\n", voodoo->banshee_blt.cur_x, voodoo->banshee_blt.cur_y, src, dest, pattern, voodoo->vram[dst_addr]); + voodoo->changedvram[dst_addr >> 12] = changeframecount; + break; + } + } + } + + error_x -= voodoo->banshee_blt.srcSizeX; + while (error_x < 0) + { + error_x += voodoo->banshee_blt.dstSizeX; + src_x++; + } + dst_x++; + pat_x++; + } + } + + voodoo->banshee_blt.bres_error_0 -= voodoo->banshee_blt.srcSizeY; + while (voodoo->banshee_blt.bres_error_0 < 0) + { + voodoo->banshee_blt.bres_error_0 += voodoo->banshee_blt.dstSizeY; + if (src_y) + (*src_y) += (voodoo->banshee_blt.command & COMMAND_DY) ? -1 : 1; + } + voodoo->banshee_blt.dstY += (voodoo->banshee_blt.command & COMMAND_DY) ? -1 : 1; +// pat_y += (voodoo->banshee_blt.command & COMMAND_DY) ? -1 : 1; +} + +static void banshee_do_screen_to_screen_stretch_blt(voodoo_t *voodoo) +{ +// pclog("screen_to_screen: %08x %08x %08x\n", voodoo->banshee_blt.srcFormat, voodoo->banshee_blt.src_stride, voodoo->banshee_blt.src_stride_dest); +// return; + for (voodoo->banshee_blt.cur_y = 0; voodoo->banshee_blt.cur_y < voodoo->banshee_blt.dstSizeY; voodoo->banshee_blt.cur_y++) + { + uint32_t src_addr = get_addr(voodoo, 0, voodoo->banshee_blt.srcY, 1, voodoo->banshee_blt.src_stride_src);//(voodoo->banshee_blt.srcBaseAddr + voodoo->banshee_blt.srcY*voodoo->banshee_blt.src_stride_src) & voodoo->fb_mask; +// pclog("scale_blit %i %08x %08x\n", voodoo->banshee_blt.cur_y, src_addr, voodoo->banshee_blt.command); +// if ((voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) == SRC_FORMAT_COL_1_BPP) +// pclog(" srcY=%i src_addr=%08x\n", voodoo->banshee_blt.srcY, src_addr); + do_screen_to_screen_stretch_line(voodoo, &voodoo->vram[src_addr], voodoo->banshee_blt.srcX, &voodoo->banshee_blt.srcY); + } + end_command(voodoo); +} + +static void banshee_do_host_to_screen_stretch_blt(voodoo_t *voodoo, int count, uint32_t data) +{ +// if (voodoo->banshee_blt.dstBaseAddr == 0xee5194) +// pclog("banshee_do_host_to_screen_blt: data=%08x host_data_count=%i src_stride_dest=%i host_data_size_dest=%i\n", data, voodoo->banshee_blt.host_data_count, voodoo->banshee_blt.src_stride_dest, voodoo->banshee_blt.host_data_size_dest); + + if (voodoo->banshee_blt.srcFormat & SRC_FORMAT_BYTE_SWIZZLE) + data = (data >> 24) | ((data >> 8) & 0xff00) | ((data << 8) & 0xff0000) | (data << 24); + if (voodoo->banshee_blt.srcFormat & SRC_FORMAT_WORD_SWIZZLE) + data = (data >> 16) | (data << 16); + + if ((voodoo->banshee_blt.srcFormat & SRC_FORMAT_PACKING_MASK) == SRC_FORMAT_PACKING_STRIDE) + { + int last_byte = (voodoo->banshee_blt.srcX & 3) + voodoo->banshee_blt.host_data_size_src; + + *(uint32_t *)&voodoo->banshee_blt.host_data[voodoo->banshee_blt.host_data_count] = data; + voodoo->banshee_blt.host_data_count += 4; + if (voodoo->banshee_blt.host_data_count >= last_byte) + { +// pclog(" %i %i srcX=%i srcFormat=%08x\n", voodoo->banshee_blt.cur_y, voodoo->banshee_blt.dstSizeY, voodoo->banshee_blt.srcX); + if (voodoo->banshee_blt.cur_y < voodoo->banshee_blt.dstSizeY) + { + if ((voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) == SRC_FORMAT_COL_1_BPP) + do_screen_to_screen_stretch_line(voodoo, &voodoo->banshee_blt.host_data[(voodoo->banshee_blt.srcX >> 3) & 3], voodoo->banshee_blt.srcX & 7, NULL); + else + do_screen_to_screen_stretch_line(voodoo, &voodoo->banshee_blt.host_data[voodoo->banshee_blt.srcX & 3], 0, NULL); + voodoo->banshee_blt.cur_y++; + if (voodoo->banshee_blt.cur_y == voodoo->banshee_blt.dstSizeY) + end_command(voodoo); + } + + if ((voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) == SRC_FORMAT_COL_1_BPP) + voodoo->banshee_blt.srcX += (voodoo->banshee_blt.srcFormat & SRC_FORMAT_STRIDE_MASK) << 3; + else + voodoo->banshee_blt.srcX += (voodoo->banshee_blt.srcFormat & SRC_FORMAT_STRIDE_MASK); + + voodoo->banshee_blt.host_data_count = 0; + } + } + else + { + *(uint32_t *)&voodoo->banshee_blt.host_data[voodoo->banshee_blt.host_data_count] = data; + voodoo->banshee_blt.host_data_count += 4; + while (voodoo->banshee_blt.host_data_count >= voodoo->banshee_blt.src_stride_src) + { + voodoo->banshee_blt.host_data_count -= voodoo->banshee_blt.src_stride_src; + +// pclog(" %i %i\n", voodoo->banshee_blt.cur_y, voodoo->banshee_blt.dstSizeY); + if (voodoo->banshee_blt.cur_y < voodoo->banshee_blt.dstSizeY) + { + do_screen_to_screen_stretch_line(voodoo, voodoo->banshee_blt.host_data, 0, NULL); + voodoo->banshee_blt.cur_y++; + if (voodoo->banshee_blt.cur_y == voodoo->banshee_blt.dstSizeY) + end_command(voodoo); + } + + if (voodoo->banshee_blt.host_data_count) + { +// pclog(" remaining=%i\n", voodoo->banshee_blt.host_data_count); + *(uint32_t *)&voodoo->banshee_blt.host_data[0] = data >> (4-voodoo->banshee_blt.host_data_count)*8; + } + } + } +} + +static void step_line(voodoo_t *voodoo) +{ + if (voodoo->banshee_blt.line_pix_pos == voodoo->banshee_blt.line_rep_cnt) + { + voodoo->banshee_blt.line_pix_pos = 0; + if (voodoo->banshee_blt.line_bit_pos == voodoo->banshee_blt.line_bit_mask_size) + voodoo->banshee_blt.line_bit_pos = 0; + else + voodoo->banshee_blt.line_bit_pos++; + } + else + voodoo->banshee_blt.line_pix_pos++; +} + +static void banshee_do_line(voodoo_t *voodoo, int draw_last_pixel) +{ + clip_t *clip = &voodoo->banshee_blt.clip[(voodoo->banshee_blt.command & COMMAND_CLIP_SEL) ? 1 : 0]; + uint8_t rop = voodoo->banshee_blt.command >> 24; + int dx = ABS(voodoo->banshee_blt.dstX - voodoo->banshee_blt.srcX); + int dy = ABS(voodoo->banshee_blt.dstY - voodoo->banshee_blt.srcY); + int x_inc = (voodoo->banshee_blt.dstX > voodoo->banshee_blt.srcX) ? 1 : -1; + int y_inc = (voodoo->banshee_blt.dstY > voodoo->banshee_blt.srcY) ? 1 : -1; + int x = voodoo->banshee_blt.srcX; + int y = voodoo->banshee_blt.srcY; + int error; + uint32_t stipple = (voodoo->banshee_blt.command & COMMAND_STIPPLE_LINE) ? + voodoo->banshee_blt.lineStipple : ~0; + + if (dx > dy) /*X major*/ + { + error = dx/2; + while (x != voodoo->banshee_blt.dstX) + { + int mask = stipple & (1 << voodoo->banshee_blt.line_bit_pos); + int pattern_trans = (voodoo->banshee_blt.command & COMMAND_TRANS_MONO) ? mask : 1; + + if (y >= clip->y_min && y < clip->y_max && x >= clip->x_min && x < clip->x_max && pattern_trans) + PLOT_LINE(voodoo, x, y, rop, mask ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack, COLORKEY_32); + + error -= dy; + if (error < 0) + { + error += dx; + y += y_inc; + } + x += x_inc; + step_line(voodoo); + } + } + else /*Y major*/ + { + error = dy/2; + while (y != voodoo->banshee_blt.dstY) + { + int mask = stipple & (1 << voodoo->banshee_blt.line_bit_pos); + int pattern_trans = (voodoo->banshee_blt.command & COMMAND_TRANS_MONO) ? mask : 1; + + if (y >= clip->y_min && y < clip->y_max && x >= clip->x_min && x < clip->x_max && pattern_trans) + PLOT_LINE(voodoo, x, y, rop, mask ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack, COLORKEY_32); + + error -= dx; + if (error < 0) + { + error += dy; + x += x_inc; + } + y += y_inc; + step_line(voodoo); + } + } + + if (draw_last_pixel) + { + int mask = stipple & (1 << voodoo->banshee_blt.line_bit_pos); + int pattern_trans = (voodoo->banshee_blt.command & COMMAND_TRANS_MONO) ? mask : 1; + + if (y >= clip->y_min && y < clip->y_max && x >= clip->x_min && x < clip->x_max && pattern_trans) + PLOT_LINE(voodoo, x, y, rop, mask ? voodoo->banshee_blt.colorFore : voodoo->banshee_blt.colorBack, COLORKEY_32); + } + + voodoo->banshee_blt.srcXY = (x & 0xffff) | (y << 16); + voodoo->banshee_blt.srcX = x; + voodoo->banshee_blt.srcY = y; +} + +static void banshee_polyfill_start(voodoo_t *voodoo) +{ + voodoo->banshee_blt.lx[0] = voodoo->banshee_blt.srcX; + voodoo->banshee_blt.ly[0] = voodoo->banshee_blt.srcY; + voodoo->banshee_blt.rx[0] = voodoo->banshee_blt.dstX; + voodoo->banshee_blt.ry[0] = voodoo->banshee_blt.dstY; + voodoo->banshee_blt.lx[1] = voodoo->banshee_blt.srcX; + voodoo->banshee_blt.ly[1] = voodoo->banshee_blt.srcY; + voodoo->banshee_blt.rx[1] = voodoo->banshee_blt.dstX; + voodoo->banshee_blt.ry[1] = voodoo->banshee_blt.dstY; + voodoo->banshee_blt.lx_cur = voodoo->banshee_blt.srcX; + voodoo->banshee_blt.rx_cur = voodoo->banshee_blt.dstX; +} + +static void banshee_polyfill_continue(voodoo_t *voodoo, uint32_t data) +{ + clip_t *clip = &voodoo->banshee_blt.clip[(voodoo->banshee_blt.command & COMMAND_CLIP_SEL) ? 1 : 0]; + uint8_t *pattern_mono = (uint8_t *)voodoo->banshee_blt.colorPattern; + int use_pattern_trans = (voodoo->banshee_blt.command & (COMMAND_PATTERN_MONO | COMMAND_TRANS_MONO)) == + (COMMAND_PATTERN_MONO | COMMAND_TRANS_MONO); + uint8_t rop = voodoo->banshee_blt.command >> 24; + int y = MAX(voodoo->banshee_blt.ly[0], voodoo->banshee_blt.ry[0]); + int y_end; + +// pclog("Polyfill : data %08x\n", data); + + /*if r1.y>=l1.y, next vertex is left*/ + if (voodoo->banshee_blt.ry[1] >= voodoo->banshee_blt.ly[1]) + { + voodoo->banshee_blt.lx[1] = ((int32_t)(data << 19)) >> 19; + voodoo->banshee_blt.ly[1] = ((int32_t)(data << 3)) >> 19; + voodoo->banshee_blt.dx[0] = ABS(voodoo->banshee_blt.lx[1] - voodoo->banshee_blt.lx[0]); + voodoo->banshee_blt.dy[0] = ABS(voodoo->banshee_blt.ly[1] - voodoo->banshee_blt.ly[0]); + voodoo->banshee_blt.x_inc[0] = (voodoo->banshee_blt.lx[1] > voodoo->banshee_blt.lx[0]) ? 1 : -1; + voodoo->banshee_blt.error[0] = voodoo->banshee_blt.dy[0] / 2; + } + else + { + voodoo->banshee_blt.rx[1] = ((int32_t)(data << 19)) >> 19; + voodoo->banshee_blt.ry[1] = ((int32_t)(data << 3)) >> 19; + voodoo->banshee_blt.dx[1] = ABS(voodoo->banshee_blt.rx[1] - voodoo->banshee_blt.rx[0]); + voodoo->banshee_blt.dy[1] = ABS(voodoo->banshee_blt.ry[1] - voodoo->banshee_blt.ry[0]); + voodoo->banshee_blt.x_inc[1] = (voodoo->banshee_blt.rx[1] > voodoo->banshee_blt.rx[0]) ? 1 : -1; + voodoo->banshee_blt.error[1] = voodoo->banshee_blt.dy[1] / 2; + } + +/* pclog(" verts now : %03i,%03i %03i,%03i\n", voodoo->banshee_blt.lx[0], voodoo->banshee_blt.ly[0], voodoo->banshee_blt.rx[0], voodoo->banshee_blt.ry[0]); + pclog(" %03i,%03i %03i,%03i\n", voodoo->banshee_blt.lx[1], voodoo->banshee_blt.ly[1], voodoo->banshee_blt.rx[1], voodoo->banshee_blt.ry[1]); + pclog(" left dx=%i dy=%i x_inc=%i error=%i\n", voodoo->banshee_blt.dx[0],voodoo->banshee_blt.dy[0],voodoo->banshee_blt.x_inc[0],voodoo->banshee_blt.error[0]); + pclog(" right dx=%i dy=%i x_inc=%i error=%i\n", voodoo->banshee_blt.dx[1],voodoo->banshee_blt.dy[1],voodoo->banshee_blt.x_inc[1],voodoo->banshee_blt.error[1]);*/ + y_end = MIN(voodoo->banshee_blt.ly[1], voodoo->banshee_blt.ry[1]); +// pclog("Polyfill : draw spans from %i-%i\n", y, y_end); + for (; y < y_end; y++) + { +// pclog(" %i: %i %i\n", y, voodoo->banshee_blt.lx_cur, voodoo->banshee_blt.rx_cur); + /*Draw span from lx_cur to rx_cur*/ + if (y >= clip->y_min && y < clip->y_max) + { + int pat_y = (voodoo->banshee_blt.commandExtra & CMDEXTRA_FORCE_PAT_ROW0) ? 0 : (voodoo->banshee_blt.patoff_y + y); + uint8_t pattern_mask = pattern_mono[pat_y & 7]; + int x; + + for (x = voodoo->banshee_blt.lx_cur; x < voodoo->banshee_blt.rx_cur; x++) + { + int pat_x = voodoo->banshee_blt.patoff_x + x; + int pattern_trans = use_pattern_trans ? (pattern_mask & (1 << (7-(pat_x & 7)))) : 1; + + if (x >= clip->x_min && x < clip->x_max && pattern_trans) + PLOT(voodoo, x, y, pat_x, pat_y, pattern_mask, rop, voodoo->banshee_blt.colorFore, COLORKEY_32); + } + } + + voodoo->banshee_blt.error[0] -= voodoo->banshee_blt.dx[0]; + while (voodoo->banshee_blt.error[0] < 0) + { + voodoo->banshee_blt.error[0] += voodoo->banshee_blt.dy[0]; + voodoo->banshee_blt.lx_cur += voodoo->banshee_blt.x_inc[0]; + } + voodoo->banshee_blt.error[1] -= voodoo->banshee_blt.dx[1]; + while (voodoo->banshee_blt.error[1] < 0) + { + voodoo->banshee_blt.error[1] += voodoo->banshee_blt.dy[1]; + voodoo->banshee_blt.rx_cur += voodoo->banshee_blt.x_inc[1]; + } + } + + if (voodoo->banshee_blt.ry[1] == voodoo->banshee_blt.ly[1]) + { + voodoo->banshee_blt.lx[0] = voodoo->banshee_blt.lx[1]; + voodoo->banshee_blt.ly[0] = voodoo->banshee_blt.ly[1]; + voodoo->banshee_blt.rx[0] = voodoo->banshee_blt.rx[1]; + voodoo->banshee_blt.ry[0] = voodoo->banshee_blt.ry[1]; + } + else if (voodoo->banshee_blt.ry[1] >= voodoo->banshee_blt.ly[1]) + { + voodoo->banshee_blt.lx[0] = voodoo->banshee_blt.lx[1]; + voodoo->banshee_blt.ly[0] = voodoo->banshee_blt.ly[1]; + } + else + { + voodoo->banshee_blt.rx[0] = voodoo->banshee_blt.rx[1]; + voodoo->banshee_blt.ry[0] = voodoo->banshee_blt.ry[1]; + } +} + +static void banshee_do_2d_blit(voodoo_t *voodoo, int count, uint32_t data) +{ + switch (voodoo->banshee_blt.command & COMMAND_CMD_MASK) + { + case COMMAND_CMD_NOP: + break; + + case COMMAND_CMD_SCREEN_TO_SCREEN_BLT: + banshee_do_screen_to_screen_blt(voodoo); + break; + + case COMMAND_CMD_SCREEN_TO_SCREEN_STRETCH_BLT: + banshee_do_screen_to_screen_stretch_blt(voodoo); + break; + + case COMMAND_CMD_HOST_TO_SCREEN_BLT: + banshee_do_host_to_screen_blt(voodoo, count, data); + break; + + case COMMAND_CMD_HOST_TO_SCREEN_STRETCH_BLT: + banshee_do_host_to_screen_stretch_blt(voodoo, count, data); + break; + + case COMMAND_CMD_RECTFILL: + banshee_do_rectfill(voodoo); + break; + + case COMMAND_CMD_LINE: + banshee_do_line(voodoo, 1); + break; + + case COMMAND_CMD_POLYLINE: + banshee_do_line(voodoo, 0); + break; + +#ifndef RELEASE_BUILD + default: + fatal("banshee_do_2d_blit: unknown command=%08x\n", voodoo->banshee_blt.command); +#endif + } +} + +void voodoo_2d_reg_writel(voodoo_t *voodoo, uint32_t addr, uint32_t val) +{ +// /*if ((addr & 0x1fc) != 0x80) */pclog("2D reg write %03x %08x\n", addr & 0x1fc, val); + switch (addr & 0x1fc) + { + case 0x08: + voodoo->banshee_blt.clip0Min = val; + voodoo->banshee_blt.clip[0].x_min = val & 0xfff; + voodoo->banshee_blt.clip[0].y_min = (val >> 16) & 0xfff; + break; + case 0x0c: + voodoo->banshee_blt.clip0Max = val; + voodoo->banshee_blt.clip[0].x_max = val & 0xfff; + voodoo->banshee_blt.clip[0].y_max = (val >> 16) & 0xfff; + break; + case 0x10: + voodoo->banshee_blt.dstBaseAddr = val & 0xffffff; + voodoo->banshee_blt.dstBaseAddr_tiled = val & 0x80000000; + if (voodoo->banshee_blt.dstBaseAddr_tiled) + voodoo->banshee_blt.dst_stride = (voodoo->banshee_blt.dstFormat & DST_FORMAT_STRIDE_MASK) * 128*32; + else + voodoo->banshee_blt.dst_stride = voodoo->banshee_blt.dstFormat & DST_FORMAT_STRIDE_MASK; +// pclog("dstBaseAddr=%08x\n", val); + break; + case 0x14: + voodoo->banshee_blt.dstFormat = val; + if (voodoo->banshee_blt.dstBaseAddr_tiled) + voodoo->banshee_blt.dst_stride = (voodoo->banshee_blt.dstFormat & DST_FORMAT_STRIDE_MASK) * 128*32; + else + voodoo->banshee_blt.dst_stride = voodoo->banshee_blt.dstFormat & DST_FORMAT_STRIDE_MASK; +// pclog("dstFormat=%08x\n", val); + break; + + case 0x18: + voodoo->banshee_blt.srcColorkeyMin = val & 0xffffff; + break; + case 0x1c: + voodoo->banshee_blt.srcColorkeyMax = val & 0xffffff; + break; + case 0x20: + voodoo->banshee_blt.dstColorkeyMin = val & 0xffffff; + break; + case 0x24: + voodoo->banshee_blt.dstColorkeyMax = val & 0xffffff; + break; + + case 0x28: + voodoo->banshee_blt.bresError0 = val; + voodoo->banshee_blt.bres_error_0 = val & 0xffff; + break; + case 0x2c: + voodoo->banshee_blt.bresError1 = val; + voodoo->banshee_blt.bres_error_1 = val & 0xffff; + break; + + case 0x30: + voodoo->banshee_blt.rop = val; + voodoo->banshee_blt.rops[1] = val & 0xff; + voodoo->banshee_blt.rops[2] = (val >> 8) & 0xff; + voodoo->banshee_blt.rops[3] = (val >> 16) & 0xff; +// pclog("rop=%08x\n", val); + break; + case 0x34: + voodoo->banshee_blt.srcBaseAddr = val & 0xffffff; + voodoo->banshee_blt.srcBaseAddr_tiled = val & 0x80000000; + if (voodoo->banshee_blt.srcBaseAddr_tiled) + voodoo->banshee_blt.src_stride = (voodoo->banshee_blt.srcFormat & SRC_FORMAT_STRIDE_MASK) * 128*32; + else + voodoo->banshee_blt.src_stride = voodoo->banshee_blt.srcFormat & SRC_FORMAT_STRIDE_MASK; + update_src_stride(voodoo); +// pclog("srcBaseAddr=%08x\n", val); + break; + case 0x38: + voodoo->banshee_blt.commandExtra = val; +// pclog("commandExtra=%08x\n", val); + break; + case 0x3c: + voodoo->banshee_blt.lineStipple = val; + break; + case 0x40: + voodoo->banshee_blt.lineStyle = val; + voodoo->banshee_blt.line_rep_cnt = val & 0xff; + voodoo->banshee_blt.line_bit_mask_size = (val >> 8) & 0x1f; + voodoo->banshee_blt.line_pix_pos = (val >> 16) & 0xff; + voodoo->banshee_blt.line_bit_pos = (val >> 24) & 0x1f; + break; + case 0x44: + voodoo->banshee_blt.colorPattern[0] = val; +// pclog("colorPattern0=%08x\n", val); + voodoo->banshee_blt.colorPattern24[0] = val & 0xffffff; + voodoo->banshee_blt.colorPattern24[1] = (voodoo->banshee_blt.colorPattern24[1] & 0xffff00) | (val >> 24); + voodoo->banshee_blt.colorPattern16[0] = val & 0xffff; + voodoo->banshee_blt.colorPattern16[1] = (val >> 16) & 0xffff; + voodoo->banshee_blt.colorPattern8[0] = val & 0xff; + voodoo->banshee_blt.colorPattern8[1] = (val >> 8) & 0xff; + voodoo->banshee_blt.colorPattern8[2] = (val >> 16) & 0xff; + voodoo->banshee_blt.colorPattern8[3] = (val >> 24) & 0xff; + break; + case 0x48: + voodoo->banshee_blt.colorPattern[1] = val; +// pclog("colorPattern1=%08x\n", val); + voodoo->banshee_blt.colorPattern24[1] = (voodoo->banshee_blt.colorPattern24[1] & 0xff) | ((val & 0xffff) << 8); + voodoo->banshee_blt.colorPattern24[2] = (voodoo->banshee_blt.colorPattern24[2] & 0xff0000) | (val >> 16); + voodoo->banshee_blt.colorPattern16[2] = val & 0xffff; + voodoo->banshee_blt.colorPattern16[3] = (val >> 16) & 0xffff; + voodoo->banshee_blt.colorPattern8[4] = val & 0xff; + voodoo->banshee_blt.colorPattern8[5] = (val >> 8) & 0xff; + voodoo->banshee_blt.colorPattern8[6] = (val >> 16) & 0xff; + voodoo->banshee_blt.colorPattern8[7] = (val >> 24) & 0xff; + break; + case 0x4c: + voodoo->banshee_blt.clip1Min = val; + voodoo->banshee_blt.clip[1].x_min = val & 0xfff; + voodoo->banshee_blt.clip[1].y_min = (val >> 16) & 0xfff; + break; + case 0x50: + voodoo->banshee_blt.clip1Max = val; + voodoo->banshee_blt.clip[1].x_max = val & 0xfff; + voodoo->banshee_blt.clip[1].y_max = (val >> 16) & 0xfff; + break; + case 0x54: + voodoo->banshee_blt.srcFormat = val; + if (voodoo->banshee_blt.srcBaseAddr_tiled) + voodoo->banshee_blt.src_stride = (voodoo->banshee_blt.srcFormat & SRC_FORMAT_STRIDE_MASK) * 128*32; + else + voodoo->banshee_blt.src_stride = voodoo->banshee_blt.srcFormat & SRC_FORMAT_STRIDE_MASK; + update_src_stride(voodoo); + switch (voodoo->banshee_blt.srcFormat & SRC_FORMAT_COL_MASK) + { + case SRC_FORMAT_COL_1_BPP: + voodoo->banshee_blt.src_bpp = 1; + break; + case SRC_FORMAT_COL_8_BPP: + voodoo->banshee_blt.src_bpp = 8; + break; + case SRC_FORMAT_COL_24_BPP: + voodoo->banshee_blt.src_bpp = 24; + break; + case SRC_FORMAT_COL_32_BPP: + voodoo->banshee_blt.src_bpp = 32; + break; + case SRC_FORMAT_COL_16_BPP: default: + voodoo->banshee_blt.src_bpp = 16; + break; + } +// pclog("srcFormat=%08x\n", val); + break; + case 0x58: + voodoo->banshee_blt.srcSize = val; + voodoo->banshee_blt.srcSizeX = voodoo->banshee_blt.srcSize & 0x1fff; + voodoo->banshee_blt.srcSizeY = (voodoo->banshee_blt.srcSize >> 16) & 0x1fff; + update_src_stride(voodoo); +// pclog("srcSize=%08x\n", val); + break; + case 0x5c: + voodoo->banshee_blt.srcXY = val; + voodoo->banshee_blt.srcX = ((int32_t)(val << 19)) >> 19; + voodoo->banshee_blt.srcY = ((int32_t)(val << 3)) >> 19; + update_src_stride(voodoo); +// pclog("srcXY=%08x\n", val); + break; + case 0x60: + voodoo->banshee_blt.colorBack = val; + break; + case 0x64: + voodoo->banshee_blt.colorFore = val; + break; + case 0x68: + voodoo->banshee_blt.dstSize = val; + voodoo->banshee_blt.dstSizeX = voodoo->banshee_blt.dstSize & 0x1fff; + voodoo->banshee_blt.dstSizeY = (voodoo->banshee_blt.dstSize >> 16) & 0x1fff; + update_src_stride(voodoo); +// pclog("dstSize=%08x\n", val); + break; + case 0x6c: + voodoo->banshee_blt.dstXY = val; + voodoo->banshee_blt.dstX = ((int32_t)(val << 19)) >> 19; + voodoo->banshee_blt.dstY = ((int32_t)(val << 3)) >> 19; +// pclog("dstXY=%08x\n", val); + break; + case 0x70: + voodoo_wait_for_render_thread_idle(voodoo); + voodoo->banshee_blt.command = val; + voodoo->banshee_blt.rops[0] = val >> 24; +// pclog("command=%x %08x\n", voodoo->banshee_blt.command & COMMAND_CMD_MASK, val); + voodoo->banshee_blt.patoff_x = (val & COMMAND_PATOFF_X_MASK) >> COMMAND_PATOFF_X_SHIFT; + voodoo->banshee_blt.patoff_y = (val & COMMAND_PATOFF_Y_MASK) >> COMMAND_PATOFF_Y_SHIFT; + voodoo->banshee_blt.cur_x = 0; + voodoo->banshee_blt.cur_y = 0; + voodoo->banshee_blt.dstX = ((int32_t)(voodoo->banshee_blt.dstXY << 19)) >> 19; + voodoo->banshee_blt.dstY = ((int32_t)(voodoo->banshee_blt.dstXY << 3)) >> 19; + voodoo->banshee_blt.srcX = ((int32_t)(voodoo->banshee_blt.srcXY << 19)) >> 19; + voodoo->banshee_blt.srcY = ((int32_t)(voodoo->banshee_blt.srcXY << 3)) >> 19; + voodoo->banshee_blt.old_srcX = voodoo->banshee_blt.srcX; + voodoo->banshee_blt.host_data_remainder = 0; + voodoo->banshee_blt.host_data_count = 0; + switch (voodoo->banshee_blt.command & COMMAND_CMD_MASK) + { +/* case COMMAND_CMD_SCREEN_TO_SCREEN_STRETCH_BLT: + if (voodoo->banshee_blt.bresError0 & BRES_ERROR_USE) + voodoo->banshee_blt.bres_error_0 = (int32_t)(int16_t)(voodoo->banshee_blt.bresError0 & BRES_ERROR_MASK); + else + voodoo->banshee_blt.bres_error_0 = voodoo->banshee_blt.dstSizeY / 2; + if (voodoo->banshee_blt.bresError1 & BRES_ERROR_USE) + voodoo->banshee_blt.bres_error_1 = (int32_t)(int16_t)(voodoo->banshee_blt.bresError1 & BRES_ERROR_MASK); + else + voodoo->banshee_blt.bres_error_1 = voodoo->banshee_blt.dstSizeX / 2; + + if (val & COMMAND_INITIATE) + banshee_do_2d_blit(voodoo, -1, 0); + break;*/ + + case COMMAND_CMD_POLYFILL: + if (val & COMMAND_INITIATE) + { + voodoo->banshee_blt.dstXY = voodoo->banshee_blt.srcXY; + voodoo->banshee_blt.dstX = voodoo->banshee_blt.srcX; + voodoo->banshee_blt.dstY = voodoo->banshee_blt.srcY; + } + banshee_polyfill_start(voodoo); + break; + + default: + if (val & COMMAND_INITIATE) + { + banshee_do_2d_blit(voodoo, -1, 0); + // fatal("Initiate command!\n"); + } + break; + } + break; + + case 0x80: case 0x84: case 0x88: case 0x8c: + case 0x90: case 0x94: case 0x98: case 0x9c: + case 0xa0: case 0xa4: case 0xa8: case 0xac: + case 0xb0: case 0xb4: case 0xb8: case 0xbc: + case 0xc0: case 0xc4: case 0xc8: case 0xcc: + case 0xd0: case 0xd4: case 0xd8: case 0xdc: + case 0xe0: case 0xe4: case 0xe8: case 0xec: + case 0xf0: case 0xf4: case 0xf8: case 0xfc: +// pclog("launch %08x %08x %08x %08x\n", voodoo->banshee_blt.command, voodoo->banshee_blt.commandExtra, voodoo->banshee_blt.srcColorkeyMin, voodoo->banshee_blt.srcColorkeyMax); + switch (voodoo->banshee_blt.command & COMMAND_CMD_MASK) + { + case COMMAND_CMD_SCREEN_TO_SCREEN_BLT: + voodoo->banshee_blt.srcXY = val; + voodoo->banshee_blt.srcX = ((int32_t)(val << 19)) >> 19; + voodoo->banshee_blt.srcY = ((int32_t)(val << 3)) >> 19; + banshee_do_screen_to_screen_blt(voodoo); + break; + + case COMMAND_CMD_HOST_TO_SCREEN_BLT: + banshee_do_2d_blit(voodoo, 32, val); + break; + + case COMMAND_CMD_HOST_TO_SCREEN_STRETCH_BLT: + banshee_do_2d_blit(voodoo, 32, val); + break; + + case COMMAND_CMD_RECTFILL: + voodoo->banshee_blt.dstXY = val; + voodoo->banshee_blt.dstX = ((int32_t)(val << 19)) >> 19; + voodoo->banshee_blt.dstY = ((int32_t)(val << 3)) >> 19; + banshee_do_rectfill(voodoo); + break; + + case COMMAND_CMD_LINE: + voodoo->banshee_blt.dstXY = val; + voodoo->banshee_blt.dstX = ((int32_t)(val << 19)) >> 19; + voodoo->banshee_blt.dstY = ((int32_t)(val << 3)) >> 19; + banshee_do_line(voodoo, 1); + break; + + case COMMAND_CMD_POLYLINE: + voodoo->banshee_blt.dstXY = val; + voodoo->banshee_blt.dstX = ((int32_t)(val << 19)) >> 19; + voodoo->banshee_blt.dstY = ((int32_t)(val << 3)) >> 19; + banshee_do_line(voodoo, 0); + break; + + case COMMAND_CMD_POLYFILL: + banshee_polyfill_continue(voodoo, val); + break; + +#ifndef RELEASE_BUILD + default: + fatal("launch area write, command=%08x\n", voodoo->banshee_blt.command); +#endif + } + break; + + case 0x100: case 0x104: case 0x108: case 0x10c: + case 0x110: case 0x114: case 0x118: case 0x11c: + case 0x120: case 0x124: case 0x128: case 0x12c: + case 0x130: case 0x134: case 0x138: case 0x13c: + case 0x140: case 0x144: case 0x148: case 0x14c: + case 0x150: case 0x154: case 0x158: case 0x15c: + case 0x160: case 0x164: case 0x168: case 0x16c: + case 0x170: case 0x174: case 0x178: case 0x17c: + case 0x180: case 0x184: case 0x188: case 0x18c: + case 0x190: case 0x194: case 0x198: case 0x19c: + case 0x1a0: case 0x1a4: case 0x1a8: case 0x1ac: + case 0x1b0: case 0x1b4: case 0x1b8: case 0x1bc: + case 0x1c0: case 0x1c4: case 0x1c8: case 0x1cc: + case 0x1d0: case 0x1d4: case 0x1d8: case 0x1dc: + case 0x1e0: case 0x1e4: case 0x1e8: case 0x1ec: + case 0x1f0: case 0x1f4: case 0x1f8: case 0x1fc: + voodoo->banshee_blt.colorPattern[(addr >> 2) & 63] = val; + if ((addr & 0x1fc) < 0x1c0) + { + int base_addr = (addr & 0xfc) / 0xc; + uintptr_t src_p = (uintptr_t)&voodoo->banshee_blt.colorPattern[base_addr * 3]; + int col24 = base_addr * 4; + + voodoo->banshee_blt.colorPattern24[col24] = *(uint32_t *)src_p & 0xffffff; + voodoo->banshee_blt.colorPattern24[col24 + 1] = *(uint32_t *)(src_p + 3) & 0xffffff; + voodoo->banshee_blt.colorPattern24[col24 + 2] = *(uint32_t *)(src_p + 6) & 0xffffff; + voodoo->banshee_blt.colorPattern24[col24 + 3] = *(uint32_t *)(src_p + 9) & 0xffffff; + } + if ((addr & 0x1fc) < 0x180) + { + voodoo->banshee_blt.colorPattern16[(addr >> 1) & 62] = val & 0xffff; + voodoo->banshee_blt.colorPattern16[((addr >> 1) & 62) + 1] = (val >> 16) & 0xffff; + } + if ((addr & 0x1fc) < 0x140) + { + voodoo->banshee_blt.colorPattern8[addr & 60] = val & 0xff; + voodoo->banshee_blt.colorPattern8[(addr & 60) + 1] = (val >> 8) & 0xff; + voodoo->banshee_blt.colorPattern8[(addr & 60) + 2] = (val >> 16) & 0xff; + voodoo->banshee_blt.colorPattern8[(addr & 60) + 3] = (val >> 24) & 0xff; + } +// pclog("colorPattern%02x=%08x\n", (addr >> 2) & 63, val); + break; + +#ifndef RELEASE_BUILD + default: + fatal("Unknown 2D reg write %03x %08x\n", addr & 0x1fc, val); +#endif + } +} diff --git a/pcem/vid_voodoo_banshee_blitter.h b/pcem/vid_voodoo_banshee_blitter.h new file mode 100644 index 00000000..cc7a8f2e --- /dev/null +++ b/pcem/vid_voodoo_banshee_blitter.h @@ -0,0 +1 @@ +void voodoo_2d_reg_writel(voodoo_t *voodoo, uint32_t addr, uint32_t val); diff --git a/pcem/vid_voodoo_blitter.cpp b/pcem/vid_voodoo_blitter.cpp new file mode 100644 index 00000000..01048f9a --- /dev/null +++ b/pcem/vid_voodoo_blitter.cpp @@ -0,0 +1,507 @@ +#include +#include +#include "ibm.h" +#include "device.h" +#include "mem.h" +#include "thread.h" +#include "video.h" +#include "vid_svga.h" +#include "vid_voodoo.h" +#include "vid_voodoo_common.h" +#include "vid_voodoo_blitter.h" +#include "vid_voodoo_dither.h" +#include "vid_voodoo_regs.h" +#include "vid_voodoo_render.h" + +enum +{ + BLIT_COMMAND_SCREEN_TO_SCREEN = 0, + BLIT_COMMAND_CPU_TO_SCREEN = 1, + BLIT_COMMAND_RECT_FILL = 2, + BLIT_COMMAND_SGRAM_FILL = 3 +}; + +enum +{ + BLIT_SRC_1BPP = (0 << 3), + BLIT_SRC_1BPP_BYTE_PACKED = (1 << 3), + BLIT_SRC_16BPP = (2 << 3), + BLIT_SRC_24BPP = (3 << 3), + BLIT_SRC_24BPP_DITHER_2X2 = (4 << 3), + BLIT_SRC_24BPP_DITHER_4X4 = (5 << 3) +}; + +enum +{ + BLIT_SRC_RGB_ARGB = (0 << 6), + BLIT_SRC_RGB_ABGR = (1 << 6), + BLIT_SRC_RGB_RGBA = (2 << 6), + BLIT_SRC_RGB_BGRA = (3 << 6) +}; + +enum +{ + BLIT_COMMAND_MASK = 7, + BLIT_SRC_FORMAT = (7 << 3), + BLIT_SRC_RGB_FORMAT = (3 << 6), + BLIT_SRC_CHROMA = (1 << 10), + BLIT_DST_CHROMA = (1 << 12), + BLIT_CLIPPING_ENABLED = (1 << 16) +}; + +enum +{ + BLIT_ROP_DST_PASS = (1 << 0), + BLIT_ROP_SRC_PASS = (1 << 1) +}; + +#define MIX(src_dat, dst_dat, rop) \ + switch (rop) \ + { \ + case 0x0: dst_dat = 0; break; \ + case 0x1: dst_dat = ~(src_dat | dst_dat); break; \ + case 0x2: dst_dat = ~src_dat & dst_dat; break; \ + case 0x3: dst_dat = ~src_dat; break; \ + case 0x4: dst_dat = src_dat & ~dst_dat; break; \ + case 0x5: dst_dat = ~dst_dat; break; \ + case 0x6: dst_dat = src_dat ^ dst_dat; break; \ + case 0x7: dst_dat = ~(src_dat & dst_dat); break; \ + case 0x8: dst_dat = src_dat & dst_dat; break; \ + case 0x9: dst_dat = ~(src_dat ^ dst_dat); break; \ + case 0xa: dst_dat = dst_dat; break; \ + case 0xb: dst_dat = ~src_dat | dst_dat; break; \ + case 0xc: dst_dat = src_dat; break; \ + case 0xd: dst_dat = src_dat | ~dst_dat; break; \ + case 0xe: dst_dat = src_dat | dst_dat; break; \ + case 0xf: dst_dat = 0xffff; break; \ + } + +void voodoo_v2_blit_start(voodoo_t *voodoo) +{ + uint64_t dat64; + int size_x = ABS(voodoo->bltSizeX), size_y = ABS(voodoo->bltSizeY); + int x_dir = (voodoo->bltSizeX > 0) ? 1 : -1; + int y_dir = (voodoo->bltSizeY > 0) ? 1 : -1; + int dst_x; + int src_y = voodoo->bltSrcY & 0x7ff, dst_y = voodoo->bltDstY & 0x7ff; + int src_stride = (voodoo->bltCommand & BLTCMD_SRC_TILED) ? ((voodoo->bltSrcXYStride & 0x3f) * 32*2) : (voodoo->bltSrcXYStride & 0xff8); + int dst_stride = (voodoo->bltCommand & BLTCMD_DST_TILED) ? ((voodoo->bltDstXYStride & 0x3f) * 32*2) : (voodoo->bltDstXYStride & 0xff8); + uint32_t src_base_addr = (voodoo->bltCommand & BLTCMD_SRC_TILED) ? ((voodoo->bltSrcBaseAddr & 0x3ff) << 12) : (voodoo->bltSrcBaseAddr & 0x3ffff8); + uint32_t dst_base_addr = (voodoo->bltCommand & BLTCMD_DST_TILED) ? ((voodoo->bltDstBaseAddr & 0x3ff) << 12) : (voodoo->bltDstBaseAddr & 0x3ffff8); + int x, y; + +/* pclog("blit_start: command=%08x srcX=%i srcY=%i dstX=%i dstY=%i sizeX=%i sizeY=%i color=%04x,%04x\n", + voodoo->bltCommand, voodoo->bltSrcX, voodoo->bltSrcY, voodoo->bltDstX, voodoo->bltDstY, voodoo->bltSizeX, voodoo->bltSizeY, voodoo->bltColorFg, voodoo->bltColorBg);*/ + + voodoo_wait_for_render_thread_idle(voodoo); + + switch (voodoo->bltCommand & BLIT_COMMAND_MASK) + { + case BLIT_COMMAND_SCREEN_TO_SCREEN: + for (y = 0; y <= size_y; y++) + { + uint16_t *src = (uint16_t *)&voodoo->fb_mem[src_base_addr + src_y*src_stride]; + uint16_t *dst = (uint16_t *)&voodoo->fb_mem[dst_base_addr + dst_y*dst_stride]; + int src_x = voodoo->bltSrcX, dst_x = voodoo->bltDstX; + + for (x = 0; x <= size_x; x++) + { + uint16_t src_dat = src[src_x]; + uint16_t dst_dat = dst[dst_x]; + int rop = 0; + + if (voodoo->bltCommand & BLIT_CLIPPING_ENABLED) + { + if (dst_x < voodoo->bltClipLeft || dst_x >= voodoo->bltClipRight || + dst_y < voodoo->bltClipLowY || dst_y >= voodoo->bltClipHighY) + goto skip_pixel_blit; + } + + if (voodoo->bltCommand & BLIT_SRC_CHROMA) + { + int r = (src_dat >> 11); + int g = (src_dat >> 5) & 0x3f; + int b = src_dat & 0x1f; + + if (r >= voodoo->bltSrcChromaMinR && r <= voodoo->bltSrcChromaMaxR && + g >= voodoo->bltSrcChromaMinG && g <= voodoo->bltSrcChromaMaxG && + b >= voodoo->bltSrcChromaMinB && b <= voodoo->bltSrcChromaMaxB) + rop |= BLIT_ROP_SRC_PASS; + } + if (voodoo->bltCommand & BLIT_DST_CHROMA) + { + int r = (dst_dat >> 11); + int g = (dst_dat >> 5) & 0x3f; + int b = dst_dat & 0x1f; + + if (r >= voodoo->bltDstChromaMinR && r <= voodoo->bltDstChromaMaxR && + g >= voodoo->bltDstChromaMinG && g <= voodoo->bltDstChromaMaxG && + b >= voodoo->bltDstChromaMinB && b <= voodoo->bltDstChromaMaxB) + rop |= BLIT_ROP_DST_PASS; + } + + MIX(src_dat, dst_dat, voodoo->bltRop[rop]); + + dst[dst_x] = dst_dat; +skip_pixel_blit: + src_x += x_dir; + dst_x += x_dir; + } + + src_y += y_dir; + dst_y += y_dir; + } + break; + + case BLIT_COMMAND_CPU_TO_SCREEN: + voodoo->blt.dst_x = voodoo->bltDstX; + voodoo->blt.dst_y = voodoo->bltDstY; + voodoo->blt.cur_x = 0; + voodoo->blt.size_x = size_x; + voodoo->blt.size_y = size_y; + voodoo->blt.x_dir = x_dir; + voodoo->blt.y_dir = y_dir; + voodoo->blt.dst_stride = (voodoo->bltCommand & BLTCMD_DST_TILED) ? ((voodoo->bltDstXYStride & 0x3f) * 32*2) : (voodoo->bltDstXYStride & 0xff8); + break; + + case BLIT_COMMAND_RECT_FILL: + for (y = 0; y <= size_y; y++) + { + uint16_t *dst; + int dst_x = voodoo->bltDstX; + + if (SLI_ENABLED) + { + if ((!(voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && (voodoo->blt.dst_y & 1)) || + ((voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && !(voodoo->blt.dst_y & 1))) + goto skip_line_fill; + dst = (uint16_t *)&voodoo->fb_mem[dst_base_addr + (dst_y >> 1) * dst_stride]; + } + else + dst = (uint16_t *)&voodoo->fb_mem[dst_base_addr + dst_y*dst_stride]; + + for (x = 0; x <= size_x; x++) + { + if (voodoo->bltCommand & BLIT_CLIPPING_ENABLED) + { + if (dst_x < voodoo->bltClipLeft || dst_x >= voodoo->bltClipRight || + dst_y < voodoo->bltClipLowY || dst_y >= voodoo->bltClipHighY) + goto skip_pixel_fill; + } + + dst[dst_x] = voodoo->bltColorFg; +skip_pixel_fill: + dst_x += x_dir; + } +skip_line_fill: + dst_y += y_dir; + } + break; + + case BLIT_COMMAND_SGRAM_FILL: + /*32x32 tiles - 2kb*/ + dst_y = voodoo->bltDstY & 0x3ff; + size_x = voodoo->bltSizeX & 0x1ff; //512*8 = 4kb + size_y = voodoo->bltSizeY & 0x3ff; + + dat64 = voodoo->bltColorFg | ((uint64_t)voodoo->bltColorFg << 16) | + ((uint64_t)voodoo->bltColorFg << 32) | ((uint64_t)voodoo->bltColorFg << 48); + + for (y = 0; y <= size_y; y++) + { + uint64_t *dst; + + /*This may be wrong*/ + if (!y) + { + dst_x = voodoo->bltDstX & 0x1ff; + size_x = 511 - dst_x; + } + else if (y < size_y) + { + dst_x = 0; + size_x = 511; + } + else + { + dst_x = 0; + size_x = voodoo->bltSizeX & 0x1ff; + } + + dst = (uint64_t *)&voodoo->fb_mem[(dst_y*512*8 + dst_x*8) & voodoo->fb_mask]; + + for (x = 0; x <= size_x; x++) + dst[x] = dat64; + + dst_y++; + } + break; + + default: + fatal("bad blit command %08x\n", voodoo->bltCommand); + } +} + +void voodoo_v2_blit_data(voodoo_t *voodoo, uint32_t data) +{ + int src_bits = 32; + uint32_t base_addr = (voodoo->bltCommand & BLTCMD_DST_TILED) ? ((voodoo->bltDstBaseAddr & 0x3ff) << 12) : (voodoo->bltDstBaseAddr & 0x3ffff8); + uint32_t addr; + uint16_t *dst; + + if ((voodoo->bltCommand & BLIT_COMMAND_MASK) != BLIT_COMMAND_CPU_TO_SCREEN) + return; + + if (SLI_ENABLED) + { + addr = base_addr + (voodoo->blt.dst_y >> 1) * voodoo->blt.dst_stride; + dst = (uint16_t *)&voodoo->fb_mem[addr]; + } + else + { + addr = base_addr + voodoo->blt.dst_y*voodoo->blt.dst_stride; + dst = (uint16_t *)&voodoo->fb_mem[addr]; + } + + if (addr >= voodoo->front_offset && voodoo->row_width) + { + int y = (addr - voodoo->front_offset) / voodoo->row_width; + if (y < voodoo->v_disp) + voodoo->dirty_line[y] = 2; + } + + while (src_bits && voodoo->blt.cur_x <= voodoo->blt.size_x) + { + int r = 0, g = 0, b = 0; + uint16_t src_dat = 0, dst_dat; + int x = (voodoo->blt.x_dir > 0) ? (voodoo->blt.dst_x + voodoo->blt.cur_x) : (voodoo->blt.dst_x - voodoo->blt.cur_x); + int rop = 0; + + switch (voodoo->bltCommand & BLIT_SRC_FORMAT) + { + case BLIT_SRC_1BPP: case BLIT_SRC_1BPP_BYTE_PACKED: + src_dat = (data & 1) ? voodoo->bltColorFg : voodoo->bltColorBg; + data >>= 1; + src_bits--; + break; + case BLIT_SRC_16BPP: + switch (voodoo->bltCommand & BLIT_SRC_RGB_FORMAT) + { + case BLIT_SRC_RGB_ARGB: case BLIT_SRC_RGB_RGBA: + src_dat = data & 0xffff; + break; + case BLIT_SRC_RGB_ABGR: case BLIT_SRC_RGB_BGRA: + src_dat = ((data & 0xf800) >> 11) | (data & 0x07c0) | ((data & 0x0038) << 11); + break; + } + data >>= 16; + src_bits -= 16; + break; + case BLIT_SRC_24BPP: case BLIT_SRC_24BPP_DITHER_2X2: case BLIT_SRC_24BPP_DITHER_4X4: + switch (voodoo->bltCommand & BLIT_SRC_RGB_FORMAT) + { + case BLIT_SRC_RGB_ARGB: + r = (data >> 16) & 0xff; + g = (data >> 8) & 0xff; + b = data & 0xff; + break; + case BLIT_SRC_RGB_ABGR: + r = data & 0xff; + g = (data >> 8) & 0xff; + b = (data >> 16) & 0xff; + break; + case BLIT_SRC_RGB_RGBA: + r = (data >> 24) & 0xff; + g = (data >> 16) & 0xff; + b = (data >> 8) & 0xff; + break; + case BLIT_SRC_RGB_BGRA: + r = (data >> 8) & 0xff; + g = (data >> 16) & 0xff; + b = (data >> 24) & 0xff; + break; + } + switch (voodoo->bltCommand & BLIT_SRC_FORMAT) + { + case BLIT_SRC_24BPP: + src_dat = (b >> 3) | ((g & 0xfc) << 3) | ((r & 0xf8) << 8); + break; + case BLIT_SRC_24BPP_DITHER_2X2: + r = dither_rb2x2[r][voodoo->blt.dst_y & 1][x & 1]; + g = dither_g2x2[g][voodoo->blt.dst_y & 1][x & 1]; + b = dither_rb2x2[b][voodoo->blt.dst_y & 1][x & 1]; + src_dat = (b >> 3) | ((g & 0xfc) << 3) | ((r & 0xf8) << 8); + break; + case BLIT_SRC_24BPP_DITHER_4X4: + r = dither_rb[r][voodoo->blt.dst_y & 3][x & 3]; + g = dither_g[g][voodoo->blt.dst_y & 3][x & 3]; + b = dither_rb[b][voodoo->blt.dst_y & 3][x & 3]; + src_dat = (b >> 3) | ((g & 0xfc) << 3) | ((r & 0xf8) << 8); + break; + } + src_bits = 0; + break; + } + + if (SLI_ENABLED) + { + if ((!(voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && (voodoo->blt.dst_y & 1)) || + ((voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && !(voodoo->blt.dst_y & 1))) + goto skip_pixel; + } + + if (voodoo->bltCommand & BLIT_CLIPPING_ENABLED) + { + if (x < voodoo->bltClipLeft || x >= voodoo->bltClipRight || + voodoo->blt.dst_y < voodoo->bltClipLowY || voodoo->blt.dst_y >= voodoo->bltClipHighY) + goto skip_pixel; + } + + dst_dat = dst[x]; + + if (voodoo->bltCommand & BLIT_SRC_CHROMA) + { + r = (src_dat >> 11); + g = (src_dat >> 5) & 0x3f; + b = src_dat & 0x1f; + + if (r >= voodoo->bltSrcChromaMinR && r <= voodoo->bltSrcChromaMaxR && + g >= voodoo->bltSrcChromaMinG && g <= voodoo->bltSrcChromaMaxG && + b >= voodoo->bltSrcChromaMinB && b <= voodoo->bltSrcChromaMaxB) + rop |= BLIT_ROP_SRC_PASS; + } + if (voodoo->bltCommand & BLIT_DST_CHROMA) + { + r = (dst_dat >> 11); + g = (dst_dat >> 5) & 0x3f; + b = dst_dat & 0x1f; + + if (r >= voodoo->bltDstChromaMinR && r <= voodoo->bltDstChromaMaxR && + g >= voodoo->bltDstChromaMinG && g <= voodoo->bltDstChromaMaxG && + b >= voodoo->bltDstChromaMinB && b <= voodoo->bltDstChromaMaxB) + rop |= BLIT_ROP_DST_PASS; + } + + MIX(src_dat, dst_dat, voodoo->bltRop[rop]); + + dst[x] = dst_dat; + +skip_pixel: + voodoo->blt.cur_x++; + } + + if (voodoo->blt.cur_x > voodoo->blt.size_x) + { + voodoo->blt.size_y--; + if (voodoo->blt.size_y >= 0) + { + voodoo->blt.cur_x = 0; + voodoo->blt.dst_y += voodoo->blt.y_dir; + } + } +} + + +void voodoo_fastfill(voodoo_t *voodoo, voodoo_params_t *params) +{ + int y; + int low_y, high_y; + + if (params->fbzMode & (1 << 17)) + { + high_y = voodoo->v_disp - params->clipLowY; + low_y = voodoo->v_disp - params->clipHighY; + } + else + { + low_y = params->clipLowY; + high_y = params->clipHighY; + } + + if (params->fbzMode & FBZ_RGB_WMASK) + { + int r, g, b; + uint16_t col; + + r = ((params->color1 >> 16) >> 3) & 0x1f; + g = ((params->color1 >> 8) >> 2) & 0x3f; + b = (params->color1 >> 3) & 0x1f; + col = b | (g << 5) | (r << 11); + + if (SLI_ENABLED) + { + for (y = low_y; y < high_y; y += 2) + { + uint16_t *cbuf = (uint16_t *)&voodoo->fb_mem[(params->draw_offset + (y >> 1) * voodoo->row_width) & voodoo->fb_mask]; + int x; + + for (x = params->clipLeft; x < params->clipRight; x++) + cbuf[x] = col; + } + } + else + { + for (y = low_y; y < high_y; y++) + { + if (voodoo->col_tiled) + { + uint16_t *cbuf = (uint16_t *)&voodoo->fb_mem[(params->draw_offset + (y >> 5) * voodoo->row_width + (y & 31) * 128) & voodoo->fb_mask]; + int x; + + for (x = params->clipLeft; x < params->clipRight; x++) + { + int x2 = (x & 63) | ((x >> 6) * 128*32/2); + cbuf[x2] = col; + } + } + else + { + uint16_t *cbuf = (uint16_t *)&voodoo->fb_mem[(params->draw_offset + y * voodoo->row_width) & voodoo->fb_mask]; + int x; + + for (x = params->clipLeft; x < params->clipRight; x++) + cbuf[x] = col; + } + } + } + } + if (params->fbzMode & FBZ_DEPTH_WMASK) + { + if (SLI_ENABLED) + { + for (y = low_y; y < high_y; y += 2) + { + uint16_t *abuf = (uint16_t *)&voodoo->fb_mem[(params->aux_offset + (y >> 1) * voodoo->row_width) & voodoo->fb_mask]; + int x; + + for (x = params->clipLeft; x < params->clipRight; x++) + abuf[x] = params->zaColor & 0xffff; + } + } + else + { + for (y = low_y; y < high_y; y++) + { + if (voodoo->aux_tiled) + { + uint16_t *abuf = (uint16_t *)&voodoo->fb_mem[(params->aux_offset + (y >> 5) * voodoo->aux_row_width + (y & 31) * 128) & voodoo->fb_mask]; + int x; + + for (x = params->clipLeft; x < params->clipRight; x++) + { + int x2 = (x & 63) | ((x >> 6) * 128*32/2); + abuf[x2] = params->zaColor & 0xffff; + } + } + else + { + uint16_t *abuf = (uint16_t *)&voodoo->fb_mem[(params->aux_offset + y * voodoo->aux_row_width) & voodoo->fb_mask]; + int x; + + for (x = params->clipLeft; x < params->clipRight; x++) + abuf[x] = params->zaColor & 0xffff; + } + } + } + } +} diff --git a/pcem/vid_voodoo_blitter.h b/pcem/vid_voodoo_blitter.h new file mode 100644 index 00000000..8d315dfe --- /dev/null +++ b/pcem/vid_voodoo_blitter.h @@ -0,0 +1,3 @@ +void voodoo_v2_blit_start(voodoo_t *voodoo); +void voodoo_v2_blit_data(voodoo_t *voodoo, uint32_t data); +void voodoo_fastfill(voodoo_t *voodoo, voodoo_params_t *params); diff --git a/pcem/vid_voodoo_codegen_x86-64.h b/pcem/vid_voodoo_codegen_x86-64.h new file mode 100644 index 00000000..35ed9e57 --- /dev/null +++ b/pcem/vid_voodoo_codegen_x86-64.h @@ -0,0 +1,3467 @@ +/*Registers : + + alphaMode + fbzMode & 0x1f3fff + fbzColorPath +*/ + +#if defined(__linux__) || defined(__APPLE__) +#include +#include +#endif +#if WIN64 +#define BITMAP windows_BITMAP +#include +#undef BITMAP +#endif + +#include + +#define BLOCK_NUM 8 +#define BLOCK_MASK (BLOCK_NUM-1) +#define BLOCK_SIZE 8192 + +#define LOD_MASK (LOD_TMIRROR_S | LOD_TMIRROR_T) + +typedef struct voodoo_x86_data_t +{ + uint8_t code_block[BLOCK_SIZE]; + int xdir; + uint32_t alphaMode; + uint32_t fbzMode; + uint32_t fogMode; + uint32_t fbzColorPath; + uint32_t textureMode[2]; + uint32_t tLOD[2]; + uint32_t trexInit1; + int is_tiled; +} voodoo_x86_data_t; + +//static voodoo_x86_data_t voodoo_x86_data[2][BLOCK_NUM]; + +static int last_block[4] = {0, 0}; +static int next_block_to_write[4] = {0, 0}; + +#define addbyte(val) \ + do { \ + code_block[block_pos++] = val; \ + if (block_pos >= BLOCK_SIZE) \ + fatal("Over!\n"); \ + } while (0) + +#define addword(val) \ + do { \ + *(uint16_t *)&code_block[block_pos] = val; \ + block_pos += 2; \ + if (block_pos >= BLOCK_SIZE) \ + fatal("Over!\n"); \ + } while (0) + +#define addlong(val) \ + do { \ + *(uint32_t *)&code_block[block_pos] = val; \ + block_pos += 4; \ + if (block_pos >= BLOCK_SIZE) \ + fatal("Over!\n"); \ + } while (0) + +#define addquad(val) \ + do { \ + *(uint64_t *)&code_block[block_pos] = val; \ + block_pos += 8; \ + if (block_pos >= BLOCK_SIZE) \ + fatal("Over!\n"); \ + } while (0) + + +static __m128i xmm_01_w;// = 0x0001000100010001ull; +static __m128i xmm_ff_w;// = 0x00ff00ff00ff00ffull; +static __m128i xmm_ff_b;// = 0x00000000ffffffffull; + +static __m128i alookup[257], aminuslookup[256]; +static __m128i minus_254;// = 0xff02ff02ff02ff02ull; +static __m128i bilinear_lookup[256*2]; +static __m128i xmm_00_ff_w[2]; +static uint32_t i_00_ff_w[2] = {0, 0xff}; + +static inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int block_pos, int tmu) +{ + if (params->textureMode[tmu] & 1) + { + addbyte(0x48); /*MOV RBX, state->tmu0_s*/ + addbyte(0x8b); + addbyte(0x9f); + addlong(tmu ? offsetof(voodoo_state_t, tmu1_s) : offsetof(voodoo_state_t, tmu0_s)); + addbyte(0x48); /*MOV RAX, (1 << 48)*/ + addbyte(0xb8); + addquad(1ULL << 48); + addbyte(0x48); /*XOR RDX, RDX*/ + addbyte(0x31); + addbyte(0xd2); + addbyte(0x48); /*MOV RCX, state->tmu0_t*/ + addbyte(0x8b); + addbyte(0x8f); + addlong(tmu ? offsetof(voodoo_state_t, tmu1_t) : offsetof(voodoo_state_t, tmu0_t)); + addbyte(0x48); /*CMP state->tmu_w, 0*/ + addbyte(0x83); + addbyte(0xbf); + addlong(tmu ? offsetof(voodoo_state_t, tmu1_w) : offsetof(voodoo_state_t, tmu0_w)); + addbyte(0); + addbyte(0x74); /*JZ +*/ + addbyte(7); + addbyte(0x48); /*IDIV state->tmu_w*/ + addbyte(0xf7); + addbyte(0xbf); + addlong(tmu ? offsetof(voodoo_state_t, tmu1_w) : offsetof(voodoo_state_t, tmu0_w)); + addbyte(0x48); /*SAR RBX, 14*/ + addbyte(0xc1); + addbyte(0xfb); + addbyte(14); + addbyte(0x48); /*SAR RCX, 14*/ + addbyte(0xc1); + addbyte(0xf9); + addbyte(14); + addbyte(0x48); /*IMUL RBX, RAX*/ + addbyte(0x0f); + addbyte(0xaf); + addbyte(0xd8); + addbyte(0x48); /*IMUL RCX, RAX*/ + addbyte(0x0f); + addbyte(0xaf); + addbyte(0xc8); + addbyte(0x48); /*SAR RBX, 30*/ + addbyte(0xc1); + addbyte(0xfb); + addbyte(30); + addbyte(0x48); /*SAR RCX, 30*/ + addbyte(0xc1); + addbyte(0xf9); + addbyte(30); + addbyte(0x48); /*BSR EDX, RAX*/ + addbyte(0x0f); + addbyte(0xbd); + addbyte(0xd0); + addbyte(0x48); /*SHL RAX, 8*/ + addbyte(0xc1); + addbyte(0xe0); + addbyte(8); + addbyte(0x89); /*MOV state->tex_t, ECX*/ + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, tex_t)); + addbyte(0x89); /*MOV ECX, EDX*/ + addbyte(0xd1); + addbyte(0x83); /*SUB EDX, 19*/ + addbyte(0xea); + addbyte(19); + addbyte(0x48); /*SHR RAX, CL*/ + addbyte(0xd3); + addbyte(0xe8); + addbyte(0xc1); /*SHL EDX, 8*/ + addbyte(0xe2); + addbyte(8); + addbyte(0x25); /*AND EAX, 0xff*/ + addlong(0xff); + addbyte(0x89); /*MOV state->tex_s, EBX*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, tex_s)); + addbyte(0x41); /*MOVZX EAX, R9(logtable)[RAX]*/ + addbyte(0x0f); + addbyte(0xb6); + addbyte(0x04); + addbyte(0x01); + addbyte(0x09); /*OR EAX, EDX*/ + addbyte(0xd0); + addbyte(0x03); /*ADD EAX, state->lod*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, tmu[tmu].lod)); + addbyte(0x3b); /*CMP EAX, state->lod_min*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod_min[tmu])); + addbyte(0x0f); /*CMOVL EAX, state->lod_min*/ + addbyte(0x4c); + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod_min[tmu])); + addbyte(0x3b); /*CMP EAX, state->lod_max*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod_max[tmu])); + addbyte(0x0f); /*CMOVNL EAX, state->lod_max*/ + addbyte(0x4d); + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod_max[tmu])); + addbyte(0xc1); /*SHR EAX, 8*/ + addbyte(0xe8); + addbyte(8); + addbyte(0x89); /*MOV state->lod, EAX*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod)); + } + else + { + addbyte(0x48); /*MOV RAX, state->tmu0_s*/ + addbyte(0x8b); + addbyte(0x87); + addlong(tmu ? offsetof(voodoo_state_t, tmu1_s) : offsetof(voodoo_state_t, tmu0_s)); + addbyte(0x48); /*MOV RCX, state->tmu0_t*/ + addbyte(0x8b); + addbyte(0x8f); + addlong(tmu ? offsetof(voodoo_state_t, tmu1_t) : offsetof(voodoo_state_t, tmu0_t)); + addbyte(0x48); /*SHR RAX, 28*/ + addbyte(0xc1); + addbyte(0xe8); + addbyte(28); + addbyte(0x8b); /*MOV EBX, state->lod_min*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, lod_min[tmu])); + addbyte(0x48); /*SHR RCX, 28*/ + addbyte(0xc1); + addbyte(0xe9); + addbyte(28); + addbyte(0x48); /*MOV state->tex_s, RAX*/ + addbyte(0x89); + addbyte(0x87); + addlong(offsetof(voodoo_state_t, tex_s)); + addbyte(0xc1); /*SHR EBX, 8*/ + addbyte(0xeb); + addbyte(8); + addbyte(0x48); /*MOV state->tex_t, RCX*/ + addbyte(0x89); + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, tex_t)); + addbyte(0x89); /*MOV state->lod, EBX*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, lod)); + } + + if (params->fbzColorPath & FBZCP_TEXTURE_ENABLED) + { + if (voodoo->bilinear_enabled && (params->textureMode[tmu] & 6)) + { + addbyte(0xb2); /*MOV DL, 8*/ + addbyte(8); + addbyte(0x8b); /*MOV ECX, state->lod[RDI]*/ + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, lod)); + addbyte(0xbd); /*MOV EBP, 1*/ + addlong(1); + addbyte(0x28); /*SUB DL, CL*/ + addbyte(0xca); +// addbyte(0x8a); /*MOV DL, params->tex_shift[RSI+ECX*4]*/ +// addbyte(0x94); +// addbyte(0x8e); +// addlong(offsetof(voodoo_params_t, tex_shift)); + addbyte(0xd3); /*SHL EBP, CL*/ + addbyte(0xe5); + addbyte(0x8b); /*MOV EAX, state->tex_s[RDI]*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, tex_s)); + addbyte(0xc1); /*SHL EBP, 3*/ + addbyte(0xe5); + addbyte(3); + addbyte(0x8b); /*MOV EBX, state->tex_t[RDI]*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, tex_t)); + if (params->tLOD[tmu] & LOD_TMIRROR_S) + { + addbyte(0xa9); /*TEST EAX, 0x1000*/ + addlong(0x1000); + addbyte(0x74); /*JZ +*/ + addbyte(2); + addbyte(0xf7); /*NOT EAX*/ + addbyte(0xd0); + } + if (params->tLOD[tmu] & LOD_TMIRROR_T) + { + addbyte(0xf7); /*TEST EBX, 0x1000*/ + addbyte(0xc3); + addlong(0x1000); + addbyte(0x74); /*JZ +*/ + addbyte(2); + addbyte(0xf7); /*NOT EBX*/ + addbyte(0xd3); + } + addbyte(0x29); /*SUB EAX, EBP*/ + addbyte(0xe8); + addbyte(0x29); /*SUB EBX, EBP*/ + addbyte(0xeb); + addbyte(0xd3); /*SAR EAX, CL*/ + addbyte(0xf8); + addbyte(0xd3); /*SAR EBX, CL*/ + addbyte(0xfb); + addbyte(0x89); /*MOV EBP, EAX*/ + addbyte(0xc5); + addbyte(0x89); /*MOV ECX, EBX*/ + addbyte(0xd9); + addbyte(0x83); /*AND EBP, 0xf*/ + addbyte(0xe5); + addbyte(0xf); + addbyte(0xc1); /*SHL ECX, 4*/ + addbyte(0xe1); + addbyte(4); + addbyte(0xc1); /*SAR EAX, 4*/ + addbyte(0xf8); + addbyte(4); + addbyte(0x81); /*AND ECX, 0xf0*/ + addbyte(0xe1); + addlong(0xf0); + addbyte(0xc1); /*SAR EBX, 4*/ + addbyte(0xfb); + addbyte(4); + addbyte(0x09); /*OR EBP, ECX*/ + addbyte(0xcd); + addbyte(0x8b); /*MOV ECX, state->lod[RDI]*/ + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, lod)); + addbyte(0xc1); /*SHL EBP, 5*/ + addbyte(0xe5); + addbyte(5); + /*EAX = S, EBX = T, ECX = LOD, EDX = tex_shift, ESI=params, EDI=state, EBP = bilinear shift*/ + addbyte(0x48); /*LEA RSI, [RSI+RCX*4]*/ + addbyte(0x8d); + addbyte(0x34); + addbyte(0x8e); + addbyte(0x89); /*MOV ebp_store, EBP*/ + addbyte(0xaf); + addlong(offsetof(voodoo_state_t, ebp_store)); + addbyte(0x48); /*MOV RBP, state->tex[RDI+RCX*8]*/ + addbyte(0x8b); + addbyte(0xac); + addbyte(0xcf); + addlong(offsetof(voodoo_state_t, tex[tmu])); + addbyte(0x88); /*MOV CL, DL*/ + addbyte(0xd1); + addbyte(0x89); /*MOV EDX, EBX*/ + addbyte(0xda); + if (!state->clamp_s[tmu]) + { + addbyte(0x23); /*AND EAX, params->tex_w_mask[ESI]*/ + addbyte(0x86); + addlong(offsetof(voodoo_params_t, tex_w_mask[tmu])); + } + addbyte(0x83); /*ADD EDX, 1*/ + addbyte(0xc2); + addbyte(1); + if (state->clamp_t[tmu]) + { + addbyte(0x41); /*CMOVS EDX, R10(alookup[0](zero))*/ + addbyte(0x0f); + addbyte(0x48); + addbyte(0x12); + addbyte(0x3b); /*CMP EDX, params->tex_h_mask[ESI]*/ + addbyte(0x96); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu])); + addbyte(0x0f); /*CMOVA EDX, params->tex_h_mask[ESI]*/ + addbyte(0x47); + addbyte(0x96); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu])); + addbyte(0x85); /*TEST EBX,EBX*/ + addbyte(0xdb); + addbyte(0x41); /*CMOVS EBX, R10(alookup[0](zero))*/ + addbyte(0x0f); + addbyte(0x48); + addbyte(0x1a); + addbyte(0x3b); /*CMP EBX, params->tex_h_mask[ESI]*/ + addbyte(0x9e); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu])); + addbyte(0x0f); /*CMOVA EBX, params->tex_h_mask[ESI]*/ + addbyte(0x47); + addbyte(0x9e); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu])); + } + else + { + addbyte(0x23); /*AND EDX, params->tex_h_mask[ESI]*/ + addbyte(0x96); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu])); + addbyte(0x23); /*AND EBX, params->tex_h_mask[ESI]*/ + addbyte(0x9e); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu])); + } + /*EAX = S, EBX = T0, EDX = T1*/ + addbyte(0xd3); /*SHL EBX, CL*/ + addbyte(0xe3); + addbyte(0xd3); /*SHL EDX, CL*/ + addbyte(0xe2); + addbyte(0x48); /*LEA RBX,[RBP+RBX*4]*/ + addbyte(0x8d); + addbyte(0x5c); + addbyte(0x9d); + addbyte(0); + addbyte(0x48); /*LEA RDX,[RBP+RDX*4]*/ + addbyte(0x8d); + addbyte(0x54); + addbyte(0x95); + addbyte(0); + if (state->clamp_s[tmu]) + { + addbyte(0x8b); /*MOV EBP, params->tex_w_mask[ESI]*/ + addbyte(0xae); + addlong(offsetof(voodoo_params_t, tex_w_mask[tmu])); + addbyte(0x85); /*TEST EAX, EAX*/ + addbyte(0xc0); + addbyte(0x8b); /*MOV ebp_store2, RSI*/ + addbyte(0xb7); + addlong(offsetof(voodoo_state_t, ebp_store)); + addbyte(0x41); /*CMOVS EAX, R10(alookup[0](zero))*/ + addbyte(0x0f); + addbyte(0x48); + addbyte(0x02); + addbyte(0x78); /*JS + - clamp on 0*/ + addbyte(2+3+2+ 5+5+2); + addbyte(0x3b); /*CMP EAX, EBP*/ + addbyte(0xc5); + addbyte(0x0f); /*CMOVAE EAX, EBP*/ + addbyte(0x43); + addbyte(0xc5); + addbyte(0x73); /*JAE + - clamp on +*/ + addbyte(5+5+2); + } + else + { + addbyte(0x3b); /*CMP EAX, params->tex_w_mask[ESI] - is S at texture edge (ie will wrap/clamp)?*/ + addbyte(0x86); + addlong(offsetof(voodoo_params_t, tex_w_mask[tmu])); + addbyte(0x8b); /*MOV ebp_store2, ESI*/ + addbyte(0xb7); + addlong(offsetof(voodoo_state_t, ebp_store)); + addbyte(0x74); /*JE +*/ + addbyte(5+5+2); + } + + addbyte(0xf3); /*MOVQ XMM0, [RBX+RAX*4]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0x04); + addbyte(0x83); + addbyte(0xf3); /*MOVQ XMM1, [RDX+RAX*4]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0x0c); + addbyte(0x82); + + if (state->clamp_s[tmu]) + { + addbyte(0xeb); /*JMP +*/ + addbyte(5+5+4+4); + + /*S clamped - the two S coordinates are the same*/ + addbyte(0x66); /*MOVD XMM0, [RBX+RAX*4]*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0x04); + addbyte(0x83); + addbyte(0x66); /*MOVD XMM1, [RDX+RAX*4]*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0x0c); + addbyte(0x82); + addbyte(0x66); /*PUNPCKLDQ XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x62); + addbyte(0xc0); + addbyte(0x66); /*PUNPCKLDQ XMM1, XMM1*/ + addbyte(0x0f); + addbyte(0x62); + addbyte(0xc9); + } + else + { + addbyte(0xeb); /*JMP +*/ + addbyte(5+5+5+5+6+6); + + /*S wrapped - the two S coordinates are not contiguous*/ + addbyte(0x66); /*MOVD XMM0, [RBX+EAX*4]*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0x04); + addbyte(0x83); + addbyte(0x66); /*MOVD XMM1, [RDX+EAX*4]*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0x0c); + addbyte(0x82); + addbyte(0x66); /*PINSRW XMM0, [RBX], 2*/ + addbyte(0x0f); + addbyte(0xc4); + addbyte(0x03); + addbyte(0x02); + addbyte(0x66); /*PINSRW XMM1, [RDX], 2*/ + addbyte(0x0f); + addbyte(0xc4); + addbyte(0x0a); + addbyte(0x02); + addbyte(0x66); /*PINSRW XMM0, 2[RBX], 3*/ + addbyte(0x0f); + addbyte(0xc4); + addbyte(0x43); + addbyte(0x02); + addbyte(0x03); + addbyte(0x66); /*PINSRW XMM1, 2[RDX], 3*/ + addbyte(0x0f); + addbyte(0xc4); + addbyte(0x4a); + addbyte(0x02); + addbyte(0x03); + } + + addbyte(0x49); /*MOV R8, bilinear_lookup*/ + addbyte(0xb8); + addquad((uintptr_t)bilinear_lookup); + + addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xc2); + addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xca); + + addbyte(0x4c); /*ADD RSI, R8*/ + addbyte(0x01); + addbyte(0xc6); + + addbyte(0x66); /*PMULLW XMM0, bilinear_lookup[ESI]*/ + addbyte(0x0f); + addbyte(0xd5); + addbyte(0x06); + addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x10*/ + addbyte(0x0f); + addbyte(0xd5); + addbyte(0x4e); + addbyte(0x10); + addbyte(0x66); /*PADDW XMM0, XMM1*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xc0 | 1 | (0 << 3)); + addbyte(0x66); /*MOV XMM1, XMM0*/ + addbyte(0x0f); + addbyte(0x6f); + addbyte(0xc0 | 0 | (1 << 3)); + addbyte(0x66); /*PSRLDQ XMM0, 64*/ + addbyte(0x0f); + addbyte(0x73); + addbyte(0xd8); + addbyte(8); + addbyte(0x66); /*PADDW XMM0, XMM1*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xc0 | 1 | (0 << 3)); + addbyte(0x66); /*PSRLW XMM0, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd0 | 0); + addbyte(8); + addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xc0); + + addbyte(0x4c); /*MOV RSI, R15*/ + addbyte(0x89); + addbyte(0xfe); + + addbyte(0x66); /*MOV EAX, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xc0); + } + else + { + addbyte(0xb2); /*MOV DL, 8*/ + addbyte(8); + addbyte(0x8b); /*MOV ECX, state->lod[RDI]*/ + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, lod)); + addbyte(0x48); /*MOV RBP, state->tex[RDI+RCX*8]*/ + addbyte(0x8b); + addbyte(0xac); + addbyte(0xcf); + addlong(offsetof(voodoo_state_t, tex[tmu])); + addbyte(0x28); /*SUB DL, CL*/ + addbyte(0xca); + addbyte(0x80); /*ADD CL, 4*/ + addbyte(0xc1); + addbyte(4); + addbyte(0x8b); /*MOV EAX, state->tex_s[EDI]*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, tex_s)); + addbyte(0x8b); /*MOV EBX, state->tex_t[EDI]*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, tex_t)); + if (params->tLOD[tmu] & LOD_TMIRROR_S) + { + addbyte(0xa9); /*TEST EAX, 0x1000*/ + addlong(0x1000); + addbyte(0x74); /*JZ +*/ + addbyte(2); + addbyte(0xf7); /*NOT EAX*/ + addbyte(0xd0); + } + if (params->tLOD[tmu] & LOD_TMIRROR_T) + { + addbyte(0xf7); /*TEST EBX, 0x1000*/ + addbyte(0xc3); + addlong(0x1000); + addbyte(0x74); /*JZ +*/ + addbyte(2); + addbyte(0xf7); /*NOT EBX*/ + addbyte(0xd3); + } + addbyte(0xd3); /*SHR EAX, CL*/ + addbyte(0xe8); + addbyte(0xd3); /*SHR EBX, CL*/ + addbyte(0xeb); + if (state->clamp_s[tmu]) + { + addbyte(0x85); /*TEST EAX, EAX*/ + addbyte(0xc0); + addbyte(0x41); /*CMOVS EAX, R10(alookup[0](zero))*/ + addbyte(0x0f); + addbyte(0x48); + addbyte(0x02); + addbyte(0x3b); /*CMP EAX, params->tex_w_mask[ESI+ECX*4]*/ + addbyte(0x84); + addbyte(0x8e); + addlong(offsetof(voodoo_params_t, tex_w_mask[tmu]) - 0x10); + addbyte(0x0f); /*CMOVAE EAX, params->tex_w_mask[ESI+ECX*4]*/ + addbyte(0x43); + addbyte(0x84); + addbyte(0x8e); + addlong(offsetof(voodoo_params_t, tex_w_mask[tmu]) - 0x10); + + } + else + { + addbyte(0x23); /*AND EAX, params->tex_w_mask-0x10[ESI+ECX*4]*/ + addbyte(0x84); + addbyte(0x8e); + addlong(offsetof(voodoo_params_t, tex_w_mask[tmu]) - 0x10); + } + if (state->clamp_t[tmu]) + { + addbyte(0x85); /*TEST EBX, EBX*/ + addbyte(0xdb); + addbyte(0x41); /*CMOVS EBX, R10(alookup[0](zero))*/ + addbyte(0x0f); + addbyte(0x48); + addbyte(0x1a); + addbyte(0x3b); /*CMP EBX, params->tex_h_mask[ESI+ECX*4]*/ + addbyte(0x9c); + addbyte(0x8e); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]) - 0x10); + addbyte(0x0f); /*CMOVAE EBX, params->tex_h_mask[ESI+ECX*4]*/ + addbyte(0x43); + addbyte(0x9c); + addbyte(0x8e); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]) - 0x10); + } + else + { + addbyte(0x23); /*AND EBX, params->tex_h_mask-0x10[ESI+ECX*4]*/ + addbyte(0x9c); + addbyte(0x8e); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]) - 0x10); + } + addbyte(0x88); /*MOV CL, DL*/ + addbyte(0xd1); + addbyte(0xd3); /*SHL EBX, CL*/ + addbyte(0xe3); + addbyte(0x01); /*ADD EBX, EAX*/ + addbyte(0xc3); + + addbyte(0x8b); /*MOV EAX, [RBP+RBX*4]*/ + addbyte(0x44); + addbyte(0x9d); + addbyte(0); + } + } + + return block_pos; +} + +static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int depthop) +{ + int block_pos = 0; + int z_skip_pos = 0; + int a_skip_pos = 0; + int chroma_skip_pos = 0; + int depth_jump_pos = 0; + int depth_jump_pos2 = 0; + int loop_jump_pos = 0; +// xmm_01_w = (__m128i)0x0001000100010001ull; +// xmm_ff_w = (__m128i)0x00ff00ff00ff00ffull; +// xmm_ff_b = (__m128i)0x00000000ffffffffull; + xmm_01_w = _mm_set_epi32(0, 0, 0x00010001, 0x00010001); + xmm_ff_w = _mm_set_epi32(0, 0, 0x00ff00ff, 0x00ff00ff); + xmm_ff_b = _mm_set_epi32(0, 0, 0, 0x00ffffff); + minus_254 = _mm_set_epi32(0, 0, 0xff02ff02, 0xff02ff02); +// *(uint64_t *)&const_1_48 = 0x45b0000000000000ull; +// block_pos = 0; +// voodoo_get_depth = &code_block[block_pos]; + /*W at (%esp+4) + Z at (%esp+12) + new_depth at (%esp+16)*/ +// if ((params->fbzMode & FBZ_DEPTH_ENABLE) && (depth_op == DEPTHOP_NEVER)) +// { +// addbyte(0xC3); /*RET*/ +// return; +// } + addbyte(0x55); /*PUSH RBP*/ + addbyte(0x57); /*PUSH RDI*/ + addbyte(0x56); /*PUSH RSI*/ + addbyte(0x53); /*PUSH RBX*/ + addbyte(0x41); /*PUSH R12*/ + addbyte(0x54); + addbyte(0x41); /*PUSH R13*/ + addbyte(0x55); + addbyte(0x41); /*PUSH R14*/ + addbyte(0x56); + addbyte(0x41); /*PUSH R15*/ + addbyte(0x57); + + addbyte(0x49); /*MOV R15, xmm_01_w*/ + addbyte(0xbf); + addquad((uint64_t)(uintptr_t)&xmm_01_w); + addbyte(0x66); /*MOVDQA XMM8, [R15]*/ + addbyte(0x45); + addbyte(0x0f); + addbyte(0x6f); + addbyte(0x07 | (0 << 3)); + addbyte(0x49); /*MOV R15, xmm_ff_w*/ + addbyte(0xbf); + addquad((uint64_t)(uintptr_t)&xmm_ff_w); + addbyte(0x66); /*MOVDQA XMM9, [R15]*/ + addbyte(0x45); + addbyte(0x0f); + addbyte(0x6f); + addbyte(0x07 | (1 << 3)); + addbyte(0x49); /*MOV R15, xmm_ff_b*/ + addbyte(0xbf); + addquad((uint64_t)(uintptr_t)&xmm_ff_b); + addbyte(0x66); /*MOVDQA XMM10, [R15]*/ + addbyte(0x45); + addbyte(0x0f); + addbyte(0x6f); + addbyte(0x07 | (2 << 3)); + addbyte(0x49); /*MOV R15, minus_254*/ + addbyte(0xbf); + addquad((uint64_t)(uintptr_t)&minus_254); + addbyte(0x66); /*MOVDQA XMM11, [R15]*/ + addbyte(0x45); + addbyte(0x0f); + addbyte(0x6f); + addbyte(0x07 | (3 << 3)); + +#if WIN64 + addbyte(0x48); /*MOV RDI, RCX (voodoo_state)*/ + addbyte(0x89); + addbyte(0xcf); + addbyte(0x49); /*MOV R15, RDX (voodoo_params)*/ + addbyte(0x89); + addbyte(0xd7); + addbyte(0x4d); /*MOV R14, R9 (real_y)*/ + addbyte(0x89); + addbyte(0xce); +#else + addbyte(0x49); /*MOV R14, RCX (real_y)*/ + addbyte(0x89); + addbyte(0xce); + addbyte(0x49); /*MOV R15, RSI (voodoo_state)*/ + addbyte(0x89); + addbyte(0xf7); +#endif + + addbyte(0x49); /*MOV R9, logtable*/ + addbyte(0xb8 | (9 & 7)); + addquad((uint64_t)(uintptr_t)&logtable); + addbyte(0x49); /*MOV R10, alookup*/ + addbyte(0xb8 | (10 & 7)); + addquad((uint64_t)(uintptr_t)&alookup); + addbyte(0x49); /*MOV R11, aminuslookup*/ + addbyte(0xb8 | (11 & 7)); + addquad((uint64_t)(uintptr_t)&aminuslookup); + addbyte(0x49); /*MOV R12, xmm_00_ff_w*/ + addbyte(0xb8 | (12 & 7)); + addquad((uint64_t)(uintptr_t)&xmm_00_ff_w); + addbyte(0x49); /*MOV R13, i_00_ff_w*/ + addbyte(0xb8 | (13 & 7)); + addquad((uint64_t)(uintptr_t)&i_00_ff_w); + + loop_jump_pos = block_pos; + addbyte(0x4c); /*MOV RSI, R15*/ + addbyte(0x89); + addbyte(0xfe); + if (params->col_tiled || params->aux_tiled) + { + addbyte(0x8b); /*MOV EAX, state->x[EDI]*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, x)); + addbyte(0x89); /*MOV EBX, EAX*/ + addbyte(0xc3); + addbyte(0x83); /*AND EAX, 63*/ + addbyte(0xe0); + addbyte(63); + addbyte(0xc1); /*SHR EBX, 6*/ + addbyte(0xeb); + addbyte(6); + addbyte(0xc1); /*SHL EBX, 11 - tile is 128*32, << 12, div 2 because word index*/ + addbyte(0xe3); + addbyte(11); + addbyte(0x01); /*ADD EAX, EBX*/ + addbyte(0xd8); + addbyte(0x89); /*MOV state->x_tiled[EDI], EAX*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, x_tiled)); + } + addbyte(0x66); /*PXOR XMM2, XMM2*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xd2); + + if ((params->fbzMode & FBZ_W_BUFFER) || (params->fogMode & (FOG_ENABLE|FOG_CONSTANT|FOG_Z|FOG_ALPHA)) == FOG_ENABLE) + { + addbyte(0xb8); /*MOV new_depth, 0*/ + addlong(0); + addbyte(0x66); /*TEST w+4, 0xffff*/ + addbyte(0xf7); + addbyte(0x87); + addlong(offsetof(voodoo_state_t, w)+4); + addword(0xffff); + addbyte(0x75); /*JNZ got_depth*/ + depth_jump_pos = block_pos; + addbyte(0); +// addbyte(4+5+2+3+2+5+5+3+2+2+2+/*3+*/3+2+6+4+5+2+3); + addbyte(0x8b); /*MOV EDX, w*/ + addbyte(0x97); + addlong(offsetof(voodoo_state_t, w)); + addbyte(0xb8); /*MOV new_depth, 0xf001*/ + addlong(0xf001); + addbyte(0x89); /*MOV EBX, EDX*/ + addbyte(0xd3); + addbyte(0xc1); /*SHR EDX, 16*/ + addbyte(0xea); + addbyte(16); + addbyte(0x74); /*JZ got_depth*/ + depth_jump_pos2 = block_pos; + addbyte(0); +// addbyte(5+5+3+2+2+2+/*3+*/3+2+6+4+5+2+3); + addbyte(0xb9); /*MOV ECX, 19*/ + addlong(19); + addbyte(0x0f); /*BSR EAX, EDX*/ + addbyte(0xbd); + addbyte(0xc2); + addbyte(0xba); /*MOV EDX, 15*/ + addlong(15); + addbyte(0xf7); /*NOT EBX*/ + addbyte(0xd3); + addbyte(0x29); /*SUB EDX, EAX - EDX = exp*/ + addbyte(0xc2); + addbyte(0x29); /*SUB ECX, EDX*/ + addbyte(0xd1); + addbyte(0xc1); /*SHL EDX, 12*/ + addbyte(0xe2); + addbyte(12); + addbyte(0xd3); /*SHR EBX, CL*/ + addbyte(0xeb); + addbyte(0x81); /*AND EBX, 0xfff - EBX = mant*/ + addbyte(0xe3); + addlong(0xfff); + addbyte(0x67); /*LEA EAX, 1[EDX, EBX]*/ + addbyte(0x8d); + addbyte(0x44); + addbyte(0x13); + addbyte(1); + addbyte(0xbb); /*MOV EBX, 0xffff*/ + addlong(0xffff); + addbyte(0x39); /*CMP EAX, EBX*/ + addbyte(0xd8); + addbyte(0x0f); /*CMOVA EAX, EBX*/ + addbyte(0x47); + addbyte(0xc3); + + if (depth_jump_pos) + *(uint8_t *)&code_block[depth_jump_pos] = (block_pos - depth_jump_pos) - 1; + if (depth_jump_pos) + *(uint8_t *)&code_block[depth_jump_pos2] = (block_pos - depth_jump_pos2) - 1; + + if ((params->fogMode & (FOG_ENABLE|FOG_CONSTANT|FOG_Z|FOG_ALPHA)) == FOG_ENABLE) + { + addbyte(0x89); /*MOV state->w_depth[EDI], EAX*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, w_depth)); + } + } + if (!(params->fbzMode & FBZ_W_BUFFER)) + { + addbyte(0x8b); /*MOV EAX, z*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, z)); + addbyte(0xbb); /*MOV EBX, 0xffff*/ + addlong(0xffff); + addbyte(0x31); /*XOR ECX, ECX*/ + addbyte(0xc9); + addbyte(0xc1); /*SAR EAX, 12*/ + addbyte(0xf8); + addbyte(12); + addbyte(0x0f); /*CMOVS EAX, ECX*/ + addbyte(0x48); + addbyte(0xc1); + addbyte(0x39); /*CMP EAX, EBX*/ + addbyte(0xd8); + addbyte(0x0f); /*CMOVA EAX, EBX*/ + addbyte(0x47); + addbyte(0xc3); + } + + if (params->fbzMode & FBZ_DEPTH_BIAS) + { + addbyte(0x03); /*ADD EAX, params->zaColor[ESI]*/ + addbyte(0x86); + addlong(offsetof(voodoo_params_t, zaColor)); + addbyte(0x25); /*AND EAX, 0xffff*/ + addlong(0xffff); + } + + addbyte(0x89); /*MOV state->new_depth[EDI], EAX*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, new_depth)); + + if ((params->fbzMode & FBZ_DEPTH_ENABLE) && (depthop != DEPTHOP_ALWAYS) && (depthop != DEPTHOP_NEVER)) + { + addbyte(0x8b); /*MOV EBX, state->x[EDI]*/ + addbyte(0x9f); + if (params->aux_tiled) + addlong(offsetof(voodoo_state_t, x_tiled)); + else + addlong(offsetof(voodoo_state_t, x)); + addbyte(0x48); /*MOV RCX, aux_mem[RDI]*/ + addbyte(0x8b); + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, aux_mem)); + addbyte(0x0f); /*MOVZX EBX, [ECX+EBX*2]*/ + addbyte(0xb7); + addbyte(0x1c); + addbyte(0x59); + if (params->fbzMode & FBZ_DEPTH_SOURCE) + { + addbyte(0x0f); /*MOVZX EAX, zaColor[RSI]*/ + addbyte(0xb7); + addbyte(0x86); + addlong(offsetof(voodoo_params_t, zaColor)); + } + addbyte(0x39); /*CMP EAX, EBX*/ + addbyte(0xd8); + if (depthop == DEPTHOP_LESSTHAN) + { + addbyte(0x0f); /*JAE skip*/ + addbyte(0x83); + z_skip_pos = block_pos; + addlong(0); + } + else if (depthop == DEPTHOP_EQUAL) + { + addbyte(0x0f); /*JNE skip*/ + addbyte(0x85); + z_skip_pos = block_pos; + addlong(0); + } + else if (depthop == DEPTHOP_LESSTHANEQUAL) + { + addbyte(0x0f); /*JA skip*/ + addbyte(0x87); + z_skip_pos = block_pos; + addlong(0); + } + else if (depthop == DEPTHOP_GREATERTHAN) + { + addbyte(0x0f); /*JBE skip*/ + addbyte(0x86); + z_skip_pos = block_pos; + addlong(0); + } + else if (depthop == DEPTHOP_NOTEQUAL) + { + addbyte(0x0f); /*JE skip*/ + addbyte(0x84); + z_skip_pos = block_pos; + addlong(0); + } + else if (depthop == DEPTHOP_GREATERTHANEQUAL) + { + addbyte(0x0f); /*JB skip*/ + addbyte(0x82); + z_skip_pos = block_pos; + addlong(0); + } + else + fatal("Bad depth_op\n"); + } + else if ((params->fbzMode & FBZ_DEPTH_ENABLE) && (depthop == DEPTHOP_NEVER)) + { + addbyte(0xC3); /*RET*/ + } + + /*XMM0 = colour*/ + /*XMM2 = 0 (for unpacking*/ + + /*EDI = state, ESI = params*/ + + if ((params->textureMode[0] & TEXTUREMODE_LOCAL_MASK) == TEXTUREMODE_LOCAL || !voodoo->dual_tmus) + { + /*TMU0 only sampling local colour or only one TMU, only sample TMU0*/ + block_pos = codegen_texture_fetch(code_block, voodoo, params, state, block_pos, 0); + + addbyte(0x66); /*MOVD XMM0, EAX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xc0); + addbyte(0xc1); /*SHR EAX, 24*/ + addbyte(0xe8); + addbyte(24); + addbyte(0x89); /*MOV state->tex_a[RDI], EAX*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, tex_a)); + } + else if ((params->textureMode[0] & TEXTUREMODE_MASK) == TEXTUREMODE_PASSTHROUGH) + { + /*TMU0 in pass-through mode, only sample TMU1*/ + block_pos = codegen_texture_fetch(code_block, voodoo, params, state, block_pos, 1); + + addbyte(0x66); /*MOVD XMM0, EAX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xc0); + addbyte(0xc1); /*SHR EAX, 24*/ + addbyte(0xe8); + addbyte(24); + addbyte(0x89); /*MOV state->tex_a[RDI], EAX*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, tex_a)); + } + else + { + block_pos = codegen_texture_fetch(code_block, voodoo, params, state, block_pos, 1); + + addbyte(0x66); /*MOVD XMM3, EAX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xd8); + if ((params->textureMode[1] & TEXTUREMODE_TRILINEAR) && tc_sub_clocal_1) + { + addbyte(0x8b); /*MOV EAX, state->lod*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod)); + if (!tc_reverse_blend_1) + { + addbyte(0xbb); /*MOV EBX, 1*/ + addlong(1); + } + else + { + addbyte(0x31); /*XOR EBX, EBX*/ + addbyte(0xdb); + } + addbyte(0x83); /*AND EAX, 1*/ + addbyte(0xe0); + addbyte(1); + if (!tca_reverse_blend_1) + { + addbyte(0xb9); /*MOV ECX, 1*/ + addlong(1); + } + else + { + addbyte(0x31); /*XOR ECX, ECX*/ + addbyte(0xc9); + } + addbyte(0x31); /*XOR EBX, EAX*/ + addbyte(0xc3); + addbyte(0x31); /*XOR ECX, EAX*/ + addbyte(0xc1); + addbyte(0xc1); /*SHL EBX, 4*/ + addbyte(0xe3); + addbyte(4); + /*EBX = tc_reverse_blend, ECX=tca_reverse_blend*/ + } + addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xda); + if (tc_sub_clocal_1) + { + switch (tc_mselect_1) + { + case TC_MSELECT_ZERO: + addbyte(0x66); /*PXOR XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xc0); + break; + case TC_MSELECT_CLOCAL: + addbyte(0xf3); /*MOVQ XMM0, XMM3*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xc3); + break; + case TC_MSELECT_AOTHER: + addbyte(0x66); /*PXOR XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xc0); + break; + case TC_MSELECT_ALOCAL: + addbyte(0xf2); /*PSHUFLW XMM0, XMM3, 0xff*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xc3); + addbyte(0xff); + break; + case TC_MSELECT_DETAIL: + addbyte(0xb8); /*MOV EAX, params->detail_bias[1]*/ + addlong(params->detail_bias[1]); + addbyte(0x2b); /*SUB EAX, state->lod*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod)); + addbyte(0xba); /*MOV EDX, params->detail_max[1]*/ + addlong(params->detail_max[1]); + addbyte(0xc1); /*SHL EAX, params->detail_scale[1]*/ + addbyte(0xe0); + addbyte(params->detail_scale[1]); + addbyte(0x39); /*CMP EAX, EDX*/ + addbyte(0xd0); + addbyte(0x0f); /*CMOVNL EAX, EDX*/ + addbyte(0x4d); + addbyte(0xc2); + addbyte(0x66); /*MOVD XMM0, EAX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xc0); + addbyte(0xf2); /*PSHUFLW XMM0, XMM0, 0*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xc0); + addbyte(0); + break; + case TC_MSELECT_LOD_FRAC: + addbyte(0x66); /*MOVD XMM0, state->lod_frac[1]*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod_frac[1])); + addbyte(0xf2); /*PSHUFLW XMM0, XMM0, 0*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xc0); + addbyte(0); + break; + } + if (params->textureMode[1] & TEXTUREMODE_TRILINEAR) + { + addbyte(0x66); /*PXOR XMM0, R12(xmm_00_ff_w)[EBX]*/ + addbyte(0x41); + addbyte(0x0f); + addbyte(0xef); + addbyte(0x04); + addbyte(0x1c); + } + else if (!tc_reverse_blend_1) + { + addbyte(0x66); /*PXOR XMM0, XMM9(xmm_ff_w)*/ + addbyte(0x41); + addbyte(0x0f); + addbyte(0xef); + addbyte(0xc1); + } + addbyte(0x66); /*PADDW XMM0, XMM8(xmm_01_w)*/ + addbyte(0x41); + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xc0); + addbyte(0xf3); /*MOVQ XMM1, XMM2*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xca); + addbyte(0xf3); /*MOVQ XMM5, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xe8); + addbyte(0x66); /*PMULLW XMM0, XMM3*/ + addbyte(0x0f); + addbyte(0xd5); + addbyte(0xc3); + addbyte(0x66); /*PMULHW XMM5, XMM3*/ + addbyte(0x0f); + addbyte(0xe5); + addbyte(0xeb); + addbyte(0x66); /*PUNPCKLWD XMM0, XMM5*/ + addbyte(0x0f); + addbyte(0x61); + addbyte(0xc5); + addbyte(0x66); /*PSRAD XMM0, 8*/ + addbyte(0x0f); + addbyte(0x72); + addbyte(0xe0); + addbyte(8); + addbyte(0x66); /*PACKSSDW XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x6b); + addbyte(0xc0); + addbyte(0x66); /*PSUBW XMM1, XMM0*/ + addbyte(0x0f); + addbyte(0xf9); + addbyte(0xc8); + if (tc_add_clocal_1) + { + addbyte(0x66); /*PADDW XMM1, XMM3*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xcb); + } + else if (tc_add_alocal_1) + { + addbyte(0xf2); /*PSHUFLW XMM0, XMM3, 0xff*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xc3); + addbyte(0xff); + addbyte(0x66); /*PADDW XMM1, XMM0*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xc8); + } + addbyte(0x66); /*PACKUSWB XMM3, XMM1*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xd9); + if (tca_sub_clocal_1) + { + addbyte(0x66); /*MOVD EBX, XMM3*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xdb); + } + addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xda); + } + + if (tca_sub_clocal_1) + { + addbyte(0xc1); /*SHR EBX, 24*/ + addbyte(0xeb); + addbyte(24); + switch (tca_mselect_1) + { + case TCA_MSELECT_ZERO: + addbyte(0x31); /*XOR EAX, EAX*/ + addbyte(0xc0); + break; + case TCA_MSELECT_CLOCAL: + addbyte(0x89); /*MOV EAX, EBX*/ + addbyte(0xd8); + break; + case TCA_MSELECT_AOTHER: + addbyte(0x31); /*XOR EAX, EAX*/ + addbyte(0xc0); + break; + case TCA_MSELECT_ALOCAL: + addbyte(0x89); /*MOV EAX, EBX*/ + addbyte(0xd8); + break; + case TCA_MSELECT_DETAIL: + addbyte(0xb8); /*MOV EAX, params->detail_bias[1]*/ + addlong(params->detail_bias[1]); + addbyte(0x2b); /*SUB EAX, state->lod*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod)); + addbyte(0xba); /*MOV EDX, params->detail_max[1]*/ + addlong(params->detail_max[1]); + addbyte(0xc1); /*SHL EAX, params->detail_scale[1]*/ + addbyte(0xe0); + addbyte(params->detail_scale[1]); + addbyte(0x39); /*CMP EAX, EDX*/ + addbyte(0xd0); + addbyte(0x0f); /*CMOVNL EAX, EDX*/ + addbyte(0x4d); + addbyte(0xc2); + break; + case TCA_MSELECT_LOD_FRAC: + addbyte(0x8b); /*MOV EAX, state->lod_frac[1]*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod_frac[1])); + break; + } + if (params->textureMode[1] & TEXTUREMODE_TRILINEAR) + { + addbyte(0x41); /*XOR EAX, R13(i_00_ff_w)[ECX*4]*/ + addbyte(0x33); + addbyte(0x44); + addbyte(0x8d); + addbyte(0); + } + else if (!tc_reverse_blend_1) + { + addbyte(0x35); /*XOR EAX, 0xff*/ + addlong(0xff); + } + addbyte(0x8e); /*ADD EAX, 1*/ + addbyte(0xc0); + addbyte(1); + addbyte(0x0f); /*IMUL EAX, EBX*/ + addbyte(0xaf); + addbyte(0xc3); + addbyte(0xb9); /*MOV ECX, 0xff*/ + addlong(0xff); + addbyte(0xf7); /*NEG EAX*/ + addbyte(0xd8); + addbyte(0xc1); /*SAR EAX, 8*/ + addbyte(0xf8); + addbyte(8); + if (tca_add_clocal_1 || tca_add_alocal_1) + { + addbyte(0x01); /*ADD EAX, EBX*/ + addbyte(0xd8); + } + addbyte(0x39); /*CMP ECX, EAX*/ + addbyte(0xc1); + addbyte(0x0f); /*CMOVA ECX, EAX*/ + addbyte(0x47); + addbyte(0xc8); + addbyte(0x66); /*PINSRW 3, XMM3, XMM0*/ + addbyte(0x0f); + addbyte(0xc4); + addbyte(0xd8); + addbyte(3); + } + + block_pos = codegen_texture_fetch(code_block, voodoo, params, state, block_pos, 0); + + addbyte(0x66); /*MOVD XMM0, EAX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xc0); + addbyte(0x66); /*MOVD XMM7, EAX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xf8); + + if (params->textureMode[0] & TEXTUREMODE_TRILINEAR) + { + addbyte(0x8b); /*MOV EAX, state->lod*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod)); + if (!tc_reverse_blend) + { + addbyte(0xbb); /*MOV EBX, 1*/ + addlong(1); + } + else + { + addbyte(0x31); /*XOR EBX, EBX*/ + addbyte(0xdb); + } + addbyte(0x83); /*AND EAX, 1*/ + addbyte(0xe0); + addbyte(1); + if (!tca_reverse_blend) + { + addbyte(0xb9); /*MOV ECX, 1*/ + addlong(1); + } + else + { + addbyte(0x31); /*XOR ECX, ECX*/ + addbyte(0xc9); + } + addbyte(0x31); /*XOR EBX, EAX*/ + addbyte(0xc3); + addbyte(0x31); /*XOR ECX, EAX*/ + addbyte(0xc1); + addbyte(0xc1); /*SHL EBX, 4*/ + addbyte(0xe3); + addbyte(4); + /*EBX = tc_reverse_blend, ECX=tca_reverse_blend*/ + } + + /*XMM0 = TMU0 output, XMM3 = TMU1 output*/ + + addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xc2); + if (tc_zero_other) + { + addbyte(0x66); /*PXOR XMM1, XMM1*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xc9); + } + else + { + addbyte(0xf3); /*MOV XMM1, XMM3*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xcb); + } + if (tc_sub_clocal) + { + addbyte(0x66); /*PSUBW XMM1, XMM0*/ + addbyte(0x0f); + addbyte(0xf9); + addbyte(0xc8); + } + + switch (tc_mselect) + { + case TC_MSELECT_ZERO: + addbyte(0x66); /*PXOR XMM4, XMM4*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xe4); + break; + case TC_MSELECT_CLOCAL: + addbyte(0xf3); /*MOV XMM4, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xe0); + break; + case TC_MSELECT_AOTHER: + addbyte(0xf2); /*PSHUFLW XMM4, XMM3, 3, 3, 3, 3*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xe3); + addbyte(0xff); + break; + case TC_MSELECT_ALOCAL: + addbyte(0xf2); /*PSHUFLW XMM4, XMM0, 3, 3, 3, 3*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xe0); + addbyte(0xff); + break; + case TC_MSELECT_DETAIL: + addbyte(0xb8); /*MOV EAX, params->detail_bias[0]*/ + addlong(params->detail_bias[0]); + addbyte(0x2b); /*SUB EAX, state->lod*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod)); + addbyte(0xba); /*MOV EDX, params->detail_max[0]*/ + addlong(params->detail_max[0]); + addbyte(0xc1); /*SHL EAX, params->detail_scale[0]*/ + addbyte(0xe0); + addbyte(params->detail_scale[0]); + addbyte(0x39); /*CMP EAX, EDX*/ + addbyte(0xd0); + addbyte(0x0f); /*CMOVNL EAX, EDX*/ + addbyte(0x4d); + addbyte(0xc2); + addbyte(0x66); /*MOVD XMM4, EAX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xe0); + addbyte(0xf2); /*PSHUFLW XMM4, XMM4, 0*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xe4); + addbyte(0); + break; + case TC_MSELECT_LOD_FRAC: + addbyte(0x66); /*MOVD XMM0, state->lod_frac[0]*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xa7); + addlong(offsetof(voodoo_state_t, lod_frac[0])); + addbyte(0xf2); /*PSHUFLW XMM0, XMM0, 0*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xe4); + addbyte(0); + break; + } + if (params->textureMode[0] & TEXTUREMODE_TRILINEAR) + { + addbyte(0x66); /*PXOR XMM4, R12(xmm_00_ff_w)[EBX]*/ + addbyte(0x41); + addbyte(0x0f); + addbyte(0xef); + addbyte(0x24); + addbyte(0x1c); + } + else if (!tc_reverse_blend) + { + addbyte(0x66); /*PXOR XMM4, XMM9(xmm_ff_w)*/ + addbyte(0x41); + addbyte(0x0f); + addbyte(0xef); + addbyte(0xe1); + } + addbyte(0x66); /*PADDW XMM4, XMM8(xmm_01_w)*/ + addbyte(0x41); + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xe0); + addbyte(0xf3); /*MOVQ XMM5, XMM1*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xe9); + addbyte(0x66); /*PMULLW XMM1, XMM4*/ + addbyte(0x0f); + addbyte(0xd5); + addbyte(0xcc); + + if (tca_sub_clocal) + { + addbyte(0x66); /*MOV EBX, XMM7*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xfb); + } + + addbyte(0x66); /*PMULHW XMM5, XMM4*/ + addbyte(0x0f); + addbyte(0xe5); + addbyte(0xec); + addbyte(0x66); /*PUNPCKLWD XMM1, XMM5*/ + addbyte(0x0f); + addbyte(0x61); + addbyte(0xcd); + addbyte(0x66); /*PSRAD XMM1, 8*/ + addbyte(0x0f); + addbyte(0x72); + addbyte(0xe1); + addbyte(8); + addbyte(0x66); /*PACKSSDW XMM1, XMM1*/ + addbyte(0x0f); + addbyte(0x6b); + addbyte(0xc9); + + if (tca_sub_clocal) + { + addbyte(0xc1); /*SHR EBX, 24*/ + addbyte(0xeb); + addbyte(24); + } + + if (tc_add_clocal) + { + addbyte(0x66); /*PADDW XMM1, XMM0*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xc8); + } + else if (tc_add_alocal) + { + addbyte(0xf2); /*PSHUFLW XMM4, XMM0, 3, 3, 3, 3*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xe0); + addbyte(0xff); + addbyte(0x66); /*PADDW XMM1, XMM4*/ + addbyte(0x0f); + addbyte(0xfc); + addbyte(0xcc); + } + if (tc_invert_output) + { + addbyte(0x66); /*PXOR XMM1, XMM9(xmm_ff_w)*/ + addbyte(0x41); + addbyte(0x0f); + addbyte(0xef); + addbyte(0xc9); + } + + addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xc0); + addbyte(0x66); /*PACKUSWB XMM3, XMM3*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xdb); + addbyte(0x66); /*PACKUSWB XMM1, XMM1*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xc9); + + if (tca_zero_other) + { + addbyte(0x31); /*XOR EAX, EAX*/ + addbyte(0xc0); + } + else + { + addbyte(0x66); /*MOV EAX, XMM3*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xd8); + addbyte(0xc1); /*SHR EAX, 24*/ + addbyte(0xe8); + addbyte(24); + } + if (tca_sub_clocal) + { + addbyte(0x29); /*SUB EAX, EBX*/ + addbyte(0xd8); + } + switch (tca_mselect) + { + case TCA_MSELECT_ZERO: + addbyte(0x31); /*XOR EBX, EBX*/ + addbyte(0xdb); + break; + case TCA_MSELECT_CLOCAL: + addbyte(0x66); /*MOV EBX, XMM7*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xfb); + addbyte(0xc1); /*SHR EBX, 24*/ + addbyte(0xeb); + addbyte(24); + break; + case TCA_MSELECT_AOTHER: + addbyte(0x66); /*MOV EBX, XMM3*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xdb); + addbyte(0xc1); /*SHR EBX, 24*/ + addbyte(0xeb); + addbyte(24); + break; + case TCA_MSELECT_ALOCAL: + addbyte(0x66); /*MOV EBX, XMM7*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xfb); + addbyte(0xc1); /*SHR EBX, 24*/ + addbyte(0xeb); + addbyte(24); + break; + case TCA_MSELECT_DETAIL: + addbyte(0xbb); /*MOV EBX, params->detail_bias[1]*/ + addlong(params->detail_bias[1]); + addbyte(0x2b); /*SUB EBX, state->lod*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, lod)); + addbyte(0xba); /*MOV EDX, params->detail_max[1]*/ + addlong(params->detail_max[1]); + addbyte(0xc1); /*SHL EBX, params->detail_scale[1]*/ + addbyte(0xe3); + addbyte(params->detail_scale[1]); + addbyte(0x39); /*CMP EBX, EDX*/ + addbyte(0xd3); + addbyte(0x0f); /*CMOVNL EBX, EDX*/ + addbyte(0x4d); + addbyte(0xda); + break; + case TCA_MSELECT_LOD_FRAC: + addbyte(0x8b); /*MOV EBX, state->lod_frac[0]*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, lod_frac[0])); + break; + } + if (params->textureMode[0] & TEXTUREMODE_TRILINEAR) + { + addbyte(0x41); /*XOR EBX, R13(i_00_ff_w)[ECX*4]*/ + addbyte(0x33); + addbyte(0x5c); + addbyte(0x8d); + addbyte(0); + } + else if (!tca_reverse_blend) + { + addbyte(0x81); /*XOR EBX, 0xFF*/ + addbyte(0xf3); + addlong(0xff); + } + + addbyte(0x83); /*ADD EBX, 1*/ + addbyte(0xc3); + addbyte(1); + addbyte(0x0f); /*IMUL EAX, EBX*/ + addbyte(0xaf); + addbyte(0xc3); + addbyte(0x31); /*XOR EDX, EDX*/ + addbyte(0xd2); + addbyte(0xc1); /*SAR EAX, 8*/ + addbyte(0xf8); + addbyte(8); + if (tca_add_clocal || tca_add_alocal) + { + addbyte(0x66); /*MOV EBX, XMM7*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xfb); + addbyte(0xc1); /*SHR EBX, 24*/ + addbyte(0xeb); + addbyte(24); + addbyte(0x01); /*ADD EAX, EBX*/ + addbyte(0xd8); + } + addbyte(0x0f); /*CMOVS EAX, EDX*/ + addbyte(0x48); + addbyte(0xc2); + addbyte(0xba); /*MOV EDX, 0xff*/ + addlong(0xff); + addbyte(0x3d); /*CMP EAX, 0xff*/ + addlong(0xff); + addbyte(0x0f); /*CMOVA EAX, EDX*/ + addbyte(0x47); + addbyte(0xc2); + if (tca_invert_output) + { + addbyte(0x35); /*XOR EAX, 0xff*/ + addlong(0xff); + } + + addbyte(0x89); /*MOV state->tex_a[EDI], EAX*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, tex_a)); + + addbyte(0xf3); /*MOVQ XMM0, XMM1*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xc1); + } + if (cc_mselect == CC_MSELECT_TEXRGB) + { + addbyte(0xf3); /*MOVD XMM4, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xe0); + } + + if ((params->fbzMode & FBZ_CHROMAKEY)) + { + switch (_rgb_sel) + { + case CC_LOCALSELECT_ITER_RGB: + addbyte(0xf3); /*MOVDQU XMM0, ib*/ /* ir, ig and ib must be in same dqword!*/ + addbyte(0x0f); + addbyte(0x6f); + addbyte(0x87); + addlong(offsetof(voodoo_state_t, ib)); + addbyte(0x66); /*PSRAD XMM0, 12*/ + addbyte(0x0f); + addbyte(0x72); + addbyte(0xe0); + addbyte(12); + addbyte(0x66); /*PACKSSDW XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x6b); + addbyte(0xc0); + addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xc0); + addbyte(0x66); /*MOVD EAX, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xc0); + break; + case CC_LOCALSELECT_COLOR1: + addbyte(0x8b); /*MOV EAX, params->color1[RSI]*/ + addbyte(0x86); + addlong(offsetof(voodoo_params_t, color1)); + break; + case CC_LOCALSELECT_TEX: + addbyte(0x66); /*MOVD EAX, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xc0); + break; + } + addbyte(0x8b); /*MOV EBX, params->chromaKey[ESI]*/ + addbyte(0x9e); + addlong(offsetof(voodoo_params_t, chromaKey)); + addbyte(0x31); /*XOR EBX, EAX*/ + addbyte(0xc3); + addbyte(0x81); /*AND EBX, 0xffffff*/ + addbyte(0xe3); + addlong(0xffffff); + addbyte(0x0f); /*JE skip*/ + addbyte(0x84); + chroma_skip_pos = block_pos; + addlong(0); + } + + if (voodoo->trexInit1[0] & (1 << 18)) + { + addbyte(0xb8); /*MOV EAX, tmuConfig*/ + addlong(voodoo->tmuConfig); + addbyte(0x66); /*MOVD XMM0, EAX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xc0); + } + + if (params->alphaMode & ((1 << 0) | (1 << 4))) + { + /*EBX = a_other*/ + switch (a_sel) + { + case A_SEL_ITER_A: + addbyte(0x8b); /*MOV EBX, state->ia*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, ia)); + addbyte(0x31); /*XOR EAX, EAX*/ + addbyte(0xc0); + addbyte(0xba); /*MOV EDX, 0xff*/ + addlong(0xff); + addbyte(0xc1); /*SAR EBX, 12*/ + addbyte(0xfb); + addbyte(12); + addbyte(0x0f); /*CMOVS EBX, EAX*/ + addbyte(0x48); + addbyte(0xd8); + addbyte(0x39); /*CMP EBX, EDX*/ + addbyte(0xd3); + addbyte(0x0f); /*CMOVA EBX, EDX*/ + addbyte(0x47); + addbyte(0xda); + break; + case A_SEL_TEX: + addbyte(0x8b); /*MOV EBX, state->tex_a*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, tex_a)); + break; + case A_SEL_COLOR1: + addbyte(0x0f); /*MOVZX EBX, params->color1+3*/ + addbyte(0xb6); + addbyte(0x9e); + addlong(offsetof(voodoo_params_t, color1)+3); + break; + default: + addbyte(0x31); /*XOR EBX, EBX*/ + addbyte(0xdb); + break; + } + /*ECX = a_local*/ + switch (cca_localselect) + { + case CCA_LOCALSELECT_ITER_A: + if (a_sel == A_SEL_ITER_A) + { + addbyte(0x89); /*MOV ECX, EBX*/ + addbyte(0xd9); + } + else + { + addbyte(0x8b); /*MOV ECX, state->ia*/ + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, ia)); + addbyte(0x31); /*XOR EAX, EAX*/ + addbyte(0xc0); + addbyte(0xba); /*MOV EDX, 0xff*/ + addlong(0xff); + addbyte(0xc1);/*SAR ECX, 12*/ + addbyte(0xf9); + addbyte(12); + addbyte(0x0f); /*CMOVS ECX, EAX*/ + addbyte(0x48); + addbyte(0xc8); + addbyte(0x39); /*CMP ECX, EDX*/ + addbyte(0xd1); + addbyte(0x0f); /*CMOVA ECX, EDX*/ + addbyte(0x47); + addbyte(0xca); + } + break; + case CCA_LOCALSELECT_COLOR0: + addbyte(0x0f); /*MOVZX ECX, params->color0+3*/ + addbyte(0xb6); + addbyte(0x8e); + addlong(offsetof(voodoo_params_t, color0)+3); + break; + case CCA_LOCALSELECT_ITER_Z: + addbyte(0x8b); /*MOV ECX, state->z*/ + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, z)); + if (a_sel != A_SEL_ITER_A) + { + addbyte(0x31); /*XOR EAX, EAX*/ + addbyte(0xc0); + addbyte(0xba); /*MOV EDX, 0xff*/ + addlong(0xff); + } + addbyte(0xc1);/*SAR ECX, 20*/ + addbyte(0xf9); + addbyte(20); + addbyte(0x0f); /*CMOVS ECX, EAX*/ + addbyte(0x48); + addbyte(0xc8); + addbyte(0x39); /*CMP ECX, EDX*/ + addbyte(0xd1); + addbyte(0x0f); /*CMOVA ECX, EDX*/ + addbyte(0x47); + addbyte(0xca); + break; + + default: + addbyte(0xb9); /*MOV ECX, 0xff*/ + addlong(0xff); + break; + } + + if (cca_zero_other) + { + addbyte(0x31); /*XOR EDX, EDX*/ + addbyte(0xd2); + } + else + { + addbyte(0x89); /*MOV EDX, EBX*/ + addbyte(0xda); + } + + if (cca_sub_clocal) + { + addbyte(0x29); /*SUB EDX, ECX*/ + addbyte(0xca); + } + } + + if (cc_sub_clocal || cc_mselect == 1 || cc_add == 1) + { + /*XMM1 = local*/ + if (!cc_localselect_override) + { + if (cc_localselect) + { + addbyte(0x66); /*MOVD XMM1, params->color0*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0x8e); + addlong(offsetof(voodoo_params_t, color0)); + } + else + { + addbyte(0xf3); /*MOVDQU XMM1, ib*/ /* ir, ig and ib must be in same dqword!*/ + addbyte(0x0f); + addbyte(0x6f); + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, ib)); + addbyte(0x66); /*PSRAD XMM1, 12*/ + addbyte(0x0f); + addbyte(0x72); + addbyte(0xe1); + addbyte(12); + addbyte(0x66); /*PACKSSDW XMM1, XMM1*/ + addbyte(0x0f); + addbyte(0x6b); + addbyte(0xc9); + addbyte(0x66); /*PACKUSWB XMM1, XMM1*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xc9); + } + } + else + { + addbyte(0xf6); /*TEST state->tex_a, 0x80*/ + addbyte(0x87); + addbyte(0x23); + addlong(offsetof(voodoo_state_t, tex_a)); + addbyte(0x80); + addbyte(0x74);/*JZ !cc_localselect*/ + addbyte(8+2); + addbyte(0x66); /*MOVD XMM1, params->color0*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0x8e); + addlong(offsetof(voodoo_params_t, color0)); + addbyte(0xeb); /*JMP +*/ + addbyte(8+5+4+4); + /*!cc_localselect:*/ + addbyte(0xf3); /*MOVDQU XMM1, ib*/ /* ir, ig and ib must be in same dqword!*/ + addbyte(0x0f); + addbyte(0x6f); + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, ib)); + addbyte(0x66); /*PSRAD XMM1, 12*/ + addbyte(0x0f); + addbyte(0x72); + addbyte(0xe1); + addbyte(12); + addbyte(0x66); /*PACKSSDW XMM1, XMM1*/ + addbyte(0x0f); + addbyte(0x6b); + addbyte(0xc9); + addbyte(0x66); /*PACKUSWB XMM1, XMM1*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xc9); + } + addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xca); + } + if (!cc_zero_other) + { + if (_rgb_sel == CC_LOCALSELECT_ITER_RGB) + { + addbyte(0xf3); /*MOVDQU XMM0, ib*/ /* ir, ig and ib must be in same dqword!*/ + addbyte(0x0f); + addbyte(0x6f); + addbyte(0x87); + addlong(offsetof(voodoo_state_t, ib)); + addbyte(0x66); /*PSRAD XMM0, 12*/ + addbyte(0x0f); + addbyte(0x72); + addbyte(0xe0); + addbyte(12); + addbyte(0x66); /*PACKSSDW XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x6b); + addbyte(0xc0); + addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xc0); + } + else if (_rgb_sel == CC_LOCALSELECT_TEX) + { +#if 0 + addbyte(0xf3); /*MOVDQU XMM0, state->tex_b*/ + addbyte(0x0f); + addbyte(0x6f); + addbyte(0x87); + addlong(offsetof(voodoo_state_t, tex_b)); + addbyte(0x66); /*PACKSSDW XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x6b); + addbyte(0xc0); + addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xc0); +#endif + } + else if (_rgb_sel == CC_LOCALSELECT_COLOR1) + { + addbyte(0x66); /*MOVD XMM0, params->color1*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0x86); + addlong(offsetof(voodoo_params_t, color1)); + } + else + { + /*MOVD XMM0, src_r*/ + } + addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xc2); + if (cc_sub_clocal) + { + addbyte(0x66); /*PSUBW XMM0, XMM1*/ + addbyte(0x0f); + addbyte(0xf9); + addbyte(0xc1); + } + } + else + { + addbyte(0x66); /*PXOR XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xc0); + if (cc_sub_clocal) + { + addbyte(0x66); /*PSUBW XMM0, XMM1*/ + addbyte(0x0f); + addbyte(0xf9); + addbyte(0xc1); + } + } + + if (params->alphaMode & ((1 << 0) | (1 << 4))) + { + if (!(cca_mselect == 0 && cca_reverse_blend == 0)) + { + switch (cca_mselect) + { + case CCA_MSELECT_ALOCAL: + addbyte(0x89); /*MOV EAX, ECX*/ + addbyte(0xc8); + break; + case CCA_MSELECT_AOTHER: + addbyte(0x89); /*MOV EAX, EBX*/ + addbyte(0xd8); + break; + case CCA_MSELECT_ALOCAL2: + addbyte(0x89); /*MOV EAX, ECX*/ + addbyte(0xc8); + break; + case CCA_MSELECT_TEX: + addbyte(0x0f); /*MOVZX EAX, state->tex_a*/ + addbyte(0xb6); + addbyte(0x87); + addlong(offsetof(voodoo_state_t, tex_a)); + break; + + case CCA_MSELECT_ZERO: + default: + addbyte(0x31); /*XOR EAX, EAX*/ + addbyte(0xc0); + break; + } + if (!cca_reverse_blend) + { + addbyte(0x35); /*XOR EAX, 0xff*/ + addlong(0xff); + } + addbyte(0x83); /*ADD EAX, 1*/ + addbyte(0xc0); + addbyte(1); + addbyte(0x0f); /*IMUL EDX, EAX*/ + addbyte(0xaf); + addbyte(0xd0); + addbyte(0xc1); /*SHR EDX, 8*/ + addbyte(0xea); + addbyte(8); + } + } + + if ((params->alphaMode & ((1 << 0) | (1 << 4)))) + { + addbyte(0x31); /*XOR EAX, EAX*/ + addbyte(0xc0); + } + + if (!(cc_mselect == 0 && cc_reverse_blend == 0) && cc_mselect == CC_MSELECT_AOTHER) + { + /*Copy a_other to XMM3 before it gets modified*/ + addbyte(0x66); /*MOVD XMM3, EDX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xda); + addbyte(0xf2); /*PSHUFLW XMM3, XMM3, 0*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xdb); + addbyte(0x00); + } + + if (cca_add && (params->alphaMode & ((1 << 0) | (1 << 4)))) + { + addbyte(0x01); /*ADD EDX, ECX*/ + addbyte(0xca); + } + + if ((params->alphaMode & ((1 << 0) | (1 << 4)))) + { + addbyte(0x85); /*TEST EDX, EDX*/ + addbyte(0xd2); + addbyte(0x0f); /*CMOVS EDX, EAX*/ + addbyte(0x48); + addbyte(0xd0); + addbyte(0xb8); /*MOV EAX, 0xff*/ + addlong(0xff); + addbyte(0x81); /*CMP EDX, 0xff*/ + addbyte(0xfa); + addlong(0xff); + addbyte(0x0f); /*CMOVA EDX, EAX*/ + addbyte(0x47); + addbyte(0xd0); + if (cca_invert_output) + { + addbyte(0x81); /*XOR EDX, 0xff*/ + addbyte(0xf2); + addlong(0xff); + } + } + + if (!(cc_mselect == 0 && cc_reverse_blend == 0)) + { + switch (cc_mselect) + { + case CC_MSELECT_ZERO: + addbyte(0x66); /*PXOR XMM3, XMM3*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xdb); + break; + case CC_MSELECT_CLOCAL: + addbyte(0xf3); /*MOV XMM3, XMM1*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xd9); + break; + case CC_MSELECT_ALOCAL: + addbyte(0x66); /*MOVD XMM3, ECX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xd9); + addbyte(0xf2); /*PSHUFLW XMM3, XMM3, 0*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xdb); + addbyte(0x00); + break; + case CC_MSELECT_AOTHER: + /*Handled above*/ + break; + case CC_MSELECT_TEX: + addbyte(0x66); /*PINSRW XMM3, state->tex_a, 0*/ + addbyte(0x0f); + addbyte(0xc4); + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, tex_a)); + addbyte(0); + addbyte(0x66); /*PINSRW XMM3, state->tex_a, 1*/ + addbyte(0x0f); + addbyte(0xc4); + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, tex_a)); + addbyte(1); + addbyte(0x66); /*PINSRW XMM3, state->tex_a, 2*/ + addbyte(0x0f); + addbyte(0xc4); + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, tex_a)); + addbyte(2); + break; + case CC_MSELECT_TEXRGB: + addbyte(0x66); /*PUNPCKLBW XMM4, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xe2); + addbyte(0xf3); /*MOVQ XMM3, XMM4*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xdc); + break; + default: + addbyte(0x66); /*PXOR XMM3, XMM3*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xdb); + break; + } + addbyte(0xf3); /*MOV XMM4, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xe0); + if (!cc_reverse_blend) + { + addbyte(0x66); /*PXOR XMM3, XMM9(xmm_ff_w)*/ + addbyte(0x41); + addbyte(0x0f); + addbyte(0xef); + addbyte(0xd9); + } + addbyte(0x66); /*PADDW XMM3, XMM8(xmm_01_w)*/ + addbyte(0x41); + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xd8); + addbyte(0x66); /*PMULLW XMM0, XMM3*/ + addbyte(0x0f); + addbyte(0xd5); + addbyte(0xc3); + addbyte(0x66); /*PMULHW XMM4, XMM3*/ + addbyte(0x0f); + addbyte(0xe5); + addbyte(0xe3); + addbyte(0x66); /*PUNPCKLWD XMM0, XMM4*/ + addbyte(0x0f); + addbyte(0x61); + addbyte(0xc4); + addbyte(0x66); /*PSRLD XMM0, 8*/ + addbyte(0x0f); + addbyte(0x72); + addbyte(0xe0); + addbyte(8); + addbyte(0x66); /*PACKSSDW XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x6b); + addbyte(0xc0); + } + + if (cc_add == 1) + { + addbyte(0x66); /*PADDW XMM0, XMM1*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xc1); + } + + addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xc0); + + if (cc_invert_output) + { + addbyte(0x66); /*PXOR XMM0, XMM10(xmm_ff_b)*/ + addbyte(0x41); + addbyte(0x0f); + addbyte(0xef); + addbyte(0xc2); + } + + if (params->fogMode & FOG_ENABLE) + { + if (params->fogMode & FOG_CONSTANT) + { + addbyte(0x66); /*MOVD XMM3, params->fogColor[ESI]*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0x9e); + addlong(offsetof(voodoo_params_t, fogColor)); + addbyte(0x66); /*PADDUSB XMM0, XMM3*/ + addbyte(0x0f); + addbyte(0xdc); + addbyte(0xc3); + } + else + { + addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xc2); + + if (!(params->fogMode & FOG_ADD)) + { + addbyte(0x66); /*MOVD XMM3, params->fogColor[ESI]*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0x9e); + addlong(offsetof(voodoo_params_t, fogColor)); + addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xda); + } + else + { + addbyte(0x66); /*PXOR XMM3, XMM3*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xdb); + } + + if (!(params->fogMode & FOG_MULT)) + { + addbyte(0x66); /*PSUBW XMM3, XMM0*/ + addbyte(0x0f); + addbyte(0xf9); + addbyte(0xd8); + } + + /*Divide by 2 to prevent overflow on multiply*/ + addbyte(0x66); /*PSRAW XMM3, 1*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xe3); + addbyte(1); + + switch (params->fogMode & (FOG_Z|FOG_ALPHA)) + { + case 0: + addbyte(0x8b); /*MOV EBX, state->w_depth[EDI]*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, w_depth)); + addbyte(0x89); /*MOV EAX, EBX*/ + addbyte(0xd8); + addbyte(0xc1); /*SHR EBX, 10*/ + addbyte(0xeb); + addbyte(10); + addbyte(0xc1); /*SHR EAX, 2*/ + addbyte(0xe8); + addbyte(2); + addbyte(0x83); /*AND EBX, 0x3f*/ + addbyte(0xe3); + addbyte(0x3f); + addbyte(0x25); /*AND EAX, 0xff*/ + addlong(0xff); + addbyte(0xf6); /*MUL params->fogTable+1[ESI+EBX*2]*/ + addbyte(0xa4); + addbyte(0x5e); + addlong(offsetof(voodoo_params_t, fogTable)+1); + addbyte(0x0f); /*MOVZX EBX, params->fogTable[ESI+EBX*2]*/ + addbyte(0xb6); + addbyte(0x9c); + addbyte(0x5e); + addlong(offsetof(voodoo_params_t, fogTable)); + addbyte(0xc1); /*SHR EAX, 10*/ + addbyte(0xe8); + addbyte(10); + addbyte(0x01); /*ADD EAX, EBX*/ + addbyte(0xd8); +/* int fog_idx = (w_depth >> 10) & 0x3f; + + fog_a = params->fogTable[fog_idx].fog; + fog_a += (params->fogTable[fog_idx].dfog * ((w_depth >> 2) & 0xff)) >> 10;*/ + break; + + case FOG_Z: + addbyte(0x8b); /*MOV EAX, state->z[EDI]*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, z)); + addbyte(0xc1); /*SHR EAX, 12*/ + addbyte(0xe8); + addbyte(12); + addbyte(0x25); /*AND EAX, 0xff*/ + addlong(0xff); +// fog_a = (z >> 20) & 0xff; + break; + + case FOG_ALPHA: + addbyte(0x8b); /*MOV EAX, state->ia[EDI]*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, ia)); + addbyte(0x31); /*XOR EBX, EBX*/ + addbyte(0xdb); + addbyte(0xc1); /*SAR EAX, 12*/ + addbyte(0xf8); + addbyte(12); + addbyte(0x0f); /*CMOVS EAX, EBX*/ + addbyte(0x48); + addbyte(0xc3); + addbyte(0xbb); /*MOV EBX, 0xff*/ + addlong(0xff); + addbyte(0x3d); /*CMP EAX, 0xff*/ + addlong(0xff); + addbyte(0x0f); /*CMOVAE EAX, EBX*/ + addbyte(0x43); + addbyte(0xc3); +// fog_a = CLAMP(ia >> 12); + break; + + case FOG_W: + addbyte(0x8b); /*MOV EAX, state->w[EDI]+4*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, w)+4); + addbyte(0x31); /*XOR EBX, EBX*/ + addbyte(0xdb); + addbyte(0x09); /*OR EAX, EAX*/ + addbyte(0xc0); + addbyte(0x0f); /*CMOVS EAX, EBX*/ + addbyte(0x48); + addbyte(0xc3); + addbyte(0xbb); /*MOV EBX, 0xff*/ + addlong(0xff); + addbyte(0x3d); /*CMP EAX, 0xff*/ + addlong(0xff); + addbyte(0x0f); /*CMOVAE EAX, EBX*/ + addbyte(0x43); + addbyte(0xc3); +// fog_a = CLAMP(w >> 32); + break; + } + addbyte(0x01); /*ADD EAX, EAX*/ + addbyte(0xc0); + + addbyte(0x66); /*PMULLW XMM3, alookup+4[EAX*8]*/ + addbyte(0x41); + addbyte(0x0f); + addbyte(0xd5); + addbyte(0x5c); + addbyte(0xc2); + addbyte(16); + addbyte(0x66); /*PSRAW XMM3, 7*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xe3); + addbyte(7); + + if (params->fogMode & FOG_MULT) + { + addbyte(0xf3); /*MOV XMM0, XMM3*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xc3); + } + else + { + addbyte(0x66); /*PADDW XMM0, XMM3*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xc3); + } + addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xc0); + } + } + + if ((params->alphaMode & 1) && (alpha_func != AFUNC_NEVER) && (alpha_func != AFUNC_ALWAYS)) + { + addbyte(0x0f); /*MOVZX ECX, params->alphaMode+3*/ + addbyte(0xb6); + addbyte(0x8e); + addlong(offsetof(voodoo_params_t, alphaMode) + 3); + addbyte(0x39); /*CMP EDX, ECX*/ + addbyte(0xca); + + switch (alpha_func) + { + case AFUNC_LESSTHAN: + addbyte(0x0f); /*JAE skip*/ + addbyte(0x83); + a_skip_pos = block_pos; + addlong(0); + break; + case AFUNC_EQUAL: + addbyte(0x0f); /*JNE skip*/ + addbyte(0x85); + a_skip_pos = block_pos; + addlong(0); + break; + case AFUNC_LESSTHANEQUAL: + addbyte(0x0f); /*JA skip*/ + addbyte(0x87); + a_skip_pos = block_pos; + addlong(0); + break; + case AFUNC_GREATERTHAN: + addbyte(0x0f); /*JBE skip*/ + addbyte(0x86); + a_skip_pos = block_pos; + addlong(0); + break; + case AFUNC_NOTEQUAL: + addbyte(0x0f); /*JE skip*/ + addbyte(0x84); + a_skip_pos = block_pos; + addlong(0); + break; + case AFUNC_GREATERTHANEQUAL: + addbyte(0x0f); /*JB skip*/ + addbyte(0x82); + a_skip_pos = block_pos; + addlong(0); + break; + } + } + else if ((params->alphaMode & 1) && (alpha_func == AFUNC_NEVER)) + { + addbyte(0xC3); /*RET*/ + } + + if (params->alphaMode & (1 << 4)) + { + addbyte(0x49); /*MOV R8, rgb565*/ + addbyte(0xb8); + addquad((uintptr_t)rgb565); + addbyte(0x8b); /*MOV EAX, state->x[EDI]*/ + addbyte(0x87); + if (params->col_tiled) + addlong(offsetof(voodoo_state_t, x_tiled)); + else + addlong(offsetof(voodoo_state_t, x)); + addbyte(0x48); /*MOV RBP, fb_mem*/ + addbyte(0x8b); + addbyte(0xaf); + addlong(offsetof(voodoo_state_t, fb_mem)); + addbyte(0x01); /*ADD EDX, EDX*/ + addbyte(0xd2); + addbyte(0x0f); /*MOVZX EAX, [RBP+RAX*2]*/ + addbyte(0xb7); + addbyte(0x44); + addbyte(0x45); + addbyte(0); + addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xc2); + addbyte(0x66); /*MOVD XMM4, rgb565[EAX*4]*/ + addbyte(0x41); + addbyte(0x0f); + addbyte(0x6e); + addbyte(0x24); + addbyte(0x80); + addbyte(0x66); /*PUNPCKLBW XMM4, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xe2); + addbyte(0xf3); /*MOV XMM6, XMM4*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xf4); + + switch (dest_afunc) + { + case AFUNC_AZERO: + addbyte(0x66); /*PXOR XMM4, XMM4*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xe4); + break; + case AFUNC_ASRC_ALPHA: + addbyte(0x66); /*PMULLW XMM4, R10(alookup)[EDX*8]*/ + addbyte(0x41); + addbyte(0x0f); + addbyte(0xd5); + addbyte(0x24); + addbyte(0xd2); + addbyte(0xf3); /*MOVQ XMM5, XMM4*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xec); + addbyte(0x66); /*PADDW XMM4, R10(alookup)[1*8]*/ + addbyte(0x41); + addbyte(0x0f); + addbyte(0xfd); + addbyte(0x62); + addbyte(8*2); + addbyte(0x66); /*PSRLW XMM5, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd5); + addbyte(8); + addbyte(0x66); /*PADDW XMM4, XMM5*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xe5); + addbyte(0x66); /*PSRLW XMM4, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd4); + addbyte(8); + break; + case AFUNC_A_COLOR: + addbyte(0x66); /*PMULLW XMM4, XMM0*/ + addbyte(0x0f); + addbyte(0xd5); + addbyte(0xe0); + addbyte(0xf3); /*MOVQ XMM5, XMM4*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xec); + addbyte(0x66); /*PADDW XMM4, R10(alookup)[1*8]*/ + addbyte(0x41); + addbyte(0x0f); + addbyte(0xfd); + addbyte(0x62); + addbyte(8*2); + addbyte(0x66); /*PSRLW XMM5, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd5); + addbyte(8); + addbyte(0x66); /*PADDW XMM4, XMM5*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xe5); + addbyte(0x66); /*PSRLW XMM4, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd4); + addbyte(8); + break; + case AFUNC_ADST_ALPHA: + break; + case AFUNC_AONE: + break; + case AFUNC_AOMSRC_ALPHA: + addbyte(0x66); /*PMULLW XMM4, R11(aminuslookup)[EDX*8]*/ + addbyte(0x41); + addbyte(0x0f); + addbyte(0xd5); + addbyte(0x24); + addbyte(0xd3); + addbyte(0xf3); /*MOVQ XMM5, XMM4*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xec); + addbyte(0x66); /*PADDW XMM4, R10(alookup)[1*8]*/ + addbyte(0x41); + addbyte(0x0f); + addbyte(0xfd); + addbyte(0x62); + addbyte(8*2); + addbyte(0x66); /*PSRLW XMM5, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd5); + addbyte(8); + addbyte(0x66); /*PADDW XMM4, XMM5*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xe5); + addbyte(0x66); /*PSRLW XMM4, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd4); + addbyte(8); + break; + case AFUNC_AOM_COLOR: + addbyte(0xf3); /*MOVQ XMM5, XMM9(xmm_ff_w)*/ + addbyte(0x41); + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xe9); + addbyte(0x66); /*PSUBW XMM5, XMM0*/ + addbyte(0x0f); + addbyte(0xf9); + addbyte(0xe8); + addbyte(0x66); /*PMULLW XMM4, XMM5*/ + addbyte(0x0f); + addbyte(0xd5); + addbyte(0xe5); + addbyte(0xf3); /*MOVQ XMM5, XMM4*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xec); + addbyte(0x66); /*PADDW XMM4, alookup[1*8]*/ + addbyte(0x41); + addbyte(0x0f); + addbyte(0xfd); + addbyte(0x62); + addbyte(8*2); + addbyte(0x66); /*PSRLW XMM5, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd5); + addbyte(8); + addbyte(0x66); /*PADDW XMM4, XMM5*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xe5); + addbyte(0x66); /*PSRLW XMM4, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd4); + addbyte(8); + break; + case AFUNC_AOMDST_ALPHA: + addbyte(0x66); /*PXOR XMM4, XMM4*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xe4); + break; + case AFUNC_ASATURATE: + addbyte(0x66); /*PMULLW XMM4, XMM11(minus_254)*/ + addbyte(0x41); + addbyte(0x0f); + addbyte(0xd5); + addbyte(0xe3); + addbyte(0xf3); /*MOVQ XMM5, XMM4*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xec); + addbyte(0x66); /*PADDW XMM4, alookup[1*8]*/ + addbyte(0x41); + addbyte(0x0f); + addbyte(0xfd); + addbyte(0x62); + addbyte(8*2); + addbyte(0x66); /*PSRLW XMM5, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd5); + addbyte(8); + addbyte(0x66); /*PADDW XMM4, XMM5*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xe5); + addbyte(0x66); /*PSRLW XMM4, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd4); + addbyte(8); + } + + switch (src_afunc) + { + case AFUNC_AZERO: + addbyte(0x66); /*PXOR XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xc0); + break; + case AFUNC_ASRC_ALPHA: + addbyte(0x66); /*PMULLW XMM0, R10(alookup)[EDX*8]*/ + addbyte(0x41); + addbyte(0x0f); + addbyte(0xd5); + addbyte(0x04); + addbyte(0xd2); + addbyte(0xf3); /*MOVQ XMM5, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xe8); + addbyte(0x66); /*PADDW XMM0, R10(alookup)[1*8]*/ + addbyte(0x41); + addbyte(0x0f); + addbyte(0xfd); + addbyte(0x42); + addbyte(8*2); + addbyte(0x66); /*PSRLW XMM5, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd5); + addbyte(8); + addbyte(0x66); /*PADDW XMM0, XMM5*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xc5); + addbyte(0x66); /*PSRLW XMM0, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd0); + addbyte(8); + break; + case AFUNC_A_COLOR: + addbyte(0x66); /*PMULLW XMM0, XMM6*/ + addbyte(0x0f); + addbyte(0xd5); + addbyte(0xc6); + addbyte(0xf3); /*MOVQ XMM5, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xe8); + addbyte(0x66); /*PADDW XMM0, R10(alookup)[1*8]*/ + addbyte(0x41); + addbyte(0x0f); + addbyte(0xfd); + addbyte(0x42); + addbyte(8*2); + addbyte(0x66); /*PSRLW XMM5, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd5); + addbyte(8); + addbyte(0x66); /*PADDW XMM0, XMM5*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xc5); + addbyte(0x66); /*PSRLW XMM0, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd0); + addbyte(8); + break; + case AFUNC_ADST_ALPHA: + break; + case AFUNC_AONE: + break; + case AFUNC_AOMSRC_ALPHA: + addbyte(0x66); /*PMULLW XMM0, R11(aminuslookup)[EDX*8]*/ + addbyte(0x41); + addbyte(0x0f); + addbyte(0xd5); + addbyte(0x04); + addbyte(0xd3); + addbyte(0xf3); /*MOVQ XMM5, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xe8); + addbyte(0x66); /*PADDW XMM0, alookup[1*8]*/ + addbyte(0x41); + addbyte(0x0f); + addbyte(0xfd); + addbyte(0x42); + addbyte(8*2); + addbyte(0x66); /*PSRLW XMM5, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd5); + addbyte(8); + addbyte(0x66); /*PADDW XMM0, XMM5*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xc5); + addbyte(0x66); /*PSRLW XMM0, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd0); + addbyte(8); + break; + case AFUNC_AOM_COLOR: + addbyte(0xf3); /*MOVQ XMM5, XMM9(xmm_ff_w)*/ + addbyte(0x41); + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xe9); + addbyte(0x66); /*PSUBW XMM5, XMM6*/ + addbyte(0x0f); + addbyte(0xf9); + addbyte(0xee); + addbyte(0x66); /*PMULLW XMM0, XMM5*/ + addbyte(0x0f); + addbyte(0xd5); + addbyte(0xc5); + addbyte(0xf3); /*MOVQ XMM5, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xe8); + addbyte(0x66); /*PADDW XMM0, alookup[1*8]*/ + addbyte(0x41); + addbyte(0x0f); + addbyte(0xfd); + addbyte(0x42); + addbyte(8*2); + addbyte(0x66); /*PSRLW XMM5, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd5); + addbyte(8); + addbyte(0x66); /*PADDW XMM0, XMM5*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xc5); + addbyte(0x66); /*PSRLW XMM0, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd0); + addbyte(8); + break; + case AFUNC_AOMDST_ALPHA: + addbyte(0x66); /*PXOR XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xc0); + break; + case AFUNC_ACOLORBEFOREFOG: + break; + } + + addbyte(0x66); /*PADDW XMM0, XMM4*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xc4); + + addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xc0); + } + + addbyte(0x8b); /*MOV EDX, state->x[EDI]*/ + addbyte(0x97); + if (params->col_tiled) + addlong(offsetof(voodoo_state_t, x_tiled)); + else + addlong(offsetof(voodoo_state_t, x)); + + addbyte(0x66); /*MOV EAX, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xc0); + + if (params->fbzMode & FBZ_RGB_WMASK) + { + if (dither) + { + addbyte(0x49); /*MOV R8, dither_rb*/ + addbyte(0xb8); + addquad(dither2x2 ? (uintptr_t)dither_rb2x2 : (uintptr_t)dither_rb); + addbyte(0x4c); /*MOV ESI, real_y (R14)*/ + addbyte(0x89); + addbyte(0xf6); + addbyte(0x0f); /*MOVZX EBX, AH*/ /*G*/ + addbyte(0xb6); + addbyte(0xdc); + if (dither2x2) + { + addbyte(0x83); /*AND EDX, 1*/ + addbyte(0xe2); + addbyte(1); + addbyte(0x83); /*AND ESI, 1*/ + addbyte(0xe6); + addbyte(1); + addbyte(0xc1); /*SHL EBX, 2*/ + addbyte(0xe3); + addbyte(2); + } + else + { + addbyte(0x83); /*AND EDX, 3*/ + addbyte(0xe2); + addbyte(3); + addbyte(0x83); /*AND ESI, 3*/ + addbyte(0xe6); + addbyte(3); + addbyte(0xc1); /*SHL EBX, 4*/ + addbyte(0xe3); + addbyte(4); + } + addbyte(0x0f); /*MOVZX ECX, AL*/ /*R*/ + addbyte(0xb6); + addbyte(0xc8); + if (dither2x2) + { + addbyte(0xc1); /*SHR EAX, 14*/ + addbyte(0xe8); + addbyte(14); + addbyte(0x8d); /*LEA ESI, RDX+RSI*2*/ + addbyte(0x34); + addbyte(0x72); + } + else + { + addbyte(0xc1); /*SHR EAX, 12*/ + addbyte(0xe8); + addbyte(12); + addbyte(0x8d); /*LEA ESI, RDX+RSI*4*/ + addbyte(0x34); + addbyte(0xb2); + } + addbyte(0x8b); /*MOV EDX, state->x[EDI]*/ + addbyte(0x97); + if (params->col_tiled) + addlong(offsetof(voodoo_state_t, x_tiled)); + else + addlong(offsetof(voodoo_state_t, x)); + addbyte(0x4c); /*ADD RSI, R8*/ + addbyte(0x01); + addbyte(0xc6); + if (dither2x2) + { + addbyte(0xc1); /*SHL ECX, 2*/ + addbyte(0xe1); + addbyte(2); + addbyte(0x25); /*AND EAX, 0x3fc*/ /*B*/ + addlong(0x3fc); + } + else + { + addbyte(0xc1); /*SHL ECX, 4*/ + addbyte(0xe1); + addbyte(4); + addbyte(0x25); /*AND EAX, 0xff0*/ /*B*/ + addlong(0xff0); + } + addbyte(0x0f); /*MOVZX EBX, dither_g[EBX+ESI]*/ + addbyte(0xb6); + addbyte(0x9c); + addbyte(0x1e); + addlong(dither2x2 ? ((uintptr_t)dither_g2x2 - (uintptr_t)dither_rb2x2) : ((uintptr_t)dither_g - (uintptr_t)dither_rb)); + addbyte(0x0f); /*MOVZX ECX, dither_rb[RCX+RSI]*/ + addbyte(0xb6); + addbyte(0x0c); + addbyte(0x0e); + addbyte(0x0f); /*MOVZX EAX, dither_rb[RAX+RSI]*/ + addbyte(0xb6); + addbyte(0x04); + addbyte(0x06); + addbyte(0xc1); /*SHL EBX, 5*/ + addbyte(0xe3); + addbyte(5); + addbyte(0xc1); /*SHL EAX, 11*/ + addbyte(0xe0); + addbyte(11); + addbyte(0x09); /*OR EAX, EBX*/ + addbyte(0xd8); + addbyte(0x09); /*OR EAX, ECX*/ + addbyte(0xc8); + } + else + { + addbyte(0x89); /*MOV EBX, EAX*/ + addbyte(0xc3); + addbyte(0x0f); /*MOVZX ECX, AH*/ + addbyte(0xb6); + addbyte(0xcc); + addbyte(0xc1); /*SHR EAX, 3*/ + addbyte(0xe8); + addbyte(3); + addbyte(0xc1); /*SHR EBX, 8*/ + addbyte(0xeb); + addbyte(8); + addbyte(0xc1); /*SHL ECX, 3*/ + addbyte(0xe1); + addbyte(3); + addbyte(0x81); /*AND EAX, 0x001f*/ + addbyte(0xe0); + addlong(0x001f); + addbyte(0x81); /*AND EBX, 0xf800*/ + addbyte(0xe3); + addlong(0xf800); + addbyte(0x81); /*AND ECX, 0x07e0*/ + addbyte(0xe1); + addlong(0x07e0); + addbyte(0x09); /*OR EAX, EBX*/ + addbyte(0xd8); + addbyte(0x09); /*OR EAX, ECX*/ + addbyte(0xc8); + } + addbyte(0x48); /*MOV RSI, fb_mem*/ + addbyte(0x8b); + addbyte(0xb7); + addlong(offsetof(voodoo_state_t, fb_mem)); + addbyte(0x66); /*MOV [ESI+EDX*2], AX*/ + addbyte(0x89); + addbyte(0x04); + addbyte(0x56); + } + + if ((params->fbzMode & (FBZ_DEPTH_WMASK | FBZ_DEPTH_ENABLE)) == (FBZ_DEPTH_WMASK | FBZ_DEPTH_ENABLE)) + { + addbyte(0x8b); /*MOV EDX, state->x[EDI]*/ + addbyte(0x97); + if (params->aux_tiled) + addlong(offsetof(voodoo_state_t, x_tiled)); + else + addlong(offsetof(voodoo_state_t, x)); + addbyte(0x66); /*MOV AX, new_depth*/ + addbyte(0x8b); + addbyte(0x87); + addlong(offsetof(voodoo_state_t, new_depth)); + addbyte(0x48); /*MOV RSI, aux_mem*/ + addbyte(0x8b); + addbyte(0xb7); + addlong(offsetof(voodoo_state_t, aux_mem)); + addbyte(0x66); /*MOV [ESI+EDX*2], AX*/ + addbyte(0x89); + addbyte(0x04); + addbyte(0x56); + } + + if (z_skip_pos) + *(uint32_t *)&code_block[z_skip_pos] = (block_pos - z_skip_pos) - 4; + if (a_skip_pos) + *(uint32_t *)&code_block[a_skip_pos] = (block_pos - a_skip_pos) - 4; + if (chroma_skip_pos) + *(uint32_t *)&code_block[chroma_skip_pos] = (block_pos - chroma_skip_pos) - 4; + + addbyte(0x4c); /*MOV RSI, R15*/ + addbyte(0x89); + addbyte(0xfe); + + addbyte(0xf3); /*MOVDQU XMM1, state->ib[EDI]*/ + addbyte(0x0f); + addbyte(0x6f); + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, ib)); + addbyte(0xf3); /*MOVDQU XMM3, state->tmu0_s[EDI]*/ + addbyte(0x0f); + addbyte(0x6f); + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, tmu0_s)); + addbyte(0xf3); /*MOVQ XMM4, state->tmu0_w[EDI]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xa7); + addlong(offsetof(voodoo_state_t, tmu0_w)); + addbyte(0xf3); /*MOVDQU XMM0, params->dBdX[ESI]*/ + addbyte(0x0f); + addbyte(0x6f); + addbyte(0x86); + addlong(offsetof(voodoo_params_t, dBdX)); + addbyte(0x8b); /*MOV EAX, params->dZdX[ESI]*/ + addbyte(0x86); + addlong(offsetof(voodoo_params_t, dZdX)); + addbyte(0xf3); /*MOVDQU XMM5, params->tmu[0].dSdX[ESI]*/ + addbyte(0x0f); + addbyte(0x6f); + addbyte(0xae); + addlong(offsetof(voodoo_params_t, tmu[0].dSdX)); + addbyte(0xf3); /*MOVQ XMM6, params->tmu[0].dWdX[ESI]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xb6); + addlong(offsetof(voodoo_params_t, tmu[0].dWdX)); + + if (state->xdir > 0) + { + addbyte(0x66); /*PADDD XMM1, XMM0*/ + addbyte(0x0f); + addbyte(0xfe); + addbyte(0xc8); + } + else + { + addbyte(0x66); /*PSUBD XMM1, XMM0*/ + addbyte(0x0f); + addbyte(0xfa); + addbyte(0xc8); + } + + addbyte(0xf3); /*MOVQ XMM0, state->w*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0x87); + addlong(offsetof(voodoo_state_t, w)); + addbyte(0xf3); /*MOVDQU state->ib, XMM1*/ + addbyte(0x0f); + addbyte(0x7f); + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, ib)); + addbyte(0xf3); /*MOVQ XMM7, params->dWdX*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xbe); + addlong(offsetof(voodoo_params_t, dWdX)); + + if (state->xdir > 0) + { + addbyte(0x66); /*PADDQ XMM3, XMM5*/ + addbyte(0x0f); + addbyte(0xd4); + addbyte(0xdd); + addbyte(0x66); /*PADDQ XMM4, XMM6*/ + addbyte(0x0f); + addbyte(0xd4); + addbyte(0xe6); + addbyte(0x66); /*PADDQ XMM0, XMM7*/ + addbyte(0x0f); + addbyte(0xd4); + addbyte(0xc7); + addbyte(0x01); /*ADD state->z[EDI], EAX*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, z)); + } + else + { + addbyte(0x66); /*PSUBQ XMM3, XMM5*/ + addbyte(0x0f); + addbyte(0xfb); + addbyte(0xdd); + addbyte(0x66); /*PSUBQ XMM4, XMM6*/ + addbyte(0x0f); + addbyte(0xfb); + addbyte(0xe6); + addbyte(0x66); /*PSUBQ XMM0, XMM7*/ + addbyte(0x0f); + addbyte(0xfb); + addbyte(0xc7); + addbyte(0x29); /*SUB state->z[EDI], EAX*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, z)); + } + + if (voodoo->dual_tmus) + { + addbyte(0xf3); /*MOVDQU XMM5, params->tmu[1].dSdX[ESI]*/ + addbyte(0x0f); + addbyte(0x6f); + addbyte(0xae); + addlong(offsetof(voodoo_params_t, tmu[1].dSdX)); + addbyte(0xf3); /*MOVQ XMM6, params->tmu[1].dWdX[ESI]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xb6); + addlong(offsetof(voodoo_params_t, tmu[1].dWdX)); + } + + addbyte(0xf3); /*MOVDQU state->tmu0_s, XMM3*/ + addbyte(0x0f); + addbyte(0x7f); + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, tmu0_s)); + addbyte(0x66); /*MOVQ state->tmu0_w, XMM4*/ + addbyte(0x0f); + addbyte(0xd6); + addbyte(0xa7); + addlong(offsetof(voodoo_state_t, tmu0_w)); + addbyte(0x66); /*MOVQ state->w, XMM0*/ + addbyte(0x0f); + addbyte(0xd6); + addbyte(0x87); + addlong(offsetof(voodoo_state_t, w)); + + if (voodoo->dual_tmus) + { + addbyte(0xf3); /*MOVDQU XMM3, state->tmu1_s[EDI]*/ + addbyte(0x0f); + addbyte(0x6f); + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, tmu1_s)); + addbyte(0xf3); /*MOVQ XMM4, state->tmu1_w[EDI]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xa7); + addlong(offsetof(voodoo_state_t, tmu1_w)); + + if (state->xdir > 0) + { + addbyte(0x66); /*PADDQ XMM3, XMM5*/ + addbyte(0x0f); + addbyte(0xd4); + addbyte(0xdd); + addbyte(0x66); /*PADDQ XMM4, XMM6*/ + addbyte(0x0f); + addbyte(0xd4); + addbyte(0xe6); + } + else + { + addbyte(0x66); /*PSUBQ XMM3, XMM5*/ + addbyte(0x0f); + addbyte(0xfb); + addbyte(0xdd); + addbyte(0x66); /*PSUBQ XMM4, XMM6*/ + addbyte(0x0f); + addbyte(0xfb); + addbyte(0xe6); + } + + addbyte(0xf3); /*MOVDQU state->tmu1_s, XMM3*/ + addbyte(0x0f); + addbyte(0x7f); + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, tmu1_s)); + addbyte(0x66); /*MOVQ state->tmu1_w, XMM4*/ + addbyte(0x0f); + addbyte(0xd6); + addbyte(0xa7); + addlong(offsetof(voodoo_state_t, tmu1_w)); + } + + addbyte(0x83); /*ADD state->pixel_count[EDI], 1*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, pixel_count)); + addbyte(1); + + if (params->fbzColorPath & FBZCP_TEXTURE_ENABLED) + { + if ((params->textureMode[0] & TEXTUREMODE_MASK) == TEXTUREMODE_PASSTHROUGH || + (params->textureMode[0] & TEXTUREMODE_LOCAL_MASK) == TEXTUREMODE_LOCAL) + { + addbyte(0x83); /*ADD state->texel_count[EDI], 1*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, texel_count)); + addbyte(1); + } + else + { + addbyte(0x83); /*ADD state->texel_count[EDI], 2*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, texel_count)); + addbyte(2); + } + } + + addbyte(0x8b); /*MOV EAX, state->x[EDI]*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, x)); + + if (state->xdir > 0) + { + addbyte(0x83); /*ADD state->x[EDI], 1*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, x)); + addbyte(1); + } + else + { + addbyte(0x83); /*SUB state->x[EDI], 1*/ + addbyte(0xaf); + addlong(offsetof(voodoo_state_t, x)); + addbyte(1); + } + + addbyte(0x3b); /*CMP EAX, state->x2[EDI]*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, x2)); + addbyte(0x0f); /*JNZ loop_jump_pos*/ + addbyte(0x85); + addlong(loop_jump_pos - (block_pos + 4)); + + addbyte(0x41); /*POP R15*/ + addbyte(0x5f); + addbyte(0x41); /*POP R14*/ + addbyte(0x5e); + addbyte(0x41); /*POP R13*/ + addbyte(0x5d); + addbyte(0x41); /*POP R12*/ + addbyte(0x5c); + addbyte(0x5b); /*POP RBX*/ + addbyte(0x5e); /*POP RSI*/ + addbyte(0x5f); /*POP RDI*/ + addbyte(0x5d); /*POP RBP*/ + + addbyte(0xC3); /*RET*/ +} +int voodoo_recomp = 0; +static inline void *voodoo_get_block(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int odd_even) +{ + int c; + int b = last_block[odd_even]; + voodoo_x86_data_t *voodoo_x86_data = voodoo->codegen_data; + voodoo_x86_data_t *data; + + for (c = 0; c < 8; c++) + { + data = &voodoo_x86_data[odd_even + c*4]; //&voodoo_x86_data[odd_even][b]; + + if (state->xdir == data->xdir && + params->alphaMode == data->alphaMode && + params->fbzMode == data->fbzMode && + params->fogMode == data->fogMode && + params->fbzColorPath == data->fbzColorPath && + (voodoo->trexInit1[0] & (1 << 18)) == data->trexInit1 && + params->textureMode[0] == data->textureMode[0] && + params->textureMode[1] == data->textureMode[1] && + (params->tLOD[0] & LOD_MASK) == data->tLOD[0] && + (params->tLOD[1] & LOD_MASK) == data->tLOD[1] && + ((params->col_tiled || params->aux_tiled) ? 1 : 0) == data->is_tiled) + { + last_block[odd_even] = b; + return data->code_block; + } + + b = (b + 1) & 7; + } +voodoo_recomp++; + data = &voodoo_x86_data[odd_even + next_block_to_write[odd_even]*4]; +// code_block = data->code_block; + + voodoo_generate(data->code_block, voodoo, params, state, depth_op); + + data->xdir = state->xdir; + data->alphaMode = params->alphaMode; + data->fbzMode = params->fbzMode; + data->fogMode = params->fogMode; + data->fbzColorPath = params->fbzColorPath; + data->trexInit1 = voodoo->trexInit1[0] & (1 << 18); + data->textureMode[0] = params->textureMode[0]; + data->textureMode[1] = params->textureMode[1]; + data->tLOD[0] = params->tLOD[0] & LOD_MASK; + data->tLOD[1] = params->tLOD[1] & LOD_MASK; + data->is_tiled = (params->col_tiled || params->aux_tiled) ? 1 : 0; + + next_block_to_write[odd_even] = (next_block_to_write[odd_even] + 1) & 7; + + return data->code_block; +} + +void voodoo_codegen_init(voodoo_t *voodoo) +{ + int c; + +#if WIN64 + voodoo->codegen_data = VirtualAlloc(NULL, sizeof(voodoo_x86_data_t) * BLOCK_NUM * 4, MEM_COMMIT, PAGE_EXECUTE_READWRITE); +#else + voodoo->codegen_data = mmap(0, sizeof(voodoo_x86_data_t) * BLOCK_NUM*4, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, 0, 0); +#endif + + for (c = 0; c < 256; c++) + { + int d[4]; + int _ds = c & 0xf; + int dt = c >> 4; + + alookup[c] = _mm_set_epi32(0, 0, c | (c << 16), c | (c << 16)); + aminuslookup[c] = _mm_set_epi32(0, 0, (255-c) | ((255-c) << 16), (255-c) | ((255-c) << 16)); + + d[0] = (16 - _ds) * (16 - dt); + d[1] = _ds * (16 - dt); + d[2] = (16 - _ds) * dt; + d[3] = _ds * dt; + + bilinear_lookup[c*2] = _mm_set_epi32(d[1] | (d[1] << 16), d[1] | (d[1] << 16), d[0] | (d[0] << 16), d[0] | (d[0] << 16)); + bilinear_lookup[c*2 + 1] = _mm_set_epi32(d[3] | (d[3] << 16), d[3] | (d[3] << 16), d[2] | (d[2] << 16), d[2] | (d[2] << 16)); + } + alookup[256] = _mm_set_epi32(0, 0, 256 | (256 << 16), 256 | (256 << 16)); + xmm_00_ff_w[0] = _mm_set_epi32(0, 0, 0, 0); + xmm_00_ff_w[1] = _mm_set_epi32(0, 0, 0xff | (0xff << 16), 0xff | (0xff << 16)); +} + +void voodoo_codegen_close(voodoo_t *voodoo) +{ +#if WIN64 + VirtualFree(voodoo->codegen_data, 0, MEM_RELEASE); +#else + munmap(voodoo->codegen_data, sizeof(voodoo_x86_data_t) * BLOCK_NUM*4); +#endif +} + diff --git a/pcem/vid_voodoo_codegen_x86.h b/pcem/vid_voodoo_codegen_x86.h new file mode 100644 index 00000000..c925d5b1 --- /dev/null +++ b/pcem/vid_voodoo_codegen_x86.h @@ -0,0 +1,3412 @@ +/*Registers : + + alphaMode + fbzMode & 0x1f3fff + fbzColorPath +*/ + +#if defined(__linux__) || defined(__APPLE__) +#include +#include +#endif +#if defined WIN32 || defined _WIN32 || defined _WIN32 +#define BITMAP windows_BITMAP +#include +#undef BITMAP +#endif + +#include + +#define BLOCK_NUM 8 +#define BLOCK_MASK (BLOCK_NUM-1) +#define BLOCK_SIZE 8192 + +#define LOD_MASK (LOD_TMIRROR_S | LOD_TMIRROR_T) + +typedef struct voodoo_x86_data_t +{ + uint8_t code_block[BLOCK_SIZE]; + int xdir; + uint32_t alphaMode; + uint32_t fbzMode; + uint32_t fogMode; + uint32_t fbzColorPath; + uint32_t textureMode[2]; + uint32_t tLOD[2]; + uint32_t trexInit1; + int is_tiled; +} voodoo_x86_data_t; + +static int last_block[4] = {0, 0}; +static int next_block_to_write[4] = {0, 0}; + +#define addbyte(val) \ + do { \ + code_block[block_pos++] = val; \ + if (block_pos >= BLOCK_SIZE) \ + fatal("Over!\n"); \ + } while (0) + +#define addword(val) \ + do { \ + *(uint16_t *)&code_block[block_pos] = val; \ + block_pos += 2; \ + if (block_pos >= BLOCK_SIZE) \ + fatal("Over!\n"); \ + } while (0) + +#define addlong(val) \ + do { \ + *(uint32_t *)&code_block[block_pos] = val; \ + block_pos += 4; \ + if (block_pos >= BLOCK_SIZE) \ + fatal("Over!\n"); \ + } while (0) + +#define addquad(val) \ + do { \ + *(uint64_t *)&code_block[block_pos] = val; \ + block_pos += 8; \ + if (block_pos >= BLOCK_SIZE) \ + fatal("Over!\n"); \ + } while (0) + + +static __m128i xmm_01_w;// = 0x0001000100010001ull; +static __m128i xmm_ff_w;// = 0x00ff00ff00ff00ffull; +static __m128i xmm_ff_b;// = 0x00000000ffffffffull; + +static uint32_t zero = 0; +static double const_1_48 = (double)(1ull << 4); + +static __m128i alookup[257], aminuslookup[256]; +static __m128i minus_254;// = 0xff02ff02ff02ff02ull; +static __m128i bilinear_lookup[256*2]; +static __m128i xmm_00_ff_w[2]; +static uint32_t i_00_ff_w[2] = {0, 0xff}; + +static inline int codegen_texture_fetch(uint8_t *code_block, voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int block_pos, int tmu) +{ + if (params->textureMode[tmu] & 1) + { + addbyte(0xdf); /*FILDq state->tmu0_w*/ + addbyte(0xaf); + addlong(tmu ? offsetof(voodoo_state_t, tmu1_w) : offsetof(voodoo_state_t, tmu0_w)); + addbyte(0xdd); /*FLDq const_1_48*/ + addbyte(0x05); + addlong((uint32_t)&const_1_48); + addbyte(0xde); /*FDIV ST(1)*/ + addbyte(0xf1); + addbyte(0xdf); /*FILDq state->tmu0_s*/ + addbyte(0xaf); + addlong(tmu ? offsetof(voodoo_state_t, tmu1_s) : offsetof(voodoo_state_t, tmu0_s)); + addbyte(0xdf); /*FILDq state->tmu0_t*/ /*ST(0)=t, ST(1)=s, ST(2)=1/w*/ + addbyte(0xaf); + addlong(tmu ? offsetof(voodoo_state_t, tmu1_t) : offsetof(voodoo_state_t, tmu0_t)); + addbyte(0xd9); /*FXCH ST(1)*/ /*ST(0)=s, ST(1)=t, ST(2)=1/w*/ + addbyte(0xc9); + addbyte(0xd8); /*FMUL ST(2)*/ /*ST(0)=s/w, ST(1)=t, ST(2)=1/w*/ + addbyte(0xca); + addbyte(0xd9); /*FXCH ST(1)*/ /*ST(0)=t, ST(1)=s/w, ST(2)=1/w*/ + addbyte(0xc9); + addbyte(0xd8); /*FMUL ST(2)*/ /*ST(0)=t/w, ST(1)=s/w, ST(2)=1/w*/ + addbyte(0xca); + addbyte(0xd9); /*FXCH ST(2)*/ /*ST(0)=1/w, ST(1)=s/w, ST(2)=t/w*/ + addbyte(0xca); + addbyte(0xd9); /*FSTPs log_temp*/ /*ST(0)=s/w, ST(1)=t/w*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, log_temp)); + addbyte(0xdf); /*FSITPq state->tex_s*/ + addbyte(0xbf); + addlong(offsetof(voodoo_state_t, tex_s)); + addbyte(0x8b); /*MOV EAX, log_temp*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, log_temp)); + addbyte(0xdf); /*FSITPq state->tex_t*/ + addbyte(0xbf); + addlong(offsetof(voodoo_state_t, tex_t)); + addbyte(0xc1); /*SHR EAX, 23-8*/ + addbyte(0xe8); + addbyte(15); + addbyte(0x0f); /*MOVZX EBX, AL*/ + addbyte(0xb6); + addbyte(0xd8); + addbyte(0x25); /*AND EAX, 0xff00*/ + addlong(0xff00); + addbyte(0x2d); /*SUB EAX, (127-44)<<8*/ + addlong((127-44+19) << 8); + addbyte(0x0f); /*MOVZX EBX, logtable[EBX]*/ + addbyte(0xb6); + addbyte(0x9b); + addlong((uint32_t)logtable); + addbyte(0x09); /*OR EAX, EBX*/ + addbyte(0xd8); + addbyte(0x03); /*ADD EAX, state->lod*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, tmu[tmu].lod)); + addbyte(0x3b); /*CMP EAX, state->lod_min*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod_min[tmu])); + addbyte(0x0f); /*CMOVL EAX, state->lod_min*/ + addbyte(0x4c); + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod_min[tmu])); + addbyte(0x3b); /*CMP EAX, state->lod_max*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod_max[tmu])); + addbyte(0x0f); /*CMOVNL EAX, state->lod_max*/ + addbyte(0x4d); + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod_max[tmu])); + addbyte(0x0f); /*MOVZX EBX, AL*/ + addbyte(0xb6); + addbyte(0xd8); + addbyte(0xc1); /*SHR EAX, 8*/ + addbyte(0xe8); + addbyte(8); + addbyte(0x89); /*MOV state->lod_frac[tmu], EBX*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, lod_frac[tmu])); + addbyte(0x89); /*MOV state->lod, EAX*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod)); + } + else + { + addbyte(0xf3); /*MOVQ XMM4, state->tmu0_s*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xa7); + addlong(tmu ? offsetof(voodoo_state_t, tmu1_s) : offsetof(voodoo_state_t, tmu0_s)); + addbyte(0xf3); /*MOVQ XMM5, state->tmu0_t*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xaf); + addlong(tmu ? offsetof(voodoo_state_t, tmu1_t) : offsetof(voodoo_state_t, tmu0_t)); + addbyte(0xc7); /*MOV state->lod[tmu], 0*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod_frac[tmu])); + addlong(0); + addbyte(0x8b); /*MOV EAX, state->lod_min*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod_min[tmu])); + addbyte(0x66); /*SHRQ XMM4, 28*/ + addbyte(0x0f); + addbyte(0x73); + addbyte(0xd4); + addbyte(28); + addbyte(0x66); /*SHRQ XMM5, 28*/ + addbyte(0x0f); + addbyte(0x73); + addbyte(0xd5); + addbyte(28); + addbyte(0x0f); /*MOVZX EBX, AL*/ + addbyte(0xb6); + addbyte(0xd8); + addbyte(0xc1); /*SHR EAX, 8*/ + addbyte(0xe8); + addbyte(8); + addbyte(0x66); /*MOVQ state->tex_s, XMM4*/ + addbyte(0x0f); + addbyte(0xd6); + addbyte(0xa7); + addlong(offsetof(voodoo_state_t, tex_s)); + addbyte(0x66); /*MOVQ state->tex_t, XMM5*/ + addbyte(0x0f); + addbyte(0xd6); + addbyte(0xaf); + addlong(offsetof(voodoo_state_t, tex_t)); + addbyte(0x89); /*MOV state->lod_frac[tmu], EBX*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, lod_frac[tmu])); + addbyte(0x89); /*MOV state->lod, EAX*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod)); + } + /*EAX = state->lod*/ + if (params->fbzColorPath & FBZCP_TEXTURE_ENABLED) + { + if (voodoo->bilinear_enabled && (params->textureMode[tmu] & 6)) + { + addbyte(0x8b); /*MOV ECX, state->tex_lod[tmu]*/ + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, tex_lod[tmu])); + addbyte(0xb2); /*MOV DL, 8*/ + addbyte(8); + addbyte(0x8b); /*MOV ECX, [ECX+EAX*4]*/ + addbyte(0x0c); + addbyte(0x81); + addbyte(0xbd); /*MOV EBP, 8*/ + addlong(8); + addbyte(0x28); /*SUB DL, CL*/ + addbyte(0xca); + addbyte(0xd3); /*SHL EBP, CL*/ + addbyte(0xe5); + addbyte(0x8b); /*MOV EAX, state->tex_s[EDI]*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, tex_s)); + addbyte(0x8b); /*MOV EBX, state->tex_t[EDI]*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, tex_t)); + if (params->tLOD[tmu] & LOD_TMIRROR_S) + { + addbyte(0xa9); /*TEST EAX, 0x1000*/ + addlong(0x1000); + addbyte(0x74); /*JZ +*/ + addbyte(2); + addbyte(0xf7); /*NOT EAX*/ + addbyte(0xd0); + } + if (params->tLOD[tmu] & LOD_TMIRROR_T) + { + addbyte(0xf7); /*TEST EBX, 0x1000*/ + addbyte(0xc3); + addlong(0x1000); + addbyte(0x74); /*JZ +*/ + addbyte(2); + addbyte(0xf7); /*NOT EBX*/ + addbyte(0xd3); + } + addbyte(0x29); /*SUB EAX, EBP*/ + addbyte(0xe8); + addbyte(0x29); /*SUB EBX, EBP*/ + addbyte(0xeb); + addbyte(0xd3); /*SAR EAX, CL*/ + addbyte(0xf8); + addbyte(0xd3); /*SAR EBX, CL*/ + addbyte(0xfb); + addbyte(0x89); /*MOV EBP, EAX*/ + addbyte(0xc5); + addbyte(0x89); /*MOV ECX, EBX*/ + addbyte(0xd9); + addbyte(0x83); /*AND EBP, 0xf*/ + addbyte(0xe5); + addbyte(0xf); + addbyte(0xc1); /*SHL ECX, 4*/ + addbyte(0xe1); + addbyte(4); + addbyte(0xc1); /*SAR EAX, 4*/ + addbyte(0xf8); + addbyte(4); + addbyte(0x81); /*AND ECX, 0xf0*/ + addbyte(0xe1); + addlong(0xf0); + addbyte(0xc1); /*SAR EBX, 4*/ + addbyte(0xfb); + addbyte(4); + addbyte(0x09); /*OR EBP, ECX*/ + addbyte(0xcd); + addbyte(0x8b); /*MOV ECX, state->lod[EDI]*/ + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, lod)); + addbyte(0xc1); /*SHL EBP, 5*/ + addbyte(0xe5); + addbyte(5); + /*EAX = S, EBX = T, ECX = LOD, EDX = tex_shift, ESI=params, EDI=state, EBP = bilinear shift*/ + addbyte(0x8d); /*LEA ESI, [ESI+ECX*4]*/ + addbyte(0x34); + addbyte(0x8e); + addbyte(0x89); /*MOV ebp_store, EBP*/ + addbyte(0xaf); + addlong(offsetof(voodoo_state_t, ebp_store)); + addbyte(0x8b); /*MOV EBP, state->tex[EDI+ECX*4]*/ + addbyte(0xac); + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, tex[tmu])); + addbyte(0x88); /*MOV CL, DL*/ + addbyte(0xd1); + addbyte(0x89); /*MOV EDX, EBX*/ + addbyte(0xda); + if (!state->clamp_s[tmu]) + { + addbyte(0x23); /*AND EAX, params->tex_w_mask[ESI]*/ + addbyte(0x86); + addlong(offsetof(voodoo_params_t, tex_w_mask[tmu])); + } + addbyte(0x83); /*ADD EDX, 1*/ + addbyte(0xc2); + addbyte(1); + if (state->clamp_t[tmu]) + { + addbyte(0x0f); /*CMOVS EDX, zero*/ + addbyte(0x48); + addbyte(0x15); + addlong((uint32_t)&zero); + addbyte(0x3b); /*CMP EDX, params->tex_h_mask[ESI]*/ + addbyte(0x96); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu])); + addbyte(0x0f); /*CMOVA EDX, params->tex_h_mask[ESI]*/ + addbyte(0x47); + addbyte(0x96); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu])); + addbyte(0x85); /*TEST EBX,EBX*/ + addbyte(0xdb); + addbyte(0x0f); /*CMOVS EBX, zero*/ + addbyte(0x48); + addbyte(0x1d); + addlong((uint32_t)&zero); + addbyte(0x3b); /*CMP EBX, params->tex_h_mask[ESI]*/ + addbyte(0x9e); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu])); + addbyte(0x0f); /*CMOVA EBX, params->tex_h_mask[ESI]*/ + addbyte(0x47); + addbyte(0x9e); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu])); + } + else + { + addbyte(0x23); /*AND EDX, params->tex_h_mask[ESI]*/ + addbyte(0x96); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu])); + addbyte(0x23); /*AND EBX, params->tex_h_mask[ESI]*/ + addbyte(0x9e); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu])); + } + /*EAX = S, EBX = T0, EDX = T1*/ + addbyte(0xd3); /*SHL EBX, CL*/ + addbyte(0xe3); + addbyte(0xd3); /*SHL EDX, CL*/ + addbyte(0xe2); + addbyte(0x8d); /*LEA EBX,[EBP+EBX*2]*/ + addbyte(0x5c); + addbyte(0x9d); + addbyte(0); + addbyte(0x8d); /*LEA EDX,[EBP+EDX*2]*/ + addbyte(0x54); + addbyte(0x95); + addbyte(0); + if (state->clamp_s[tmu]) + { + addbyte(0x8b); /*MOV EBP, params->tex_w_mask[ESI]*/ + addbyte(0xae); + addlong(offsetof(voodoo_params_t, tex_w_mask[tmu])); + addbyte(0x85); /*TEST EAX, EAX*/ + addbyte(0xc0); + addbyte(0x8b); /*MOV ESI, ebp_store*/ + addbyte(0xb7); + addlong(offsetof(voodoo_state_t, ebp_store)); + addbyte(0x0f); /*CMOVS EAX, zero*/ + addbyte(0x48); + addbyte(0x05); + addlong((uint32_t)&zero); + addbyte(0x78); /*JS + - clamp on 0*/ + addbyte(2+3+2+ 5+5+2); + addbyte(0x3b); /*CMP EAX, EBP*/ + addbyte(0xc5); + addbyte(0x0f); /*CMOVAE EAX, EBP*/ + addbyte(0x43); + addbyte(0xc5); + addbyte(0x73); /*JAE + - clamp on +*/ + addbyte(5+5+2); + } + else + { + addbyte(0x3b); /*CMP EAX, params->tex_w_mask[ESI] - is S at texture edge (ie will wrap/clamp)?*/ + addbyte(0x86); + addlong(offsetof(voodoo_params_t, tex_w_mask[tmu])); + addbyte(0x8b); /*MOV ESI, ebp_store*/ + addbyte(0xb7); + addlong(offsetof(voodoo_state_t, ebp_store)); + addbyte(0x74); /*JE +*/ + addbyte(5+5+2); + } + + addbyte(0xf3); /*MOVQ XMM0, [EBX+EAX*4]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0x04); + addbyte(0x83); + addbyte(0xf3); /*MOVQ XMM1, [EDX+EAX*4]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0x0c); + addbyte(0x82); + + if (state->clamp_s[tmu]) + { + addbyte(0xeb); /*JMP +*/ + addbyte(5+5+4+4); + + /*S clamped - the two S coordinates are the same*/ + addbyte(0x66); /*MOVD XMM0, [EBX+EAX*4]*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0x04); + addbyte(0x83); + addbyte(0x66); /*MOVD XMM1, [EDX+EAX*4]*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0x0c); + addbyte(0x82); + addbyte(0x66); /*PUNPCKLDQ XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x62); + addbyte(0xc0); + addbyte(0x66); /*PUNPCKLDQ XMM1, XMM1*/ + addbyte(0x0f); + addbyte(0x62); + addbyte(0xc9); + } + else + { + addbyte(0xeb); /*JMP +*/ + addbyte(5+5+5+5+6+6); + + /*S wrapped - the two S coordinates are not contiguous*/ + addbyte(0x66); /*MOVD XMM0, [EBX+EAX*4]*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0x04); + addbyte(0x83); + addbyte(0x66); /*MOVD XMM1, [EDX+EAX*4]*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0x0c); + addbyte(0x82); + addbyte(0x66); /*PINSRW XMM0, [EBX], 2*/ + addbyte(0x0f); + addbyte(0xc4); + addbyte(0x03); + addbyte(0x02); + addbyte(0x66); /*PINSRW XMM1, [EDX], 2*/ + addbyte(0x0f); + addbyte(0xc4); + addbyte(0x0a); + addbyte(0x02); + addbyte(0x66); /*PINSRW XMM0, 2[EBX], 3*/ + addbyte(0x0f); + addbyte(0xc4); + addbyte(0x43); + addbyte(0x02); + addbyte(0x03); + addbyte(0x66); /*PINSRW XMM1, 2[EDX], 3*/ + addbyte(0x0f); + addbyte(0xc4); + addbyte(0x4a); + addbyte(0x02); + addbyte(0x03); + } + + addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xc2); + addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xca); + + addbyte(0x81); /*ADD ESI, bilinear_lookup*/ + addbyte(0xc6); + addlong((uint32_t)bilinear_lookup); + + addbyte(0x66); /*PMULLW XMM0, bilinear_lookup[ESI]*/ + addbyte(0x0f); + addbyte(0xd5); + addbyte(0x06); + addbyte(0x66); /*PMULLW XMM1, bilinear_lookup[ESI]+0x10*/ + addbyte(0x0f); + addbyte(0xd5); + addbyte(0x4e); + addbyte(0x10); + addbyte(0x66); /*PADDW XMM0, XMM1*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xc0 | 1 | (0 << 3)); + addbyte(0x66); /*MOV XMM1, XMM0*/ + addbyte(0x0f); + addbyte(0x6f); + addbyte(0xc0 | 0 | (1 << 3)); + addbyte(0x66); /*PSRLDQ XMM0, 64*/ + addbyte(0x0f); + addbyte(0x73); + addbyte(0xd8); + addbyte(8); + addbyte(0x66); /*PADDW XMM0, XMM1*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xc0 | 1 | (0 << 3)); + addbyte(0x66); /*PSRLW XMM0, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd0 | 0); + addbyte(8); + addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xc0); + + addbyte(0x8b); /*MOV ESI, [ESP+8]*/ + addbyte(0x74); + addbyte(0x24); + addbyte(8+16); /*CHECK!*/ + + addbyte(0x66); /*MOV EAX, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xc0); + } + else + { + addbyte(0x8b); /*MOV ECX, state->tex_lod[tmu]*/ + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, tex_lod[tmu])); + addbyte(0xb2); /*MOV DL, 8*/ + addbyte(8); + addbyte(0x8b); /*MOV ECX, [ECX+EAX*4]*/ + addbyte(0x0c); + addbyte(0x81); + addbyte(0x8b); /*MOV EBP, state->tex[EDI+ECX*4]*/ + addbyte(0xac); + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, tex[tmu])); + addbyte(0x28); /*SUB DL, CL*/ + addbyte(0xca); + addbyte(0x80); /*ADD CL, 4*/ + addbyte(0xc1); + addbyte(4); + addbyte(0x8b); /*MOV EAX, state->tex_s[EDI]*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, tex_s)); + addbyte(0x8b); /*MOV EBX, state->tex_t[EDI]*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, tex_t)); + if (params->tLOD[tmu] & LOD_TMIRROR_S) + { + addbyte(0xa9); /*TEST EAX, 0x1000*/ + addlong(0x1000); + addbyte(0x74); /*JZ +*/ + addbyte(2); + addbyte(0xf7); /*NOT EAX*/ + addbyte(0xd0); + } + if (params->tLOD[tmu] & LOD_TMIRROR_T) + { + addbyte(0xf7); /*TEST EBX, 0x1000*/ + addbyte(0xc3); + addlong(0x1000); + addbyte(0x74); /*JZ +*/ + addbyte(2); + addbyte(0xf7); /*NOT EBX*/ + addbyte(0xd3); + } + addbyte(0xd3); /*SHR EAX, CL*/ + addbyte(0xe8); + addbyte(0xd3); /*SHR EBX, CL*/ + addbyte(0xeb); + if (state->clamp_s[tmu]) + { + addbyte(0x85); /*TEST EAX, EAX*/ + addbyte(0xc0); + addbyte(0x0f); /*CMOVS EAX, zero*/ + addbyte(0x48); + addbyte(0x05); + addlong((uint32_t)&zero); + addbyte(0x3b); /*CMP EAX, params->tex_w_mask[ESI+ECX*4]*/ + addbyte(0x84); + addbyte(0x8e); + addlong(offsetof(voodoo_params_t, tex_w_mask[tmu]) - 0x10); + addbyte(0x0f); /*CMOVAE EAX, params->tex_w_mask[ESI+ECX*4]*/ + addbyte(0x43); + addbyte(0x84); + addbyte(0x8e); + addlong(offsetof(voodoo_params_t, tex_w_mask[tmu]) - 0x10); + + } + else + { + addbyte(0x23); /*AND EAX, params->tex_w_mask-0x10[ESI+ECX*4]*/ + addbyte(0x84); + addbyte(0x8e); + addlong(offsetof(voodoo_params_t, tex_w_mask[tmu]) - 0x10); + } + if (state->clamp_t[tmu]) + { + addbyte(0x85); /*TEST EBX, EBX*/ + addbyte(0xdb); + addbyte(0x0f); /*CMOVS EBX, zero*/ + addbyte(0x48); + addbyte(0x1d); + addlong((uint32_t)&zero); + addbyte(0x3b); /*CMP EBX, params->tex_h_mask[ESI+ECX*4]*/ + addbyte(0x9c); + addbyte(0x8e); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]) - 0x10); + addbyte(0x0f); /*CMOVAE EBX, params->tex_h_mask[ESI+ECX*4]*/ + addbyte(0x43); + addbyte(0x9c); + addbyte(0x8e); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]) - 0x10); + } + else + { + addbyte(0x23); /*AND EBX, params->tex_h_mask-0x10[ESI+ECX*4]*/ + addbyte(0x9c); + addbyte(0x8e); + addlong(offsetof(voodoo_params_t, tex_h_mask[tmu]) - 0x10); + } + addbyte(0x88); /*MOV CL, DL*/ + addbyte(0xd1); + addbyte(0xd3); /*SHL EBX, CL*/ + addbyte(0xe3); + addbyte(0x01); /*ADD EBX, EAX*/ + addbyte(0xc3); + + addbyte(0x8b); /*MOV EAX,[EBP+EBX*4]*/ + addbyte(0x44); + addbyte(0x9d); + addbyte(0); + } + } + + return block_pos; +} + +static inline void voodoo_generate(uint8_t *code_block, voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int depthop) +{ + int block_pos = 0; + int z_skip_pos = 0; + int a_skip_pos = 0; + int chroma_skip_pos = 0; + int depth_jump_pos = 0; + int depth_jump_pos2 = 0; + int loop_jump_pos = 0; +// xmm_01_w = (__m128i)0x0001000100010001ull; +// xmm_ff_w = (__m128i)0x00ff00ff00ff00ffull; +// xmm_ff_b = (__m128i)0x00000000ffffffffull; + xmm_01_w = _mm_set_epi32(0, 0, 0x00010001, 0x00010001); + xmm_ff_w = _mm_set_epi32(0, 0, 0x00ff00ff, 0x00ff00ff); + xmm_ff_b = _mm_set_epi32(0, 0, 0, 0x00ffffff); + minus_254 = _mm_set_epi32(0, 0, 0xff02ff02, 0xff02ff02); +// *(uint64_t *)&const_1_48 = 0x45b0000000000000ull; +// block_pos = 0; +// voodoo_get_depth = &code_block[block_pos]; + /*W at (%esp+4) + Z at (%esp+12) + new_depth at (%esp+16)*/ +// if ((params->fbzMode & FBZ_DEPTH_ENABLE) && (depth_op == DEPTHOP_NEVER)) +// { +// addbyte(0xC3); /*RET*/ +// return; +// } + addbyte(0x55); /*PUSH EBP*/ + addbyte(0x57); /*PUSH EDI*/ + addbyte(0x56); /*PUSH ESI*/ + addbyte(0x53); /*PUSH EBX*/ + + addbyte(0x8b); /*MOV EDI, [ESP+4]*/ + addbyte(0x7c); + addbyte(0x24); + addbyte(4+16); + loop_jump_pos = block_pos; + addbyte(0x8b); /*MOV ESI, [ESP+8]*/ + addbyte(0x74); + addbyte(0x24); + addbyte(8+16); + if (params->col_tiled || params->aux_tiled) + { + addbyte(0x8b); /*MOV EAX, state->x[EDI]*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, x)); + addbyte(0x89); /*MOV EBX, EAX*/ + addbyte(0xc3); + addbyte(0x83); /*AND EAX, 63*/ + addbyte(0xe0); + addbyte(63); + addbyte(0xc1); /*SHR EBX, 6*/ + addbyte(0xeb); + addbyte(6); + addbyte(0xc1); /*SHL EBX, 11 - tile is 128*32, << 12, div 2 because word index*/ + addbyte(0xe3); + addbyte(11); + addbyte(0x01); /*ADD EAX, EBX*/ + addbyte(0xd8); + addbyte(0x89); /*MOV state->x_tiled[EDI], EAX*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, x_tiled)); + } + addbyte(0x66); /*PXOR XMM2, XMM2*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xd2); + + if ((params->fbzMode & FBZ_W_BUFFER) || (params->fogMode & (FOG_ENABLE|FOG_CONSTANT|FOG_Z|FOG_ALPHA)) == FOG_ENABLE) + { + addbyte(0xb8); /*MOV new_depth, 0*/ + addlong(0); + addbyte(0x66); /*TEST w+4, 0xffff*/ + addbyte(0xf7); + addbyte(0x87); + addlong(offsetof(voodoo_state_t, w)+4); + addword(0xffff); + addbyte(0x75); /*JNZ got_depth*/ + depth_jump_pos = block_pos; + addbyte(0); +// addbyte(4+5+2+3+2+5+5+3+2+2+2+/*3+*/3+2+6+4+5+2+3); + addbyte(0x8b); /*MOV EDX, w*/ + addbyte(0x97); + addlong(offsetof(voodoo_state_t, w)); + addbyte(0xb8); /*MOV new_depth, 0xf001*/ + addlong(0xf001); + addbyte(0x89); /*MOV EBX, EDX*/ + addbyte(0xd3); + addbyte(0xc1); /*SHR EDX, 16*/ + addbyte(0xea); + addbyte(16); + addbyte(0x74); /*JZ got_depth*/ + depth_jump_pos2 = block_pos; + addbyte(0); +// addbyte(5+5+3+2+2+2+/*3+*/3+2+6+4+5+2+3); + addbyte(0xb9); /*MOV ECX, 19*/ + addlong(19); + addbyte(0x0f); /*BSR EAX, EDX*/ + addbyte(0xbd); + addbyte(0xc2); + addbyte(0xba); /*MOV EDX, 15*/ + addlong(15); + addbyte(0xf7); /*NOT EBX*/ + addbyte(0xd3); + addbyte(0x29); /*SUB EDX, EAX - EDX = exp*/ + addbyte(0xc2); + addbyte(0x29); /*SUB ECX, EDX*/ + addbyte(0xd1); + addbyte(0xc1); /*SHL EDX, 12*/ + addbyte(0xe2); + addbyte(12); + addbyte(0xd3); /*SHR EBX, CL*/ + addbyte(0xeb); + addbyte(0x81); /*AND EBX, 0xfff - EBX = mant*/ + addbyte(0xe3); + addlong(0xfff); + addbyte(0x8d); /*LEA EAX, 1[EDX, EBX]*/ + addbyte(0x44); + addbyte(0x13); + addbyte(1); + addbyte(0xbb); /*MOV EBX, 0xffff*/ + addlong(0xffff); + addbyte(0x39); /*CMP EAX, EBX*/ + addbyte(0xd8); + addbyte(0x0f); /*CMOVA EAX, EBX*/ + addbyte(0x47); + addbyte(0xc3); + + if (depth_jump_pos) + *(uint8_t *)&code_block[depth_jump_pos] = (block_pos - depth_jump_pos) - 1; + if (depth_jump_pos) + *(uint8_t *)&code_block[depth_jump_pos2] = (block_pos - depth_jump_pos2) - 1; + + if ((params->fogMode & (FOG_ENABLE|FOG_CONSTANT|FOG_Z|FOG_ALPHA)) == FOG_ENABLE) + { + addbyte(0x89); /*MOV state->w_depth[EDI], EAX*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, w_depth)); + } + } + if (!(params->fbzMode & FBZ_W_BUFFER)) + { + addbyte(0x8b); /*MOV EAX, z*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, z)); + addbyte(0xbb); /*MOV EBX, 0xffff*/ + addlong(0xffff); + addbyte(0x31); /*XOR ECX, ECX*/ + addbyte(0xc9); + addbyte(0xc1); /*SAR EAX, 12*/ + addbyte(0xf8); + addbyte(12); + addbyte(0x0f); /*CMOVS EAX, ECX*/ + addbyte(0x48); + addbyte(0xc1); + addbyte(0x39); /*CMP EAX, EBX*/ + addbyte(0xd8); + addbyte(0x0f); /*CMOVA EAX, EBX*/ + addbyte(0x47); + addbyte(0xc3); + } + + if (params->fbzMode & FBZ_DEPTH_BIAS) + { + addbyte(0x0f); /*MOVSX EDX, params->zaColor[ESI]*/ + addbyte(0xbf); + addbyte(0x96); + addlong(offsetof(voodoo_params_t, zaColor)); + if (params->fbzMode & FBZ_W_BUFFER) + { + addbyte(0xbb); /*MOV EBX, 0xffff*/ + addlong(0xffff); + addbyte(0x31); /*XOR ECX, ECX*/ + addbyte(0xc9); + } + addbyte(0x01); /*ADD EAX, EDX*/ + addbyte(0xd0); + addbyte(0x0f); /*CMOVS EAX, ECX*/ + addbyte(0x48); + addbyte(0xc1); + addbyte(0x39); /*CMP EAX, EBX*/ + addbyte(0xd8); + addbyte(0x0f); /*CMOVA EAX, EBX*/ + addbyte(0x47); + addbyte(0xc3); + } + + addbyte(0x89); /*MOV state->new_depth[EDI], EAX*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, new_depth)); + + if ((params->fbzMode & FBZ_DEPTH_ENABLE) && (depthop != DEPTHOP_ALWAYS) && (depthop != DEPTHOP_NEVER)) + { + addbyte(0x8b); /*MOV EBX, state->x[EDI]*/ + addbyte(0x9f); + if (params->aux_tiled) + addlong(offsetof(voodoo_state_t, x_tiled)); + else + addlong(offsetof(voodoo_state_t, x)); + addbyte(0x8b);/*MOV ECX, aux_mem[EDI]*/ + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, aux_mem)); + addbyte(0x0f); /*MOVZX EBX, [ECX+EBX*2]*/ + addbyte(0xb7); + addbyte(0x1c); + addbyte(0x59); + if (params->fbzMode & FBZ_DEPTH_SOURCE) + { + addbyte(0x0f); /*MOVZX EAX, zaColor[ESI]*/ + addbyte(0xb7); + addbyte(0x86); + addlong(offsetof(voodoo_params_t, zaColor)); + } + addbyte(0x39); /*CMP EAX, EBX*/ + addbyte(0xd8); + if (depthop == DEPTHOP_LESSTHAN) + { + addbyte(0x0f); /*JAE skip*/ + addbyte(0x83); + z_skip_pos = block_pos; + addlong(0); + } + else if (depthop == DEPTHOP_EQUAL) + { + addbyte(0x0f); /*JNE skip*/ + addbyte(0x85); + z_skip_pos = block_pos; + addlong(0); + } + else if (depthop == DEPTHOP_LESSTHANEQUAL) + { + addbyte(0x0f); /*JA skip*/ + addbyte(0x87); + z_skip_pos = block_pos; + addlong(0); + } + else if (depthop == DEPTHOP_GREATERTHAN) + { + addbyte(0x0f); /*JBE skip*/ + addbyte(0x86); + z_skip_pos = block_pos; + addlong(0); + } + else if (depthop == DEPTHOP_NOTEQUAL) + { + addbyte(0x0f); /*JE skip*/ + addbyte(0x84); + z_skip_pos = block_pos; + addlong(0); + } + else if (depthop == DEPTHOP_GREATERTHANEQUAL) + { + addbyte(0x0f); /*JB skip*/ + addbyte(0x82); + z_skip_pos = block_pos; + addlong(0); + } + else + fatal("Bad depth_op\n"); + } + else if ((params->fbzMode & FBZ_DEPTH_ENABLE) && (depthop == DEPTHOP_NEVER)) + { + addbyte(0xC3); /*RET*/ +// addbyte(0x30); /*XOR EAX, EAX*/ +// addbyte(0xc0); + } +// else +// { +// addbyte(0xb0); /*MOV AL, 1*/ +// addbyte(1); +// } + + +// voodoo_combine = &code_block[block_pos]; + /*XMM0 = colour*/ + /*XMM2 = 0 (for unpacking*/ + + /*EDI = state, ESI = params*/ + + if ((params->textureMode[0] & TEXTUREMODE_LOCAL_MASK) == TEXTUREMODE_LOCAL || !voodoo->dual_tmus) + { + /*TMU0 only sampling local colour or only one TMU, only sample TMU0*/ + block_pos = codegen_texture_fetch(code_block, voodoo, params, state, block_pos, 0); + + addbyte(0x66); /*MOVD XMM0, EAX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xc0); + addbyte(0xc1); /*SHR EAX, 24*/ + addbyte(0xe8); + addbyte(24); + addbyte(0x89); /*MOV state->tex_a[EDI], EAX*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, tex_a)); + } + else if ((params->textureMode[0] & TEXTUREMODE_MASK) == TEXTUREMODE_PASSTHROUGH) + { + /*TMU0 in pass-through mode, only sample TMU1*/ + block_pos = codegen_texture_fetch(code_block, voodoo, params, state, block_pos, 1); + + addbyte(0x66); /*MOVD XMM0, EAX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xc0); + addbyte(0xc1); /*SHR EAX, 24*/ + addbyte(0xe8); + addbyte(24); + addbyte(0x89); /*MOV state->tex_a[EDI], EAX*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, tex_a)); + } + else + { + block_pos = codegen_texture_fetch(code_block, voodoo, params, state, block_pos, 1); + + addbyte(0x66); /*MOVD XMM3, EAX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xd8); + if ((params->textureMode[1] & TEXTUREMODE_TRILINEAR) && tc_sub_clocal_1) + { + addbyte(0x8b); /*MOV EAX, state->lod*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod)); + if (!tc_reverse_blend_1) + { + addbyte(0xbb); /*MOV EBX, 1*/ + addlong(1); + } + else + { + addbyte(0x31); /*XOR EBX, EBX*/ + addbyte(0xdb); + } + addbyte(0x83); /*AND EAX, 1*/ + addbyte(0xe0); + addbyte(1); + if (!tca_reverse_blend_1) + { + addbyte(0xb9); /*MOV ECX, 1*/ + addlong(1); + } + else + { + addbyte(0x31); /*XOR ECX, ECX*/ + addbyte(0xc9); + } + addbyte(0x31); /*XOR EBX, EAX*/ + addbyte(0xc3); + addbyte(0x31); /*XOR ECX, EAX*/ + addbyte(0xc1); + addbyte(0xc1); /*SHL EBX, 4*/ + addbyte(0xe3); + addbyte(4); + /*EBX = tc_reverse_blend, ECX=tca_reverse_blend*/ + } + addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xda); + if (tc_sub_clocal_1) + { + switch (tc_mselect_1) + { + case TC_MSELECT_ZERO: + addbyte(0x66); /*PXOR XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xc0); + break; + case TC_MSELECT_CLOCAL: + addbyte(0xf3); /*MOVQ XMM0, XMM3*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xc3); + break; + case TC_MSELECT_AOTHER: + addbyte(0x66); /*PXOR XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xc0); + break; + case TC_MSELECT_ALOCAL: + addbyte(0xf2); /*PSHUFLW XMM0, XMM3, 0xff*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xc3); + addbyte(0xff); + break; + case TC_MSELECT_DETAIL: + addbyte(0xb8); /*MOV EAX, params->detail_bias[1]*/ + addlong(params->detail_bias[1]); + addbyte(0x2b); /*SUB EAX, state->lod*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod)); + addbyte(0xba); /*MOV EDX, params->detail_max[1]*/ + addlong(params->detail_max[1]); + addbyte(0xc1); /*SHL EAX, params->detail_scale[1]*/ + addbyte(0xe0); + addbyte(params->detail_scale[1]); + addbyte(0x39); /*CMP EAX, EDX*/ + addbyte(0xd0); + addbyte(0x0f); /*CMOVNL EAX, EDX*/ + addbyte(0x4d); + addbyte(0xc2); + addbyte(0x66); /*MOVD XMM0, EAX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xc0); + addbyte(0xf2); /*PSHUFLW XMM0, XMM0, 0*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xc0); + addbyte(0); + break; + case TC_MSELECT_LOD_FRAC: + addbyte(0x66); /*MOVD XMM0, state->lod_frac[1]*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod_frac[1])); + addbyte(0xf2); /*PSHUFLW XMM0, XMM0, 0*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xc0); + addbyte(0); + break; + } + if (params->textureMode[1] & TEXTUREMODE_TRILINEAR) + { + addbyte(0x66); /*PXOR XMM0, xmm_00_ff_w[EBX]*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0x83); + addlong((uint32_t)&xmm_00_ff_w[0]); + } + else if (!tc_reverse_blend_1) + { + addbyte(0x66); /*PXOR XMM0, xmm_ff_w*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0x05); + addlong((uint32_t)&xmm_ff_w); + } + addbyte(0x66); /*PADD XMM0, xmm_01_w*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0x05); + addlong((uint32_t)&xmm_01_w); + addbyte(0xf3); /*MOVQ XMM1, XMM2*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xca); + addbyte(0xf3); /*MOVQ XMM5, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xe8); + addbyte(0x66); /*PMULLW XMM0, XMM3*/ + addbyte(0x0f); + addbyte(0xd5); + addbyte(0xc3); + addbyte(0x66); /*PMULHW XMM5, XMM3*/ + addbyte(0x0f); + addbyte(0xe5); + addbyte(0xeb); + addbyte(0x66); /*PUNPCKLWD XMM0, XMM5*/ + addbyte(0x0f); + addbyte(0x61); + addbyte(0xc5); + addbyte(0x66); /*PSRAD XMM0, 8*/ + addbyte(0x0f); + addbyte(0x72); + addbyte(0xe0); + addbyte(8); + addbyte(0x66); /*PACKSSDW XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x6b); + addbyte(0xc0); + addbyte(0x66); /*PSUBW XMM1, XMM0*/ + addbyte(0x0f); + addbyte(0xf9); + addbyte(0xc8); + if (tc_add_clocal_1) + { + addbyte(0x66); /*PADDW XMM1, XMM3*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xcb); + } + else if (tc_add_alocal_1) + { + addbyte(0xf2); /*PSHUFLW XMM0, XMM3, 0xff*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xc3); + addbyte(0xff); + addbyte(0x66); /*PADDW XMM1, XMM0*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xc8); + } + addbyte(0xf3); /*MOVD XMM3, XMM1*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xd9); + addbyte(0x66); /*PACKUSWB XMM3, XMM3*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xdb); + if (tca_sub_clocal_1) + { + addbyte(0x66); /*MOVD EBX, XMM3*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xdb); + } + addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xda); + } + + if (tca_sub_clocal_1) + { + addbyte(0xc1); /*SHR EBX, 24*/ + addbyte(0xeb); + addbyte(24); + switch (tca_mselect_1) + { + case TCA_MSELECT_ZERO: + addbyte(0x31); /*XOR EAX, EAX*/ + addbyte(0xc0); + break; + case TCA_MSELECT_CLOCAL: + addbyte(0x89); /*MOV EAX, EBX*/ + addbyte(0xd8); + break; + case TCA_MSELECT_AOTHER: + addbyte(0x31); /*XOR EAX, EAX*/ + addbyte(0xc0); + break; + case TCA_MSELECT_ALOCAL: + addbyte(0x89); /*MOV EAX, EBX*/ + addbyte(0xd8); + break; + case TCA_MSELECT_DETAIL: + addbyte(0xb8); /*MOV EAX, params->detail_bias[1]*/ + addlong(params->detail_bias[1]); + addbyte(0x2b); /*SUB EAX, state->lod*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod)); + addbyte(0xba); /*MOV EDX, params->detail_max[1]*/ + addlong(params->detail_max[1]); + addbyte(0xc1); /*SHL EAX, params->detail_scale[1]*/ + addbyte(0xe0); + addbyte(params->detail_scale[1]); + addbyte(0x39); /*CMP EAX, EDX*/ + addbyte(0xd0); + addbyte(0x0f); /*CMOVNL EAX, EDX*/ + addbyte(0x4d); + addbyte(0xc2); + break; + case TCA_MSELECT_LOD_FRAC: + addbyte(0x8b); /*MOV EAX, state->lod_frac[1]*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod_frac[1])); + break; + } + if (params->textureMode[1] & TEXTUREMODE_TRILINEAR) + { + addbyte(0x33); /*XOR EAX, i_00_ff_w[ECX*4]*/ + addbyte(0x04); + addbyte(0x8d); + addlong((uint32_t)i_00_ff_w); + } + else if (!tc_reverse_blend_1) + { + addbyte(0x35); /*XOR EAX, 0xff*/ + addlong(0xff); + } + addbyte(0x83); /*ADD EAX, 1*/ + addbyte(0xc0); + addbyte(1); + addbyte(0x0f); /*IMUL EAX, EBX*/ + addbyte(0xaf); + addbyte(0xc3); + addbyte(0xb9); /*MOV ECX, 0xff*/ + addlong(0xff); + addbyte(0xf7); /*NEG EAX*/ + addbyte(0xd8); + addbyte(0xc1); /*SAR EAX, 8*/ + addbyte(0xf8); + addbyte(8); + if (tca_add_clocal_1 || tca_add_alocal_1) + { + addbyte(0x01); /*ADD EAX, EBX*/ + addbyte(0xd8); + } + addbyte(0x39); /*CMP ECX, EAX*/ + addbyte(0xc1); + addbyte(0x0f); /*CMOVA ECX, EAX*/ + addbyte(0x47); + addbyte(0xc8); + addbyte(0x66); /*PINSRW 3, XMM3, XMM0*/ + addbyte(0x0f); + addbyte(0xc4); + addbyte(0xd8); + addbyte(3); + } + + block_pos = codegen_texture_fetch(code_block, voodoo, params, state, block_pos, 0); + + addbyte(0x66); /*MOVD XMM0, EAX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xc0); + addbyte(0x66); /*MOVD XMM7, EAX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xf8); + + if (params->textureMode[0] & TEXTUREMODE_TRILINEAR) + { + addbyte(0x8b); /*MOV EAX, state->lod*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod)); + if (!tc_reverse_blend) + { + addbyte(0xbb); /*MOV EBX, 1*/ + addlong(1); + } + else + { + addbyte(0x31); /*XOR EBX, EBX*/ + addbyte(0xdb); + } + addbyte(0x83); /*AND EAX, 1*/ + addbyte(0xe0); + addbyte(1); + if (!tca_reverse_blend) + { + addbyte(0xb9); /*MOV ECX, 1*/ + addlong(1); + } + else + { + addbyte(0x31); /*XOR ECX, ECX*/ + addbyte(0xc9); + } + addbyte(0x31); /*XOR EBX, EAX*/ + addbyte(0xc3); + addbyte(0x31); /*XOR ECX, EAX*/ + addbyte(0xc1); + addbyte(0xc1); /*SHL EBX, 4*/ + addbyte(0xe3); + addbyte(4); + /*EBX = tc_reverse_blend, ECX=tca_reverse_blend*/ + } + + /*XMM0 = TMU0 output, XMM3 = TMU1 output*/ + + addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xc2); + if (tc_zero_other) + { + addbyte(0x66); /*PXOR XMM1, XMM1*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xc9); + } + else + { + addbyte(0xf3); /*MOV XMM1, XMM3*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xcb); + } + if (tc_sub_clocal) + { + addbyte(0x66); /*PSUBW XMM1, XMM0*/ + addbyte(0x0f); + addbyte(0xf9); + addbyte(0xc8); + } + + switch (tc_mselect) + { + case TC_MSELECT_ZERO: + addbyte(0x66); /*PXOR XMM4, XMM4*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xe4); + break; + case TC_MSELECT_CLOCAL: + addbyte(0xf3); /*MOV XMM4, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xe0); + break; + case TC_MSELECT_AOTHER: + addbyte(0xf2); /*PSHUFLW XMM4, XMM3, 3, 3, 3, 3*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xe3); + addbyte(0xff); + break; + case TC_MSELECT_ALOCAL: + addbyte(0xf2); /*PSHUFLW XMM4, XMM0, 3, 3, 3, 3*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xe0); + addbyte(0xff); + break; + case TC_MSELECT_DETAIL: + addbyte(0xb8); /*MOV EAX, params->detail_bias[0]*/ + addlong(params->detail_bias[0]); + addbyte(0x2b); /*SUB EAX, state->lod*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, lod)); + addbyte(0xba); /*MOV EDX, params->detail_max[0]*/ + addlong(params->detail_max[0]); + addbyte(0xc1); /*SHL EAX, params->detail_scale[0]*/ + addbyte(0xe0); + addbyte(params->detail_scale[0]); + addbyte(0x39); /*CMP EAX, EDX*/ + addbyte(0xd0); + addbyte(0x0f); /*CMOVNL EAX, EDX*/ + addbyte(0x4d); + addbyte(0xc2); + addbyte(0x66); /*MOVD XMM4, EAX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xe0); + addbyte(0xf2); /*PSHUFLW XMM4, XMM4, 0*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xe4); + addbyte(0); + break; + case TC_MSELECT_LOD_FRAC: + addbyte(0x66); /*MOVD XMM0, state->lod_frac[0]*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xa7); + addlong(offsetof(voodoo_state_t, lod_frac[0])); + addbyte(0xf2); /*PSHUFLW XMM0, XMM0, 0*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xe4); + addbyte(0); + break; + } + if (params->textureMode[0] & TEXTUREMODE_TRILINEAR) + { + addbyte(0x66); /*PXOR XMM4, xmm_00_ff_w[EBX]*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xa3); + addlong((uint32_t)&xmm_00_ff_w[0]); + } + else if (!tc_reverse_blend) + { + addbyte(0x66); /*PXOR XMM4, FF*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0x25); + addlong((uint32_t)&xmm_ff_w); + } + addbyte(0x66); /*PADDW XMM4, 1*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0x25); + addlong((uint32_t)&xmm_01_w); + addbyte(0xf3); /*MOVQ XMM5, XMM1*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xe9); + addbyte(0x66); /*PMULLW XMM1, XMM4*/ + addbyte(0x0f); + addbyte(0xd5); + addbyte(0xcc); + + if (tca_sub_clocal) + { + addbyte(0x66); /*MOV EBX, XMM7*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xfb); + } + + addbyte(0x66); /*PMULHW XMM5, XMM4*/ + addbyte(0x0f); + addbyte(0xe5); + addbyte(0xec); + addbyte(0x66); /*PUNPCKLWD XMM1, XMM5*/ + addbyte(0x0f); + addbyte(0x61); + addbyte(0xcd); + addbyte(0x66); /*PSRAD XMM1, 8*/ + addbyte(0x0f); + addbyte(0x72); + addbyte(0xe1); + addbyte(8); + addbyte(0x66); /*PACKSSDW XMM1, XMM1*/ + addbyte(0x0f); + addbyte(0x6b); + addbyte(0xc9); + + if (tca_sub_clocal) + { + addbyte(0xc1); /*SHR EBX, 24*/ + addbyte(0xeb); + addbyte(24); + } + + if (tc_add_clocal) + { + addbyte(0x66); /*PADDW XMM1, XMM0*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xc8); + } + else if (tc_add_alocal) + { + addbyte(0xf2); /*PSHUFLW XMM4, XMM0, 3, 3, 3, 3*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xe0); + addbyte(0xff); + addbyte(0x66); /*PADDW XMM1, XMM4*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xcc); + } + + addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xc0); + addbyte(0x66); /*PACKUSWB XMM3, XMM3*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xdb); + addbyte(0x66); /*PACKUSWB XMM1, XMM1*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xc9); + if (tc_invert_output) + { + addbyte(0x66); /*PXOR XMM1, FF*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0x0d); + addlong((uint32_t)&xmm_ff_b); + } + + if (tca_zero_other) + { + addbyte(0x31); /*XOR EAX, EAX*/ + addbyte(0xc0); + } + else + { + addbyte(0x66); /*MOV EAX, XMM3*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xd8); + addbyte(0xc1); /*SHR EAX, 24*/ + addbyte(0xe8); + addbyte(24); + } + if (tca_sub_clocal) + { + addbyte(0x29); /*SUB EAX, EBX*/ + addbyte(0xd8); + } + switch (tca_mselect) + { + case TCA_MSELECT_ZERO: + addbyte(0x31); /*XOR EBX, EBX*/ + addbyte(0xdb); + break; + case TCA_MSELECT_CLOCAL: + addbyte(0x66); /*MOV EBX, XMM7*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xfb); + addbyte(0xc1); /*SHR EBX, 24*/ + addbyte(0xeb); + addbyte(24); + break; + case TCA_MSELECT_AOTHER: + addbyte(0x66); /*MOV EBX, XMM3*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xdb); + addbyte(0xc1); /*SHR EBX, 24*/ + addbyte(0xeb); + addbyte(24); + break; + case TCA_MSELECT_ALOCAL: + addbyte(0x66); /*MOV EBX, XMM7*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xfb); + addbyte(0xc1); /*SHR EBX, 24*/ + addbyte(0xeb); + addbyte(24); + break; + case TCA_MSELECT_DETAIL: + addbyte(0xbb); /*MOV EBX, params->detail_bias[1]*/ + addlong(params->detail_bias[1]); + addbyte(0x2b); /*SUB EBX, state->lod*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, lod)); + addbyte(0xba); /*MOV EDX, params->detail_max[1]*/ + addlong(params->detail_max[1]); + addbyte(0xc1); /*SHL EBX, params->detail_scale[1]*/ + addbyte(0xe3); + addbyte(params->detail_scale[1]); + addbyte(0x39); /*CMP EBX, EDX*/ + addbyte(0xd3); + addbyte(0x0f); /*CMOVNL EBX, EDX*/ + addbyte(0x4d); + addbyte(0xda); + break; + case TCA_MSELECT_LOD_FRAC: + addbyte(0x8b); /*MOV EBX, state->lod_frac[0]*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, lod_frac[0])); + break; + } + if (params->textureMode[0] & TEXTUREMODE_TRILINEAR) + { + addbyte(0x33); /*XOR EBX, i_00_ff_w[ECX*4]*/ + addbyte(0x1c); + addbyte(0x8d); + addlong((uint32_t)i_00_ff_w); + } + else if (!tca_reverse_blend) + { + addbyte(0x81); /*XOR EBX, 0xFF*/ + addbyte(0xf3); + addlong(0xff); + } + + addbyte(0x83); /*ADD EBX, 1*/ + addbyte(0xc3); + addbyte(1); + addbyte(0x0f); /*IMUL EAX, EBX*/ + addbyte(0xaf); + addbyte(0xc3); + addbyte(0x31); /*XOR EDX, EDX*/ + addbyte(0xd2); + addbyte(0xc1); /*SAR EAX, 8*/ + addbyte(0xf8); + addbyte(8); + if (tca_add_clocal || tca_add_alocal) + { + addbyte(0x66); /*MOV EBX, XMM7*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xfb); + addbyte(0xc1); /*SHR EBX, 24*/ + addbyte(0xeb); + addbyte(24); + addbyte(0x01); /*ADD EAX, EBX*/ + addbyte(0xd8); + } + addbyte(0x0f); /*CMOVS EAX, EDX*/ + addbyte(0x48); + addbyte(0xc2); + addbyte(0xba); /*MOV EDX, 0xff*/ + addlong(0xff); + addbyte(0x3d); /*CMP EAX, 0xff*/ + addlong(0xff); + addbyte(0x0f); /*CMOVA EAX, EDX*/ + addbyte(0x47); + addbyte(0xc2); + if (tca_invert_output) + { + addbyte(0x35); /*XOR EAX, 0xff*/ + addlong(0xff); + } + + addbyte(0x89); /*MOV state->tex_a[EDI], EAX*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, tex_a)); + + addbyte(0xf3); /*MOVQ XMM0, XMM1*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xc1); + } + if (cc_mselect == CC_MSELECT_TEXRGB) + { + addbyte(0xf3); /*MOVD XMM4, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xe0); + } + + if ((params->fbzMode & FBZ_CHROMAKEY)) + { + switch (_rgb_sel) + { + case CC_LOCALSELECT_ITER_RGB: + addbyte(0xf3); /*MOVDQU XMM0, ib*/ /* ir, ig and ib must be in same dqword!*/ + addbyte(0x0f); + addbyte(0x6f); + addbyte(0x87); + addlong(offsetof(voodoo_state_t, ib)); + addbyte(0x66); /*PSRAD XMM0, 12*/ + addbyte(0x0f); + addbyte(0x72); + addbyte(0xe0); + addbyte(12); + addbyte(0x66); /*PACKSSDW XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x6b); + addbyte(0xc0); + addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xc0); + addbyte(0x66); /*MOVD EAX, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xc0); + break; + case CC_LOCALSELECT_COLOR1: + addbyte(0x8b); /*MOV EAX, params->color1[ESI]*/ + addbyte(0x86); + addlong(offsetof(voodoo_params_t, color1)); + break; + case CC_LOCALSELECT_TEX: + addbyte(0x66); /*MOVD EAX, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xc0); + break; + } + addbyte(0x8b); /*MOV EBX, params->chromaKey[ESI]*/ + addbyte(0x9e); + addlong(offsetof(voodoo_params_t, chromaKey)); + addbyte(0x31); /*XOR EBX, EAX*/ + addbyte(0xc3); + addbyte(0x81); /*AND EBX, 0xffffff*/ + addbyte(0xe3); + addlong(0xffffff); + addbyte(0x0f); /*JE skip*/ + addbyte(0x84); + chroma_skip_pos = block_pos; + addlong(0); + } + + if (voodoo->trexInit1[0] & (1 << 18)) + { + addbyte(0xb8); /*MOV EAX, tmuConfig*/ + addlong(voodoo->tmuConfig); + addbyte(0x66); /*MOVD XMM0, EAX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xc0); + } + + if ((params->alphaMode & ((1 << 0) | (1 << 4))) || (!(cc_mselect == 0 && cc_reverse_blend == 0) && (cc_mselect == CC_MSELECT_AOTHER || cc_mselect == CC_MSELECT_ALOCAL))) + { + /*EBX = a_other*/ + switch (a_sel) + { + case A_SEL_ITER_A: + addbyte(0x8b); /*MOV EBX, state->ia*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, ia)); + addbyte(0x31); /*XOR EAX, EAX*/ + addbyte(0xc0); + addbyte(0xba); /*MOV EDX, 0xff*/ + addlong(0xff); + addbyte(0xc1); /*SAR EBX, 12*/ + addbyte(0xfb); + addbyte(12); + addbyte(0x0f); /*CMOVS EBX, EAX*/ + addbyte(0x48); + addbyte(0xd8); + addbyte(0x39); /*CMP EBX, EDX*/ + addbyte(0xd3); + addbyte(0x0f); /*CMOVA EBX, EDX*/ + addbyte(0x47); + addbyte(0xda); + break; + case A_SEL_TEX: + addbyte(0x8b); /*MOV EBX, state->tex_a*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, tex_a)); + break; + case A_SEL_COLOR1: + addbyte(0x0f); /*MOVZX EBX, params->color1+3*/ + addbyte(0xb6); + addbyte(0x9e); + addlong(offsetof(voodoo_params_t, color1)+3); + break; + default: + addbyte(0x31); /*XOR EBX, EBX*/ + addbyte(0xdb); + break; + } + /*ECX = a_local*/ + switch (cca_localselect) + { + case CCA_LOCALSELECT_ITER_A: + if (a_sel == A_SEL_ITER_A) + { + addbyte(0x89); /*MOV ECX, EBX*/ + addbyte(0xd9); + } + else + { + addbyte(0x8b); /*MOV ECX, state->ia*/ + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, ia)); + addbyte(0x31); /*XOR EAX, EAX*/ + addbyte(0xc0); + addbyte(0xba); /*MOV EDX, 0xff*/ + addlong(0xff); + addbyte(0xc1);/*SAR ECX, 12*/ + addbyte(0xf9); + addbyte(12); + addbyte(0x0f); /*CMOVS ECX, EAX*/ + addbyte(0x48); + addbyte(0xc8); + addbyte(0x39); /*CMP ECX, EDX*/ + addbyte(0xd1); + addbyte(0x0f); /*CMOVA ECX, EDX*/ + addbyte(0x47); + addbyte(0xca); + } + break; + case CCA_LOCALSELECT_COLOR0: + addbyte(0x0f); /*MOVZX ECX, params->color0+3*/ + addbyte(0xb6); + addbyte(0x8e); + addlong(offsetof(voodoo_params_t, color0)+3); + break; + case CCA_LOCALSELECT_ITER_Z: + addbyte(0x8b); /*MOV ECX, state->z*/ + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, z)); + if (a_sel != A_SEL_ITER_A) + { + addbyte(0x31); /*XOR EAX, EAX*/ + addbyte(0xc0); + addbyte(0xba); /*MOV EDX, 0xff*/ + addlong(0xff); + } + addbyte(0xc1);/*SAR ECX, 20*/ + addbyte(0xf9); + addbyte(20); + addbyte(0x0f); /*CMOVS ECX, EAX*/ + addbyte(0x48); + addbyte(0xc8); + addbyte(0x39); /*CMP ECX, EDX*/ + addbyte(0xd1); + addbyte(0x0f); /*CMOVA ECX, EDX*/ + addbyte(0x47); + addbyte(0xca); + break; + + default: + addbyte(0xb9); /*MOV ECX, 0xff*/ + addlong(0xff); + break; + } + + if (cca_zero_other) + { + addbyte(0x31); /*XOR EDX, EDX*/ + addbyte(0xd2); + } + else + { + addbyte(0x89); /*MOV EDX, EBX*/ + addbyte(0xda); + } + + if (cca_sub_clocal) + { + addbyte(0x29); /*SUB EDX, ECX*/ + addbyte(0xca); + } + } + + if (cc_sub_clocal || cc_mselect == 1 || cc_add == 1) + { + /*XMM1 = local*/ + if (!cc_localselect_override) + { + if (cc_localselect) + { + addbyte(0x66); /*MOVD XMM1, params->color0*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0x8e); + addlong(offsetof(voodoo_params_t, color0)); + } + else + { + addbyte(0xf3); /*MOVDQU XMM1, ib*/ /* ir, ig and ib must be in same dqword!*/ + addbyte(0x0f); + addbyte(0x6f); + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, ib)); + addbyte(0x66); /*PSRAD XMM1, 12*/ + addbyte(0x0f); + addbyte(0x72); + addbyte(0xe1); + addbyte(12); + addbyte(0x66); /*PACKSSDW XMM1, XMM1*/ + addbyte(0x0f); + addbyte(0x6b); + addbyte(0xc9); + addbyte(0x66); /*PACKUSWB XMM1, XMM1*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xc9); + } + } + else + { + addbyte(0xf6); /*TEST state->tex_a, 0x80*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, tex_a)); + addbyte(0x80); + addbyte(0x74);/*JZ !cc_localselect*/ + addbyte(8+2); + addbyte(0x66); /*MOVD XMM1, params->color0*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0x8e); + addlong(offsetof(voodoo_params_t, color0)); + addbyte(0xeb); /*JMP +*/ + addbyte(8+5+4+4); + /*!cc_localselect:*/ + addbyte(0xf3); /*MOVDQU XMM1, ib*/ /* ir, ig and ib must be in same dqword!*/ + addbyte(0x0f); + addbyte(0x6f); + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, ib)); + addbyte(0x66); /*PSRAD XMM1, 12*/ + addbyte(0x0f); + addbyte(0x72); + addbyte(0xe1); + addbyte(12); + addbyte(0x66); /*PACKSSDW XMM1, XMM1*/ + addbyte(0x0f); + addbyte(0x6b); + addbyte(0xc9); + addbyte(0x66); /*PACKUSWB XMM1, XMM1*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xc9); + } + addbyte(0x66); /*PUNPCKLBW XMM1, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xca); + } + if (!cc_zero_other) + { + if (_rgb_sel == CC_LOCALSELECT_ITER_RGB) + { + addbyte(0xf3); /*MOVDQU XMM0, ib*/ /* ir, ig and ib must be in same dqword!*/ + addbyte(0x0f); + addbyte(0x6f); + addbyte(0x87); + addlong(offsetof(voodoo_state_t, ib)); + addbyte(0x66); /*PSRAD XMM0, 12*/ + addbyte(0x0f); + addbyte(0x72); + addbyte(0xe0); + addbyte(12); + addbyte(0x66); /*PACKSSDW XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x6b); + addbyte(0xc0); + addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xc0); + } + else if (_rgb_sel == CC_LOCALSELECT_TEX) + { +#if 0 + addbyte(0xf3); /*MOVDQU XMM0, state->tex_b*/ + addbyte(0x0f); + addbyte(0x6f); + addbyte(0x87); + addlong(offsetof(voodoo_state_t, tex_b)); + addbyte(0x66); /*PACKSSDW XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x6b); + addbyte(0xc0); + addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xc0); +#endif + } + else if (_rgb_sel == CC_LOCALSELECT_COLOR1) + { + addbyte(0x66); /*MOVD XMM0, params->color1*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0x86); + addlong(offsetof(voodoo_params_t, color1)); + } + else + { + /*MOVD XMM0, src_r*/ + } + addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xc2); + if (cc_sub_clocal) + { + addbyte(0x66); /*PSUBW XMM0, XMM1*/ + addbyte(0x0f); + addbyte(0xf9); + addbyte(0xc1); + } + } + else + { + addbyte(0x66); /*PXOR XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xc0); + if (cc_sub_clocal) + { + addbyte(0x66); /*PSUBW XMM0, XMM1*/ + addbyte(0x0f); + addbyte(0xf9); + addbyte(0xc1); + } + } + + if (params->alphaMode & ((1 << 0) | (1 << 4))) + { + if (!(cca_mselect == 0 && cca_reverse_blend == 0)) + { + switch (cca_mselect) + { + case CCA_MSELECT_ALOCAL: + addbyte(0x89); /*MOV EAX, ECX*/ + addbyte(0xc8); + break; + case CCA_MSELECT_AOTHER: + addbyte(0x89); /*MOV EAX, EBX*/ + addbyte(0xd8); + break; + case CCA_MSELECT_ALOCAL2: + addbyte(0x89); /*MOV EAX, ECX*/ + addbyte(0xc8); + break; + case CCA_MSELECT_TEX: + addbyte(0x0f); /*MOVZX EAX, state->tex_a*/ + addbyte(0xb6); + addbyte(0x87); + addlong(offsetof(voodoo_state_t, tex_a)); + break; + + case CCA_MSELECT_ZERO: + default: + addbyte(0x31); /*XOR EAX, EAX*/ + addbyte(0xc0); + break; + } + if (!cca_reverse_blend) + { + addbyte(0x35); /*XOR EAX, 0xff*/ + addlong(0xff); + } + addbyte(0x83); /*ADD EAX, 1*/ + addbyte(0xc0); + addbyte(1); + addbyte(0x0f); /*IMUL EDX, EAX*/ + addbyte(0xaf); + addbyte(0xd0); + addbyte(0xc1); /*SHR EDX, 8*/ + addbyte(0xea); + addbyte(8); + } + } + + if ((params->alphaMode & ((1 << 0) | (1 << 4)))) + { + addbyte(0x31); /*XOR EAX, EAX*/ + addbyte(0xc0); + } + + if (!(cc_mselect == 0 && cc_reverse_blend == 0) && cc_mselect == CC_MSELECT_AOTHER) + { + /*Copy a_other to XMM3 before it gets modified*/ + addbyte(0x66); /*MOVD XMM3, EDX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xda); + addbyte(0xf2); /*PSHUFLW XMM3, XMM3, 0*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xdb); + addbyte(0x00); + } + + if (cca_add && (params->alphaMode & ((1 << 0) | (1 << 4)))) + { + addbyte(0x01); /*ADD EDX, ECX*/ + addbyte(0xca); + } + + if ((params->alphaMode & ((1 << 0) | (1 << 4)))) + { + addbyte(0x85); /*TEST EDX, EDX*/ + addbyte(0xd2); + addbyte(0x0f); /*CMOVS EDX, EAX*/ + addbyte(0x48); + addbyte(0xd0); + addbyte(0xb8); /*MOV EAX, 0xff*/ + addlong(0xff); + addbyte(0x81); /*CMP EDX, 0xff*/ + addbyte(0xfa); + addlong(0xff); + addbyte(0x0f); /*CMOVA EDX, EAX*/ + addbyte(0x47); + addbyte(0xd0); + + if (cca_invert_output) + { + addbyte(0x81); /*XOR EDX, 0xff*/ + addbyte(0xf2); + addlong(0xff); + } + } + + if (!(cc_mselect == 0 && cc_reverse_blend == 0)) + { + switch (cc_mselect) + { + case CC_MSELECT_ZERO: + addbyte(0x66); /*PXOR XMM3, XMM3*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xdb); + break; + case CC_MSELECT_CLOCAL: + addbyte(0xf3); /*MOV XMM3, XMM1*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xd9); + break; + case CC_MSELECT_ALOCAL: + addbyte(0x66); /*MOVD XMM3, ECX*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0xd9); + addbyte(0xf2); /*PSHUFLW XMM3, XMM3, 0*/ + addbyte(0x0f); + addbyte(0x70); + addbyte(0xdb); + addbyte(0x00); + break; + case CC_MSELECT_AOTHER: + /*Handled above*/ + break; + case CC_MSELECT_TEX: + addbyte(0x66); /*PINSRW XMM3, state->tex_a, 0*/ + addbyte(0x0f); + addbyte(0xc4); + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, tex_a)); + addbyte(0); + addbyte(0x66); /*PINSRW XMM3, state->tex_a, 1*/ + addbyte(0x0f); + addbyte(0xc4); + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, tex_a)); + addbyte(1); + addbyte(0x66); /*PINSRW XMM3, state->tex_a, 2*/ + addbyte(0x0f); + addbyte(0xc4); + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, tex_a)); + addbyte(2); + break; + case CC_MSELECT_TEXRGB: + addbyte(0x66); /*PUNPCKLBW XMM4, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xe2); + addbyte(0xf3); /*MOVQ XMM3, XMM4*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xdc); + break; + default: + addbyte(0x66); /*PXOR XMM3, XMM3*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xdb); + break; + } + addbyte(0xf3); /*MOV XMM4, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xe0); + if (!cc_reverse_blend) + { + addbyte(0x66); /*PXOR XMM3, 0xff*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0x1d); + addlong((uint32_t)&xmm_ff_w); + } + addbyte(0x66); /*PADDW XMM3, 1*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0x1d); + addlong((uint32_t)&xmm_01_w); + addbyte(0x66); /*PMULLW XMM0, XMM3*/ + addbyte(0x0f); + addbyte(0xd5); + addbyte(0xc3); + addbyte(0x66); /*PMULHW XMM4, XMM3*/ + addbyte(0x0f); + addbyte(0xe5); + addbyte(0xe3); + addbyte(0x66); /*PUNPCKLWD XMM0, XMM4*/ + addbyte(0x0f); + addbyte(0x61); + addbyte(0xc4); + addbyte(0x66); /*PSRLD XMM0, 8*/ + addbyte(0x0f); + addbyte(0x72); + addbyte(0xe0); + addbyte(8); + addbyte(0x66); /*PACKSSDW XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x6b); + addbyte(0xc0); + } + + if (cc_add == 1) + { + addbyte(0x66); /*PADDW XMM0, XMM1*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xc1); + } + + addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xc0); + + if (cc_invert_output) + { + addbyte(0x66); /*PXOR XMM0, 0xff*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0x05); + addlong((uint32_t)&xmm_ff_b); + } +//#if 0 +// addbyte(0x66); /*MOVD state->out[EDI], XMM0*/ +// addbyte(0x0f); +// addbyte(0x7e); +// addbyte(0x87); +// addlong(offsetof(voodoo_state_t, out)); + if (params->fogMode & FOG_ENABLE) + { + if (params->fogMode & FOG_CONSTANT) + { + addbyte(0x66); /*MOVD XMM3, params->fogColor[ESI]*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0x9e); + addlong(offsetof(voodoo_params_t, fogColor)); + addbyte(0x66); /*PADDUSB XMM0, XMM3*/ + addbyte(0x0f); + addbyte(0xdc); + addbyte(0xc3); +/* src_r += params->fogColor.r; + src_g += params->fogColor.g; + src_b += params->fogColor.b; */ + } + else + { + /*int fog_r, fog_g, fog_b, fog_a; */ + + addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xc2); + + if (!(params->fogMode & FOG_ADD)) + { + addbyte(0x66); /*MOVD XMM3, params->fogColor[ESI]*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0x9e); + addlong(offsetof(voodoo_params_t, fogColor)); + addbyte(0x66); /*PUNPCKLBW XMM3, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xda); + } + else + { + addbyte(0x66); /*PXOR XMM3, XMM3*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xdb); + } + + if (!(params->fogMode & FOG_MULT)) + { + addbyte(0x66); /*PSUBW XMM3, XMM0*/ + addbyte(0x0f); + addbyte(0xf9); + addbyte(0xd8); + } + + /*Divide by 2 to prevent overflow on multiply*/ + addbyte(0x66); /*PSRAW XMM3, 1*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xe3); + addbyte(1); + + switch (params->fogMode & (FOG_Z|FOG_ALPHA)) + { + case 0: + addbyte(0x8b); /*MOV EBX, state->w_depth[EDI]*/ + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, w_depth)); + addbyte(0x89); /*MOV EAX, EBX*/ + addbyte(0xd8); + addbyte(0xc1); /*SHR EBX, 10*/ + addbyte(0xeb); + addbyte(10); + addbyte(0xc1); /*SHR EAX, 2*/ + addbyte(0xe8); + addbyte(2); + addbyte(0x83); /*AND EBX, 0x3f*/ + addbyte(0xe3); + addbyte(0x3f); + addbyte(0x25); /*AND EAX, 0xff*/ + addlong(0xff); + addbyte(0xf6); /*MUL params->fogTable+1[ESI+EBX*2]*/ + addbyte(0xa4); + addbyte(0x5e); + addlong(offsetof(voodoo_params_t, fogTable)+1); + addbyte(0x0f); /*MOVZX EBX, params->fogTable[ESI+EBX*2]*/ + addbyte(0xb6); + addbyte(0x9c); + addbyte(0x5e); + addlong(offsetof(voodoo_params_t, fogTable)); + addbyte(0xc1); /*SHR EAX, 10*/ + addbyte(0xe8); + addbyte(10); + addbyte(0x01); /*ADD EAX, EBX*/ + addbyte(0xd8); + +/* int fog_idx = (w_depth >> 10) & 0x3f; + + fog_a = params->fogTable[fog_idx].fog; + fog_a += (params->fogTable[fog_idx].dfog * ((w_depth >> 2) & 0xff)) >> 10;*/ + break; + + case FOG_Z: + addbyte(0x8b); /*MOV EAX, state->z[EDI]*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, z)); + addbyte(0xc1); /*SHR EAX, 12*/ + addbyte(0xe8); + addbyte(12); + addbyte(0x25); /*AND EAX, 0xff*/ + addlong(0xff); +// fog_a = (z >> 20) & 0xff; + break; + + case FOG_ALPHA: + addbyte(0x8b); /*MOV EAX, state->ia[EDI]*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, ia)); + addbyte(0x31); /*XOR EBX, EBX*/ + addbyte(0xdb); + addbyte(0xc1); /*SAR EAX, 12*/ + addbyte(0xf8); + addbyte(12); + addbyte(0x0f); /*CMOVS EAX, EBX*/ + addbyte(0x48); + addbyte(0xc3); + addbyte(0xbb); /*MOV EBX, 0xff*/ + addlong(0xff); + addbyte(0x3d); /*CMP EAX, 0xff*/ + addlong(0xff); + addbyte(0x0f); /*CMOVAE EAX, EBX*/ + addbyte(0x43); + addbyte(0xc3); +// fog_a = CLAMP(ia >> 12); + break; + + case FOG_W: + addbyte(0x8b); /*MOV EAX, state->w[EDI]+4*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, w)+4); + addbyte(0x31); /*XOR EBX, EBX*/ + addbyte(0xdb); + addbyte(0x09); /*OR EAX, EAX*/ + addbyte(0xc0); + addbyte(0x0f); /*CMOVS EAX, EBX*/ + addbyte(0x48); + addbyte(0xc3); + addbyte(0xbb); /*MOV EBX, 0xff*/ + addlong(0xff); + addbyte(0x3d); /*CMP EAX, 0xff*/ + addlong(0xff); + addbyte(0x0f); /*CMOVAE EAX, EBX*/ + addbyte(0x43); + addbyte(0xc3); +// fog_a = CLAMP(w >> 32); + break; + } + addbyte(0x01); /*ADD EAX, EAX*/ + addbyte(0xc0); +// fog_a++; + + addbyte(0x66); /*PMULLW XMM3, alookup+4[EAX*8]*/ + addbyte(0x0f); + addbyte(0xd5); + addbyte(0x1c); + addbyte(0xc5); + addlong(((uintptr_t)alookup) + 16); + addbyte(0x66); /*PSRAW XMM3, 7*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xe3); + addbyte(7); +/* fog_r = (fog_r * fog_a) >> 8; + fog_g = (fog_g * fog_a) >> 8; + fog_b = (fog_b * fog_a) >> 8;*/ + + if (params->fogMode & FOG_MULT) + { + addbyte(0xf3); /*MOV XMM0, XMM3*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xc3); + } + else + { + addbyte(0x66); /*PADDW XMM0, XMM3*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xc3); +/* src_r += fog_r; + src_g += fog_g; + src_b += fog_b;*/ + } + addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xc0); + } + +/* src_r = CLAMP(src_r); + src_g = CLAMP(src_g); + src_b = CLAMP(src_b);*/ + } + + if ((params->alphaMode & 1) && (alpha_func != AFUNC_NEVER) && (alpha_func != AFUNC_ALWAYS)) + { + addbyte(0x0f); /*MOVZX ECX, params->alphaMode+3*/ + addbyte(0xb6); + addbyte(0x8e); + addlong(offsetof(voodoo_params_t, alphaMode) + 3); + addbyte(0x39); /*CMP EDX, ECX*/ + addbyte(0xca); + + switch (alpha_func) + { + case AFUNC_LESSTHAN: + addbyte(0x0f); /*JAE skip*/ + addbyte(0x83); + a_skip_pos = block_pos; + addlong(0); + break; + case AFUNC_EQUAL: + addbyte(0x0f); /*JNE skip*/ + addbyte(0x85); + a_skip_pos = block_pos; + addlong(0); + break; + case AFUNC_LESSTHANEQUAL: + addbyte(0x0f); /*JA skip*/ + addbyte(0x87); + a_skip_pos = block_pos; + addlong(0); + break; + case AFUNC_GREATERTHAN: + addbyte(0x0f); /*JBE skip*/ + addbyte(0x86); + a_skip_pos = block_pos; + addlong(0); + break; + case AFUNC_NOTEQUAL: + addbyte(0x0f); /*JE skip*/ + addbyte(0x84); + a_skip_pos = block_pos; + addlong(0); + break; + case AFUNC_GREATERTHANEQUAL: + addbyte(0x0f); /*JB skip*/ + addbyte(0x82); + a_skip_pos = block_pos; + addlong(0); + break; + } + } + else if ((params->alphaMode & 1) && (alpha_func == AFUNC_NEVER)) + { + addbyte(0xC3); /*RET*/ + } + + if (params->alphaMode & (1 << 4)) + { + addbyte(0x8b); /*MOV EAX, state->x[EDI]*/ + addbyte(0x87); + if (params->col_tiled) + addlong(offsetof(voodoo_state_t, x_tiled)); + else + addlong(offsetof(voodoo_state_t, x)); + addbyte(0x8b); /*MOV EBP, fb_mem*/ + addbyte(0xaf); + addlong(offsetof(voodoo_state_t, fb_mem)); + addbyte(0x01); /*ADD EDX, EDX*/ + addbyte(0xd2); + addbyte(0x0f); /*MOVZX EAX, [EBP+EAX*2]*/ + addbyte(0xb7); + addbyte(0x44); + addbyte(0x45); + addbyte(0); + addbyte(0x66); /*PUNPCKLBW XMM0, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xc2); + addbyte(0x66); /*MOVD XMM4, rgb565[EAX*4]*/ + addbyte(0x0f); + addbyte(0x6e); + addbyte(0x24); + addbyte(0x85); + addlong((uint32_t)rgb565); + addbyte(0x66); /*PUNPCKLBW XMM4, XMM2*/ + addbyte(0x0f); + addbyte(0x60); + addbyte(0xe2); + addbyte(0xf3); /*MOV XMM6, XMM4*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xf4); + + switch (dest_afunc) + { + case AFUNC_AZERO: + addbyte(0x66); /*PXOR XMM4, XMM4*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xe4); + break; + case AFUNC_ASRC_ALPHA: + addbyte(0x66); /*PMULLW XMM4, alookup[EDX*8]*/ + addbyte(0x0f); + addbyte(0xd5); + addbyte(0x24); + addbyte(0xd5); + addlong((uint32_t)alookup); + addbyte(0xf3); /*MOVQ XMM5, XMM4*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xec); + addbyte(0x66); /*PADDW XMM4, alookup[1*8]*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0x25); + addlong((uint32_t)alookup + 16); + addbyte(0x66); /*PSRLW XMM5, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd5); + addbyte(8); + addbyte(0x66); /*PADDW XMM4, XMM5*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xe5); + addbyte(0x66); /*PSRLW XMM4, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd4); + addbyte(8); + break; + case AFUNC_A_COLOR: + addbyte(0x66); /*PMULLW XMM4, XMM0*/ + addbyte(0x0f); + addbyte(0xd5); + addbyte(0xe0); + addbyte(0xf3); /*MOVQ XMM5, XMM4*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xec); + addbyte(0x66); /*PADDW XMM4, alookup[1*8]*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0x25); + addlong((uint32_t)alookup + 16); + addbyte(0x66); /*PSRLW XMM5, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd5); + addbyte(8); + addbyte(0x66); /*PADDW XMM4, XMM5*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xe5); + addbyte(0x66); /*PSRLW XMM4, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd4); + addbyte(8); + break; + case AFUNC_ADST_ALPHA: + break; + case AFUNC_AONE: + break; + case AFUNC_AOMSRC_ALPHA: + addbyte(0x66); /*PMULLW XMM4, aminuslookup[EDX*8]*/ + addbyte(0x0f); + addbyte(0xd5); + addbyte(0x24); + addbyte(0xd5); + addlong((uint32_t)aminuslookup); + addbyte(0xf3); /*MOVQ XMM5, XMM4*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xec); + addbyte(0x66); /*PADDW XMM4, alookup[1*8]*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0x25); + addlong((uint32_t)alookup + 16); + addbyte(0x66); /*PSRLW XMM5, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd5); + addbyte(8); + addbyte(0x66); /*PADDW XMM4, XMM5*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xe5); + addbyte(0x66); /*PSRLW XMM4, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd4); + addbyte(8); + break; + case AFUNC_AOM_COLOR: + addbyte(0xf3); /*MOVQ XMM5, xmm_ff_w*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0x2d); + addlong((uint32_t)&xmm_ff_w); + addbyte(0x66); /*PSUBW XMM5, XMM0*/ + addbyte(0x0f); + addbyte(0xf9); + addbyte(0xe8); + addbyte(0x66); /*PMULLW XMM4, XMM5*/ + addbyte(0x0f); + addbyte(0xd5); + addbyte(0xe5); + addbyte(0xf3); /*MOVQ XMM5, XMM4*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xec); + addbyte(0x66); /*PADDW XMM4, alookup[1*8]*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0x25); + addlong((uint32_t)alookup + 16); + addbyte(0x66); /*PSRLW XMM5, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd5); + addbyte(8); + addbyte(0x66); /*PADDW XMM4, XMM5*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xe5); + addbyte(0x66); /*PSRLW XMM4, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd4); + addbyte(8); + break; + case AFUNC_AOMDST_ALPHA: + addbyte(0x66); /*PXOR XMM4, XMM4*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xe4); + break; + case AFUNC_ASATURATE: + addbyte(0x66); /*PMULLW XMM4, minus_254*/ + addbyte(0x0f); + addbyte(0xd5); + addbyte(0x25); + addlong((uint32_t)&minus_254); + addbyte(0xf3); /*MOVQ XMM5, XMM4*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xec); + addbyte(0x66); /*PADDW XMM4, alookup[1*8]*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0x25); + addlong((uint32_t)alookup + 16); + addbyte(0x66); /*PSRLW XMM5, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd5); + addbyte(8); + addbyte(0x66); /*PADDW XMM4, XMM5*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xe5); + addbyte(0x66); /*PSRLW XMM4, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd4); + addbyte(8); + } + + switch (src_afunc) + { + case AFUNC_AZERO: + addbyte(0x66); /*PXOR XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xc0); + break; + case AFUNC_ASRC_ALPHA: + addbyte(0x66); /*PMULLW XMM0, alookup[EDX*8]*/ + addbyte(0x0f); + addbyte(0xd5); + addbyte(0x04); + addbyte(0xd5); + addlong((uint32_t)alookup); + addbyte(0xf3); /*MOVQ XMM5, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xe8); + addbyte(0x66); /*PADDW XMM0, alookup[1*8]*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0x05); + addlong((uint32_t)alookup + 16); + addbyte(0x66); /*PSRLW XMM5, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd5); + addbyte(8); + addbyte(0x66); /*PADDW XMM0, XMM5*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xc5); + addbyte(0x66); /*PSRLW XMM0, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd0); + addbyte(8); + break; + case AFUNC_A_COLOR: + addbyte(0x66); /*PMULLW XMM0, XMM6*/ + addbyte(0x0f); + addbyte(0xd5); + addbyte(0xc6); + addbyte(0xf3); /*MOVQ XMM5, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xe8); + addbyte(0x66); /*PADDW XMM0, alookup[1*8]*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0x05); + addlong((uint32_t)alookup + 16); + addbyte(0x66); /*PSRLW XMM5, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd5); + addbyte(8); + addbyte(0x66); /*PADDW XMM0, XMM5*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xc5); + addbyte(0x66); /*PSRLW XMM0, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd0); + addbyte(8); + break; + case AFUNC_ADST_ALPHA: + break; + case AFUNC_AONE: + break; + case AFUNC_AOMSRC_ALPHA: + addbyte(0x66); /*PMULLW XMM0, aminuslookup[EDX*8]*/ + addbyte(0x0f); + addbyte(0xd5); + addbyte(0x04); + addbyte(0xd5); + addlong((uint32_t)aminuslookup); + addbyte(0xf3); /*MOVQ XMM5, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xe8); + addbyte(0x66); /*PADDW XMM0, alookup[1*8]*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0x05); + addlong((uint32_t)alookup + 16); + addbyte(0x66); /*PSRLW XMM5, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd5); + addbyte(8); + addbyte(0x66); /*PADDW XMM0, XMM5*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xc5); + addbyte(0x66); /*PSRLW XMM0, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd0); + addbyte(8); + break; + case AFUNC_AOM_COLOR: + addbyte(0xf3); /*MOVQ XMM5, xmm_ff_w*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0x2d); + addlong((uint32_t)&xmm_ff_w); + addbyte(0x66); /*PSUBW XMM5, XMM6*/ + addbyte(0x0f); + addbyte(0xf9); + addbyte(0xee); + addbyte(0x66); /*PMULLW XMM0, XMM5*/ + addbyte(0x0f); + addbyte(0xd5); + addbyte(0xc5); + addbyte(0xf3); /*MOVQ XMM5, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xe8); + addbyte(0x66); /*PADDW XMM0, alookup[1*8]*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0x05); + addlong((uint32_t)alookup + 16); + addbyte(0x66); /*PSRLW XMM5, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd5); + addbyte(8); + addbyte(0x66); /*PADDW XMM0, XMM5*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xc5); + addbyte(0x66); /*PSRLW XMM0, 8*/ + addbyte(0x0f); + addbyte(0x71); + addbyte(0xd0); + addbyte(8); + break; + case AFUNC_AOMDST_ALPHA: + addbyte(0x66); /*PXOR XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0xef); + addbyte(0xc0); + break; + case AFUNC_ACOLORBEFOREFOG: + break; + } + + addbyte(0x66); /*PADDW XMM0, XMM4*/ + addbyte(0x0f); + addbyte(0xfd); + addbyte(0xc4); + + addbyte(0x66); /*PACKUSWB XMM0, XMM0*/ + addbyte(0x0f); + addbyte(0x67); + addbyte(0xc0); + } +//#endif + +// addbyte(0x8b); /*MOV EDX, x (ESP+12)*/ +// addbyte(0x54); +// addbyte(0x24); +// addbyte(12); + + + addbyte(0x8b); /*MOV EDX, state->x[EDI]*/ + addbyte(0x97); + if (params->col_tiled) + addlong(offsetof(voodoo_state_t, x_tiled)); + else + addlong(offsetof(voodoo_state_t, x)); + + addbyte(0x66); /*MOV EAX, XMM0*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xc0); + + if (params->fbzMode & FBZ_RGB_WMASK) + { +// addbyte(0x89); /*MOV state->rgb_out[EDI], EAX*/ +// addbyte(0x87); +// addlong(offsetof(voodoo_state_t, rgb_out)); + + if (dither) + { + addbyte(0x8b); /*MOV ESI, real_y (ESP+16)*/ + addbyte(0x74); + addbyte(0x24); + addbyte(16+16); + addbyte(0x0f); /*MOVZX EBX, AH*/ /*G*/ + addbyte(0xb6); + addbyte(0xdc); + if (dither2x2) + { + addbyte(0x83); /*AND EDX, 1*/ + addbyte(0xe2); + addbyte(1); + addbyte(0x83); /*AND ESI, 1*/ + addbyte(0xe6); + addbyte(1); + addbyte(0xc1); /*SHL EBX, 2*/ + addbyte(0xe3); + addbyte(2); + } + else + { + addbyte(0x83); /*AND EDX, 3*/ + addbyte(0xe2); + addbyte(3); + addbyte(0x83); /*AND ESI, 3*/ + addbyte(0xe6); + addbyte(3); + addbyte(0xc1); /*SHL EBX, 4*/ + addbyte(0xe3); + addbyte(4); + } + addbyte(0x0f); /*MOVZX ECX, AL*/ /*R*/ + addbyte(0xb6); + addbyte(0xc8); + if (dither2x2) + { + addbyte(0xc1); /*SHR EAX, 14*/ + addbyte(0xe8); + addbyte(14); + addbyte(0x8d); /*LEA ESI, EDX+ESI*2*/ + addbyte(0x34); + addbyte(0x72); + } + else + { + addbyte(0xc1); /*SHR EAX, 12*/ + addbyte(0xe8); + addbyte(12); + addbyte(0x8d); /*LEA ESI, EDX+ESI*4*/ + addbyte(0x34); + addbyte(0xb2); + } + addbyte(0x8b); /*MOV EDX, state->x[EDI]*/ + addbyte(0x97); + if (params->col_tiled) + addlong(offsetof(voodoo_state_t, x_tiled)); + else + addlong(offsetof(voodoo_state_t, x)); + if (dither2x2) + { + addbyte(0xc1); /*SHL ECX, 2*/ + addbyte(0xe1); + addbyte(2); + addbyte(0x25); /*AND EAX, 0x3fc*/ /*B*/ + addlong(0x3fc); + } + else + { + addbyte(0xc1); /*SHL ECX, 4*/ + addbyte(0xe1); + addbyte(4); + addbyte(0x25); /*AND EAX, 0xff0*/ /*B*/ + addlong(0xff0); + } + addbyte(0x0f); /*MOVZX EBX, dither_g[EBX+ESI]*/ + addbyte(0xb6); + addbyte(0x9c); + addbyte(0x33); + addlong(dither2x2 ? (uint32_t)dither_g2x2 : (uint32_t)dither_g); + addbyte(0x0f); /*MOVZX ECX, dither_rb[ECX+ESI]*/ + addbyte(0xb6); + addbyte(0x8c); + addbyte(0x31); + addlong(dither2x2 ? (uint32_t)dither_rb2x2 : (uint32_t)dither_rb); + addbyte(0x0f); /*MOVZX EAX, dither_rb[EAX+ESI]*/ + addbyte(0xb6); + addbyte(0x84); + addbyte(0x30); + addlong(dither2x2 ? (uint32_t)dither_rb2x2 : (uint32_t)dither_rb); + addbyte(0xc1); /*SHL EBX, 5*/ + addbyte(0xe3); + addbyte(5); + addbyte(0xc1); /*SHL EAX, 11*/ + addbyte(0xe0); + addbyte(11); + addbyte(0x09); /*OR EAX, EBX*/ + addbyte(0xd8); + addbyte(0x09); /*OR EAX, ECX*/ + addbyte(0xc8); + } + else + { + addbyte(0x89); /*MOV EBX, EAX*/ + addbyte(0xc3); + addbyte(0x0f); /*MOVZX ECX, AH*/ + addbyte(0xb6); + addbyte(0xcc); + addbyte(0xc1); /*SHR EAX, 3*/ + addbyte(0xe8); + addbyte(3); + addbyte(0xc1); /*SHR EBX, 8*/ + addbyte(0xeb); + addbyte(8); + addbyte(0xc1); /*SHL ECX, 3*/ + addbyte(0xe1); + addbyte(3); + addbyte(0x81); /*AND EAX, 0x001f*/ + addbyte(0xe0); + addlong(0x001f); + addbyte(0x81); /*AND EBX, 0xf800*/ + addbyte(0xe3); + addlong(0xf800); + addbyte(0x81); /*AND ECX, 0x07e0*/ + addbyte(0xe1); + addlong(0x07e0); + addbyte(0x09); /*OR EAX, EBX*/ + addbyte(0xd8); + addbyte(0x09); /*OR EAX, ECX*/ + addbyte(0xc8); + } + addbyte(0x8b); /*MOV ESI, fb_mem*/ + addbyte(0xb7); + addlong(offsetof(voodoo_state_t, fb_mem)); + addbyte(0x66); /*MOV [ESI+EDX*2], AX*/ + addbyte(0x89); + addbyte(0x04); + addbyte(0x56); + } + + if ((params->fbzMode & (FBZ_DEPTH_WMASK | FBZ_DEPTH_ENABLE)) == (FBZ_DEPTH_WMASK | FBZ_DEPTH_ENABLE)) + { + addbyte(0x8b); /*MOV EDX, state->x[EDI]*/ + addbyte(0x97); + if (params->aux_tiled) + addlong(offsetof(voodoo_state_t, x_tiled)); + else + addlong(offsetof(voodoo_state_t, x)); + addbyte(0x66); /*MOV AX, new_depth*/ + addbyte(0x8b); + addbyte(0x87); + addlong(offsetof(voodoo_state_t, new_depth)); + addbyte(0x8b); /*MOV ESI, aux_mem*/ + addbyte(0xb7); + addlong(offsetof(voodoo_state_t, aux_mem)); + addbyte(0x66); /*MOV [ESI+EDX*2], AX*/ + addbyte(0x89); + addbyte(0x04); + addbyte(0x56); + } + + if (z_skip_pos) + *(uint32_t *)&code_block[z_skip_pos] = (block_pos - z_skip_pos) - 4; + if (a_skip_pos) + *(uint32_t *)&code_block[a_skip_pos] = (block_pos - a_skip_pos) - 4; + if (chroma_skip_pos) + *(uint32_t *)&code_block[chroma_skip_pos] = (block_pos - chroma_skip_pos) - 4; + + + addbyte(0x8b); /*MOV ESI, [ESP+8]*/ + addbyte(0x74); + addbyte(0x24); + addbyte(8+16); + + if (voodoo->dual_tmus) + { + addbyte(0xf3); /*MOVDQU XMM3, state->tmu1_s[EDI]*/ + addbyte(0x0f); + addbyte(0x6f); + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, tmu1_s)); + addbyte(0xf3); /*MOVQ XMM4, state->tmu1_w[EDI]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xa7); + addlong(offsetof(voodoo_state_t, tmu1_w)); + addbyte(0xf3); /*MOVDQU XMM5, params->tmu[1].dSdX[ESI]*/ + addbyte(0x0f); + addbyte(0x6f); + addbyte(0xae); + addlong(offsetof(voodoo_params_t, tmu[1].dSdX)); + addbyte(0xf3); /*MOVQ XMM6, params->tmu[1].dWdX[ESI]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xb6); + addlong(offsetof(voodoo_params_t, tmu[1].dWdX)); + if (state->xdir > 0) + { + addbyte(0x66); /*PADDQ XMM3, XMM5*/ + addbyte(0x0f); + addbyte(0xd4); + addbyte(0xdd); + addbyte(0x66); /*PADDQ XMM4, XMM6*/ + addbyte(0x0f); + addbyte(0xd4); + addbyte(0xe6); + } + else + { + addbyte(0x66); /*PSUBQ XMM3, XMM5*/ + addbyte(0x0f); + addbyte(0xfb); + addbyte(0xdd); + addbyte(0x66); /*PSUBQ XMM4, XMM6*/ + addbyte(0x0f); + addbyte(0xfb); + addbyte(0xe6); + } + addbyte(0xf3); /*MOVDQU state->tmu1_s, XMM3*/ + addbyte(0x0f); + addbyte(0x7f); + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, tmu1_s)); + addbyte(0x66); /*MOVQ state->tmu1_w, XMM4*/ + addbyte(0x0f); + addbyte(0xd6); + addbyte(0xa7); + addlong(offsetof(voodoo_state_t, tmu1_w)); + } + + addbyte(0xf3); /*MOVDQU XMM1, state->ib[EDI]*/ + addbyte(0x0f); + addbyte(0x6f); + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, ib)); + addbyte(0xf3); /*MOVDQU XMM3, state->tmu0_s[EDI]*/ + addbyte(0x0f); + addbyte(0x6f); + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, tmu0_s)); + addbyte(0xf3); /*MOVQ XMM4, state->tmu0_w[EDI]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xa7); + addlong(offsetof(voodoo_state_t, tmu0_w)); + addbyte(0xf3); /*MOVDQU XMM0, params->dBdX[ESI]*/ + addbyte(0x0f); + addbyte(0x6f); + addbyte(0x86); + addlong(offsetof(voodoo_params_t, dBdX)); + addbyte(0x8b); /*MOV EAX, params->dZdX[ESI]*/ + addbyte(0x86); + addlong(offsetof(voodoo_params_t, dZdX)); + addbyte(0xf3); /*MOVDQU XMM5, params->tmu[0].dSdX[ESI]*/ + addbyte(0x0f); + addbyte(0x6f); + addbyte(0xae); + addlong(offsetof(voodoo_params_t, tmu[0].dSdX)); + addbyte(0xf3); /*MOVQ XMM6, params->tmu[0].dWdX[ESI]*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xb6); + addlong(offsetof(voodoo_params_t, tmu[0].dWdX)); + + if (state->xdir > 0) + { + addbyte(0x66); /*PADDD XMM1, XMM0*/ + addbyte(0x0f); + addbyte(0xfe); + addbyte(0xc8); + } + else + { + addbyte(0x66); /*PSUBD XMM1, XMM0*/ + addbyte(0x0f); + addbyte(0xfa); + addbyte(0xc8); + } + + addbyte(0xf3); /*MOVQ XMM0, state->w*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0x87); + addlong(offsetof(voodoo_state_t, w)); + addbyte(0xf3); /*MOVDQU state->ib, XMM1*/ + addbyte(0x0f); + addbyte(0x7f); + addbyte(0x8f); + addlong(offsetof(voodoo_state_t, ib)); + addbyte(0xf3); /*MOVQ XMM7, params->dWdX*/ + addbyte(0x0f); + addbyte(0x7e); + addbyte(0xbe); + addlong(offsetof(voodoo_params_t, dWdX)); + + if (state->xdir > 0) + { + addbyte(0x66); /*PADDQ XMM3, XMM5*/ + addbyte(0x0f); + addbyte(0xd4); + addbyte(0xdd); + addbyte(0x66); /*PADDQ XMM4, XMM6*/ + addbyte(0x0f); + addbyte(0xd4); + addbyte(0xe6); + addbyte(0x66); /*PADDQ XMM0, XMM7*/ + addbyte(0x0f); + addbyte(0xd4); + addbyte(0xc7); + addbyte(0x01); /*ADD state->z[EDI], EAX*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, z)); + } + else + { + addbyte(0x66); /*PSUBQ XMM3, XMM5*/ + addbyte(0x0f); + addbyte(0xfb); + addbyte(0xdd); + addbyte(0x66); /*PSUBQ XMM4, XMM6*/ + addbyte(0x0f); + addbyte(0xfb); + addbyte(0xe6); + addbyte(0x66); /*PSUBQ XMM0, XMM7*/ + addbyte(0x0f); + addbyte(0xfb); + addbyte(0xc7); + addbyte(0x29); /*SUB state->z[EDI], EAX*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, z)); + } + + addbyte(0xf3); /*MOVDQU state->tmu0_s, XMM3*/ + addbyte(0x0f); + addbyte(0x7f); + addbyte(0x9f); + addlong(offsetof(voodoo_state_t, tmu0_s)); + addbyte(0x66); /*MOVQ state->tmu0_w, XMM4*/ + addbyte(0x0f); + addbyte(0xd6); + addbyte(0xa7); + addlong(offsetof(voodoo_state_t, tmu0_w)); + addbyte(0x66); /*MOVQ state->w, XMM0*/ + addbyte(0x0f); + addbyte(0xd6); + addbyte(0x87); + addlong(offsetof(voodoo_state_t, w)); + + addbyte(0x83); /*ADD state->pixel_count[EDI], 1*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, pixel_count)); + addbyte(1); + + if (params->fbzColorPath & FBZCP_TEXTURE_ENABLED) + { + if ((params->textureMode[0] & TEXTUREMODE_MASK) == TEXTUREMODE_PASSTHROUGH || + (params->textureMode[0] & TEXTUREMODE_LOCAL_MASK) == TEXTUREMODE_LOCAL) + { + addbyte(0x83); /*ADD state->texel_count[EDI], 1*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, texel_count)); + addbyte(1); + } + else + { + addbyte(0x83); /*ADD state->texel_count[EDI], 2*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, texel_count)); + addbyte(2); + } + } + addbyte(0x8b); /*MOV EAX, state->x[EDI]*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, x)); + + if (state->xdir > 0) + { + addbyte(0x83); /*ADD state->x[EDI], 1*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, x)); + addbyte(1); + } + else + { + addbyte(0x83); /*SUB state->x[EDI], 1*/ + addbyte(0xaf); + addlong(offsetof(voodoo_state_t, x)); + addbyte(1); + } + + addbyte(0x3b); /*CMP EAX, state->x2[EDI]*/ + addbyte(0x87); + addlong(offsetof(voodoo_state_t, x2)); + addbyte(0x0f); /*JNZ loop_jump_pos*/ + addbyte(0x85); + addlong(loop_jump_pos - (block_pos + 4)); + + addbyte(0x5b); /*POP EBX*/ + addbyte(0x5e); /*POP ESI*/ + addbyte(0x5f); /*POP EDI*/ + addbyte(0x5d); /*POP EBP*/ + + addbyte(0xC3); /*RET*/ + + if (params->textureMode[1] & TEXTUREMODE_TRILINEAR) + cs = cs; +} +int voodoo_recomp = 0; + +static inline void *voodoo_get_block(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int odd_even) +{ + int c; + int b = last_block[odd_even]; + voodoo_x86_data_t *data; + voodoo_x86_data_t *codegen_data = voodoo->codegen_data; + + for (c = 0; c < 8; c++) + { + data = &codegen_data[odd_even + b*4]; + + if (state->xdir == data->xdir && + params->alphaMode == data->alphaMode && + params->fbzMode == data->fbzMode && + params->fogMode == data->fogMode && + params->fbzColorPath == data->fbzColorPath && + (voodoo->trexInit1[0] & (1 << 18)) == data->trexInit1 && + params->textureMode[0] == data->textureMode[0] && + params->textureMode[1] == data->textureMode[1] && + (params->tLOD[0] & LOD_MASK) == data->tLOD[0] && + (params->tLOD[1] & LOD_MASK) == data->tLOD[1] && + ((params->col_tiled || params->aux_tiled) ? 1 : 0) == data->is_tiled) + { + last_block[odd_even] = b; + return data->code_block; + } + + b = (b + 1) & 7; + } +voodoo_recomp++; + data = &codegen_data[odd_even + next_block_to_write[odd_even]*4]; +// code_block = data->code_block; + + voodoo_generate(data->code_block, voodoo, params, state, depth_op); + + data->xdir = state->xdir; + data->alphaMode = params->alphaMode; + data->fbzMode = params->fbzMode; + data->fogMode = params->fogMode; + data->fbzColorPath = params->fbzColorPath; + data->trexInit1 = voodoo->trexInit1[0] & (1 << 18); + data->textureMode[0] = params->textureMode[0]; + data->textureMode[1] = params->textureMode[1]; + data->tLOD[0] = params->tLOD[0] & LOD_MASK; + data->tLOD[1] = params->tLOD[1] & LOD_MASK; + data->is_tiled = (params->col_tiled || params->aux_tiled) ? 1 : 0; + + next_block_to_write[odd_even] = (next_block_to_write[odd_even] + 1) & 7; + + return data->code_block; +} + +void voodoo_codegen_init(voodoo_t *voodoo) +{ + int c; +#if defined(__linux__) || defined(__APPLE__) + void *start; + size_t len; + long pagesize = sysconf(_SC_PAGESIZE); + long pagemask = ~(pagesize - 1); +#endif + +#if defined WIN32 || defined _WIN32 || defined _WIN32 + voodoo->codegen_data = VirtualAlloc(NULL, sizeof(voodoo_x86_data_t) * BLOCK_NUM*4, MEM_COMMIT, PAGE_EXECUTE_READWRITE); +#else + voodoo->codegen_data = mmap(0, sizeof(voodoo_x86_data_t) * BLOCK_NUM*4, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, 0, 0); +#endif + + for (c = 0; c < 256; c++) + { + int d[4]; + int _ds = c & 0xf; + int dt = c >> 4; + + alookup[c] = _mm_set_epi32(0, 0, c | (c << 16), c | (c << 16)); + aminuslookup[c] = _mm_set_epi32(0, 0, (255-c) | ((255-c) << 16), (255-c) | ((255-c) << 16)); + + d[0] = (16 - _ds) * (16 - dt); + d[1] = _ds * (16 - dt); + d[2] = (16 - _ds) * dt; + d[3] = _ds * dt; + + bilinear_lookup[c*2] = _mm_set_epi32(d[1] | (d[1] << 16), d[1] | (d[1] << 16), d[0] | (d[0] << 16), d[0] | (d[0] << 16)); + bilinear_lookup[c*2 + 1] = _mm_set_epi32(d[3] | (d[3] << 16), d[3] | (d[3] << 16), d[2] | (d[2] << 16), d[2] | (d[2] << 16)); + } + alookup[256] = _mm_set_epi32(0, 0, 256 | (256 << 16), 256 | (256 << 16)); + xmm_00_ff_w[0] = _mm_set_epi32(0, 0, 0, 0); + xmm_00_ff_w[1] = _mm_set_epi32(0, 0, 0xff | (0xff << 16), 0xff | (0xff << 16)); +} + +void voodoo_codegen_close(voodoo_t *voodoo) +{ +#if defined WIN32 || defined _WIN32 || defined _WIN32 + VirtualFree(voodoo->codegen_data, 0, MEM_RELEASE); +#else + munmap(voodoo->codegen_data, sizeof(voodoo_x86_data_t) * BLOCK_NUM*4); +#endif +} diff --git a/pcem/vid_voodoo_common.h b/pcem/vid_voodoo_common.h new file mode 100644 index 00000000..df261af7 --- /dev/null +++ b/pcem/vid_voodoo_common.h @@ -0,0 +1,509 @@ +#ifdef MIN +#undef MIN +#endif +#ifdef CLAMP +#undef CLAMP +#endif + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) + +#define CLAMP(x) (((x) < 0) ? 0 : (((x) > 0xff) ? 0xff : (x))) +#define CLAMP16(x) (((x) < 0) ? 0 : (((x) > 0xffff) ? 0xffff : (x))) + + +#define LOD_MAX 8 + +#define TEX_DIRTY_SHIFT 10 + +#define TEX_CACHE_MAX 64 + +enum +{ + VOODOO_1 = 0, + VOODOO_SB50, + VOODOO_2, + VOODOO_BANSHEE, + VOODOO_3 +}; + +typedef union int_float +{ + uint32_t i; + float f; +} int_float; + +typedef struct rgb_t +{ + uint8_t b, g, r; + uint8_t pad; +} rgb_t; +typedef struct rgba8_t +{ + uint8_t b, g, r, a; +} rgba8_t; + +typedef union rgba_u +{ + struct + { + uint8_t b, g, r, a; + } rgba; + uint32_t u; +} rgba_u; + +#define FIFO_SIZE 65536 +#define FIFO_MASK (FIFO_SIZE - 1) +#define FIFO_ENTRY_SIZE (1 << 31) + +#define FIFO_ENTRIES (voodoo->fifo_write_idx - voodoo->fifo_read_idx) +#define FIFO_FULL ((voodoo->fifo_write_idx - voodoo->fifo_read_idx) >= FIFO_SIZE-4) +#define FIFO_EMPTY (voodoo->fifo_read_idx == voodoo->fifo_write_idx) + +#define FIFO_TYPE 0xff000000 +#define FIFO_ADDR 0x00ffffff + +enum +{ + FIFO_INVALID = (0x00 << 24), + FIFO_WRITEL_REG = (0x01 << 24), + FIFO_WRITEW_FB = (0x02 << 24), + FIFO_WRITEL_FB = (0x03 << 24), + FIFO_WRITEL_TEX = (0x04 << 24), + FIFO_WRITEL_2DREG = (0x05 << 24) +}; + +#define PARAM_SIZE 1024 +#define PARAM_MASK (PARAM_SIZE - 1) +#define PARAM_ENTRY_SIZE (1 << 31) + +#define PARAM_ENTRIES(x) (voodoo->params_write_idx - voodoo->params_read_idx[x]) +#define PARAM_FULL(x) ((voodoo->params_write_idx - voodoo->params_read_idx[x]) >= PARAM_SIZE) +#define PARAM_EMPTY(x) (voodoo->params_read_idx[x] == voodoo->params_write_idx) + +typedef struct +{ + uint32_t addr_type; + uint32_t val; +} fifo_entry_t; + +typedef struct voodoo_params_t +{ + int command; + + int32_t vertexAx, vertexAy, vertexBx, vertexBy, vertexCx, vertexCy; + + uint32_t startR, startG, startB, startZ, startA; + + int32_t dBdX, dGdX, dRdX, dAdX, dZdX; + + int32_t dBdY, dGdY, dRdY, dAdY, dZdY; + + int64_t startW, dWdX, dWdY; + + struct + { + int64_t startS, startT, startW, p1; + int64_t dSdX, dTdX, dWdX, p2; + int64_t dSdY, dTdY, dWdY, p3; + } tmu[2]; + + uint32_t color0, color1; + + uint32_t fbzMode; + uint32_t fbzColorPath; + + uint32_t fogMode; + rgb_t fogColor; + struct + { + uint8_t fog, dfog; + } fogTable[64]; + + uint32_t alphaMode; + + uint32_t zaColor; + + int chromaKey_r, chromaKey_g, chromaKey_b; + uint32_t chromaKey; + + uint32_t textureMode[2]; + uint32_t tLOD[2]; + + uint32_t texBaseAddr[2], texBaseAddr1[2], texBaseAddr2[2], texBaseAddr38[2]; + + uint32_t tex_base[2][LOD_MAX+2]; + uint32_t tex_end[2][LOD_MAX+2]; + int tex_width[2]; + int tex_w_mask[2][LOD_MAX+2]; + int tex_w_nmask[2][LOD_MAX+2]; + int tex_h_mask[2][LOD_MAX+2]; + int tex_shift[2][LOD_MAX+2]; + int tex_lod[2][LOD_MAX+2]; + int tex_entry[2]; + int detail_max[2], detail_bias[2], detail_scale[2]; + + uint32_t draw_offset, aux_offset; + + int tformat[2]; + + int clipLeft, clipRight, clipLowY, clipHighY; + int clipLeft1, clipRight1, clipLowY1, clipHighY1; + + int sign; + + uint32_t front_offset; + + uint32_t swapbufferCMD; + + uint32_t stipple; + + int col_tiled, aux_tiled; + int row_width, aux_row_width; +} voodoo_params_t; + +typedef struct texture_t +{ + uint32_t base; + uint32_t tLOD; + volatile int refcount, refcount_r[4]; + int is16; + uint32_t palette_checksum; + uint32_t addr_start[4], addr_end[4]; + uint32_t *data; +} texture_t; + +typedef struct vert_t +{ + float sVx, sVy; + float sRed, sGreen, sBlue, sAlpha; + float sVz, sWb; + float sW0, sS0, sT0; + float sW1, sS1, sT1; +} vert_t; + +typedef struct clip_t +{ + int x_min, x_max; + int y_min, y_max; +} clip_t; + +typedef struct voodoo_t +{ + mem_mapping_t mapping; + + int pci_enable; + + uint8_t dac_data[8]; + int dac_reg, dac_reg_ff; + uint8_t dac_readdata; + uint16_t dac_pll_regs[16]; + + float pixel_clock; + uint64_t line_time; + + voodoo_params_t params; + + uint32_t fbiInit0, fbiInit1, fbiInit2, fbiInit3, fbiInit4; + uint32_t fbiInit5, fbiInit6, fbiInit7; /*Voodoo 2*/ + + uint32_t initEnable; + + uint32_t lfbMode; + + uint32_t memBaseAddr; + + int_float fvertexAx, fvertexAy, fvertexBx, fvertexBy, fvertexCx, fvertexCy; + + uint32_t front_offset, back_offset; + + uint32_t fb_read_offset, fb_write_offset; + + int row_width, aux_row_width; + int block_width; + + int col_tiled, aux_tiled; + + uint8_t *fb_mem, *tex_mem[2]; + uint16_t *tex_mem_w[2]; + + int rgb_sel; + + uint32_t trexInit1[2]; + + uint32_t tmuConfig; + + mutex_t *swap_mutex; + int swap_count; + + int disp_buffer, draw_buffer; + pc_timer_t timer; + + int line; + svga_t *svga; + + uint32_t backPorch; + uint32_t videoDimensions; + uint32_t hSync, vSync; + + int h_total, v_total, v_disp; + int h_disp; + int v_retrace; + + struct + { + uint32_t y[4], i[4], q[4]; + } nccTable[2][2]; + + rgba_u palette[2][256]; + + rgba_u ncc_lookup[2][2][256]; + int ncc_dirty[2]; + + thread_t *fifo_thread; + thread_t *render_thread[4]; + event_t *wake_fifo_thread; + event_t *wake_main_thread; + event_t *fifo_not_full_event; + event_t *render_not_full_event[4]; + event_t *wake_render_thread[4]; + + int voodoo_busy; + int render_voodoo_busy[4]; + + int render_threads; + int odd_even_mask; + + int pixel_count[4], texel_count[4], tri_count, frame_count; + int pixel_count_old[4], texel_count_old[4]; + int wr_count, rd_count, tex_count; + + int retrace_count; + int swap_interval; + uint32_t swap_offset; + int swap_pending; + + int bilinear_enabled; + + int fb_size; + uint32_t fb_mask; + + int texture_size; + uint32_t texture_mask; + + int dual_tmus; + int type; + + fifo_entry_t fifo[FIFO_SIZE]; + volatile int fifo_read_idx, fifo_write_idx; + volatile int cmd_read, cmd_written, cmd_written_fifo; + + voodoo_params_t params_buffer[PARAM_SIZE]; + volatile int params_read_idx[4], params_write_idx; + + uint32_t cmdfifo_base, cmdfifo_end, cmdfifo_size; + int cmdfifo_rp, cmdfifo_ret_addr; + int cmdfifo_in_sub; + volatile int cmdfifo_depth_rd, cmdfifo_depth_wr; + volatile int cmdfifo_enabled; + uint32_t cmdfifo_amin, cmdfifo_amax; + int cmdfifo_holecount; + + uint32_t sSetupMode; + vert_t verts[4]; + unsigned int vertex_ages[3]; + unsigned int vertex_next_age; + int num_verticies; + int cull_pingpong; + + int flush; + + int scrfilter; + int scrfilterEnabled; + int scrfilterThreshold; + int scrfilterThresholdOld; + + uint32_t last_write_addr; + + uint32_t fbiPixelsIn; + uint32_t fbiChromaFail; + uint32_t fbiZFuncFail; + uint32_t fbiAFuncFail; + uint32_t fbiPixelsOut; + + uint32_t bltSrcBaseAddr; + uint32_t bltDstBaseAddr; + int bltSrcXYStride, bltDstXYStride; + uint32_t bltSrcChromaRange, bltDstChromaRange; + int bltSrcChromaMinR, bltSrcChromaMinG, bltSrcChromaMinB; + int bltSrcChromaMaxR, bltSrcChromaMaxG, bltSrcChromaMaxB; + int bltDstChromaMinR, bltDstChromaMinG, bltDstChromaMinB; + int bltDstChromaMaxR, bltDstChromaMaxG, bltDstChromaMaxB; + + int bltClipRight, bltClipLeft; + int bltClipHighY, bltClipLowY; + + int bltSrcX, bltSrcY; + int bltDstX, bltDstY; + int bltSizeX, bltSizeY; + int bltRop[4]; + uint16_t bltColorFg, bltColorBg; + + uint32_t bltCommand; + + uint32_t leftOverlayBuf; + + struct + { + int dst_x, dst_y; + int cur_x; + int size_x, size_y; + int x_dir, y_dir; + int dst_stride; + } blt; + + struct + { + uint32_t bresError0, bresError1; + uint32_t clip0Min, clip0Max; + uint32_t clip1Min, clip1Max; + uint32_t colorBack, colorFore; + uint32_t command, commandExtra; + uint32_t dstBaseAddr; + uint32_t dstFormat; + uint32_t dstSize; + uint32_t dstXY; + uint32_t lineStipple; + uint32_t lineStyle; + uint32_t rop; + uint32_t srcBaseAddr; + uint32_t srcFormat; + uint32_t srcSize; + uint32_t srcXY; + + uint32_t colorPattern[64]; + + int bres_error_0, bres_error_1; + uint32_t colorPattern8[64], colorPattern16[64], colorPattern24[64]; + int cur_x, cur_y; + uint32_t dstBaseAddr_tiled; + uint32_t dstColorkeyMin, dstColorkeyMax; + int dstSizeX, dstSizeY; + int dstX, dstY; + int dst_stride; + int patoff_x, patoff_y; + uint8_t rops[4]; + uint32_t srcBaseAddr_tiled; + uint32_t srcColorkeyMin, srcColorkeyMax; + int srcSizeX, srcSizeY; + int srcX, srcY; + int src_stride; + int old_srcX; + + /*Used for handling packed 24bpp host data*/ + int host_data_remainder; + uint32_t old_host_data; + + /*Polyfill coordinates*/ + int lx[2], rx[2]; + int ly[2], ry[2]; + + /*Polyfill state*/ + int error[2]; + int dx[2], dy[2]; + int x_inc[2]; /*y_inc is always 1 for polyfill*/ + int lx_cur, rx_cur; + + clip_t clip[2]; + + uint8_t host_data[16384]; + int host_data_count; + int host_data_size_src, host_data_size_dest; + int src_stride_src, src_stride_dest; + + int src_bpp; + + int line_pix_pos, line_bit_pos; + int line_rep_cnt, line_bit_mask_size; + } banshee_blt; + + struct + { + uint32_t vidOverlayStartCoords; + uint32_t vidOverlayEndScreenCoords; + uint32_t vidOverlayDudx, vidOverlayDudxOffsetSrcWidth; + uint32_t vidOverlayDvdy, vidOverlayDvdyOffset; + //uint32_t vidDesktopOverlayStride; + + int start_x, start_y; + int end_x, end_y; + int size_x, size_y; + int overlay_bytes; + + unsigned int src_y; + } overlay; + + rgb_t clutData[33]; + int clutData_dirty; + rgb_t clutData256[256]; + uint32_t video_16to32[0x10000]; + + uint8_t dirty_line[2048]; + int dirty_line_low, dirty_line_high; + + int fb_write_buffer, fb_draw_buffer; + int buffer_cutoff; + + uint32_t tile_base, tile_stride; + int tile_stride_shift, tile_x, tile_x_real; + + int read_time, write_time, burst_time; + + pc_timer_t wake_timer; + + /* screen filter tables */ + uint8_t thefilter[256][256]; + uint8_t thefilterg[256][256]; + uint8_t thefilterb[256][256]; + uint16_t purpleline[256][3]; + + texture_t texture_cache[2][TEX_CACHE_MAX]; + uint8_t texture_present[2][16384]; + int texture_last_removed; + + uint32_t palette_checksum[2]; + int palette_dirty[2]; + + uint64_t time; + int render_time[4]; + + int use_recompiler; + void *codegen_data; + + struct voodoo_set_t *set; + + + uint8_t *vram, *changedvram; + + void *p; +} voodoo_t; + +typedef struct voodoo_set_t +{ + voodoo_t *voodoos[2]; + + mem_mapping_t snoop_mapping; + + int nr_cards; +} voodoo_set_t; + + +extern rgba8_t rgb332[0x100], ai44[0x100], rgb565[0x10000], argb1555[0x10000], argb4444[0x10000], ai88[0x10000]; + + +void voodoo_generate_vb_filters(voodoo_t *voodoo, int fcr, int fcg); + +void voodoo_recalc(voodoo_t *voodoo); +void voodoo_update_ncc(voodoo_t *voodoo, int tmu); + +void *voodoo_2d3d_card_init(int type); +void voodoo_card_close(voodoo_t *voodoo); diff --git a/pcem/vid_voodoo_display.cpp b/pcem/vid_voodoo_display.cpp new file mode 100644 index 00000000..9ce92dde --- /dev/null +++ b/pcem/vid_voodoo_display.cpp @@ -0,0 +1,609 @@ +#include "ibm.h" +#include "device.h" +#include "mem.h" +#include "thread.h" +#include "video.h" +#include "vid_svga.h" +#include "vid_voodoo.h" +#include "vid_voodoo_common.h" +#include "vid_voodoo_display.h" +#include "vid_voodoo_regs.h" +#include "vid_voodoo_render.h" + +void voodoo_update_ncc(voodoo_t *voodoo, int tmu) +{ + int tbl; + + for (tbl = 0; tbl < 2; tbl++) + { + int col; + + for (col = 0; col < 256; col++) + { + int y = (col >> 4), i = (col >> 2) & 3, q = col & 3; + int i_r, i_g, i_b; + int q_r, q_g, q_b; + + y = (voodoo->nccTable[tmu][tbl].y[y >> 2] >> ((y & 3) * 8)) & 0xff; + + i_r = (voodoo->nccTable[tmu][tbl].i[i] >> 18) & 0x1ff; + if (i_r & 0x100) + i_r |= 0xfffffe00; + i_g = (voodoo->nccTable[tmu][tbl].i[i] >> 9) & 0x1ff; + if (i_g & 0x100) + i_g |= 0xfffffe00; + i_b = voodoo->nccTable[tmu][tbl].i[i] & 0x1ff; + if (i_b & 0x100) + i_b |= 0xfffffe00; + + q_r = (voodoo->nccTable[tmu][tbl].q[q] >> 18) & 0x1ff; + if (q_r & 0x100) + q_r |= 0xfffffe00; + q_g = (voodoo->nccTable[tmu][tbl].q[q] >> 9) & 0x1ff; + if (q_g & 0x100) + q_g |= 0xfffffe00; + q_b = voodoo->nccTable[tmu][tbl].q[q] & 0x1ff; + if (q_b & 0x100) + q_b |= 0xfffffe00; + + voodoo->ncc_lookup[tmu][tbl][col].rgba.r = CLAMP(y + i_r + q_r); + voodoo->ncc_lookup[tmu][tbl][col].rgba.g = CLAMP(y + i_g + q_g); + voodoo->ncc_lookup[tmu][tbl][col].rgba.b = CLAMP(y + i_b + q_b); + voodoo->ncc_lookup[tmu][tbl][col].rgba.a = 0xff; + } + } +} + +void voodoo_pixelclock_update(voodoo_t *voodoo) +{ + int m = (voodoo->dac_pll_regs[0] & 0x7f) + 2; + int n1 = ((voodoo->dac_pll_regs[0] >> 8) & 0x1f) + 2; + int n2 = ((voodoo->dac_pll_regs[0] >> 13) & 0x07); + float t = (14318184.0 * ((float)m / (float)n1)) / (float)(1 << n2); + double clock_const; + int line_length; + + if ((voodoo->dac_data[6] & 0xf0) == 0x20 || + (voodoo->dac_data[6] & 0xf0) == 0x60 || + (voodoo->dac_data[6] & 0xf0) == 0x70) + t /= 2.0f; + + line_length = (voodoo->hSync & 0xff) + ((voodoo->hSync >> 16) & 0x3ff); + +// pclog("Pixel clock %f MHz hsync %08x line_length %d\n", t, voodoo->hSync, line_length); + + voodoo->pixel_clock = t; + + clock_const = cpuclock / t; + voodoo->line_time = (uint64_t)((double)line_length * clock_const * (double)(1ull << 32)); +} + +static void voodoo_calc_clutData(voodoo_t *voodoo) +{ + int c; + + for (c = 0; c < 256; c++) + { + voodoo->clutData256[c].r = (voodoo->clutData[c >> 3].r*(8-(c & 7)) + + voodoo->clutData[(c >> 3)+1].r*(c & 7)) >> 3; + voodoo->clutData256[c].g = (voodoo->clutData[c >> 3].g*(8-(c & 7)) + + voodoo->clutData[(c >> 3)+1].g*(c & 7)) >> 3; + voodoo->clutData256[c].b = (voodoo->clutData[c >> 3].b*(8-(c & 7)) + + voodoo->clutData[(c >> 3)+1].b*(c & 7)) >> 3; + } + + for (c = 0; c < 65536; c++) + { + int r = (c >> 8) & 0xf8; + int g = (c >> 3) & 0xfc; + int b = (c << 3) & 0xf8; +// r |= (r >> 5); +// g |= (g >> 6); +// b |= (b >> 5); + + voodoo->video_16to32[c] = (voodoo->clutData256[r].r << 16) | (voodoo->clutData256[g].g << 8) | voodoo->clutData256[b].b; + } +} + + + +#define FILTDIV 256 + +static int FILTCAP, FILTCAPG, FILTCAPB = 0; /* color filter threshold values */ + +void voodoo_generate_filter_v1(voodoo_t *voodoo) +{ + int g, h; + float difference, diffg, diffb; + float thiscol, thiscolg, thiscolb, lined; + float fcr, fcg, fcb; + + fcr = FILTCAP * 5; + fcg = FILTCAPG * 6; + fcb = FILTCAPB * 5; + + for (g=0;g FILTCAP) + difference = FILTCAP; + if (difference < -FILTCAP) + difference = -FILTCAP; + + if (diffg > FILTCAPG) + diffg = FILTCAPG; + if (diffg < -FILTCAPG) + diffg = -FILTCAPG; + + if (diffb > FILTCAPB) + diffb = FILTCAPB; + if (diffb < -FILTCAPB) + diffb = -FILTCAPB; + + // hack - to make it not bleed onto black + //if (g == 0){ + //difference = diffg = diffb = 0; + //} + + if ((difference < fcr) || (-difference > -fcr)) + thiscol = g + (difference / 2); + if ((diffg < fcg) || (-diffg > -fcg)) + thiscolg = g + (diffg / 2); /* need these divides so we can actually undither! */ + if ((diffb < fcb) || (-diffb > -fcb)) + thiscolb = g + (diffb / 2); + + if (thiscol < 0) + thiscol = 0; + if (thiscol > FILTDIV-1) + thiscol = FILTDIV-1; + + if (thiscolg < 0) + thiscolg = 0; + if (thiscolg > FILTDIV-1) + thiscolg = FILTDIV-1; + + if (thiscolb < 0) + thiscolb = 0; + if (thiscolb > FILTDIV-1) + thiscolb = FILTDIV-1; + + voodoo->thefilter[g][h] = thiscol; + voodoo->thefilterg[g][h] = thiscolg; + voodoo->thefilterb[g][h] = thiscolb; + } + + lined = g + 4; + if (lined > 255) + lined = 255; + voodoo->purpleline[g][0] = lined; + voodoo->purpleline[g][2] = lined; + + lined = g + 0; + if (lined > 255) + lined = 255; + voodoo->purpleline[g][1] = lined; + } +} + +void voodoo_generate_filter_v2(voodoo_t *voodoo) +{ + int g, h; + float difference; + float thiscol, thiscolg, thiscolb; + float clr, clg, clb = 0; + float fcr, fcg, fcb = 0; + + // pre-clamping + + fcr = FILTCAP; + fcg = FILTCAPG; + fcb = FILTCAPB; + + if (fcr > 32) fcr = 32; + if (fcg > 32) fcg = 32; + if (fcb > 32) fcb = 32; + + for (g=0;g<256;g++) // pixel 1 - our target pixel we want to bleed into + { + for (h=0;h<256;h++) // pixel 2 - our main pixel + { + float avg; + float avgdiff; + + difference = (float)(g - h); + avg = (float)((g + g + g + g + h) / 5); + avgdiff = avg - (float)((g + h + h + h + h) / 5); + if (avgdiff < 0) avgdiff *= -1; + if (difference < 0) difference *= -1; + + thiscol = thiscolg = thiscolb = g; + + // try lighten + if (h > g) + { + clr = clg = clb = avgdiff; + + if (clr>fcr) clr=fcr; + if (clg>fcg) clg=fcg; + if (clb>fcb) clb=fcb; + + + thiscol = g + clr; + thiscolg = g + clg; + thiscolb = g + clb; + + if (thiscol>g+FILTCAP) + thiscol=g+FILTCAP; + if (thiscolg>g+FILTCAPG) + thiscolg=g+FILTCAPG; + if (thiscolb>g+FILTCAPB) + thiscolb=g+FILTCAPB; + + + if (thiscol>g+avgdiff) + thiscol=g+avgdiff; + if (thiscolg>g+avgdiff) + thiscolg=g+avgdiff; + if (thiscolb>g+avgdiff) + thiscolb=g+avgdiff; + + } + + if (difference > FILTCAP) + thiscol = g; + if (difference > FILTCAPG) + thiscolg = g; + if (difference > FILTCAPB) + thiscolb = g; + + // clamp + if (thiscol < 0) thiscol = 0; + if (thiscolg < 0) thiscolg = 0; + if (thiscolb < 0) thiscolb = 0; + + if (thiscol > 255) thiscol = 255; + if (thiscolg > 255) thiscolg = 255; + if (thiscolb > 255) thiscolb = 255; + + // add to the table + voodoo->thefilter[g][h] = (thiscol); + voodoo->thefilterg[g][h] = (thiscolg); + voodoo->thefilterb[g][h] = (thiscolb); + + // debug the ones that don't give us much of a difference + //if (difference < FILTCAP) + //pclog("Voodoofilter: %ix%i - %f difference, %f average difference, R=%f, G=%f, B=%f\n", g, h, difference, avgdiff, thiscol, thiscolg, thiscolb); + } + + } +} + +void voodoo_threshold_check(voodoo_t *voodoo) +{ + int r, g, b; + + if (!voodoo->scrfilterEnabled) + return; /* considered disabled; don't check and generate */ + + /* Check for changes, to generate anew table */ + if (voodoo->scrfilterThreshold != voodoo->scrfilterThresholdOld) + { + r = (voodoo->scrfilterThreshold >> 16) & 0xFF; + g = (voodoo->scrfilterThreshold >> 8 ) & 0xFF; + b = voodoo->scrfilterThreshold & 0xFF; + + FILTCAP = r; + FILTCAPG = g; + FILTCAPB = b; + + pclog("Voodoo Filter Threshold Check: %06x - RED %i GREEN %i BLUE %i\n", voodoo->scrfilterThreshold, r, g, b); + + voodoo->scrfilterThresholdOld = voodoo->scrfilterThreshold; + + if (voodoo->type == VOODOO_2) + voodoo_generate_filter_v2(voodoo); + else + voodoo_generate_filter_v1(voodoo); + + if (voodoo->type >= VOODOO_BANSHEE) + voodoo_generate_vb_filters(voodoo, FILTCAP, FILTCAPG); + } +} + +static void voodoo_filterline_v1(voodoo_t *voodoo, uint8_t *fil, int column, uint16_t *src, int line) +{ + int x; + + // Scratchpad for avoiding feedback streaks + uint8_t fil3[(voodoo->h_disp) * 3]; + + /* 16 to 32-bit */ + for (x=0; x> 5) & 63) << 2); + fil[x*3+2] = (((src[x] >> 11) & 31) << 3); + + // Copy to our scratchpads + fil3[x*3+0] = fil[x*3+0]; + fil3[x*3+1] = fil[x*3+1]; + fil3[x*3+2] = fil[x*3+2]; + } + + + /* lines */ + + if (line & 1) + { + for (x=0; xpurpleline[fil[x*3]][0]; + fil[x*3+1] = voodoo->purpleline[fil[x*3+1]][1]; + fil[x*3+2] = voodoo->purpleline[fil[x*3+2]][2]; + } + } + + + /* filtering time */ + + for (x=1; xthefilterb[fil[x*3]][fil[ (x-1) *3]]; + fil3[(x)*3+1] = voodoo->thefilterg[fil[x*3+1]][fil[ (x-1) *3+1]]; + fil3[(x)*3+2] = voodoo->thefilter[fil[x*3+2]][fil[ (x-1) *3+2]]; + } + + for (x=1; xthefilterb[fil3[x*3]][fil3[ (x-1) *3]]; + fil[(x)*3+1] = voodoo->thefilterg[fil3[x*3+1]][fil3[ (x-1) *3+1]]; + fil[(x)*3+2] = voodoo->thefilter[fil3[x*3+2]][fil3[ (x-1) *3+2]]; + } + + for (x=1; xthefilterb[fil[x*3]][fil[ (x-1) *3]]; + fil3[(x)*3+1] = voodoo->thefilterg[fil[x*3+1]][fil[ (x-1) *3+1]]; + fil3[(x)*3+2] = voodoo->thefilter[fil[x*3+2]][fil[ (x-1) *3+2]]; + } + + for (x=0; xthefilterb[fil3[x*3]][fil3[ (x+1) *3]]; + fil[(x)*3+1] = voodoo->thefilterg[fil3[x*3+1]][fil3[ (x+1) *3+1]]; + fil[(x)*3+2] = voodoo->thefilter[fil3[x*3+2]][fil3[ (x+1) *3+2]]; + } +} + + +static void voodoo_filterline_v2(voodoo_t *voodoo, uint8_t *fil, int column, uint16_t *src, int line) +{ + int x; + + // Scratchpad for blending filter + uint8_t fil3[(voodoo->h_disp) * 3]; + + /* 16 to 32-bit */ + for (x=0; x> 5) & 63) << 2); + fil3[x*3+2] = fil[x*3+2] = (((src[x] >> 11) & 31) << 3); + } + + /* filtering time */ + + for (x=1; xthefilterb [((src[x+3] & 31) << 3)] [((src[x] & 31) << 3)]; + fil3[(x+3)*3+1] = voodoo->thefilterg [(((src[x+3] >> 5) & 63) << 2)] [(((src[x] >> 5) & 63) << 2)]; + fil3[(x+3)*3+2] = voodoo->thefilter [(((src[x+3] >> 11) & 31) << 3)] [(((src[x] >> 11) & 31) << 3)]; + + fil[(x+2)*3] = voodoo->thefilterb [fil3[(x+2)*3]][((src[x] & 31) << 3)]; + fil[(x+2)*3+1] = voodoo->thefilterg [fil3[(x+2)*3+1]][(((src[x] >> 5) & 63) << 2)]; + fil[(x+2)*3+2] = voodoo->thefilter [fil3[(x+2)*3+2]][(((src[x] >> 11) & 31) << 3)]; + + fil3[(x+1)*3] = voodoo->thefilterb [fil[(x+1)*3]][((src[x] & 31) << 3)]; + fil3[(x+1)*3+1] = voodoo->thefilterg [fil[(x+1)*3+1]][(((src[x] >> 5) & 63) << 2)]; + fil3[(x+1)*3+2] = voodoo->thefilter [fil[(x+1)*3+2]][(((src[x] >> 11) & 31) << 3)]; + + fil[(x-1)*3] = voodoo->thefilterb [fil3[(x-1)*3]][((src[x] & 31) << 3)]; + fil[(x-1)*3+1] = voodoo->thefilterg [fil3[(x-1)*3+1]][(((src[x] >> 5) & 63) << 2)]; + fil[(x-1)*3+2] = voodoo->thefilter [fil3[(x-1)*3+2]][(((src[x] >> 11) & 31) << 3)]; + } + + // unroll for edge cases + + fil3[(column-3)*3] = voodoo->thefilterb [((src[column-3] & 31) << 3)] [((src[column] & 31) << 3)]; + fil3[(column-3)*3+1] = voodoo->thefilterg [(((src[column-3] >> 5) & 63) << 2)] [(((src[column] >> 5) & 63) << 2)]; + fil3[(column-3)*3+2] = voodoo->thefilter [(((src[column-3] >> 11) & 31) << 3)] [(((src[column] >> 11) & 31) << 3)]; + + fil3[(column-2)*3] = voodoo->thefilterb [((src[column-2] & 31) << 3)] [((src[column] & 31) << 3)]; + fil3[(column-2)*3+1] = voodoo->thefilterg [(((src[column-2] >> 5) & 63) << 2)] [(((src[column] >> 5) & 63) << 2)]; + fil3[(column-2)*3+2] = voodoo->thefilter [(((src[column-2] >> 11) & 31) << 3)] [(((src[column] >> 11) & 31) << 3)]; + + fil3[(column-1)*3] = voodoo->thefilterb [((src[column-1] & 31) << 3)] [((src[column] & 31) << 3)]; + fil3[(column-1)*3+1] = voodoo->thefilterg [(((src[column-1] >> 5) & 63) << 2)] [(((src[column] >> 5) & 63) << 2)]; + fil3[(column-1)*3+2] = voodoo->thefilter [(((src[column-1] >> 11) & 31) << 3)] [(((src[column] >> 11) & 31) << 3)]; + + fil[(column-2)*3] = voodoo->thefilterb [fil3[(column-2)*3]][((src[column] & 31) << 3)]; + fil[(column-2)*3+1] = voodoo->thefilterg [fil3[(column-2)*3+1]][(((src[column] >> 5) & 63) << 2)]; + fil[(column-2)*3+2] = voodoo->thefilter [fil3[(column-2)*3+2]][(((src[column] >> 11) & 31) << 3)]; + + fil[(column-1)*3] = voodoo->thefilterb [fil3[(column-1)*3]][((src[column] & 31) << 3)]; + fil[(column-1)*3+1] = voodoo->thefilterg [fil3[(column-1)*3+1]][(((src[column] >> 5) & 63) << 2)]; + fil[(column-1)*3+2] = voodoo->thefilter [fil3[(column-1)*3+2]][(((src[column] >> 11) & 31) << 3)]; + + fil3[(column-1)*3] = voodoo->thefilterb [fil[(column-1)*3]][((src[column] & 31) << 3)]; + fil3[(column-1)*3+1] = voodoo->thefilterg [fil[(column-1)*3+1]][(((src[column] >> 5) & 63) << 2)]; + fil3[(column-1)*3+2] = voodoo->thefilter [fil[(column-1)*3+2]][(((src[column] >> 11) & 31) << 3)]; +} + +void voodoo_callback(void *p) +{ + voodoo_t *voodoo = (voodoo_t *)p; + + if (voodoo->fbiInit0 & FBIINIT0_VGA_PASS) + { + if (voodoo->line < voodoo->v_disp) + { + voodoo_t *draw_voodoo; + int draw_line; + + if (SLI_ENABLED) + { + if (voodoo == voodoo->set->voodoos[1]) + goto skip_draw; + + if (((voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) ? 1 : 0) == (voodoo->line & 1)) + draw_voodoo = voodoo; + else + draw_voodoo = voodoo->set->voodoos[1]; + draw_line = voodoo->line >> 1; + } + else + { + if (!(voodoo->fbiInit0 & 1)) + goto skip_draw; + draw_voodoo = voodoo; + draw_line = voodoo->line; + } + + if (draw_voodoo->dirty_line[draw_line]) + { + uint32_t *p = &((uint32_t *)buffer32->line[voodoo->line])[32]; + uint16_t *src = (uint16_t *)&draw_voodoo->fb_mem[draw_voodoo->front_offset + draw_line*draw_voodoo->row_width]; + int x; + + draw_voodoo->dirty_line[draw_line] = 0; + + if (voodoo->line < voodoo->dirty_line_low) + { + voodoo->dirty_line_low = voodoo->line; + video_wait_for_buffer(); + } + if (voodoo->line > voodoo->dirty_line_high) + voodoo->dirty_line_high = voodoo->line; + + if (voodoo->scrfilter && voodoo->scrfilterEnabled) + { + uint8_t fil[(voodoo->h_disp) * 3]; /* interleaved 24-bit RGB */ + + if (voodoo->type == VOODOO_2) + voodoo_filterline_v2(voodoo, fil, voodoo->h_disp, src, voodoo->line); + else + voodoo_filterline_v1(voodoo, fil, voodoo->h_disp, src, voodoo->line); + + for (x = 0; x < voodoo->h_disp; x++) + { + p[x] = (voodoo->clutData256[fil[x*3]].b << 0 | voodoo->clutData256[fil[x*3+1]].g << 8 | voodoo->clutData256[fil[x*3+2]].r << 16); + } + } + else + { + for (x = 0; x < voodoo->h_disp; x++) + { + p[x] = draw_voodoo->video_16to32[src[x]]; + } + } + } + } + } +skip_draw: + if (voodoo->line == voodoo->v_disp) + { +// pclog("retrace %i %i %08x %i\n", voodoo->retrace_count, voodoo->swap_interval, voodoo->swap_offset, voodoo->swap_pending); + voodoo->retrace_count++; + if (SLI_ENABLED && (voodoo->fbiInit2 & FBIINIT2_SWAP_ALGORITHM_MASK) == FBIINIT2_SWAP_ALGORITHM_SLI_SYNC) + { + if (voodoo == voodoo->set->voodoos[0]) + { + voodoo_t *voodoo_1 = voodoo->set->voodoos[1]; + + thread_lock_mutex(voodoo->swap_mutex); + /*Only swap if both Voodoos are waiting for buffer swap*/ + if (voodoo->swap_pending && (voodoo->retrace_count > voodoo->swap_interval) && + voodoo_1->swap_pending && (voodoo_1->retrace_count > voodoo_1->swap_interval)) + { + memset(voodoo->dirty_line, 1, 1024); + voodoo->retrace_count = 0; + voodoo->front_offset = voodoo->swap_offset; + if (voodoo->swap_count > 0) + voodoo->swap_count--; + voodoo->swap_pending = 0; + + memset(voodoo_1->dirty_line, 1, 1024); + voodoo_1->retrace_count = 0; + voodoo_1->front_offset = voodoo_1->swap_offset; + if (voodoo_1->swap_count > 0) + voodoo_1->swap_count--; + voodoo_1->swap_pending = 0; + thread_unlock_mutex(voodoo->swap_mutex); + + thread_set_event(voodoo->wake_fifo_thread); + thread_set_event(voodoo_1->wake_fifo_thread); + + voodoo->frame_count++; + voodoo_1->frame_count++; + } + else + thread_unlock_mutex(voodoo->swap_mutex); + } + } + else + { + thread_lock_mutex(voodoo->swap_mutex); + if (voodoo->swap_pending && (voodoo->retrace_count > voodoo->swap_interval)) + { + voodoo->front_offset = voodoo->swap_offset; + if (voodoo->swap_count > 0) + voodoo->swap_count--; + voodoo->swap_pending = 0; + thread_unlock_mutex(voodoo->swap_mutex); + + memset(voodoo->dirty_line, 1, 1024); + voodoo->retrace_count = 0; + thread_set_event(voodoo->wake_fifo_thread); + voodoo->frame_count++; + } + else + thread_unlock_mutex(voodoo->swap_mutex); + } + voodoo->v_retrace = 1; + } + voodoo->line++; + + if (voodoo->fbiInit0 & FBIINIT0_VGA_PASS) + { + if (voodoo->line == voodoo->v_disp) + { + if (voodoo->dirty_line_high > voodoo->dirty_line_low) + svga_doblit(0, voodoo->v_disp, voodoo->h_disp, voodoo->v_disp-1, voodoo->svga); + if (voodoo->clutData_dirty) + { + voodoo->clutData_dirty = 0; + voodoo_calc_clutData(voodoo); + } + voodoo->dirty_line_high = -1; + voodoo->dirty_line_low = 2000; + } + } + + if (voodoo->line >= voodoo->v_total) + { + voodoo->line = 0; + voodoo->v_retrace = 0; + } + if (voodoo->line_time) + timer_advance_u64(&voodoo->timer, voodoo->line_time); + else + timer_advance_u64(&voodoo->timer, TIMER_USEC * 32); +} diff --git a/pcem/vid_voodoo_display.h b/pcem/vid_voodoo_display.h new file mode 100644 index 00000000..46cbc00d --- /dev/null +++ b/pcem/vid_voodoo_display.h @@ -0,0 +1,6 @@ +void voodoo_update_ncc(voodoo_t *voodoo, int tmu); +void voodoo_pixelclock_update(voodoo_t *voodoo); +void voodoo_generate_filter_v1(voodoo_t *voodoo); +void voodoo_generate_filter_v2(voodoo_t *voodoo); +void voodoo_threshold_check(voodoo_t *voodoo); +void voodoo_callback(void *p); diff --git a/pcem/vid_voodoo_dither.h b/pcem/vid_voodoo_dither.h new file mode 100644 index 00000000..67d48388 --- /dev/null +++ b/pcem/vid_voodoo_dither.h @@ -0,0 +1,5136 @@ +static const uint8_t dither_rb[256][4][4] = +{ + { + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + }, + { + {0, 0, 0, 0}, + {0, 0, 1, 0}, + {0, 0, 0, 0}, + {1, 0, 0, 0}, + }, + { + {0, 0, 0, 0}, + {1, 0, 1, 0}, + {0, 0, 0, 0}, + {1, 0, 1, 0}, + }, + { + {0, 0, 0, 1}, + {1, 0, 1, 0}, + {0, 1, 0, 0}, + {1, 0, 1, 0}, + }, + { + {0, 1, 0, 1}, + {1, 0, 1, 0}, + {0, 1, 0, 1}, + {1, 0, 1, 0}, + }, + { + {0, 1, 0, 1}, + {1, 0, 1, 1}, + {0, 1, 0, 1}, + {1, 1, 1, 0}, + }, + { + {0, 1, 0, 1}, + {1, 1, 1, 1}, + {0, 1, 0, 1}, + {1, 1, 1, 1}, + }, + { + {0, 1, 1, 1}, + {1, 1, 1, 1}, + {1, 1, 0, 1}, + {1, 1, 1, 1}, + }, + { + {1, 1, 1, 1}, + {1, 1, 1, 1}, + {1, 1, 1, 1}, + {1, 1, 1, 1}, + }, + { + {1, 1, 1, 1}, + {1, 1, 2, 1}, + {1, 1, 1, 1}, + {2, 1, 1, 1}, + }, + { + {1, 1, 1, 1}, + {2, 1, 2, 1}, + {1, 1, 1, 1}, + {2, 1, 2, 1}, + }, + { + {1, 1, 1, 2}, + {2, 1, 2, 1}, + {1, 2, 1, 1}, + {2, 1, 2, 1}, + }, + { + {1, 2, 1, 2}, + {2, 1, 2, 1}, + {1, 2, 1, 2}, + {2, 1, 2, 1}, + }, + { + {1, 2, 1, 2}, + {2, 1, 2, 2}, + {1, 2, 1, 2}, + {2, 2, 2, 1}, + }, + { + {1, 2, 1, 2}, + {2, 2, 2, 2}, + {1, 2, 1, 2}, + {2, 2, 2, 2}, + }, + { + {1, 2, 2, 2}, + {2, 2, 2, 2}, + {2, 2, 1, 2}, + {2, 2, 2, 2}, + }, + { + {1, 2, 2, 2}, + {2, 2, 2, 2}, + {2, 2, 2, 2}, + {2, 2, 2, 2}, + }, + { + {2, 2, 2, 2}, + {2, 2, 2, 2}, + {2, 2, 2, 2}, + {3, 2, 2, 2}, + }, + { + {2, 2, 2, 2}, + {2, 2, 3, 2}, + {2, 2, 2, 2}, + {3, 2, 3, 2}, + }, + { + {2, 2, 2, 2}, + {3, 2, 3, 2}, + {2, 3, 2, 2}, + {3, 2, 3, 2}, + }, + { + {2, 2, 2, 3}, + {3, 2, 3, 2}, + {2, 3, 2, 3}, + {3, 2, 3, 2}, + }, + { + {2, 3, 2, 3}, + {3, 2, 3, 2}, + {2, 3, 2, 3}, + {3, 3, 3, 2}, + }, + { + {2, 3, 2, 3}, + {3, 2, 3, 3}, + {2, 3, 2, 3}, + {3, 3, 3, 3}, + }, + { + {2, 3, 2, 3}, + {3, 3, 3, 3}, + {3, 3, 2, 3}, + {3, 3, 3, 3}, + }, + { + {2, 3, 3, 3}, + {3, 3, 3, 3}, + {3, 3, 3, 3}, + {3, 3, 3, 3}, + }, + { + {3, 3, 3, 3}, + {3, 3, 3, 3}, + {3, 3, 3, 3}, + {4, 3, 3, 3}, + }, + { + {3, 3, 3, 3}, + {3, 3, 4, 3}, + {3, 3, 3, 3}, + {4, 3, 4, 3}, + }, + { + {3, 3, 3, 3}, + {4, 3, 4, 3}, + {3, 4, 3, 3}, + {4, 3, 4, 3}, + }, + { + {3, 3, 3, 4}, + {4, 3, 4, 3}, + {3, 4, 3, 4}, + {4, 3, 4, 3}, + }, + { + {3, 4, 3, 4}, + {4, 3, 4, 3}, + {3, 4, 3, 4}, + {4, 4, 4, 3}, + }, + { + {3, 4, 3, 4}, + {4, 3, 4, 4}, + {3, 4, 3, 4}, + {4, 4, 4, 4}, + }, + { + {3, 4, 3, 4}, + {4, 4, 4, 4}, + {4, 4, 3, 4}, + {4, 4, 4, 4}, + }, + { + {3, 4, 4, 4}, + {4, 4, 4, 4}, + {4, 4, 3, 4}, + {4, 4, 4, 4}, + }, + { + {4, 4, 4, 4}, + {4, 4, 4, 4}, + {4, 4, 4, 4}, + {4, 4, 4, 4}, + }, + { + {4, 4, 4, 4}, + {4, 4, 5, 4}, + {4, 4, 4, 4}, + {5, 4, 4, 4}, + }, + { + {4, 4, 4, 4}, + {5, 4, 5, 4}, + {4, 4, 4, 4}, + {5, 4, 5, 4}, + }, + { + {4, 4, 4, 5}, + {5, 4, 5, 4}, + {4, 5, 4, 4}, + {5, 4, 5, 4}, + }, + { + {4, 5, 4, 5}, + {5, 4, 5, 4}, + {4, 5, 4, 5}, + {5, 4, 5, 4}, + }, + { + {4, 5, 4, 5}, + {5, 4, 5, 5}, + {4, 5, 4, 5}, + {5, 5, 5, 4}, + }, + { + {4, 5, 4, 5}, + {5, 5, 5, 5}, + {4, 5, 4, 5}, + {5, 5, 5, 5}, + }, + { + {4, 5, 5, 5}, + {5, 5, 5, 5}, + {5, 5, 4, 5}, + {5, 5, 5, 5}, + }, + { + {5, 5, 5, 5}, + {5, 5, 5, 5}, + {5, 5, 5, 5}, + {5, 5, 5, 5}, + }, + { + {5, 5, 5, 5}, + {5, 5, 6, 5}, + {5, 5, 5, 5}, + {6, 5, 5, 5}, + }, + { + {5, 5, 5, 5}, + {6, 5, 6, 5}, + {5, 5, 5, 5}, + {6, 5, 6, 5}, + }, + { + {5, 5, 5, 6}, + {6, 5, 6, 5}, + {5, 6, 5, 5}, + {6, 5, 6, 5}, + }, + { + {5, 6, 5, 6}, + {6, 5, 6, 5}, + {5, 6, 5, 6}, + {6, 5, 6, 5}, + }, + { + {5, 6, 5, 6}, + {6, 5, 6, 6}, + {5, 6, 5, 6}, + {6, 6, 6, 5}, + }, + { + {5, 6, 5, 6}, + {6, 6, 6, 6}, + {5, 6, 5, 6}, + {6, 6, 6, 6}, + }, + { + {5, 6, 5, 6}, + {6, 6, 6, 6}, + {6, 6, 5, 6}, + {6, 6, 6, 6}, + }, + { + {5, 6, 6, 6}, + {6, 6, 6, 6}, + {6, 6, 6, 6}, + {6, 6, 6, 6}, + }, + { + {6, 6, 6, 6}, + {6, 6, 6, 6}, + {6, 6, 6, 6}, + {7, 6, 6, 6}, + }, + { + {6, 6, 6, 6}, + {6, 6, 7, 6}, + {6, 6, 6, 6}, + {7, 6, 7, 6}, + }, + { + {6, 6, 6, 6}, + {7, 6, 7, 6}, + {6, 7, 6, 6}, + {7, 6, 7, 6}, + }, + { + {6, 6, 6, 7}, + {7, 6, 7, 6}, + {6, 7, 6, 7}, + {7, 6, 7, 6}, + }, + { + {6, 7, 6, 7}, + {7, 6, 7, 6}, + {6, 7, 6, 7}, + {7, 7, 7, 6}, + }, + { + {6, 7, 6, 7}, + {7, 6, 7, 7}, + {6, 7, 6, 7}, + {7, 7, 7, 7}, + }, + { + {6, 7, 6, 7}, + {7, 7, 7, 7}, + {7, 7, 6, 7}, + {7, 7, 7, 7}, + }, + { + {6, 7, 7, 7}, + {7, 7, 7, 7}, + {7, 7, 7, 7}, + {7, 7, 7, 7}, + }, + { + {7, 7, 7, 7}, + {7, 7, 7, 7}, + {7, 7, 7, 7}, + {8, 7, 7, 7}, + }, + { + {7, 7, 7, 7}, + {7, 7, 8, 7}, + {7, 7, 7, 7}, + {8, 7, 8, 7}, + }, + { + {7, 7, 7, 7}, + {8, 7, 8, 7}, + {7, 8, 7, 7}, + {8, 7, 8, 7}, + }, + { + {7, 7, 7, 8}, + {8, 7, 8, 7}, + {7, 8, 7, 8}, + {8, 7, 8, 7}, + }, + { + {7, 8, 7, 8}, + {8, 7, 8, 7}, + {7, 8, 7, 8}, + {8, 8, 8, 7}, + }, + { + {7, 8, 7, 8}, + {8, 7, 8, 8}, + {7, 8, 7, 8}, + {8, 8, 8, 8}, + }, + { + {7, 8, 7, 8}, + {8, 8, 8, 8}, + {7, 8, 7, 8}, + {8, 8, 8, 8}, + }, + { + {7, 8, 8, 8}, + {8, 8, 8, 8}, + {8, 8, 7, 8}, + {8, 8, 8, 8}, + }, + { + {8, 8, 8, 8}, + {8, 8, 8, 8}, + {8, 8, 8, 8}, + {8, 8, 8, 8}, + }, + { + {8, 8, 8, 8}, + {8, 8, 9, 8}, + {8, 8, 8, 8}, + {9, 8, 8, 8}, + }, + { + {8, 8, 8, 8}, + {9, 8, 9, 8}, + {8, 8, 8, 8}, + {9, 8, 9, 8}, + }, + { + {8, 8, 8, 9}, + {9, 8, 9, 8}, + {8, 9, 8, 8}, + {9, 8, 9, 8}, + }, + { + {8, 9, 8, 9}, + {9, 8, 9, 8}, + {8, 9, 8, 9}, + {9, 8, 9, 8}, + }, + { + {8, 9, 8, 9}, + {9, 8, 9, 9}, + {8, 9, 8, 9}, + {9, 9, 9, 8}, + }, + { + {8, 9, 8, 9}, + {9, 9, 9, 9}, + {8, 9, 8, 9}, + {9, 9, 9, 9}, + }, + { + {8, 9, 9, 9}, + {9, 9, 9, 9}, + {9, 9, 8, 9}, + {9, 9, 9, 9}, + }, + { + {9, 9, 9, 9}, + {9, 9, 9, 9}, + {9, 9, 9, 9}, + {9, 9, 9, 9}, + }, + { + {9, 9, 9, 9}, + {9, 9, 10, 9}, + {9, 9, 9, 9}, + {10, 9, 9, 9}, + }, + { + {9, 9, 9, 9}, + {10, 9, 10, 9}, + {9, 9, 9, 9}, + {10, 9, 10, 9}, + }, + { + {9, 9, 9, 10}, + {10, 9, 10, 9}, + {9, 10, 9, 9}, + {10, 9, 10, 9}, + }, + { + {9, 10, 9, 10}, + {10, 9, 10, 9}, + {9, 10, 9, 10}, + {10, 9, 10, 9}, + }, + { + {9, 10, 9, 10}, + {10, 9, 10, 10}, + {9, 10, 9, 10}, + {10, 10, 10, 9}, + }, + { + {9, 10, 9, 10}, + {10, 9, 10, 10}, + {9, 10, 9, 10}, + {10, 10, 10, 10}, + }, + { + {9, 10, 9, 10}, + {10, 10, 10, 10}, + {10, 10, 9, 10}, + {10, 10, 10, 10}, + }, + { + {9, 10, 10, 10}, + {10, 10, 10, 10}, + {10, 10, 10, 10}, + {10, 10, 10, 10}, + }, + { + {10, 10, 10, 10}, + {10, 10, 10, 10}, + {10, 10, 10, 10}, + {11, 10, 10, 10}, + }, + { + {10, 10, 10, 10}, + {10, 10, 11, 10}, + {10, 10, 10, 10}, + {11, 10, 11, 10}, + }, + { + {10, 10, 10, 10}, + {11, 10, 11, 10}, + {10, 11, 10, 10}, + {11, 10, 11, 10}, + }, + { + {10, 10, 10, 11}, + {11, 10, 11, 10}, + {10, 11, 10, 11}, + {11, 10, 11, 10}, + }, + { + {10, 11, 10, 11}, + {11, 10, 11, 10}, + {10, 11, 10, 11}, + {11, 11, 11, 10}, + }, + { + {10, 11, 10, 11}, + {11, 10, 11, 11}, + {10, 11, 10, 11}, + {11, 11, 11, 11}, + }, + { + {10, 11, 10, 11}, + {11, 11, 11, 11}, + {11, 11, 10, 11}, + {11, 11, 11, 11}, + }, + { + {10, 11, 11, 11}, + {11, 11, 11, 11}, + {11, 11, 11, 11}, + {11, 11, 11, 11}, + }, + { + {11, 11, 11, 11}, + {11, 11, 11, 11}, + {11, 11, 11, 11}, + {12, 11, 11, 11}, + }, + { + {11, 11, 11, 11}, + {11, 11, 12, 11}, + {11, 11, 11, 11}, + {12, 11, 12, 11}, + }, + { + {11, 11, 11, 11}, + {12, 11, 12, 11}, + {11, 12, 11, 11}, + {12, 11, 12, 11}, + }, + { + {11, 11, 11, 12}, + {12, 11, 12, 11}, + {11, 12, 11, 12}, + {12, 11, 12, 11}, + }, + { + {11, 12, 11, 12}, + {12, 11, 12, 11}, + {11, 12, 11, 12}, + {12, 12, 12, 11}, + }, + { + {11, 12, 11, 12}, + {12, 11, 12, 12}, + {11, 12, 11, 12}, + {12, 12, 12, 11}, + }, + { + {11, 12, 11, 12}, + {12, 12, 12, 12}, + {11, 12, 11, 12}, + {12, 12, 12, 12}, + }, + { + {11, 12, 12, 12}, + {12, 12, 12, 12}, + {12, 12, 11, 12}, + {12, 12, 12, 12}, + }, + { + {12, 12, 12, 12}, + {12, 12, 12, 12}, + {12, 12, 12, 12}, + {12, 12, 12, 12}, + }, + { + {12, 12, 12, 12}, + {12, 12, 13, 12}, + {12, 12, 12, 12}, + {13, 12, 12, 12}, + }, + { + {12, 12, 12, 12}, + {13, 12, 13, 12}, + {12, 12, 12, 12}, + {13, 12, 13, 12}, + }, + { + {12, 12, 12, 13}, + {13, 12, 13, 12}, + {12, 13, 12, 12}, + {13, 12, 13, 12}, + }, + { + {12, 13, 12, 13}, + {13, 12, 13, 12}, + {12, 13, 12, 13}, + {13, 12, 13, 12}, + }, + { + {12, 13, 12, 13}, + {13, 12, 13, 13}, + {12, 13, 12, 13}, + {13, 13, 13, 12}, + }, + { + {12, 13, 12, 13}, + {13, 13, 13, 13}, + {12, 13, 12, 13}, + {13, 13, 13, 13}, + }, + { + {12, 13, 13, 13}, + {13, 13, 13, 13}, + {13, 13, 12, 13}, + {13, 13, 13, 13}, + }, + { + {13, 13, 13, 13}, + {13, 13, 13, 13}, + {13, 13, 13, 13}, + {13, 13, 13, 13}, + }, + { + {13, 13, 13, 13}, + {13, 13, 14, 13}, + {13, 13, 13, 13}, + {14, 13, 13, 13}, + }, + { + {13, 13, 13, 13}, + {14, 13, 14, 13}, + {13, 13, 13, 13}, + {14, 13, 14, 13}, + }, + { + {13, 13, 13, 14}, + {14, 13, 14, 13}, + {13, 14, 13, 13}, + {14, 13, 14, 13}, + }, + { + {13, 14, 13, 14}, + {14, 13, 14, 13}, + {13, 14, 13, 14}, + {14, 13, 14, 13}, + }, + { + {13, 14, 13, 14}, + {14, 13, 14, 13}, + {13, 14, 13, 14}, + {14, 14, 14, 13}, + }, + { + {13, 14, 13, 14}, + {14, 13, 14, 14}, + {13, 14, 13, 14}, + {14, 14, 14, 14}, + }, + { + {13, 14, 13, 14}, + {14, 14, 14, 14}, + {14, 14, 13, 14}, + {14, 14, 14, 14}, + }, + { + {13, 14, 14, 14}, + {14, 14, 14, 14}, + {14, 14, 14, 14}, + {14, 14, 14, 14}, + }, + { + {14, 14, 14, 14}, + {14, 14, 14, 14}, + {14, 14, 14, 14}, + {15, 14, 14, 14}, + }, + { + {14, 14, 14, 14}, + {14, 14, 15, 14}, + {14, 14, 14, 14}, + {15, 14, 15, 14}, + }, + { + {14, 14, 14, 14}, + {15, 14, 15, 14}, + {14, 15, 14, 14}, + {15, 14, 15, 14}, + }, + { + {14, 14, 14, 15}, + {15, 14, 15, 14}, + {14, 15, 14, 15}, + {15, 14, 15, 14}, + }, + { + {14, 15, 14, 15}, + {15, 14, 15, 14}, + {14, 15, 14, 15}, + {15, 15, 15, 14}, + }, + { + {14, 15, 14, 15}, + {15, 14, 15, 15}, + {14, 15, 14, 15}, + {15, 15, 15, 15}, + }, + { + {14, 15, 14, 15}, + {15, 15, 15, 15}, + {15, 15, 14, 15}, + {15, 15, 15, 15}, + }, + { + {14, 15, 15, 15}, + {15, 15, 15, 15}, + {15, 15, 15, 15}, + {15, 15, 15, 15}, + }, + { + {15, 15, 15, 15}, + {15, 15, 15, 15}, + {15, 15, 15, 15}, + {16, 15, 15, 15}, + }, + { + {15, 15, 15, 15}, + {15, 15, 16, 15}, + {15, 15, 15, 15}, + {16, 15, 16, 15}, + }, + { + {15, 15, 15, 15}, + {16, 15, 16, 15}, + {15, 16, 15, 15}, + {16, 15, 16, 15}, + }, + { + {15, 15, 15, 16}, + {16, 15, 16, 15}, + {15, 16, 15, 16}, + {16, 15, 16, 15}, + }, + { + {15, 16, 15, 16}, + {16, 15, 16, 15}, + {15, 16, 15, 16}, + {16, 16, 16, 15}, + }, + { + {15, 16, 15, 16}, + {16, 15, 16, 16}, + {15, 16, 15, 16}, + {16, 16, 16, 16}, + }, + { + {15, 16, 15, 16}, + {16, 16, 16, 16}, + {16, 16, 15, 16}, + {16, 16, 16, 16}, + }, + { + {15, 16, 16, 16}, + {16, 16, 16, 16}, + {16, 16, 16, 16}, + {16, 16, 16, 16}, + }, + { + {16, 16, 16, 16}, + {16, 16, 16, 16}, + {16, 16, 16, 16}, + {17, 16, 16, 16}, + }, + { + {16, 16, 16, 16}, + {16, 16, 17, 16}, + {16, 16, 16, 16}, + {17, 16, 17, 16}, + }, + { + {16, 16, 16, 16}, + {17, 16, 17, 16}, + {16, 17, 16, 16}, + {17, 16, 17, 16}, + }, + { + {16, 16, 16, 17}, + {17, 16, 17, 16}, + {16, 17, 16, 17}, + {17, 16, 17, 16}, + }, + { + {16, 17, 16, 17}, + {17, 16, 17, 16}, + {16, 17, 16, 17}, + {17, 17, 17, 16}, + }, + { + {16, 17, 16, 17}, + {17, 16, 17, 17}, + {16, 17, 16, 17}, + {17, 17, 17, 17}, + }, + { + {16, 17, 16, 17}, + {17, 17, 17, 17}, + {17, 17, 16, 17}, + {17, 17, 17, 17}, + }, + { + {16, 17, 17, 17}, + {17, 17, 17, 17}, + {17, 17, 17, 17}, + {17, 17, 17, 17}, + }, + { + {17, 17, 17, 17}, + {17, 17, 17, 17}, + {17, 17, 17, 17}, + {18, 17, 17, 17}, + }, + { + {17, 17, 17, 17}, + {17, 17, 18, 17}, + {17, 17, 17, 17}, + {18, 17, 18, 17}, + }, + { + {17, 17, 17, 17}, + {18, 17, 18, 17}, + {17, 18, 17, 17}, + {18, 17, 18, 17}, + }, + { + {17, 17, 17, 18}, + {18, 17, 18, 17}, + {17, 18, 17, 18}, + {18, 17, 18, 17}, + }, + { + {17, 18, 17, 18}, + {18, 17, 18, 17}, + {17, 18, 17, 18}, + {18, 17, 18, 17}, + }, + { + {17, 18, 17, 18}, + {18, 17, 18, 18}, + {17, 18, 17, 18}, + {18, 18, 18, 17}, + }, + { + {17, 18, 17, 18}, + {18, 18, 18, 18}, + {17, 18, 17, 18}, + {18, 18, 18, 18}, + }, + { + {17, 18, 18, 18}, + {18, 18, 18, 18}, + {18, 18, 17, 18}, + {18, 18, 18, 18}, + }, + { + {18, 18, 18, 18}, + {18, 18, 18, 18}, + {18, 18, 18, 18}, + {18, 18, 18, 18}, + }, + { + {18, 18, 18, 18}, + {18, 18, 19, 18}, + {18, 18, 18, 18}, + {19, 18, 18, 18}, + }, + { + {18, 18, 18, 18}, + {19, 18, 19, 18}, + {18, 18, 18, 18}, + {19, 18, 19, 18}, + }, + { + {18, 18, 18, 19}, + {19, 18, 19, 18}, + {18, 19, 18, 18}, + {19, 18, 19, 18}, + }, + { + {18, 19, 18, 19}, + {19, 18, 19, 18}, + {18, 19, 18, 19}, + {19, 18, 19, 18}, + }, + { + {18, 19, 18, 19}, + {19, 18, 19, 19}, + {18, 19, 18, 19}, + {19, 19, 19, 18}, + }, + { + {18, 19, 18, 19}, + {19, 19, 19, 19}, + {18, 19, 18, 19}, + {19, 19, 19, 19}, + }, + { + {18, 19, 19, 19}, + {19, 19, 19, 19}, + {19, 19, 18, 19}, + {19, 19, 19, 19}, + }, + { + {19, 19, 19, 19}, + {19, 19, 19, 19}, + {19, 19, 19, 19}, + {19, 19, 19, 19}, + }, + { + {19, 19, 19, 19}, + {19, 19, 20, 19}, + {19, 19, 19, 19}, + {20, 19, 19, 19}, + }, + { + {19, 19, 19, 19}, + {20, 19, 20, 19}, + {19, 19, 19, 19}, + {20, 19, 20, 19}, + }, + { + {19, 19, 19, 20}, + {20, 19, 20, 19}, + {19, 20, 19, 19}, + {20, 19, 20, 19}, + }, + { + {19, 19, 19, 20}, + {20, 19, 20, 19}, + {19, 20, 19, 20}, + {20, 19, 20, 19}, + }, + { + {19, 20, 19, 20}, + {20, 19, 20, 19}, + {19, 20, 19, 20}, + {20, 20, 20, 19}, + }, + { + {19, 20, 19, 20}, + {20, 19, 20, 20}, + {19, 20, 19, 20}, + {20, 20, 20, 20}, + }, + { + {19, 20, 19, 20}, + {20, 20, 20, 20}, + {20, 20, 19, 20}, + {20, 20, 20, 20}, + }, + { + {19, 20, 20, 20}, + {20, 20, 20, 20}, + {20, 20, 20, 20}, + {20, 20, 20, 20}, + }, + { + {20, 20, 20, 20}, + {20, 20, 20, 20}, + {20, 20, 20, 20}, + {21, 20, 20, 20}, + }, + { + {20, 20, 20, 20}, + {20, 20, 21, 20}, + {20, 20, 20, 20}, + {21, 20, 21, 20}, + }, + { + {20, 20, 20, 20}, + {21, 20, 21, 20}, + {20, 21, 20, 20}, + {21, 20, 21, 20}, + }, + { + {20, 20, 20, 21}, + {21, 20, 21, 20}, + {20, 21, 20, 21}, + {21, 20, 21, 20}, + }, + { + {20, 21, 20, 21}, + {21, 20, 21, 20}, + {20, 21, 20, 21}, + {21, 21, 21, 20}, + }, + { + {20, 21, 20, 21}, + {21, 20, 21, 21}, + {20, 21, 20, 21}, + {21, 21, 21, 21}, + }, + { + {20, 21, 20, 21}, + {21, 21, 21, 21}, + {21, 21, 20, 21}, + {21, 21, 21, 21}, + }, + { + {20, 21, 21, 21}, + {21, 21, 21, 21}, + {21, 21, 21, 21}, + {21, 21, 21, 21}, + }, + { + {21, 21, 21, 21}, + {21, 21, 21, 21}, + {21, 21, 21, 21}, + {22, 21, 21, 21}, + }, + { + {21, 21, 21, 21}, + {21, 21, 22, 21}, + {21, 21, 21, 21}, + {22, 21, 22, 21}, + }, + { + {21, 21, 21, 21}, + {22, 21, 22, 21}, + {21, 22, 21, 21}, + {22, 21, 22, 21}, + }, + { + {21, 21, 21, 22}, + {22, 21, 22, 21}, + {21, 22, 21, 21}, + {22, 21, 22, 21}, + }, + { + {21, 22, 21, 22}, + {22, 21, 22, 21}, + {21, 22, 21, 22}, + {22, 21, 22, 21}, + }, + { + {21, 22, 21, 22}, + {22, 21, 22, 22}, + {21, 22, 21, 22}, + {22, 22, 22, 21}, + }, + { + {21, 22, 21, 22}, + {22, 22, 22, 22}, + {21, 22, 21, 22}, + {22, 22, 22, 22}, + }, + { + {21, 22, 22, 22}, + {22, 22, 22, 22}, + {22, 22, 21, 22}, + {22, 22, 22, 22}, + }, + { + {22, 22, 22, 22}, + {22, 22, 22, 22}, + {22, 22, 22, 22}, + {22, 22, 22, 22}, + }, + { + {22, 22, 22, 22}, + {22, 22, 23, 22}, + {22, 22, 22, 22}, + {23, 22, 22, 22}, + }, + { + {22, 22, 22, 22}, + {23, 22, 23, 22}, + {22, 22, 22, 22}, + {23, 22, 23, 22}, + }, + { + {22, 22, 22, 23}, + {23, 22, 23, 22}, + {22, 23, 22, 22}, + {23, 22, 23, 22}, + }, + { + {22, 23, 22, 23}, + {23, 22, 23, 22}, + {22, 23, 22, 23}, + {23, 22, 23, 22}, + }, + { + {22, 23, 22, 23}, + {23, 22, 23, 23}, + {22, 23, 22, 23}, + {23, 23, 23, 22}, + }, + { + {22, 23, 22, 23}, + {23, 23, 23, 23}, + {22, 23, 22, 23}, + {23, 23, 23, 23}, + }, + { + {22, 23, 23, 23}, + {23, 23, 23, 23}, + {23, 23, 22, 23}, + {23, 23, 23, 23}, + }, + { + {23, 23, 23, 23}, + {23, 23, 23, 23}, + {23, 23, 23, 23}, + {23, 23, 23, 23}, + }, + { + {23, 23, 23, 23}, + {23, 23, 24, 23}, + {23, 23, 23, 23}, + {24, 23, 23, 23}, + }, + { + {23, 23, 23, 23}, + {24, 23, 24, 23}, + {23, 23, 23, 23}, + {24, 23, 24, 23}, + }, + { + {23, 23, 23, 23}, + {24, 23, 24, 23}, + {23, 24, 23, 23}, + {24, 23, 24, 23}, + }, + { + {23, 23, 23, 24}, + {24, 23, 24, 23}, + {23, 24, 23, 24}, + {24, 23, 24, 23}, + }, + { + {23, 24, 23, 24}, + {24, 23, 24, 23}, + {23, 24, 23, 24}, + {24, 24, 24, 23}, + }, + { + {23, 24, 23, 24}, + {24, 23, 24, 24}, + {23, 24, 23, 24}, + {24, 24, 24, 24}, + }, + { + {23, 24, 23, 24}, + {24, 24, 24, 24}, + {24, 24, 23, 24}, + {24, 24, 24, 24}, + }, + { + {23, 24, 24, 24}, + {24, 24, 24, 24}, + {24, 24, 24, 24}, + {24, 24, 24, 24}, + }, + { + {24, 24, 24, 24}, + {24, 24, 24, 24}, + {24, 24, 24, 24}, + {25, 24, 24, 24}, + }, + { + {24, 24, 24, 24}, + {24, 24, 25, 24}, + {24, 24, 24, 24}, + {25, 24, 25, 24}, + }, + { + {24, 24, 24, 24}, + {25, 24, 25, 24}, + {24, 25, 24, 24}, + {25, 24, 25, 24}, + }, + { + {24, 24, 24, 25}, + {25, 24, 25, 24}, + {24, 25, 24, 25}, + {25, 24, 25, 24}, + }, + { + {24, 25, 24, 25}, + {25, 24, 25, 24}, + {24, 25, 24, 25}, + {25, 25, 25, 24}, + }, + { + {24, 25, 24, 25}, + {25, 24, 25, 25}, + {24, 25, 24, 25}, + {25, 25, 25, 25}, + }, + { + {24, 25, 24, 25}, + {25, 25, 25, 25}, + {25, 25, 24, 25}, + {25, 25, 25, 25}, + }, + { + {24, 25, 25, 25}, + {25, 25, 25, 25}, + {25, 25, 25, 25}, + {25, 25, 25, 25}, + }, + { + {25, 25, 25, 25}, + {25, 25, 25, 25}, + {25, 25, 25, 25}, + {26, 25, 25, 25}, + }, + { + {25, 25, 25, 25}, + {25, 25, 26, 25}, + {25, 25, 25, 25}, + {26, 25, 26, 25}, + }, + { + {25, 25, 25, 25}, + {26, 25, 26, 25}, + {25, 25, 25, 25}, + {26, 25, 26, 25}, + }, + { + {25, 25, 25, 26}, + {26, 25, 26, 25}, + {25, 26, 25, 25}, + {26, 25, 26, 25}, + }, + { + {25, 26, 25, 26}, + {26, 25, 26, 25}, + {25, 26, 25, 26}, + {26, 25, 26, 25}, + }, + { + {25, 26, 25, 26}, + {26, 25, 26, 26}, + {25, 26, 25, 26}, + {26, 26, 26, 25}, + }, + { + {25, 26, 25, 26}, + {26, 26, 26, 26}, + {25, 26, 25, 26}, + {26, 26, 26, 26}, + }, + { + {25, 26, 26, 26}, + {26, 26, 26, 26}, + {26, 26, 25, 26}, + {26, 26, 26, 26}, + }, + { + {26, 26, 26, 26}, + {26, 26, 26, 26}, + {26, 26, 26, 26}, + {26, 26, 26, 26}, + }, + { + {26, 26, 26, 26}, + {26, 26, 27, 26}, + {26, 26, 26, 26}, + {27, 26, 26, 26}, + }, + { + {26, 26, 26, 26}, + {27, 26, 27, 26}, + {26, 26, 26, 26}, + {27, 26, 27, 26}, + }, + { + {26, 26, 26, 27}, + {27, 26, 27, 26}, + {26, 27, 26, 26}, + {27, 26, 27, 26}, + }, + { + {26, 27, 26, 27}, + {27, 26, 27, 26}, + {26, 27, 26, 27}, + {27, 26, 27, 26}, + }, + { + {26, 27, 26, 27}, + {27, 26, 27, 27}, + {26, 27, 26, 27}, + {27, 27, 27, 26}, + }, + { + {26, 27, 26, 27}, + {27, 27, 27, 27}, + {26, 27, 26, 27}, + {27, 27, 27, 27}, + }, + { + {26, 27, 27, 27}, + {27, 27, 27, 27}, + {27, 27, 26, 27}, + {27, 27, 27, 27}, + }, + { + {27, 27, 27, 27}, + {27, 27, 27, 27}, + {27, 27, 27, 27}, + {27, 27, 27, 27}, + }, + { + {27, 27, 27, 27}, + {27, 27, 28, 27}, + {27, 27, 27, 27}, + {28, 27, 27, 27}, + }, + { + {27, 27, 27, 27}, + {27, 27, 28, 27}, + {27, 27, 27, 27}, + {28, 27, 28, 27}, + }, + { + {27, 27, 27, 27}, + {28, 27, 28, 27}, + {27, 28, 27, 27}, + {28, 27, 28, 27}, + }, + { + {27, 27, 27, 28}, + {28, 27, 28, 27}, + {27, 28, 27, 28}, + {28, 27, 28, 27}, + }, + { + {27, 28, 27, 28}, + {28, 27, 28, 27}, + {27, 28, 27, 28}, + {28, 28, 28, 27}, + }, + { + {27, 28, 27, 28}, + {28, 27, 28, 28}, + {27, 28, 27, 28}, + {28, 28, 28, 28}, + }, + { + {27, 28, 27, 28}, + {28, 28, 28, 28}, + {28, 28, 27, 28}, + {28, 28, 28, 28}, + }, + { + {27, 28, 28, 28}, + {28, 28, 28, 28}, + {28, 28, 28, 28}, + {28, 28, 28, 28}, + }, + { + {28, 28, 28, 28}, + {28, 28, 28, 28}, + {28, 28, 28, 28}, + {29, 28, 28, 28}, + }, + { + {28, 28, 28, 28}, + {28, 28, 29, 28}, + {28, 28, 28, 28}, + {29, 28, 29, 28}, + }, + { + {28, 28, 28, 28}, + {29, 28, 29, 28}, + {28, 29, 28, 28}, + {29, 28, 29, 28}, + }, + { + {28, 28, 28, 29}, + {29, 28, 29, 28}, + {28, 29, 28, 29}, + {29, 28, 29, 28}, + }, + { + {28, 29, 28, 29}, + {29, 28, 29, 28}, + {28, 29, 28, 29}, + {29, 29, 29, 28}, + }, + { + {28, 29, 28, 29}, + {29, 28, 29, 29}, + {28, 29, 28, 29}, + {29, 29, 29, 29}, + }, + { + {28, 29, 28, 29}, + {29, 29, 29, 29}, + {29, 29, 28, 29}, + {29, 29, 29, 29}, + }, + { + {28, 29, 29, 29}, + {29, 29, 29, 29}, + {29, 29, 29, 29}, + {29, 29, 29, 29}, + }, + { + {29, 29, 29, 29}, + {29, 29, 29, 29}, + {29, 29, 29, 29}, + {30, 29, 29, 29}, + }, + { + {29, 29, 29, 29}, + {29, 29, 30, 29}, + {29, 29, 29, 29}, + {30, 29, 29, 29}, + }, + { + {29, 29, 29, 29}, + {30, 29, 30, 29}, + {29, 29, 29, 29}, + {30, 29, 30, 29}, + }, + { + {29, 29, 29, 30}, + {30, 29, 30, 29}, + {29, 30, 29, 29}, + {30, 29, 30, 29}, + }, + { + {29, 30, 29, 30}, + {30, 29, 30, 29}, + {29, 30, 29, 30}, + {30, 29, 30, 29}, + }, + { + {29, 30, 29, 30}, + {30, 29, 30, 30}, + {29, 30, 29, 30}, + {30, 30, 30, 29}, + }, + { + {29, 30, 29, 30}, + {30, 30, 30, 30}, + {29, 30, 29, 30}, + {30, 30, 30, 30}, + }, + { + {29, 30, 30, 30}, + {30, 30, 30, 30}, + {30, 30, 29, 30}, + {30, 30, 30, 30}, + }, + { + {30, 30, 30, 30}, + {30, 30, 30, 30}, + {30, 30, 30, 30}, + {30, 30, 30, 30}, + }, + { + {30, 30, 30, 30}, + {30, 30, 31, 30}, + {30, 30, 30, 30}, + {31, 30, 30, 30}, + }, + { + {30, 30, 30, 30}, + {31, 30, 31, 30}, + {30, 30, 30, 30}, + {31, 30, 31, 30}, + }, + { + {30, 30, 30, 31}, + {31, 30, 31, 30}, + {30, 31, 30, 30}, + {31, 30, 31, 30}, + }, + { + {30, 31, 30, 31}, + {31, 30, 31, 30}, + {30, 31, 30, 31}, + {31, 30, 31, 30}, + }, + { + {30, 31, 30, 31}, + {31, 30, 31, 31}, + {30, 31, 30, 31}, + {31, 31, 31, 30}, + }, + { + {30, 31, 30, 31}, + {31, 31, 31, 31}, + {30, 31, 30, 31}, + {31, 31, 31, 31}, + }, + { + {30, 31, 31, 31}, + {31, 31, 31, 31}, + {31, 31, 30, 31}, + {31, 31, 31, 31}, + }, + { + {31, 31, 31, 31}, + {31, 31, 31, 31}, + {31, 31, 31, 31}, + {31, 31, 31, 31}, + }, +}; + +static const uint8_t dither_g[256][4][4] = +{ + { + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + }, + { + {0, 0, 0, 0}, + {1, 0, 1, 0}, + {0, 0, 0, 0}, + {1, 0, 1, 0}, + }, + { + {0, 1, 0, 1}, + {1, 0, 1, 0}, + {0, 1, 0, 1}, + {1, 0, 1, 0}, + }, + { + {0, 1, 0, 1}, + {1, 1, 1, 1}, + {0, 1, 0, 1}, + {1, 1, 1, 1}, + }, + { + {1, 1, 1, 1}, + {1, 1, 1, 1}, + {1, 1, 1, 1}, + {1, 1, 1, 1}, + }, + { + {1, 1, 1, 1}, + {2, 1, 2, 1}, + {1, 1, 1, 1}, + {2, 1, 2, 1}, + }, + { + {1, 2, 1, 2}, + {2, 1, 2, 1}, + {1, 2, 1, 2}, + {2, 1, 2, 1}, + }, + { + {1, 2, 1, 2}, + {2, 2, 2, 2}, + {1, 2, 1, 2}, + {2, 2, 2, 2}, + }, + { + {2, 2, 2, 2}, + {2, 2, 2, 2}, + {2, 2, 2, 2}, + {2, 2, 2, 2}, + }, + { + {2, 2, 2, 2}, + {3, 2, 3, 2}, + {2, 2, 2, 2}, + {3, 2, 3, 2}, + }, + { + {2, 3, 2, 3}, + {3, 2, 3, 2}, + {2, 3, 2, 3}, + {3, 2, 3, 2}, + }, + { + {2, 3, 2, 3}, + {3, 3, 3, 3}, + {2, 3, 2, 3}, + {3, 3, 3, 3}, + }, + { + {3, 3, 3, 3}, + {3, 3, 3, 3}, + {3, 3, 3, 3}, + {3, 3, 3, 3}, + }, + { + {3, 3, 3, 3}, + {4, 3, 4, 3}, + {3, 3, 3, 3}, + {4, 3, 4, 3}, + }, + { + {3, 4, 3, 4}, + {4, 3, 4, 3}, + {3, 4, 3, 4}, + {4, 3, 4, 3}, + }, + { + {3, 4, 3, 4}, + {4, 4, 4, 4}, + {3, 4, 3, 4}, + {4, 4, 4, 4}, + }, + { + {3, 4, 4, 4}, + {4, 4, 4, 4}, + {4, 4, 4, 4}, + {4, 4, 4, 4}, + }, + { + {4, 4, 4, 4}, + {4, 4, 5, 4}, + {4, 4, 4, 4}, + {5, 4, 5, 4}, + }, + { + {4, 4, 4, 5}, + {5, 4, 5, 4}, + {4, 5, 4, 5}, + {5, 4, 5, 4}, + }, + { + {4, 5, 4, 5}, + {5, 4, 5, 5}, + {4, 5, 4, 5}, + {5, 5, 5, 5}, + }, + { + {4, 5, 5, 5}, + {5, 5, 5, 5}, + {5, 5, 5, 5}, + {5, 5, 5, 5}, + }, + { + {5, 5, 5, 5}, + {5, 5, 6, 5}, + {5, 5, 5, 5}, + {6, 5, 6, 5}, + }, + { + {5, 5, 5, 6}, + {6, 5, 6, 5}, + {5, 6, 5, 6}, + {6, 5, 6, 5}, + }, + { + {5, 6, 5, 6}, + {6, 5, 6, 6}, + {5, 6, 5, 6}, + {6, 6, 6, 6}, + }, + { + {5, 6, 6, 6}, + {6, 6, 6, 6}, + {6, 6, 6, 6}, + {6, 6, 6, 6}, + }, + { + {6, 6, 6, 6}, + {6, 6, 7, 6}, + {6, 6, 6, 6}, + {7, 6, 7, 6}, + }, + { + {6, 6, 6, 7}, + {7, 6, 7, 6}, + {6, 7, 6, 7}, + {7, 6, 7, 6}, + }, + { + {6, 7, 6, 7}, + {7, 6, 7, 7}, + {6, 7, 6, 7}, + {7, 7, 7, 7}, + }, + { + {6, 7, 7, 7}, + {7, 7, 7, 7}, + {7, 7, 7, 7}, + {7, 7, 7, 7}, + }, + { + {7, 7, 7, 7}, + {7, 7, 8, 7}, + {7, 7, 7, 7}, + {8, 7, 8, 7}, + }, + { + {7, 7, 7, 8}, + {8, 7, 8, 7}, + {7, 8, 7, 8}, + {8, 7, 8, 7}, + }, + { + {7, 8, 7, 8}, + {8, 7, 8, 8}, + {7, 8, 7, 8}, + {8, 8, 8, 8}, + }, + { + {7, 8, 8, 8}, + {8, 8, 8, 8}, + {8, 8, 7, 8}, + {8, 8, 8, 8}, + }, + { + {8, 8, 8, 8}, + {8, 8, 9, 8}, + {8, 8, 8, 8}, + {9, 8, 8, 8}, + }, + { + {8, 8, 8, 9}, + {9, 8, 9, 8}, + {8, 9, 8, 8}, + {9, 8, 9, 8}, + }, + { + {8, 9, 8, 9}, + {9, 8, 9, 9}, + {8, 9, 8, 9}, + {9, 9, 9, 8}, + }, + { + {8, 9, 9, 9}, + {9, 9, 9, 9}, + {9, 9, 8, 9}, + {9, 9, 9, 9}, + }, + { + {9, 9, 9, 9}, + {9, 9, 10, 9}, + {9, 9, 9, 9}, + {10, 9, 9, 9}, + }, + { + {9, 9, 9, 10}, + {10, 9, 10, 9}, + {9, 10, 9, 9}, + {10, 9, 10, 9}, + }, + { + {9, 10, 9, 10}, + {10, 9, 10, 10}, + {9, 10, 9, 10}, + {10, 10, 10, 9}, + }, + { + {9, 10, 10, 10}, + {10, 10, 10, 10}, + {10, 10, 9, 10}, + {10, 10, 10, 10}, + }, + { + {10, 10, 10, 10}, + {10, 10, 11, 10}, + {10, 10, 10, 10}, + {11, 10, 10, 10}, + }, + { + {10, 10, 10, 11}, + {11, 10, 11, 10}, + {10, 11, 10, 10}, + {11, 10, 11, 10}, + }, + { + {10, 11, 10, 11}, + {11, 10, 11, 11}, + {10, 11, 10, 11}, + {11, 11, 11, 10}, + }, + { + {10, 11, 11, 11}, + {11, 11, 11, 11}, + {11, 11, 10, 11}, + {11, 11, 11, 11}, + }, + { + {11, 11, 11, 11}, + {11, 11, 12, 11}, + {11, 11, 11, 11}, + {12, 11, 11, 11}, + }, + { + {11, 11, 11, 12}, + {12, 11, 12, 11}, + {11, 12, 11, 11}, + {12, 11, 12, 11}, + }, + { + {11, 12, 11, 12}, + {12, 11, 12, 12}, + {11, 12, 11, 12}, + {12, 12, 12, 11}, + }, + { + {11, 12, 11, 12}, + {12, 12, 12, 12}, + {12, 12, 11, 12}, + {12, 12, 12, 12}, + }, + { + {12, 12, 12, 12}, + {12, 12, 12, 12}, + {12, 12, 12, 12}, + {13, 12, 12, 12}, + }, + { + {12, 12, 12, 12}, + {13, 12, 13, 12}, + {12, 13, 12, 12}, + {13, 12, 13, 12}, + }, + { + {12, 13, 12, 13}, + {13, 12, 13, 12}, + {12, 13, 12, 13}, + {13, 13, 13, 12}, + }, + { + {12, 13, 12, 13}, + {13, 13, 13, 13}, + {13, 13, 12, 13}, + {13, 13, 13, 13}, + }, + { + {13, 13, 13, 13}, + {13, 13, 13, 13}, + {13, 13, 13, 13}, + {14, 13, 13, 13}, + }, + { + {13, 13, 13, 13}, + {14, 13, 14, 13}, + {13, 14, 13, 13}, + {14, 13, 14, 13}, + }, + { + {13, 14, 13, 14}, + {14, 13, 14, 13}, + {13, 14, 13, 14}, + {14, 14, 14, 13}, + }, + { + {13, 14, 13, 14}, + {14, 14, 14, 14}, + {14, 14, 13, 14}, + {14, 14, 14, 14}, + }, + { + {14, 14, 14, 14}, + {14, 14, 14, 14}, + {14, 14, 14, 14}, + {15, 14, 14, 14}, + }, + { + {14, 14, 14, 14}, + {15, 14, 15, 14}, + {14, 15, 14, 14}, + {15, 14, 15, 14}, + }, + { + {14, 15, 14, 15}, + {15, 14, 15, 14}, + {14, 15, 14, 15}, + {15, 15, 15, 14}, + }, + { + {14, 15, 14, 15}, + {15, 15, 15, 15}, + {15, 15, 14, 15}, + {15, 15, 15, 15}, + }, + { + {15, 15, 15, 15}, + {15, 15, 15, 15}, + {15, 15, 15, 15}, + {16, 15, 15, 15}, + }, + { + {15, 15, 15, 15}, + {16, 15, 16, 15}, + {15, 16, 15, 15}, + {16, 15, 16, 15}, + }, + { + {15, 16, 15, 16}, + {16, 15, 16, 15}, + {15, 16, 15, 16}, + {16, 16, 16, 15}, + }, + { + {15, 16, 15, 16}, + {16, 16, 16, 16}, + {16, 16, 15, 16}, + {16, 16, 16, 16}, + }, + { + {16, 16, 16, 16}, + {16, 16, 16, 16}, + {16, 16, 16, 16}, + {17, 16, 16, 16}, + }, + { + {16, 16, 16, 16}, + {17, 16, 17, 16}, + {16, 17, 16, 16}, + {17, 16, 17, 16}, + }, + { + {16, 17, 16, 17}, + {17, 16, 17, 16}, + {16, 17, 16, 17}, + {17, 17, 17, 16}, + }, + { + {16, 17, 16, 17}, + {17, 17, 17, 17}, + {17, 17, 16, 17}, + {17, 17, 17, 17}, + }, + { + {17, 17, 17, 17}, + {17, 17, 17, 17}, + {17, 17, 17, 17}, + {18, 17, 17, 17}, + }, + { + {17, 17, 17, 17}, + {18, 17, 18, 17}, + {17, 18, 17, 17}, + {18, 17, 18, 17}, + }, + { + {17, 18, 17, 18}, + {18, 17, 18, 17}, + {17, 18, 17, 18}, + {18, 18, 18, 17}, + }, + { + {17, 18, 17, 18}, + {18, 18, 18, 18}, + {18, 18, 17, 18}, + {18, 18, 18, 18}, + }, + { + {18, 18, 18, 18}, + {18, 18, 18, 18}, + {18, 18, 18, 18}, + {19, 18, 18, 18}, + }, + { + {18, 18, 18, 18}, + {19, 18, 19, 18}, + {18, 19, 18, 18}, + {19, 18, 19, 18}, + }, + { + {18, 19, 18, 19}, + {19, 18, 19, 18}, + {18, 19, 18, 19}, + {19, 19, 19, 18}, + }, + { + {18, 19, 18, 19}, + {19, 19, 19, 19}, + {19, 19, 18, 19}, + {19, 19, 19, 19}, + }, + { + {19, 19, 19, 19}, + {19, 19, 19, 19}, + {19, 19, 19, 19}, + {20, 19, 19, 19}, + }, + { + {19, 19, 19, 19}, + {20, 19, 20, 19}, + {19, 20, 19, 19}, + {20, 19, 20, 19}, + }, + { + {19, 20, 19, 20}, + {20, 19, 20, 19}, + {19, 20, 19, 20}, + {20, 20, 20, 19}, + }, + { + {19, 20, 19, 20}, + {20, 20, 20, 20}, + {19, 20, 19, 20}, + {20, 20, 20, 20}, + }, + { + {20, 20, 20, 20}, + {20, 20, 20, 20}, + {20, 20, 20, 20}, + {20, 20, 20, 20}, + }, + { + {20, 20, 20, 20}, + {21, 20, 21, 20}, + {20, 20, 20, 20}, + {21, 20, 21, 20}, + }, + { + {20, 21, 20, 21}, + {21, 20, 21, 20}, + {20, 21, 20, 21}, + {21, 20, 21, 20}, + }, + { + {20, 21, 20, 21}, + {21, 21, 21, 21}, + {20, 21, 20, 21}, + {21, 21, 21, 21}, + }, + { + {21, 21, 21, 21}, + {21, 21, 21, 21}, + {21, 21, 21, 21}, + {21, 21, 21, 21}, + }, + { + {21, 21, 21, 21}, + {22, 21, 22, 21}, + {21, 21, 21, 21}, + {22, 21, 22, 21}, + }, + { + {21, 22, 21, 22}, + {22, 21, 22, 21}, + {21, 22, 21, 22}, + {22, 21, 22, 21}, + }, + { + {21, 22, 21, 22}, + {22, 22, 22, 22}, + {21, 22, 21, 22}, + {22, 22, 22, 22}, + }, + { + {22, 22, 22, 22}, + {22, 22, 22, 22}, + {22, 22, 22, 22}, + {22, 22, 22, 22}, + }, + { + {22, 22, 22, 22}, + {23, 22, 23, 22}, + {22, 22, 22, 22}, + {23, 22, 23, 22}, + }, + { + {22, 23, 22, 23}, + {23, 22, 23, 22}, + {22, 23, 22, 23}, + {23, 22, 23, 22}, + }, + { + {22, 23, 22, 23}, + {23, 23, 23, 23}, + {22, 23, 22, 23}, + {23, 23, 23, 23}, + }, + { + {23, 23, 23, 23}, + {23, 23, 23, 23}, + {23, 23, 23, 23}, + {23, 23, 23, 23}, + }, + { + {23, 23, 23, 23}, + {24, 23, 24, 23}, + {23, 23, 23, 23}, + {24, 23, 24, 23}, + }, + { + {23, 24, 23, 24}, + {24, 23, 24, 23}, + {23, 24, 23, 24}, + {24, 23, 24, 23}, + }, + { + {23, 24, 23, 24}, + {24, 23, 24, 24}, + {23, 24, 23, 24}, + {24, 24, 24, 24}, + }, + { + {23, 24, 24, 24}, + {24, 24, 24, 24}, + {24, 24, 24, 24}, + {24, 24, 24, 24}, + }, + { + {24, 24, 24, 24}, + {24, 24, 25, 24}, + {24, 24, 24, 24}, + {25, 24, 25, 24}, + }, + { + {24, 24, 24, 25}, + {25, 24, 25, 24}, + {24, 25, 24, 25}, + {25, 24, 25, 24}, + }, + { + {24, 25, 24, 25}, + {25, 24, 25, 25}, + {24, 25, 24, 25}, + {25, 25, 25, 25}, + }, + { + {24, 25, 25, 25}, + {25, 25, 25, 25}, + {25, 25, 25, 25}, + {25, 25, 25, 25}, + }, + { + {25, 25, 25, 25}, + {25, 25, 26, 25}, + {25, 25, 25, 25}, + {26, 25, 26, 25}, + }, + { + {25, 25, 25, 26}, + {26, 25, 26, 25}, + {25, 26, 25, 26}, + {26, 25, 26, 25}, + }, + { + {25, 26, 25, 26}, + {26, 25, 26, 26}, + {25, 26, 25, 26}, + {26, 26, 26, 26}, + }, + { + {25, 26, 26, 26}, + {26, 26, 26, 26}, + {26, 26, 26, 26}, + {26, 26, 26, 26}, + }, + { + {26, 26, 26, 26}, + {26, 26, 27, 26}, + {26, 26, 26, 26}, + {27, 26, 27, 26}, + }, + { + {26, 26, 26, 27}, + {27, 26, 27, 26}, + {26, 27, 26, 27}, + {27, 26, 27, 26}, + }, + { + {26, 27, 26, 27}, + {27, 26, 27, 27}, + {26, 27, 26, 27}, + {27, 27, 27, 27}, + }, + { + {26, 27, 27, 27}, + {27, 27, 27, 27}, + {27, 27, 27, 27}, + {27, 27, 27, 27}, + }, + { + {27, 27, 27, 27}, + {27, 27, 28, 27}, + {27, 27, 27, 27}, + {28, 27, 28, 27}, + }, + { + {27, 27, 27, 28}, + {28, 27, 28, 27}, + {27, 28, 27, 28}, + {28, 27, 28, 27}, + }, + { + {27, 28, 27, 28}, + {28, 27, 28, 28}, + {27, 28, 27, 28}, + {28, 28, 28, 27}, + }, + { + {27, 28, 28, 28}, + {28, 28, 28, 28}, + {28, 28, 27, 28}, + {28, 28, 28, 28}, + }, + { + {28, 28, 28, 28}, + {28, 28, 29, 28}, + {28, 28, 28, 28}, + {29, 28, 28, 28}, + }, + { + {28, 28, 28, 29}, + {29, 28, 29, 28}, + {28, 29, 28, 28}, + {29, 28, 29, 28}, + }, + { + {28, 29, 28, 29}, + {29, 28, 29, 29}, + {28, 29, 28, 29}, + {29, 29, 29, 28}, + }, + { + {28, 29, 29, 29}, + {29, 29, 29, 29}, + {29, 29, 28, 29}, + {29, 29, 29, 29}, + }, + { + {29, 29, 29, 29}, + {29, 29, 30, 29}, + {29, 29, 29, 29}, + {30, 29, 29, 29}, + }, + { + {29, 29, 29, 30}, + {30, 29, 30, 29}, + {29, 30, 29, 29}, + {30, 29, 30, 29}, + }, + { + {29, 30, 29, 30}, + {30, 29, 30, 30}, + {29, 30, 29, 30}, + {30, 30, 30, 29}, + }, + { + {29, 30, 30, 30}, + {30, 30, 30, 30}, + {30, 30, 29, 30}, + {30, 30, 30, 30}, + }, + { + {30, 30, 30, 30}, + {30, 30, 31, 30}, + {30, 30, 30, 30}, + {31, 30, 30, 30}, + }, + { + {30, 30, 30, 31}, + {31, 30, 31, 30}, + {30, 31, 30, 30}, + {31, 30, 31, 30}, + }, + { + {30, 31, 30, 31}, + {31, 30, 31, 31}, + {30, 31, 30, 31}, + {31, 31, 31, 30}, + }, + { + {30, 31, 31, 31}, + {31, 31, 31, 31}, + {31, 31, 30, 31}, + {31, 31, 31, 31}, + }, + { + {31, 31, 31, 31}, + {31, 31, 32, 31}, + {31, 31, 31, 31}, + {32, 31, 31, 31}, + }, + { + {31, 31, 31, 32}, + {32, 31, 32, 31}, + {31, 32, 31, 31}, + {32, 31, 32, 31}, + }, + { + {31, 32, 31, 32}, + {32, 31, 32, 32}, + {31, 32, 31, 32}, + {32, 32, 32, 31}, + }, + { + {31, 32, 32, 32}, + {32, 32, 32, 32}, + {32, 32, 31, 32}, + {32, 32, 32, 32}, + }, + { + {32, 32, 32, 32}, + {32, 32, 33, 32}, + {32, 32, 32, 32}, + {33, 32, 32, 32}, + }, + { + {32, 32, 32, 33}, + {33, 32, 33, 32}, + {32, 33, 32, 32}, + {33, 32, 33, 32}, + }, + { + {32, 33, 32, 33}, + {33, 32, 33, 33}, + {32, 33, 32, 33}, + {33, 33, 33, 32}, + }, + { + {32, 33, 33, 33}, + {33, 33, 33, 33}, + {33, 33, 32, 33}, + {33, 33, 33, 33}, + }, + { + {33, 33, 33, 33}, + {33, 33, 34, 33}, + {33, 33, 33, 33}, + {34, 33, 33, 33}, + }, + { + {33, 33, 33, 34}, + {34, 33, 34, 33}, + {33, 34, 33, 33}, + {34, 33, 34, 33}, + }, + { + {33, 34, 33, 34}, + {34, 33, 34, 34}, + {33, 34, 33, 34}, + {34, 34, 34, 33}, + }, + { + {33, 34, 34, 34}, + {34, 34, 34, 34}, + {34, 34, 33, 34}, + {34, 34, 34, 34}, + }, + { + {34, 34, 34, 34}, + {34, 34, 35, 34}, + {34, 34, 34, 34}, + {35, 34, 34, 34}, + }, + { + {34, 34, 34, 35}, + {35, 34, 35, 34}, + {34, 35, 34, 34}, + {35, 34, 35, 34}, + }, + { + {34, 35, 34, 35}, + {35, 34, 35, 35}, + {34, 35, 34, 35}, + {35, 35, 35, 34}, + }, + { + {34, 35, 35, 35}, + {35, 35, 35, 35}, + {35, 35, 34, 35}, + {35, 35, 35, 35}, + }, + { + {35, 35, 35, 35}, + {35, 35, 36, 35}, + {35, 35, 35, 35}, + {36, 35, 35, 35}, + }, + { + {35, 35, 35, 36}, + {36, 35, 36, 35}, + {35, 36, 35, 35}, + {36, 35, 36, 35}, + }, + { + {35, 36, 35, 36}, + {36, 35, 36, 35}, + {35, 36, 35, 36}, + {36, 36, 36, 35}, + }, + { + {35, 36, 35, 36}, + {36, 36, 36, 36}, + {36, 36, 35, 36}, + {36, 36, 36, 36}, + }, + { + {36, 36, 36, 36}, + {36, 36, 36, 36}, + {36, 36, 36, 36}, + {37, 36, 36, 36}, + }, + { + {36, 36, 36, 36}, + {37, 36, 37, 36}, + {36, 37, 36, 36}, + {37, 36, 37, 36}, + }, + { + {36, 37, 36, 37}, + {37, 36, 37, 36}, + {36, 37, 36, 37}, + {37, 37, 37, 36}, + }, + { + {36, 37, 36, 37}, + {37, 37, 37, 37}, + {37, 37, 36, 37}, + {37, 37, 37, 37}, + }, + { + {37, 37, 37, 37}, + {37, 37, 37, 37}, + {37, 37, 37, 37}, + {38, 37, 37, 37}, + }, + { + {37, 37, 37, 37}, + {38, 37, 38, 37}, + {37, 38, 37, 37}, + {38, 37, 38, 37}, + }, + { + {37, 38, 37, 38}, + {38, 37, 38, 37}, + {37, 38, 37, 38}, + {38, 38, 38, 37}, + }, + { + {37, 38, 37, 38}, + {38, 38, 38, 38}, + {38, 38, 37, 38}, + {38, 38, 38, 38}, + }, + { + {38, 38, 38, 38}, + {38, 38, 38, 38}, + {38, 38, 38, 38}, + {39, 38, 38, 38}, + }, + { + {38, 38, 38, 38}, + {39, 38, 39, 38}, + {38, 39, 38, 38}, + {39, 38, 39, 38}, + }, + { + {38, 39, 38, 39}, + {39, 38, 39, 38}, + {38, 39, 38, 39}, + {39, 39, 39, 38}, + }, + { + {38, 39, 38, 39}, + {39, 39, 39, 39}, + {39, 39, 38, 39}, + {39, 39, 39, 39}, + }, + { + {39, 39, 39, 39}, + {39, 39, 39, 39}, + {39, 39, 39, 39}, + {40, 39, 39, 39}, + }, + { + {39, 39, 39, 39}, + {40, 39, 40, 39}, + {39, 40, 39, 39}, + {40, 39, 40, 39}, + }, + { + {39, 40, 39, 40}, + {40, 39, 40, 39}, + {39, 40, 39, 40}, + {40, 39, 40, 39}, + }, + { + {39, 40, 39, 40}, + {40, 40, 40, 40}, + {39, 40, 39, 40}, + {40, 40, 40, 40}, + }, + { + {40, 40, 40, 40}, + {40, 40, 40, 40}, + {40, 40, 40, 40}, + {40, 40, 40, 40}, + }, + { + {40, 40, 40, 40}, + {41, 40, 41, 40}, + {40, 40, 40, 40}, + {41, 40, 41, 40}, + }, + { + {40, 41, 40, 41}, + {41, 40, 41, 40}, + {40, 41, 40, 41}, + {41, 40, 41, 40}, + }, + { + {40, 41, 40, 41}, + {41, 41, 41, 41}, + {40, 41, 40, 41}, + {41, 41, 41, 41}, + }, + { + {41, 41, 41, 41}, + {41, 41, 41, 41}, + {41, 41, 41, 41}, + {41, 41, 41, 41}, + }, + { + {41, 41, 41, 41}, + {42, 41, 42, 41}, + {41, 41, 41, 41}, + {42, 41, 42, 41}, + }, + { + {41, 42, 41, 42}, + {42, 41, 42, 41}, + {41, 42, 41, 42}, + {42, 41, 42, 41}, + }, + { + {41, 42, 41, 42}, + {42, 42, 42, 42}, + {41, 42, 41, 42}, + {42, 42, 42, 42}, + }, + { + {42, 42, 42, 42}, + {42, 42, 42, 42}, + {42, 42, 42, 42}, + {42, 42, 42, 42}, + }, + { + {42, 42, 42, 42}, + {43, 42, 43, 42}, + {42, 42, 42, 42}, + {43, 42, 43, 42}, + }, + { + {42, 43, 42, 43}, + {43, 42, 43, 42}, + {42, 43, 42, 43}, + {43, 42, 43, 42}, + }, + { + {42, 43, 42, 43}, + {43, 43, 43, 43}, + {42, 43, 42, 43}, + {43, 43, 43, 43}, + }, + { + {43, 43, 43, 43}, + {43, 43, 43, 43}, + {43, 43, 43, 43}, + {43, 43, 43, 43}, + }, + { + {43, 43, 43, 43}, + {44, 43, 44, 43}, + {43, 43, 43, 43}, + {44, 43, 44, 43}, + }, + { + {43, 43, 43, 44}, + {44, 43, 44, 43}, + {43, 44, 43, 44}, + {44, 43, 44, 43}, + }, + { + {43, 44, 43, 44}, + {44, 43, 44, 44}, + {43, 44, 43, 44}, + {44, 44, 44, 44}, + }, + { + {43, 44, 44, 44}, + {44, 44, 44, 44}, + {44, 44, 44, 44}, + {44, 44, 44, 44}, + }, + { + {44, 44, 44, 44}, + {44, 44, 45, 44}, + {44, 44, 44, 44}, + {45, 44, 45, 44}, + }, + { + {44, 44, 44, 45}, + {45, 44, 45, 44}, + {44, 45, 44, 45}, + {45, 44, 45, 44}, + }, + { + {44, 45, 44, 45}, + {45, 44, 45, 45}, + {44, 45, 44, 45}, + {45, 45, 45, 45}, + }, + { + {44, 45, 45, 45}, + {45, 45, 45, 45}, + {45, 45, 45, 45}, + {45, 45, 45, 45}, + }, + { + {45, 45, 45, 45}, + {45, 45, 46, 45}, + {45, 45, 45, 45}, + {46, 45, 46, 45}, + }, + { + {45, 45, 45, 46}, + {46, 45, 46, 45}, + {45, 46, 45, 46}, + {46, 45, 46, 45}, + }, + { + {45, 46, 45, 46}, + {46, 45, 46, 46}, + {45, 46, 45, 46}, + {46, 46, 46, 46}, + }, + { + {45, 46, 46, 46}, + {46, 46, 46, 46}, + {46, 46, 46, 46}, + {46, 46, 46, 46}, + }, + { + {46, 46, 46, 46}, + {46, 46, 47, 46}, + {46, 46, 46, 46}, + {47, 46, 47, 46}, + }, + { + {46, 46, 46, 47}, + {47, 46, 47, 46}, + {46, 47, 46, 47}, + {47, 46, 47, 46}, + }, + { + {46, 47, 46, 47}, + {47, 46, 47, 47}, + {46, 47, 46, 47}, + {47, 47, 47, 47}, + }, + { + {46, 47, 47, 47}, + {47, 47, 47, 47}, + {47, 47, 47, 47}, + {47, 47, 47, 47}, + }, + { + {47, 47, 47, 47}, + {47, 47, 48, 47}, + {47, 47, 47, 47}, + {48, 47, 48, 47}, + }, + { + {47, 47, 47, 48}, + {48, 47, 48, 47}, + {47, 48, 47, 48}, + {48, 47, 48, 47}, + }, + { + {47, 48, 47, 48}, + {48, 47, 48, 48}, + {47, 48, 47, 48}, + {48, 48, 48, 48}, + }, + { + {47, 48, 48, 48}, + {48, 48, 48, 48}, + {48, 48, 48, 48}, + {48, 48, 48, 48}, + }, + { + {48, 48, 48, 48}, + {48, 48, 49, 48}, + {48, 48, 48, 48}, + {49, 48, 49, 48}, + }, + { + {48, 48, 48, 49}, + {49, 48, 49, 48}, + {48, 49, 48, 49}, + {49, 48, 49, 48}, + }, + { + {48, 49, 48, 49}, + {49, 48, 49, 49}, + {48, 49, 48, 49}, + {49, 49, 49, 49}, + }, + { + {48, 49, 49, 49}, + {49, 49, 49, 49}, + {49, 49, 49, 49}, + {49, 49, 49, 49}, + }, + { + {49, 49, 49, 49}, + {49, 49, 50, 49}, + {49, 49, 49, 49}, + {50, 49, 50, 49}, + }, + { + {49, 49, 49, 50}, + {50, 49, 50, 49}, + {49, 50, 49, 50}, + {50, 49, 50, 49}, + }, + { + {49, 50, 49, 50}, + {50, 49, 50, 50}, + {49, 50, 49, 50}, + {50, 50, 50, 50}, + }, + { + {49, 50, 50, 50}, + {50, 50, 50, 50}, + {50, 50, 50, 50}, + {50, 50, 50, 50}, + }, + { + {50, 50, 50, 50}, + {50, 50, 51, 50}, + {50, 50, 50, 50}, + {51, 50, 51, 50}, + }, + { + {50, 50, 50, 51}, + {51, 50, 51, 50}, + {50, 51, 50, 51}, + {51, 50, 51, 50}, + }, + { + {50, 51, 50, 51}, + {51, 50, 51, 51}, + {50, 51, 50, 51}, + {51, 51, 51, 51}, + }, + { + {50, 51, 51, 51}, + {51, 51, 51, 51}, + {51, 51, 51, 51}, + {51, 51, 51, 51}, + }, + { + {51, 51, 51, 51}, + {51, 51, 52, 51}, + {51, 51, 51, 51}, + {52, 51, 52, 51}, + }, + { + {51, 51, 51, 52}, + {52, 51, 52, 51}, + {51, 52, 51, 51}, + {52, 51, 52, 51}, + }, + { + {51, 52, 51, 52}, + {52, 51, 52, 52}, + {51, 52, 51, 52}, + {52, 52, 52, 51}, + }, + { + {51, 52, 52, 52}, + {52, 52, 52, 52}, + {52, 52, 51, 52}, + {52, 52, 52, 52}, + }, + { + {52, 52, 52, 52}, + {52, 52, 53, 52}, + {52, 52, 52, 52}, + {53, 52, 52, 52}, + }, + { + {52, 52, 52, 53}, + {53, 52, 53, 52}, + {52, 53, 52, 52}, + {53, 52, 53, 52}, + }, + { + {52, 53, 52, 53}, + {53, 52, 53, 53}, + {52, 53, 52, 53}, + {53, 53, 53, 52}, + }, + { + {52, 53, 53, 53}, + {53, 53, 53, 53}, + {53, 53, 52, 53}, + {53, 53, 53, 53}, + }, + { + {53, 53, 53, 53}, + {53, 53, 54, 53}, + {53, 53, 53, 53}, + {54, 53, 53, 53}, + }, + { + {53, 53, 53, 54}, + {54, 53, 54, 53}, + {53, 54, 53, 53}, + {54, 53, 54, 53}, + }, + { + {53, 54, 53, 54}, + {54, 53, 54, 54}, + {53, 54, 53, 54}, + {54, 54, 54, 53}, + }, + { + {53, 54, 54, 54}, + {54, 54, 54, 54}, + {54, 54, 53, 54}, + {54, 54, 54, 54}, + }, + { + {54, 54, 54, 54}, + {54, 54, 55, 54}, + {54, 54, 54, 54}, + {55, 54, 54, 54}, + }, + { + {54, 54, 54, 55}, + {55, 54, 55, 54}, + {54, 55, 54, 54}, + {55, 54, 55, 54}, + }, + { + {54, 55, 54, 55}, + {55, 54, 55, 55}, + {54, 55, 54, 55}, + {55, 55, 55, 54}, + }, + { + {54, 55, 55, 55}, + {55, 55, 55, 55}, + {55, 55, 54, 55}, + {55, 55, 55, 55}, + }, + { + {55, 55, 55, 55}, + {55, 55, 56, 55}, + {55, 55, 55, 55}, + {56, 55, 55, 55}, + }, + { + {55, 55, 55, 55}, + {56, 55, 56, 55}, + {55, 56, 55, 55}, + {56, 55, 56, 55}, + }, + { + {55, 56, 55, 56}, + {56, 55, 56, 55}, + {55, 56, 55, 56}, + {56, 56, 56, 55}, + }, + { + {55, 56, 55, 56}, + {56, 56, 56, 56}, + {56, 56, 55, 56}, + {56, 56, 56, 56}, + }, + { + {56, 56, 56, 56}, + {56, 56, 56, 56}, + {56, 56, 56, 56}, + {57, 56, 56, 56}, + }, + { + {56, 56, 56, 56}, + {57, 56, 57, 56}, + {56, 57, 56, 56}, + {57, 56, 57, 56}, + }, + { + {56, 57, 56, 57}, + {57, 56, 57, 56}, + {56, 57, 56, 57}, + {57, 57, 57, 56}, + }, + { + {56, 57, 56, 57}, + {57, 57, 57, 57}, + {57, 57, 56, 57}, + {57, 57, 57, 57}, + }, + { + {57, 57, 57, 57}, + {57, 57, 57, 57}, + {57, 57, 57, 57}, + {58, 57, 57, 57}, + }, + { + {57, 57, 57, 57}, + {58, 57, 58, 57}, + {57, 58, 57, 57}, + {58, 57, 58, 57}, + }, + { + {57, 58, 57, 58}, + {58, 57, 58, 57}, + {57, 58, 57, 58}, + {58, 58, 58, 57}, + }, + { + {57, 58, 57, 58}, + {58, 58, 58, 58}, + {58, 58, 57, 58}, + {58, 58, 58, 58}, + }, + { + {58, 58, 58, 58}, + {58, 58, 58, 58}, + {58, 58, 58, 58}, + {59, 58, 58, 58}, + }, + { + {58, 58, 58, 58}, + {59, 58, 59, 58}, + {58, 59, 58, 58}, + {59, 58, 59, 58}, + }, + { + {58, 59, 58, 59}, + {59, 58, 59, 58}, + {58, 59, 58, 59}, + {59, 59, 59, 58}, + }, + { + {58, 59, 58, 59}, + {59, 59, 59, 59}, + {59, 59, 58, 59}, + {59, 59, 59, 59}, + }, + { + {59, 59, 59, 59}, + {59, 59, 59, 59}, + {59, 59, 59, 59}, + {60, 59, 59, 59}, + }, + { + {59, 59, 59, 59}, + {60, 59, 60, 59}, + {59, 59, 59, 59}, + {60, 59, 60, 59}, + }, + { + {59, 60, 59, 60}, + {60, 59, 60, 59}, + {59, 60, 59, 60}, + {60, 59, 60, 59}, + }, + { + {59, 60, 59, 60}, + {60, 60, 60, 60}, + {59, 60, 59, 60}, + {60, 60, 60, 60}, + }, + { + {60, 60, 60, 60}, + {60, 60, 60, 60}, + {60, 60, 60, 60}, + {60, 60, 60, 60}, + }, + { + {60, 60, 60, 60}, + {61, 60, 61, 60}, + {60, 60, 60, 60}, + {61, 60, 61, 60}, + }, + { + {60, 61, 60, 61}, + {61, 60, 61, 60}, + {60, 61, 60, 61}, + {61, 60, 61, 60}, + }, + { + {60, 61, 60, 61}, + {61, 61, 61, 61}, + {60, 61, 60, 61}, + {61, 61, 61, 61}, + }, + { + {61, 61, 61, 61}, + {61, 61, 61, 61}, + {61, 61, 61, 61}, + {61, 61, 61, 61}, + }, + { + {61, 61, 61, 61}, + {62, 61, 62, 61}, + {61, 61, 61, 61}, + {62, 61, 62, 61}, + }, + { + {61, 62, 61, 62}, + {62, 61, 62, 61}, + {61, 62, 61, 62}, + {62, 61, 62, 61}, + }, + { + {61, 62, 61, 62}, + {62, 62, 62, 62}, + {61, 62, 61, 62}, + {62, 62, 62, 62}, + }, + { + {62, 62, 62, 62}, + {62, 62, 62, 62}, + {62, 62, 62, 62}, + {62, 62, 62, 62}, + }, + { + {62, 62, 62, 62}, + {63, 62, 63, 62}, + {62, 62, 62, 62}, + {63, 62, 63, 62}, + }, + { + {62, 63, 62, 63}, + {63, 62, 63, 62}, + {62, 63, 62, 63}, + {63, 62, 63, 62}, + }, + { + {62, 63, 62, 63}, + {63, 63, 63, 63}, + {62, 63, 62, 63}, + {63, 63, 63, 63}, + }, + { + {63, 63, 63, 63}, + {63, 63, 63, 63}, + {63, 63, 63, 63}, + {63, 63, 63, 63}, + }, +}; + +static const uint8_t dither_rb2x2[256][2][2] = +{ + { + {0, 0}, + {0, 0}, + }, + { + {0, 0}, + {1, 0}, + }, + { + {0, 0}, + {1, 0}, + }, + { + {0, 1}, + {1, 0}, + }, + { + {0, 1}, + {1, 0}, + }, + { + {0, 1}, + {1, 1}, + }, + { + {0, 1}, + {1, 1}, + }, + { + {1, 1}, + {1, 1}, + }, + { + {1, 1}, + {1, 1}, + }, + { + {1, 1}, + {2, 1}, + }, + { + {1, 1}, + {2, 1}, + }, + { + {1, 2}, + {2, 1}, + }, + { + {1, 2}, + {2, 1}, + }, + { + {1, 2}, + {2, 2}, + }, + { + {1, 2}, + {2, 2}, + }, + { + {2, 2}, + {2, 2}, + }, + { + {2, 2}, + {2, 2}, + }, + { + {2, 2}, + {2, 2}, + }, + { + {2, 2}, + {3, 2}, + }, + { + {2, 2}, + {3, 2}, + }, + { + {2, 3}, + {3, 2}, + }, + { + {2, 3}, + {3, 2}, + }, + { + {2, 3}, + {3, 3}, + }, + { + {2, 3}, + {3, 3}, + }, + { + {3, 3}, + {3, 3}, + }, + { + {3, 3}, + {3, 3}, + }, + { + {3, 3}, + {4, 3}, + }, + { + {3, 3}, + {4, 3}, + }, + { + {3, 4}, + {4, 3}, + }, + { + {3, 4}, + {4, 3}, + }, + { + {3, 4}, + {4, 4}, + }, + { + {3, 4}, + {4, 4}, + }, + { + {4, 4}, + {4, 4}, + }, + { + {4, 4}, + {4, 4}, + }, + { + {4, 4}, + {5, 4}, + }, + { + {4, 4}, + {5, 4}, + }, + { + {4, 5}, + {5, 4}, + }, + { + {4, 5}, + {5, 4}, + }, + { + {4, 5}, + {5, 5}, + }, + { + {4, 5}, + {5, 5}, + }, + { + {5, 5}, + {5, 5}, + }, + { + {5, 5}, + {5, 5}, + }, + { + {5, 5}, + {6, 5}, + }, + { + {5, 5}, + {6, 5}, + }, + { + {5, 6}, + {6, 5}, + }, + { + {5, 6}, + {6, 5}, + }, + { + {5, 6}, + {6, 6}, + }, + { + {5, 6}, + {6, 6}, + }, + { + {5, 6}, + {6, 6}, + }, + { + {6, 6}, + {6, 6}, + }, + { + {6, 6}, + {6, 6}, + }, + { + {6, 6}, + {7, 6}, + }, + { + {6, 6}, + {7, 6}, + }, + { + {6, 7}, + {7, 6}, + }, + { + {6, 7}, + {7, 6}, + }, + { + {6, 7}, + {7, 7}, + }, + { + {6, 7}, + {7, 7}, + }, + { + {7, 7}, + {7, 7}, + }, + { + {7, 7}, + {7, 7}, + }, + { + {7, 7}, + {8, 7}, + }, + { + {7, 7}, + {8, 7}, + }, + { + {7, 8}, + {8, 7}, + }, + { + {7, 8}, + {8, 7}, + }, + { + {7, 8}, + {8, 8}, + }, + { + {7, 8}, + {8, 8}, + }, + { + {8, 8}, + {8, 8}, + }, + { + {8, 8}, + {8, 8}, + }, + { + {8, 8}, + {9, 8}, + }, + { + {8, 8}, + {9, 8}, + }, + { + {8, 9}, + {9, 8}, + }, + { + {8, 9}, + {9, 8}, + }, + { + {8, 9}, + {9, 9}, + }, + { + {8, 9}, + {9, 9}, + }, + { + {9, 9}, + {9, 9}, + }, + { + {9, 9}, + {9, 9}, + }, + { + {9, 9}, + {10, 9}, + }, + { + {9, 9}, + {10, 9}, + }, + { + {9, 10}, + {10, 9}, + }, + { + {9, 10}, + {10, 9}, + }, + { + {9, 10}, + {10, 10}, + }, + { + {9, 10}, + {10, 10}, + }, + { + {9, 10}, + {10, 10}, + }, + { + {10, 10}, + {10, 10}, + }, + { + {10, 10}, + {10, 10}, + }, + { + {10, 10}, + {11, 10}, + }, + { + {10, 10}, + {11, 10}, + }, + { + {10, 11}, + {11, 10}, + }, + { + {10, 11}, + {11, 10}, + }, + { + {10, 11}, + {11, 11}, + }, + { + {10, 11}, + {11, 11}, + }, + { + {11, 11}, + {11, 11}, + }, + { + {11, 11}, + {11, 11}, + }, + { + {11, 11}, + {12, 11}, + }, + { + {11, 11}, + {12, 11}, + }, + { + {11, 12}, + {12, 11}, + }, + { + {11, 12}, + {12, 11}, + }, + { + {11, 12}, + {12, 12}, + }, + { + {11, 12}, + {12, 12}, + }, + { + {12, 12}, + {12, 12}, + }, + { + {12, 12}, + {12, 12}, + }, + { + {12, 12}, + {13, 12}, + }, + { + {12, 12}, + {13, 12}, + }, + { + {12, 13}, + {13, 12}, + }, + { + {12, 13}, + {13, 12}, + }, + { + {12, 13}, + {13, 13}, + }, + { + {12, 13}, + {13, 13}, + }, + { + {13, 13}, + {13, 13}, + }, + { + {13, 13}, + {13, 13}, + }, + { + {13, 13}, + {14, 13}, + }, + { + {13, 13}, + {14, 13}, + }, + { + {13, 14}, + {14, 13}, + }, + { + {13, 14}, + {14, 13}, + }, + { + {13, 14}, + {14, 13}, + }, + { + {13, 14}, + {14, 14}, + }, + { + {13, 14}, + {14, 14}, + }, + { + {14, 14}, + {14, 14}, + }, + { + {14, 14}, + {14, 14}, + }, + { + {14, 14}, + {15, 14}, + }, + { + {14, 14}, + {15, 14}, + }, + { + {14, 15}, + {15, 14}, + }, + { + {14, 15}, + {15, 14}, + }, + { + {14, 15}, + {15, 15}, + }, + { + {14, 15}, + {15, 15}, + }, + { + {15, 15}, + {15, 15}, + }, + { + {15, 15}, + {15, 15}, + }, + { + {15, 15}, + {16, 15}, + }, + { + {15, 15}, + {16, 15}, + }, + { + {15, 16}, + {16, 15}, + }, + { + {15, 16}, + {16, 15}, + }, + { + {15, 16}, + {16, 16}, + }, + { + {15, 16}, + {16, 16}, + }, + { + {16, 16}, + {16, 16}, + }, + { + {16, 16}, + {16, 16}, + }, + { + {16, 16}, + {17, 16}, + }, + { + {16, 16}, + {17, 16}, + }, + { + {16, 17}, + {17, 16}, + }, + { + {16, 17}, + {17, 16}, + }, + { + {16, 17}, + {17, 17}, + }, + { + {16, 17}, + {17, 17}, + }, + { + {17, 17}, + {17, 17}, + }, + { + {17, 17}, + {17, 17}, + }, + { + {17, 17}, + {18, 17}, + }, + { + {17, 17}, + {18, 17}, + }, + { + {17, 18}, + {18, 17}, + }, + { + {17, 18}, + {18, 17}, + }, + { + {17, 18}, + {18, 18}, + }, + { + {17, 18}, + {18, 18}, + }, + { + {18, 18}, + {18, 18}, + }, + { + {18, 18}, + {18, 18}, + }, + { + {18, 18}, + {19, 18}, + }, + { + {18, 18}, + {19, 18}, + }, + { + {18, 19}, + {19, 18}, + }, + { + {18, 19}, + {19, 18}, + }, + { + {18, 19}, + {19, 19}, + }, + { + {18, 19}, + {19, 19}, + }, + { + {19, 19}, + {19, 19}, + }, + { + {19, 19}, + {19, 19}, + }, + { + {19, 19}, + {20, 19}, + }, + { + {19, 19}, + {20, 19}, + }, + { + {19, 20}, + {20, 19}, + }, + { + {19, 20}, + {20, 19}, + }, + { + {19, 20}, + {20, 19}, + }, + { + {19, 20}, + {20, 20}, + }, + { + {19, 20}, + {20, 20}, + }, + { + {20, 20}, + {20, 20}, + }, + { + {20, 20}, + {20, 20}, + }, + { + {20, 20}, + {21, 20}, + }, + { + {20, 20}, + {21, 20}, + }, + { + {20, 21}, + {21, 20}, + }, + { + {20, 21}, + {21, 20}, + }, + { + {20, 21}, + {21, 21}, + }, + { + {20, 21}, + {21, 21}, + }, + { + {21, 21}, + {21, 21}, + }, + { + {21, 21}, + {21, 21}, + }, + { + {21, 21}, + {22, 21}, + }, + { + {21, 21}, + {22, 21}, + }, + { + {21, 22}, + {22, 21}, + }, + { + {21, 22}, + {22, 21}, + }, + { + {21, 22}, + {22, 22}, + }, + { + {21, 22}, + {22, 22}, + }, + { + {22, 22}, + {22, 22}, + }, + { + {22, 22}, + {22, 22}, + }, + { + {22, 22}, + {23, 22}, + }, + { + {22, 22}, + {23, 22}, + }, + { + {22, 23}, + {23, 22}, + }, + { + {22, 23}, + {23, 22}, + }, + { + {22, 23}, + {23, 23}, + }, + { + {22, 23}, + {23, 23}, + }, + { + {23, 23}, + {23, 23}, + }, + { + {23, 23}, + {23, 23}, + }, + { + {23, 23}, + {24, 23}, + }, + { + {23, 23}, + {24, 23}, + }, + { + {23, 23}, + {24, 23}, + }, + { + {23, 24}, + {24, 23}, + }, + { + {23, 24}, + {24, 23}, + }, + { + {23, 24}, + {24, 24}, + }, + { + {23, 24}, + {24, 24}, + }, + { + {24, 24}, + {24, 24}, + }, + { + {24, 24}, + {24, 24}, + }, + { + {24, 24}, + {25, 24}, + }, + { + {24, 24}, + {25, 24}, + }, + { + {24, 25}, + {25, 24}, + }, + { + {24, 25}, + {25, 24}, + }, + { + {24, 25}, + {25, 25}, + }, + { + {24, 25}, + {25, 25}, + }, + { + {25, 25}, + {25, 25}, + }, + { + {25, 25}, + {25, 25}, + }, + { + {25, 25}, + {26, 25}, + }, + { + {25, 25}, + {26, 25}, + }, + { + {25, 26}, + {26, 25}, + }, + { + {25, 26}, + {26, 25}, + }, + { + {25, 26}, + {26, 26}, + }, + { + {25, 26}, + {26, 26}, + }, + { + {26, 26}, + {26, 26}, + }, + { + {26, 26}, + {26, 26}, + }, + { + {26, 26}, + {27, 26}, + }, + { + {26, 26}, + {27, 26}, + }, + { + {26, 27}, + {27, 26}, + }, + { + {26, 27}, + {27, 26}, + }, + { + {26, 27}, + {27, 27}, + }, + { + {26, 27}, + {27, 27}, + }, + { + {27, 27}, + {27, 27}, + }, + { + {27, 27}, + {27, 27}, + }, + { + {27, 27}, + {28, 27}, + }, + { + {27, 27}, + {28, 27}, + }, + { + {27, 27}, + {28, 27}, + }, + { + {27, 28}, + {28, 27}, + }, + { + {27, 28}, + {28, 27}, + }, + { + {27, 28}, + {28, 28}, + }, + { + {27, 28}, + {28, 28}, + }, + { + {28, 28}, + {28, 28}, + }, + { + {28, 28}, + {28, 28}, + }, + { + {28, 28}, + {29, 28}, + }, + { + {28, 28}, + {29, 28}, + }, + { + {28, 29}, + {29, 28}, + }, + { + {28, 29}, + {29, 28}, + }, + { + {28, 29}, + {29, 29}, + }, + { + {28, 29}, + {29, 29}, + }, + { + {29, 29}, + {29, 29}, + }, + { + {29, 29}, + {29, 29}, + }, + { + {29, 29}, + {30, 29}, + }, + { + {29, 29}, + {30, 29}, + }, + { + {29, 30}, + {30, 29}, + }, + { + {29, 30}, + {30, 29}, + }, + { + {29, 30}, + {30, 30}, + }, + { + {29, 30}, + {30, 30}, + }, + { + {30, 30}, + {30, 30}, + }, + { + {30, 30}, + {30, 30}, + }, + { + {30, 30}, + {31, 30}, + }, + { + {30, 30}, + {31, 30}, + }, + { + {30, 31}, + {31, 30}, + }, + { + {30, 31}, + {31, 30}, + }, + { + {30, 31}, + {31, 31}, + }, + { + {30, 31}, + {31, 31}, + }, + { + {31, 31}, + {31, 31}, + }, + { + {31, 31}, + {31, 31}, + }, +}; + +static const uint8_t dither_g2x2[256][2][2] = +{ + { + {0, 0}, + {0, 0}, + }, + { + {0, 0}, + {1, 0}, + }, + { + {0, 1}, + {1, 0}, + }, + { + {0, 1}, + {1, 1}, + }, + { + {1, 1}, + {1, 1}, + }, + { + {1, 1}, + {2, 1}, + }, + { + {1, 2}, + {2, 1}, + }, + { + {1, 2}, + {2, 2}, + }, + { + {2, 2}, + {2, 2}, + }, + { + {2, 2}, + {3, 2}, + }, + { + {2, 3}, + {3, 2}, + }, + { + {2, 3}, + {3, 3}, + }, + { + {3, 3}, + {3, 3}, + }, + { + {3, 3}, + {4, 3}, + }, + { + {3, 4}, + {4, 3}, + }, + { + {3, 4}, + {4, 4}, + }, + { + {4, 4}, + {4, 4}, + }, + { + {4, 4}, + {5, 4}, + }, + { + {4, 5}, + {5, 4}, + }, + { + {4, 5}, + {5, 5}, + }, + { + {5, 5}, + {5, 5}, + }, + { + {5, 5}, + {6, 5}, + }, + { + {5, 6}, + {6, 5}, + }, + { + {5, 6}, + {6, 6}, + }, + { + {6, 6}, + {6, 6}, + }, + { + {6, 6}, + {7, 6}, + }, + { + {6, 7}, + {7, 6}, + }, + { + {6, 7}, + {7, 7}, + }, + { + {7, 7}, + {7, 7}, + }, + { + {7, 7}, + {8, 7}, + }, + { + {7, 8}, + {8, 7}, + }, + { + {7, 8}, + {8, 8}, + }, + { + {8, 8}, + {8, 8}, + }, + { + {8, 8}, + {9, 8}, + }, + { + {8, 9}, + {9, 8}, + }, + { + {8, 9}, + {9, 9}, + }, + { + {9, 9}, + {9, 9}, + }, + { + {9, 9}, + {10, 9}, + }, + { + {9, 10}, + {10, 9}, + }, + { + {9, 10}, + {10, 10}, + }, + { + {10, 10}, + {10, 10}, + }, + { + {10, 10}, + {11, 10}, + }, + { + {10, 11}, + {11, 10}, + }, + { + {10, 11}, + {11, 11}, + }, + { + {11, 11}, + {11, 11}, + }, + { + {11, 11}, + {12, 11}, + }, + { + {11, 12}, + {12, 11}, + }, + { + {11, 12}, + {12, 12}, + }, + { + {11, 12}, + {12, 12}, + }, + { + {12, 12}, + {12, 12}, + }, + { + {12, 12}, + {13, 12}, + }, + { + {12, 13}, + {13, 12}, + }, + { + {12, 13}, + {13, 13}, + }, + { + {13, 13}, + {13, 13}, + }, + { + {13, 13}, + {14, 13}, + }, + { + {13, 14}, + {14, 13}, + }, + { + {13, 14}, + {14, 14}, + }, + { + {14, 14}, + {14, 14}, + }, + { + {14, 14}, + {15, 14}, + }, + { + {14, 15}, + {15, 14}, + }, + { + {14, 15}, + {15, 15}, + }, + { + {15, 15}, + {15, 15}, + }, + { + {15, 15}, + {16, 15}, + }, + { + {15, 16}, + {16, 15}, + }, + { + {15, 16}, + {16, 16}, + }, + { + {16, 16}, + {16, 16}, + }, + { + {16, 16}, + {17, 16}, + }, + { + {16, 17}, + {17, 16}, + }, + { + {16, 17}, + {17, 17}, + }, + { + {17, 17}, + {17, 17}, + }, + { + {17, 17}, + {18, 17}, + }, + { + {17, 18}, + {18, 17}, + }, + { + {17, 18}, + {18, 18}, + }, + { + {18, 18}, + {18, 18}, + }, + { + {18, 18}, + {19, 18}, + }, + { + {18, 19}, + {19, 18}, + }, + { + {18, 19}, + {19, 19}, + }, + { + {19, 19}, + {19, 19}, + }, + { + {19, 19}, + {20, 19}, + }, + { + {19, 20}, + {20, 19}, + }, + { + {19, 20}, + {20, 20}, + }, + { + {20, 20}, + {20, 20}, + }, + { + {20, 20}, + {21, 20}, + }, + { + {20, 21}, + {21, 20}, + }, + { + {20, 21}, + {21, 21}, + }, + { + {21, 21}, + {21, 21}, + }, + { + {21, 21}, + {22, 21}, + }, + { + {21, 22}, + {22, 21}, + }, + { + {21, 22}, + {22, 22}, + }, + { + {22, 22}, + {22, 22}, + }, + { + {22, 22}, + {23, 22}, + }, + { + {22, 23}, + {23, 22}, + }, + { + {22, 23}, + {23, 23}, + }, + { + {23, 23}, + {23, 23}, + }, + { + {23, 23}, + {24, 23}, + }, + { + {23, 24}, + {24, 23}, + }, + { + {23, 24}, + {24, 24}, + }, + { + {24, 24}, + {24, 24}, + }, + { + {24, 24}, + {25, 24}, + }, + { + {24, 25}, + {25, 24}, + }, + { + {24, 25}, + {25, 25}, + }, + { + {25, 25}, + {25, 25}, + }, + { + {25, 25}, + {26, 25}, + }, + { + {25, 26}, + {26, 25}, + }, + { + {25, 26}, + {26, 26}, + }, + { + {26, 26}, + {26, 26}, + }, + { + {26, 26}, + {27, 26}, + }, + { + {26, 27}, + {27, 26}, + }, + { + {26, 27}, + {27, 27}, + }, + { + {27, 27}, + {27, 27}, + }, + { + {27, 27}, + {28, 27}, + }, + { + {27, 28}, + {28, 27}, + }, + { + {27, 28}, + {28, 28}, + }, + { + {28, 28}, + {28, 28}, + }, + { + {28, 28}, + {29, 28}, + }, + { + {28, 29}, + {29, 28}, + }, + { + {28, 29}, + {29, 29}, + }, + { + {29, 29}, + {29, 29}, + }, + { + {29, 29}, + {30, 29}, + }, + { + {29, 30}, + {30, 29}, + }, + { + {29, 30}, + {30, 30}, + }, + { + {30, 30}, + {30, 30}, + }, + { + {30, 30}, + {31, 30}, + }, + { + {30, 31}, + {31, 30}, + }, + { + {30, 31}, + {31, 31}, + }, + { + {31, 31}, + {31, 31}, + }, + { + {31, 31}, + {32, 31}, + }, + { + {31, 32}, + {32, 31}, + }, + { + {31, 32}, + {32, 32}, + }, + { + {32, 32}, + {32, 32}, + }, + { + {32, 32}, + {33, 32}, + }, + { + {32, 33}, + {33, 32}, + }, + { + {32, 33}, + {33, 33}, + }, + { + {33, 33}, + {33, 33}, + }, + { + {33, 33}, + {34, 33}, + }, + { + {33, 34}, + {34, 33}, + }, + { + {33, 34}, + {34, 34}, + }, + { + {34, 34}, + {34, 34}, + }, + { + {34, 34}, + {35, 34}, + }, + { + {34, 35}, + {35, 34}, + }, + { + {34, 35}, + {35, 35}, + }, + { + {35, 35}, + {35, 35}, + }, + { + {35, 35}, + {36, 35}, + }, + { + {35, 36}, + {36, 35}, + }, + { + {35, 36}, + {36, 35}, + }, + { + {35, 36}, + {36, 36}, + }, + { + {36, 36}, + {36, 36}, + }, + { + {36, 36}, + {37, 36}, + }, + { + {36, 37}, + {37, 36}, + }, + { + {36, 37}, + {37, 37}, + }, + { + {37, 37}, + {37, 37}, + }, + { + {37, 37}, + {38, 37}, + }, + { + {37, 38}, + {38, 37}, + }, + { + {37, 38}, + {38, 38}, + }, + { + {38, 38}, + {38, 38}, + }, + { + {38, 38}, + {39, 38}, + }, + { + {38, 39}, + {39, 38}, + }, + { + {38, 39}, + {39, 39}, + }, + { + {39, 39}, + {39, 39}, + }, + { + {39, 39}, + {40, 39}, + }, + { + {39, 40}, + {40, 39}, + }, + { + {39, 40}, + {40, 40}, + }, + { + {40, 40}, + {40, 40}, + }, + { + {40, 40}, + {41, 40}, + }, + { + {40, 41}, + {41, 40}, + }, + { + {40, 41}, + {41, 41}, + }, + { + {41, 41}, + {41, 41}, + }, + { + {41, 41}, + {42, 41}, + }, + { + {41, 42}, + {42, 41}, + }, + { + {41, 42}, + {42, 42}, + }, + { + {42, 42}, + {42, 42}, + }, + { + {42, 42}, + {43, 42}, + }, + { + {42, 43}, + {43, 42}, + }, + { + {42, 43}, + {43, 43}, + }, + { + {43, 43}, + {43, 43}, + }, + { + {43, 43}, + {44, 43}, + }, + { + {43, 44}, + {44, 43}, + }, + { + {43, 44}, + {44, 44}, + }, + { + {44, 44}, + {44, 44}, + }, + { + {44, 44}, + {45, 44}, + }, + { + {44, 45}, + {45, 44}, + }, + { + {44, 45}, + {45, 45}, + }, + { + {45, 45}, + {45, 45}, + }, + { + {45, 45}, + {46, 45}, + }, + { + {45, 46}, + {46, 45}, + }, + { + {45, 46}, + {46, 46}, + }, + { + {46, 46}, + {46, 46}, + }, + { + {46, 46}, + {47, 46}, + }, + { + {46, 47}, + {47, 46}, + }, + { + {46, 47}, + {47, 47}, + }, + { + {47, 47}, + {47, 47}, + }, + { + {47, 47}, + {48, 47}, + }, + { + {47, 48}, + {48, 47}, + }, + { + {47, 48}, + {48, 48}, + }, + { + {48, 48}, + {48, 48}, + }, + { + {48, 48}, + {49, 48}, + }, + { + {48, 49}, + {49, 48}, + }, + { + {48, 49}, + {49, 49}, + }, + { + {49, 49}, + {49, 49}, + }, + { + {49, 49}, + {50, 49}, + }, + { + {49, 50}, + {50, 49}, + }, + { + {49, 50}, + {50, 50}, + }, + { + {50, 50}, + {50, 50}, + }, + { + {50, 50}, + {51, 50}, + }, + { + {50, 51}, + {51, 50}, + }, + { + {50, 51}, + {51, 51}, + }, + { + {51, 51}, + {51, 51}, + }, + { + {51, 51}, + {52, 51}, + }, + { + {51, 52}, + {52, 51}, + }, + { + {51, 52}, + {52, 52}, + }, + { + {52, 52}, + {52, 52}, + }, + { + {52, 52}, + {53, 52}, + }, + { + {52, 53}, + {53, 52}, + }, + { + {52, 53}, + {53, 53}, + }, + { + {53, 53}, + {53, 53}, + }, + { + {53, 53}, + {54, 53}, + }, + { + {53, 54}, + {54, 53}, + }, + { + {53, 54}, + {54, 54}, + }, + { + {54, 54}, + {54, 54}, + }, + { + {54, 54}, + {55, 54}, + }, + { + {54, 55}, + {55, 54}, + }, + { + {54, 55}, + {55, 55}, + }, + { + {55, 55}, + {55, 55}, + }, + { + {55, 55}, + {56, 55}, + }, + { + {55, 55}, + {56, 55}, + }, + { + {55, 56}, + {56, 55}, + }, + { + {55, 56}, + {56, 56}, + }, + { + {56, 56}, + {56, 56}, + }, + { + {56, 56}, + {57, 56}, + }, + { + {56, 57}, + {57, 56}, + }, + { + {56, 57}, + {57, 57}, + }, + { + {57, 57}, + {57, 57}, + }, + { + {57, 57}, + {58, 57}, + }, + { + {57, 58}, + {58, 57}, + }, + { + {57, 58}, + {58, 58}, + }, + { + {58, 58}, + {58, 58}, + }, + { + {58, 58}, + {59, 58}, + }, + { + {58, 59}, + {59, 58}, + }, + { + {58, 59}, + {59, 59}, + }, + { + {59, 59}, + {59, 59}, + }, + { + {59, 59}, + {60, 59}, + }, + { + {59, 60}, + {60, 59}, + }, + { + {59, 60}, + {60, 60}, + }, + { + {60, 60}, + {60, 60}, + }, + { + {60, 60}, + {61, 60}, + }, + { + {60, 61}, + {61, 60}, + }, + { + {60, 61}, + {61, 61}, + }, + { + {61, 61}, + {61, 61}, + }, + { + {61, 61}, + {62, 61}, + }, + { + {61, 62}, + {62, 61}, + }, + { + {61, 62}, + {62, 62}, + }, + { + {62, 62}, + {62, 62}, + }, + { + {62, 62}, + {63, 62}, + }, + { + {62, 63}, + {63, 62}, + }, + { + {62, 63}, + {63, 63}, + }, + { + {63, 63}, + {63, 63}, + }, +}; + diff --git a/pcem/vid_voodoo_fb.cpp b/pcem/vid_voodoo_fb.cpp new file mode 100644 index 00000000..cbf8c172 --- /dev/null +++ b/pcem/vid_voodoo_fb.cpp @@ -0,0 +1,447 @@ +#include "ibm.h" +#include "device.h" +#include "mem.h" +#include "thread.h" +#include "video.h" +#include "vid_svga.h" +#include "vid_voodoo.h" +#include "vid_voodoo_common.h" +#include "vid_voodoo_dither.h" +#include "vid_voodoo_regs.h" +#include "vid_voodoo_render.h" +#include "vid_voodoo_fb.h" + +uint16_t voodoo_fb_readw(uint32_t addr, void *p) +{ + voodoo_t *voodoo = (voodoo_t *)p; + int x, y; + uint32_t read_addr; + uint16_t temp; + + if (voodoo->type >= VOODOO_BANSHEE) + { + x = addr & 0xffe; + y = (addr >> 12) & 0x3ff; + } + else + { + x = addr & 0x7fe; + y = (addr >> 11) & 0x3ff; + } + + if (SLI_ENABLED) + { + voodoo_set_t *set = voodoo->set; + + if (y & 1) + voodoo = set->voodoos[1]; + else + voodoo = set->voodoos[0]; + + y >>= 1; + } + + if (voodoo->col_tiled) + read_addr = voodoo->fb_read_offset + (x & 127) + (x >> 7) * 128*32 + (y & 31) * 128 + (y >> 5) * voodoo->row_width; + else + read_addr = voodoo->fb_read_offset + x + (y * voodoo->row_width); + + if (read_addr > voodoo->fb_mask) + return 0xffff; + + temp = *(uint16_t *)(&voodoo->fb_mem[read_addr & voodoo->fb_mask]); + +// pclog("voodoo_fb_readw : %08X %08X %i %i %08X %08X %08x:%08x %i\n", addr, temp, x, y, read_addr, *(uint32_t *)(&voodoo->fb_mem[4]), cs, pc, fb_reads++); + return temp; +} +uint32_t voodoo_fb_readl(uint32_t addr, void *p) +{ + voodoo_t *voodoo = (voodoo_t *)p; + int x, y; + uint32_t read_addr; + uint32_t temp; + + if (voodoo->type >= VOODOO_BANSHEE) + { + x = addr & 0xffe; + y = (addr >> 12) & 0x3ff; + } + else + { + x = addr & 0x7fe; + y = (addr >> 11) & 0x3ff; + } + + if (SLI_ENABLED) + { + voodoo_set_t *set = voodoo->set; + + if (y & 1) + voodoo = set->voodoos[1]; + else + voodoo = set->voodoos[0]; + + y >>= 1; + } + + if (voodoo->col_tiled) + read_addr = voodoo->fb_read_offset + (x & 127) + (x >> 7) * 128*32 + (y & 31) * 128 + (y >> 5) * voodoo->row_width; + else + read_addr = voodoo->fb_read_offset + x + (y * voodoo->row_width); + + if (read_addr > voodoo->fb_mask) + return 0xffffffff; + + temp = *(uint32_t *)(&voodoo->fb_mem[read_addr & voodoo->fb_mask]); + +// pclog("voodoo_fb_readl : %08X %08x %08X x=%i y=%i %08X %08X %08x:%08x %i ro=%08x rw=%i\n", addr, read_addr, temp, x, y, read_addr, *(uint32_t *)(&voodoo->fb_mem[4]), cs, pc, fb_reads++, voodoo->fb_read_offset, voodoo->row_width); + return temp; +} + +static inline uint16_t do_dither(voodoo_params_t *params, rgba8_t col, int x, int y) +{ + int r, g, b; + + if (dither) + { + if (dither2x2) + { + r = dither_rb2x2[col.r][y & 1][x & 1]; + g = dither_g2x2[col.g][y & 1][x & 1]; + b = dither_rb2x2[col.b][y & 1][x & 1]; + } + else + { + r = dither_rb[col.r][y & 3][x & 3]; + g = dither_g[col.g][y & 3][x & 3]; + b = dither_rb[col.b][y & 3][x & 3]; + } + } + else + { + r = col.r >> 3; + g = col.g >> 2; + b = col.b >> 3; + } + + return b | (g << 5) | (r << 11); +} + +void voodoo_fb_writew(uint32_t addr, uint16_t val, void *p) +{ + voodoo_t *voodoo = (voodoo_t *)p; + voodoo_params_t *params = &voodoo->params; + int x, y; + uint32_t write_addr, write_addr_aux; + rgba8_t colour_data; + uint16_t depth_data; + uint8_t alpha_data; + int write_mask = 0; + + colour_data.r = colour_data.g = colour_data.b = colour_data.a = 0; + + depth_data = voodoo->params.zaColor & 0xffff; + alpha_data = voodoo->params.zaColor >> 24; + +// while (!RB_EMPTY) +// thread_reset_event(voodoo->not_full_event); + +// pclog("voodoo_fb_writew : %08X %04X\n", addr, val); + + + switch (voodoo->lfbMode & LFB_FORMAT_MASK) + { + case LFB_FORMAT_RGB565: + colour_data = rgb565[val]; + alpha_data = 0xff; + write_mask = LFB_WRITE_COLOUR; + break; + case LFB_FORMAT_RGB555: + colour_data = argb1555[val]; + alpha_data = 0xff; + write_mask = LFB_WRITE_COLOUR; + break; + case LFB_FORMAT_ARGB1555: + colour_data = argb1555[val]; + alpha_data = colour_data.a; + write_mask = LFB_WRITE_COLOUR; + break; + case LFB_FORMAT_DEPTH: + depth_data = val; + write_mask = LFB_WRITE_DEPTH; + break; + + default: + fatal("voodoo_fb_writew : bad LFB format %08X\n", voodoo->lfbMode); + } + + if (voodoo->type >= VOODOO_BANSHEE) + { + x = addr & 0xffe; + y = (addr >> 12) & 0x3ff; + } + else + { + x = addr & 0x7fe; + y = (addr >> 11) & 0x3ff; + } + + if (SLI_ENABLED) + { + if ((!(voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && (y & 1)) || + ((voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && !(y & 1))) + return; + y >>= 1; + } + + + if (voodoo->fb_write_offset == voodoo->params.front_offset && y < 2048) + voodoo->dirty_line[y] = 1; + + if (voodoo->col_tiled) + write_addr = voodoo->fb_write_offset + (x & 127) + (x >> 7) * 128*32 + (y & 31) * 128 + (y >> 5) * voodoo->row_width; + else + write_addr = voodoo->fb_write_offset + x + (y * voodoo->row_width); + if (voodoo->aux_tiled) + write_addr_aux = voodoo->params.aux_offset + (x & 127) + (x >> 7) * 128*32 + (y & 31) * 128 + (y >> 5) * voodoo->row_width; + else + write_addr_aux = voodoo->params.aux_offset + x + (y * voodoo->row_width); + +// pclog("fb_writew %08x %i %i %i %08x\n", addr, x, y, voodoo->row_width, write_addr); + + if (voodoo->lfbMode & 0x100) + { + { + rgba8_t write_data = colour_data; + uint16_t new_depth = depth_data; + + if (params->fbzMode & FBZ_DEPTH_ENABLE) + { + uint16_t old_depth = *(uint16_t *)(&voodoo->fb_mem[write_addr_aux & voodoo->fb_mask]); + + DEPTH_TEST(new_depth); + } + + if ((params->fbzMode & FBZ_CHROMAKEY) && + write_data.r == params->chromaKey_r && + write_data.g == params->chromaKey_g && + write_data.b == params->chromaKey_b) + goto skip_pixel; + + if (params->fogMode & FOG_ENABLE) + { + int32_t z = new_depth << 12; + int64_t w_depth = (int64_t)(int32_t)new_depth; + int32_t ia = alpha_data << 12; + + APPLY_FOG(write_data.r, write_data.g, write_data.b, z, ia, w_depth); + } + + if (params->alphaMode & 1) + ALPHA_TEST(alpha_data); + + if (params->alphaMode & (1 << 4)) + { + uint16_t dat = *(uint16_t *)(&voodoo->fb_mem[write_addr & voodoo->fb_mask]); + int dest_r, dest_g, dest_b, dest_a; + + dest_r = (dat >> 8) & 0xf8; + dest_g = (dat >> 3) & 0xfc; + dest_b = (dat << 3) & 0xf8; + dest_r |= (dest_r >> 5); + dest_g |= (dest_g >> 6); + dest_b |= (dest_b >> 5); + dest_a = 0xff; + + ALPHA_BLEND(write_data.r, write_data.g, write_data.b, alpha_data); + } + + if (params->fbzMode & FBZ_RGB_WMASK) + *(uint16_t *)(&voodoo->fb_mem[write_addr & voodoo->fb_mask]) = do_dither(&voodoo->params, write_data, x >> 1, y); + if (params->fbzMode & FBZ_DEPTH_WMASK) + *(uint16_t *)(&voodoo->fb_mem[write_addr_aux & voodoo->fb_mask]) = new_depth; + +skip_pixel: + x = x; + } + } + else + { + if (write_mask & LFB_WRITE_COLOUR) + *(uint16_t *)(&voodoo->fb_mem[write_addr & voodoo->fb_mask]) = do_dither(&voodoo->params, colour_data, x >> 1, y); + if (write_mask & LFB_WRITE_DEPTH) + *(uint16_t *)(&voodoo->fb_mem[write_addr_aux & voodoo->fb_mask]) = depth_data; + } +} + + +void voodoo_fb_writel(uint32_t addr, uint32_t val, void *p) +{ + voodoo_t *voodoo = (voodoo_t *)p; + voodoo_params_t *params = &voodoo->params; + int x, y; + uint32_t write_addr, write_addr_aux; + rgba8_t colour_data[2]; + uint16_t depth_data[2]; + uint8_t alpha_data[2]; + int write_mask = 0, count = 1; + + depth_data[0] = depth_data[1] = voodoo->params.zaColor & 0xffff; + alpha_data[0] = alpha_data[1] = voodoo->params.zaColor >> 24; +// while (!RB_EMPTY) +// thread_reset_event(voodoo->not_full_event); + +// pclog("voodoo_fb_writel : %08X %08X\n", addr, val); + + switch (voodoo->lfbMode & LFB_FORMAT_MASK) + { + case LFB_FORMAT_RGB565: + colour_data[0] = rgb565[val & 0xffff]; + colour_data[1] = rgb565[val >> 16]; + write_mask = LFB_WRITE_COLOUR; + count = 2; + break; + case LFB_FORMAT_RGB555: + colour_data[0] = argb1555[val & 0xffff]; + colour_data[1] = argb1555[val >> 16]; + write_mask = LFB_WRITE_COLOUR; + count = 2; + break; + case LFB_FORMAT_ARGB1555: + colour_data[0] = argb1555[val & 0xffff]; + alpha_data[0] = colour_data[0].a; + colour_data[1] = argb1555[val >> 16]; + alpha_data[1] = colour_data[1].a; + write_mask = LFB_WRITE_COLOUR; + count = 2; + break; + + case LFB_FORMAT_ARGB8888: + colour_data[0].b = val & 0xff; + colour_data[0].g = (val >> 8) & 0xff; + colour_data[0].r = (val >> 16) & 0xff; + alpha_data[0] = (val >> 24) & 0xff; + write_mask = LFB_WRITE_COLOUR; + addr >>= 1; + break; + + case LFB_FORMAT_DEPTH: + depth_data[0] = val; + depth_data[1] = val >> 16; + write_mask = LFB_WRITE_DEPTH; + count = 2; + break; + + default: + fatal("voodoo_fb_writel : bad LFB format %08X\n", voodoo->lfbMode); + } + + if (voodoo->type >= VOODOO_BANSHEE) + { + x = addr & 0xffe; + y = (addr >> 12) & 0x3ff; + } + else + { + x = addr & 0x7fe; + y = (addr >> 11) & 0x3ff; + } + + if (SLI_ENABLED) + { + if ((!(voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && (y & 1)) || + ((voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && !(y & 1))) + return; + y >>= 1; + } + + if (voodoo->fb_write_offset == voodoo->params.front_offset && y < 2048) + voodoo->dirty_line[y] = 1; + + if (voodoo->col_tiled) + write_addr = voodoo->fb_write_offset + (x & 127) + (x >> 7) * 128*32 + (y & 31) * 128 + (y >> 5) * voodoo->row_width; + else + write_addr = voodoo->fb_write_offset + x + (y * voodoo->row_width); + if (voodoo->aux_tiled) + write_addr_aux = voodoo->params.aux_offset + (x & 127) + (x >> 7) * 128*32 + (y & 31) * 128 + (y >> 5) * voodoo->row_width; + else + write_addr_aux = voodoo->params.aux_offset + x + (y * voodoo->row_width); + +// pclog("fb_writel %08x x=%i y=%i rw=%i %08x wo=%08x\n", addr, x, y, voodoo->row_width, write_addr, voodoo->fb_write_offset); + + if (voodoo->lfbMode & 0x100) + { + int c; + + for (c = 0; c < count; c++) + { + rgba8_t write_data = colour_data[c]; + uint16_t new_depth = depth_data[c]; + + if (params->fbzMode & FBZ_DEPTH_ENABLE) + { + uint16_t old_depth = *(uint16_t *)(&voodoo->fb_mem[write_addr_aux & voodoo->fb_mask]); + + DEPTH_TEST(new_depth); + } + + if ((params->fbzMode & FBZ_CHROMAKEY) && + write_data.r == params->chromaKey_r && + write_data.g == params->chromaKey_g && + write_data.b == params->chromaKey_b) + goto skip_pixel; + + if (params->fogMode & FOG_ENABLE) + { + int32_t z = new_depth << 12; + int64_t w_depth = new_depth; + int32_t ia = alpha_data[c] << 12; + + APPLY_FOG(write_data.r, write_data.g, write_data.b, z, ia, w_depth); + } + + if (params->alphaMode & 1) + ALPHA_TEST(alpha_data[c]); + + if (params->alphaMode & (1 << 4)) + { + uint16_t dat = *(uint16_t *)(&voodoo->fb_mem[write_addr & voodoo->fb_mask]); + int dest_r, dest_g, dest_b, dest_a; + + dest_r = (dat >> 8) & 0xf8; + dest_g = (dat >> 3) & 0xfc; + dest_b = (dat << 3) & 0xf8; + dest_r |= (dest_r >> 5); + dest_g |= (dest_g >> 6); + dest_b |= (dest_b >> 5); + dest_a = 0xff; + + ALPHA_BLEND(write_data.r, write_data.g, write_data.b, alpha_data[c]); + } + + if (params->fbzMode & FBZ_RGB_WMASK) + *(uint16_t *)(&voodoo->fb_mem[write_addr & voodoo->fb_mask]) = do_dither(&voodoo->params, write_data, (x >> 1) + c, y); + if (params->fbzMode & FBZ_DEPTH_WMASK) + *(uint16_t *)(&voodoo->fb_mem[write_addr_aux & voodoo->fb_mask]) = new_depth; + +skip_pixel: + write_addr += 2; + write_addr_aux += 2; + } + } + else + { + int c; + + for (c = 0; c < count; c++) + { + if (write_mask & LFB_WRITE_COLOUR) + *(uint16_t *)(&voodoo->fb_mem[write_addr & voodoo->fb_mask]) = do_dither(&voodoo->params, colour_data[c], (x >> 1) + c, y); + if (write_mask & LFB_WRITE_DEPTH) + *(uint16_t *)(&voodoo->fb_mem[write_addr_aux & voodoo->fb_mask]) = depth_data[c]; + + write_addr += 2; + write_addr_aux += 2; + } + } +} diff --git a/pcem/vid_voodoo_fb.h b/pcem/vid_voodoo_fb.h new file mode 100644 index 00000000..fcb2513b --- /dev/null +++ b/pcem/vid_voodoo_fb.h @@ -0,0 +1,4 @@ +uint16_t voodoo_fb_readw(uint32_t addr, void *p); +uint32_t voodoo_fb_readl(uint32_t addr, void *p); +void voodoo_fb_writew(uint32_t addr, uint16_t val, void *p); +void voodoo_fb_writel(uint32_t addr, uint32_t val, void *p); diff --git a/pcem/vid_voodoo_fifo.cpp b/pcem/vid_voodoo_fifo.cpp new file mode 100644 index 00000000..59861f3f --- /dev/null +++ b/pcem/vid_voodoo_fifo.cpp @@ -0,0 +1,503 @@ +#include +#include +#include "ibm.h" +#include "device.h" +#include "mem.h" +#include "thread.h" +#include "video.h" +#include "vid_svga.h" +#include "vid_voodoo.h" +#include "vid_voodoo_common.h" +#include "vid_voodoo_banshee_blitter.h" +#include "vid_voodoo_fb.h" +#include "vid_voodoo_fifo.h" +#include "vid_voodoo_reg.h" +#include "vid_voodoo_regs.h" +#include "vid_voodoo_render.h" +#include "vid_voodoo_texture.h" + +#define WAKE_DELAY (TIMER_USEC * 100) +void voodoo_wake_fifo_thread(voodoo_t *voodoo) +{ + if (!timer_is_enabled(&voodoo->wake_timer)) + { + /*Don't wake FIFO thread immediately - if we do that it will probably + process one word and go back to sleep, requiring it to be woken on + almost every write. Instead, wait a short while so that the CPU + emulation writes more data so we have more batched-up work.*/ + timer_set_delay_u64(&voodoo->wake_timer, WAKE_DELAY); + } +} + +void voodoo_wake_fifo_thread_now(voodoo_t *voodoo) +{ + thread_set_event(voodoo->wake_fifo_thread); /*Wake up FIFO thread if moving from idle*/ +} + +void voodoo_wake_timer(void *p) +{ + voodoo_t *voodoo = (voodoo_t *)p; + + thread_set_event(voodoo->wake_fifo_thread); /*Wake up FIFO thread if moving from idle*/ +} + +void voodoo_queue_command(voodoo_t *voodoo, uint32_t addr_type, uint32_t val) +{ + fifo_entry_t *fifo = &voodoo->fifo[voodoo->fifo_write_idx & FIFO_MASK]; + + while (FIFO_FULL) + { + thread_reset_event(voodoo->fifo_not_full_event); + if (FIFO_FULL) + { + thread_wait_event(voodoo->fifo_not_full_event, 1); /*Wait for room in ringbuffer*/ + if (FIFO_FULL) + voodoo_wake_fifo_thread_now(voodoo); + } + } + + fifo->val = val; + fifo->addr_type = addr_type; + + voodoo->fifo_write_idx++; + + if (FIFO_ENTRIES > 0xe000) + voodoo_wake_fifo_thread(voodoo); +} + +void voodoo_flush(voodoo_t *voodoo) +{ + voodoo->flush = 1; + while (!FIFO_EMPTY) + { + voodoo_wake_fifo_thread_now(voodoo); + thread_wait_event(voodoo->fifo_not_full_event, 1); + } + voodoo_wait_for_render_thread_idle(voodoo); + voodoo->flush = 0; +} + +void voodoo_wake_fifo_threads(voodoo_set_t *set, voodoo_t *voodoo) +{ + voodoo_wake_fifo_thread(voodoo); + if (SLI_ENABLED && voodoo->type != VOODOO_2 && set->voodoos[0] == voodoo) + voodoo_wake_fifo_thread(set->voodoos[1]); +} + +void voodoo_wait_for_swap_complete(voodoo_t *voodoo) +{ + while (voodoo->swap_pending) + { + thread_wait_event(voodoo->wake_fifo_thread, -1); + thread_reset_event(voodoo->wake_fifo_thread); + + thread_lock_mutex(voodoo->swap_mutex); + if ((voodoo->swap_pending && voodoo->flush) || FIFO_FULL) + { + /*Main thread is waiting for FIFO to empty, so skip vsync wait and just swap*/ + memset(voodoo->dirty_line, 1, sizeof(voodoo->dirty_line)); + voodoo->front_offset = voodoo->params.front_offset; + if (voodoo->swap_count > 0) + voodoo->swap_count--; + voodoo->swap_pending = 0; + thread_unlock_mutex(voodoo->swap_mutex); + break; + } + else + thread_unlock_mutex(voodoo->swap_mutex); + } +} + + +static uint32_t cmdfifo_get(voodoo_t *voodoo) +{ + uint32_t val; + + if (!voodoo->cmdfifo_in_sub) + { + while (voodoo->cmdfifo_depth_rd == voodoo->cmdfifo_depth_wr) + { + thread_wait_event(voodoo->wake_fifo_thread, -1); + thread_reset_event(voodoo->wake_fifo_thread); + } + } + + val = *(uint32_t *)&voodoo->fb_mem[voodoo->cmdfifo_rp & voodoo->fb_mask]; + + if (!voodoo->cmdfifo_in_sub) + voodoo->cmdfifo_depth_rd++; + voodoo->cmdfifo_rp += 4; + +// pclog(" CMDFIFO get %08x\n", val); + return val; +} + +static inline float cmdfifo_get_f(voodoo_t *voodoo) +{ + union + { + uint32_t i; + float f; + } tempif; + + tempif.i = cmdfifo_get(voodoo); + return tempif.f; +} + +enum +{ + CMDFIFO3_PC_MASK_RGB = (1 << 10), + CMDFIFO3_PC_MASK_ALPHA = (1 << 11), + CMDFIFO3_PC_MASK_Z = (1 << 12), + CMDFIFO3_PC_MASK_Wb = (1 << 13), + CMDFIFO3_PC_MASK_W0 = (1 << 14), + CMDFIFO3_PC_MASK_S0_T0 = (1 << 15), + CMDFIFO3_PC_MASK_W1 = (1 << 16), + CMDFIFO3_PC_MASK_S1_T1 = (1 << 17), + + CMDFIFO3_PC = (1 << 28) +}; + +void voodoo_fifo_thread(void *param) +{ + voodoo_t *voodoo = (voodoo_t *)param; + + while (1) + { + thread_set_event(voodoo->fifo_not_full_event); + thread_wait_event(voodoo->wake_fifo_thread, -1); + thread_reset_event(voodoo->wake_fifo_thread); + voodoo->voodoo_busy = 1; + while (!FIFO_EMPTY) + { + uint64_t start_time = timer_read(); + uint64_t end_time; + fifo_entry_t *fifo = &voodoo->fifo[voodoo->fifo_read_idx & FIFO_MASK]; + + switch (fifo->addr_type & FIFO_TYPE) + { + case FIFO_WRITEL_REG: + while ((fifo->addr_type & FIFO_TYPE) == FIFO_WRITEL_REG) + { + voodoo_reg_writel(fifo->addr_type & FIFO_ADDR, fifo->val, voodoo); + fifo->addr_type = FIFO_INVALID; + voodoo->fifo_read_idx++; + if (FIFO_EMPTY) + break; + fifo = &voodoo->fifo[voodoo->fifo_read_idx & FIFO_MASK]; + } + break; + case FIFO_WRITEW_FB: + voodoo_wait_for_render_thread_idle(voodoo); + while ((fifo->addr_type & FIFO_TYPE) == FIFO_WRITEW_FB) + { + voodoo_fb_writew(fifo->addr_type & FIFO_ADDR, fifo->val, voodoo); + fifo->addr_type = FIFO_INVALID; + voodoo->fifo_read_idx++; + if (FIFO_EMPTY) + break; + fifo = &voodoo->fifo[voodoo->fifo_read_idx & FIFO_MASK]; + } + break; + case FIFO_WRITEL_FB: + voodoo_wait_for_render_thread_idle(voodoo); + while ((fifo->addr_type & FIFO_TYPE) == FIFO_WRITEL_FB) + { + voodoo_fb_writel(fifo->addr_type & FIFO_ADDR, fifo->val, voodoo); + fifo->addr_type = FIFO_INVALID; + voodoo->fifo_read_idx++; + if (FIFO_EMPTY) + break; + fifo = &voodoo->fifo[voodoo->fifo_read_idx & FIFO_MASK]; + } + break; + case FIFO_WRITEL_TEX: + while ((fifo->addr_type & FIFO_TYPE) == FIFO_WRITEL_TEX) + { + if (!(fifo->addr_type & 0x400000)) + voodoo_tex_writel(fifo->addr_type & FIFO_ADDR, fifo->val, voodoo); + fifo->addr_type = FIFO_INVALID; + voodoo->fifo_read_idx++; + if (FIFO_EMPTY) + break; + fifo = &voodoo->fifo[voodoo->fifo_read_idx & FIFO_MASK]; + } + break; + case FIFO_WRITEL_2DREG: + while ((fifo->addr_type & FIFO_TYPE) == FIFO_WRITEL_2DREG) + { + voodoo_2d_reg_writel(voodoo, fifo->addr_type & FIFO_ADDR, fifo->val); + fifo->addr_type = FIFO_INVALID; + voodoo->fifo_read_idx++; + if (FIFO_EMPTY) + break; + fifo = &voodoo->fifo[voodoo->fifo_read_idx & FIFO_MASK]; + } + break; + + default: + fatal("Unknown fifo entry %08x\n", fifo->addr_type); + } + + if (FIFO_ENTRIES > 0xe000) + thread_set_event(voodoo->fifo_not_full_event); + + end_time = timer_read(); + voodoo->time += end_time - start_time; + } + + while (voodoo->cmdfifo_enabled && (voodoo->cmdfifo_depth_rd != voodoo->cmdfifo_depth_wr || voodoo->cmdfifo_in_sub)) + { + uint64_t start_time = timer_read(); + uint64_t end_time; + uint32_t header = cmdfifo_get(voodoo); + uint32_t addr; + uint32_t mask; + int smode; + int num; + int num_verticies; + int v_num; + +// pclog(" CMDFIFO header %08x at %08x\n", header, voodoo->cmdfifo_rp); + + switch (header & 7) + { + case 0: +// pclog("CMDFIFO0\n"); + switch ((header >> 3) & 7) + { + case 0: /*NOP*/ + break; + + case 1: /*JSR*/ +// pclog("JSR %08x\n", (header >> 4) & 0xfffffc); + voodoo->cmdfifo_ret_addr = voodoo->cmdfifo_rp; + voodoo->cmdfifo_rp = (header >> 4) & 0xfffffc; + voodoo->cmdfifo_in_sub = 1; + break; + + case 2: /*RET*/ + voodoo->cmdfifo_rp = voodoo->cmdfifo_ret_addr; + voodoo->cmdfifo_in_sub = 0; + break; + + case 3: /*JMP local frame buffer*/ + voodoo->cmdfifo_rp = (header >> 4) & 0xfffffc; +// pclog("JMP to %08x %04x\n", voodoo->cmdfifo_rp, header); + break; + + default: + fatal("Bad CMDFIFO0 %08x\n", header); + } + break; + + case 1: + num = header >> 16; + addr = (header & 0x7ff8) >> 1; +// pclog("CMDFIFO1 addr=%08x\n",addr); + while (num--) + { + uint32_t val = cmdfifo_get(voodoo); + if ((addr & (1 << 13)) && voodoo->type >= VOODOO_BANSHEE) + { +// if (voodoo->type != VOODOO_BANSHEE) +// fatal("CMDFIFO1: Not Banshee\n"); +// pclog("CMDFIFO1: write %08x %08x\n", addr, val); + voodoo_2d_reg_writel(voodoo, addr, val); + } + else + { + if ((addr & 0x3ff) == SST_triangleCMD || (addr & 0x3ff) == SST_ftriangleCMD || + (addr & 0x3ff) == SST_fastfillCMD || (addr & 0x3ff) == SST_nopCMD) + voodoo->cmd_written_fifo++; + + if (voodoo->type >= VOODOO_BANSHEE && (addr & 0x3ff) == SST_swapbufferCMD) + voodoo->cmd_written_fifo++; + voodoo_reg_writel(addr, val, voodoo); + } + + if (header & (1 << 15)) + addr += 4; + } + break; + + case 2: + if (voodoo->type < VOODOO_BANSHEE) + fatal("CMDFIFO2: Not Banshee\n"); + mask = (header >> 3); + addr = 8; + while (mask) + { + if (mask & 1) + { + uint32_t val = cmdfifo_get(voodoo); + + voodoo_2d_reg_writel(voodoo, addr, val); + } + + addr += 4; + mask >>= 1; + } + break; + + case 3: + num = (header >> 29) & 7; + mask = header;//(header >> 10) & 0xff; + smode = (header >> 22) & 0xf; + voodoo_reg_writel(SST_sSetupMode, ((header >> 10) & 0xff) | (smode << 16), voodoo); + num_verticies = (header >> 6) & 0xf; + v_num = 0; + if (((header >> 3) & 7) == 2) + v_num = 1; +// pclog("CMDFIFO3: num=%i verts=%i mask=%02x\n", num, num_verticies, (header >> 10) & 0xff); +// pclog("CMDFIFO3 %02x %i\n", (header >> 10), (header >> 3) & 7); + + while (num_verticies--) + { + voodoo->verts[3].sVx = cmdfifo_get_f(voodoo); + voodoo->verts[3].sVy = cmdfifo_get_f(voodoo); + if (mask & CMDFIFO3_PC_MASK_RGB) + { + if (header & CMDFIFO3_PC) + { + uint32_t val = cmdfifo_get(voodoo); + voodoo->verts[3].sBlue = (float)(val & 0xff); + voodoo->verts[3].sGreen = (float)((val >> 8) & 0xff); + voodoo->verts[3].sRed = (float)((val >> 16) & 0xff); + voodoo->verts[3].sAlpha = (float)((val >> 24) & 0xff); + } + else + { + voodoo->verts[3].sRed = cmdfifo_get_f(voodoo); + voodoo->verts[3].sGreen = cmdfifo_get_f(voodoo); + voodoo->verts[3].sBlue = cmdfifo_get_f(voodoo); + } + } + if ((mask & CMDFIFO3_PC_MASK_ALPHA) && !(header & CMDFIFO3_PC)) + voodoo->verts[3].sAlpha = cmdfifo_get_f(voodoo); + if (mask & CMDFIFO3_PC_MASK_Z) + voodoo->verts[3].sVz = cmdfifo_get_f(voodoo); + if (mask & CMDFIFO3_PC_MASK_Wb) + voodoo->verts[3].sWb = cmdfifo_get_f(voodoo); + if (mask & CMDFIFO3_PC_MASK_W0) + voodoo->verts[3].sW0 = cmdfifo_get_f(voodoo); + if (mask & CMDFIFO3_PC_MASK_S0_T0) + { + voodoo->verts[3].sS0 = cmdfifo_get_f(voodoo); + voodoo->verts[3].sT0 = cmdfifo_get_f(voodoo); + } + if (mask & CMDFIFO3_PC_MASK_W1) + voodoo->verts[3].sW1 = cmdfifo_get_f(voodoo); + if (mask & CMDFIFO3_PC_MASK_S1_T1) + { + voodoo->verts[3].sS1 = cmdfifo_get_f(voodoo); + voodoo->verts[3].sT1 = cmdfifo_get_f(voodoo); + } + if (v_num) + voodoo_reg_writel(SST_sDrawTriCMD, 0, voodoo); + else + voodoo_reg_writel(SST_sBeginTriCMD, 0, voodoo); + v_num++; + if (v_num == 3 && ((header >> 3) & 7) == 0) + v_num = 0; + } + break; + + case 4: + num = (header >> 29) & 7; + mask = (header >> 15) & 0x3fff; + addr = (header & 0x7ff8) >> 1; +// pclog("CMDFIFO4 addr=%08x\n",addr); + while (mask) + { + if (mask & 1) + { + uint32_t val = cmdfifo_get(voodoo); + + if ((addr & (1 << 13)) && voodoo->type >= VOODOO_BANSHEE) + { + if (voodoo->type < VOODOO_BANSHEE) + fatal("CMDFIFO1: Not Banshee\n"); +// pclog("CMDFIFO1: write %08x %08x\n", addr, val); + voodoo_2d_reg_writel(voodoo, addr, val); + } + else + { + if ((addr & 0x3ff) == SST_triangleCMD || (addr & 0x3ff) == SST_ftriangleCMD || + (addr & 0x3ff) == SST_fastfillCMD || (addr & 0x3ff) == SST_nopCMD) + voodoo->cmd_written_fifo++; + + if (voodoo->type >= VOODOO_BANSHEE && (addr & 0x3ff) == SST_swapbufferCMD) + voodoo->cmd_written_fifo++; + voodoo_reg_writel(addr, val, voodoo); + } + } + + addr += 4; + mask >>= 1; + } + while (num--) + cmdfifo_get(voodoo); + break; + + case 5: +// if (header & 0x3fc00000) +// fatal("CMDFIFO packet 5 has byte disables set %08x\n", header); + num = (header >> 3) & 0x7ffff; + addr = cmdfifo_get(voodoo) & 0xffffff; + if (!num) + num = 1; +// pclog("CMDFIFO5 addr=%08x num=%i\n", addr, num); + switch (header >> 30) + { + case 0: /*Linear framebuffer (Banshee)*/ + if (voodoo->texture_present[0][(addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT]) + { +// pclog("texture_present at %08x %i\n", addr, (addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT); + flush_texture_cache(voodoo, addr & voodoo->texture_mask, 0); + } + if (voodoo->texture_present[1][(addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT]) + { +// pclog("texture_present at %08x %i\n", addr, (addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT); + flush_texture_cache(voodoo, addr & voodoo->texture_mask, 1); + } + while (num--) + { + uint32_t val = cmdfifo_get(voodoo); + if (addr <= voodoo->fb_mask) + *(uint32_t *)&voodoo->fb_mem[addr] = val; + addr += 4; + } + break; + case 2: /*Framebuffer*/ + while (num--) + { + uint32_t val = cmdfifo_get(voodoo); + voodoo_fb_writel(addr, val, voodoo); + addr += 4; + } + break; + case 3: /*Texture*/ + while (num--) + { + uint32_t val = cmdfifo_get(voodoo); + voodoo_tex_writel(addr, val, voodoo); + addr += 4; + } + break; + + default: + fatal("CMDFIFO packet 5 bad space %08x %08x\n", header, voodoo->cmdfifo_rp); + } + break; + + default: + fatal("Bad CMDFIFO packet %08x %08x\n", header, voodoo->cmdfifo_rp); + } + + end_time = timer_read(); + voodoo->time += end_time - start_time; + } + voodoo->voodoo_busy = 0; + } +} diff --git a/pcem/vid_voodoo_fifo.h b/pcem/vid_voodoo_fifo.h new file mode 100644 index 00000000..c1caebae --- /dev/null +++ b/pcem/vid_voodoo_fifo.h @@ -0,0 +1,8 @@ +void voodoo_wake_fifo_thread(voodoo_t *voodoo); +void voodoo_wake_fifo_thread_now(voodoo_t *voodoo); +void voodoo_wake_timer(void *p); +void voodoo_queue_command(voodoo_t *voodoo, uint32_t addr_type, uint32_t val); +void voodoo_flush(voodoo_t *voodoo); +void voodoo_wake_fifo_threads(voodoo_set_t *set, voodoo_t *voodoo); +void voodoo_wait_for_swap_complete(voodoo_t *voodoo); +void voodoo_fifo_thread(void *param); diff --git a/pcem/vid_voodoo_reg.cpp b/pcem/vid_voodoo_reg.cpp new file mode 100644 index 00000000..63e590b5 --- /dev/null +++ b/pcem/vid_voodoo_reg.cpp @@ -0,0 +1,1321 @@ +#include +#include +#include "ibm.h" +#include "device.h" +#include "mem.h" +#include "thread.h" +#include "video.h" +#include "vid_svga.h" +#include "vid_voodoo.h" +#include "vid_voodoo_common.h" +#include "vid_voodoo_banshee.h" +#include "vid_voodoo_blitter.h" +#include "vid_voodoo_dither.h" +#include "vid_voodoo_fifo.h" +#include "vid_voodoo_reg.h" +#include "vid_voodoo_regs.h" +#include "vid_voodoo_render.h" +#include "vid_voodoo_setup.h" +#include "vid_voodoo_texture.h" + +enum +{ + CHIP_FBI = 0x1, + CHIP_TREX0 = 0x2, + CHIP_TREX1 = 0x4, + CHIP_TREX2 = 0x8 +}; + +void voodoo_reg_writel(uint32_t addr, uint32_t val, void *p) +{ + voodoo_t *voodoo = (voodoo_t *)p; + union + { + uint32_t i; + float f; + } tempif; + int ad21 = addr & (1 << 21); + int chip = (addr >> 10) & 0xf; + if (!chip) + chip = 0xf; + + tempif.i = val; +//pclog("voodoo_reg_write_l: addr=%08x val=%08x(%f) chip=%x\n", addr, val, tempif.f, chip); + addr &= 0x3fc; + + if ((voodoo->fbiInit3 & FBIINIT3_REMAP) && addr < 0x100 && ad21) + addr |= 0x400; + switch (addr) + { + case SST_swapbufferCMD: + if (voodoo->type >= VOODOO_BANSHEE) + { +// pclog("swapbufferCMD %08x %08x\n", val, voodoo->leftOverlayBuf); + + voodoo_wait_for_render_thread_idle(voodoo); + if (!(val & 1)) + { + banshee_set_overlay_addr(voodoo->p, voodoo->leftOverlayBuf); + thread_lock_mutex(voodoo->swap_mutex); + if (voodoo->swap_count > 0) + voodoo->swap_count--; + thread_unlock_mutex(voodoo->swap_mutex); + voodoo->frame_count++; + } + else if (TRIPLE_BUFFER) + { + if (voodoo->swap_pending) + voodoo_wait_for_swap_complete(voodoo); + voodoo->swap_interval = (val >> 1) & 0xff; + voodoo->swap_offset = voodoo->leftOverlayBuf; + voodoo->swap_pending = 1; + } + else + { + voodoo->swap_interval = (val >> 1) & 0xff; + voodoo->swap_offset = voodoo->leftOverlayBuf; + voodoo->swap_pending = 1; + + voodoo_wait_for_swap_complete(voodoo); + } + + voodoo->cmd_read++; + break; + } + + if (TRIPLE_BUFFER) + { + voodoo->disp_buffer = (voodoo->disp_buffer + 1) % 3; + voodoo->draw_buffer = (voodoo->draw_buffer + 1) % 3; + } + else + { + voodoo->disp_buffer = !voodoo->disp_buffer; + voodoo->draw_buffer = !voodoo->draw_buffer; + } + voodoo_recalc(voodoo); + + voodoo->params.swapbufferCMD = val; + +// pclog("Swap buffer %08x %d %p %i\n", val, voodoo->swap_count, &voodoo->swap_count, (voodoo == voodoo->set->voodoos[1]) ? 1 : 0); +// voodoo->front_offset = params->front_offset; + voodoo_wait_for_render_thread_idle(voodoo); + if (!(val & 1)) + { + memset(voodoo->dirty_line, 1, sizeof(voodoo->dirty_line)); + voodoo->front_offset = voodoo->params.front_offset; + thread_lock_mutex(voodoo->swap_mutex); + if (voodoo->swap_count > 0) + voodoo->swap_count--; + thread_unlock_mutex(voodoo->swap_mutex); + } + else if (TRIPLE_BUFFER) + { + if (voodoo->swap_pending) + voodoo_wait_for_swap_complete(voodoo); + + voodoo->swap_interval = (val >> 1) & 0xff; + voodoo->swap_offset = voodoo->params.front_offset; + voodoo->swap_pending = 1; + } + else + { + voodoo->swap_interval = (val >> 1) & 0xff; + voodoo->swap_offset = voodoo->params.front_offset; + voodoo->swap_pending = 1; + + voodoo_wait_for_swap_complete(voodoo); + } + voodoo->cmd_read++; + break; + + case SST_vertexAx: case SST_remap_vertexAx: + voodoo->params.vertexAx = val & 0xffff; + break; + case SST_vertexAy: case SST_remap_vertexAy: + voodoo->params.vertexAy = val & 0xffff; + break; + case SST_vertexBx: case SST_remap_vertexBx: + voodoo->params.vertexBx = val & 0xffff; + break; + case SST_vertexBy: case SST_remap_vertexBy: + voodoo->params.vertexBy = val & 0xffff; + break; + case SST_vertexCx: case SST_remap_vertexCx: + voodoo->params.vertexCx = val & 0xffff; + break; + case SST_vertexCy: case SST_remap_vertexCy: + voodoo->params.vertexCy = val & 0xffff; + break; + + case SST_startR: case SST_remap_startR: + voodoo->params.startR = val & 0xffffff; + break; + case SST_startG: case SST_remap_startG: + voodoo->params.startG = val & 0xffffff; + break; + case SST_startB: case SST_remap_startB: + voodoo->params.startB = val & 0xffffff; + break; + case SST_startZ: case SST_remap_startZ: + voodoo->params.startZ = val; + break; + case SST_startA: case SST_remap_startA: + voodoo->params.startA = val & 0xffffff; + break; + case SST_startS: case SST_remap_startS: + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].startS = ((int64_t)(int32_t)val) << 14; + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].startS = ((int64_t)(int32_t)val) << 14; + break; + case SST_startT: case SST_remap_startT: + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].startT = ((int64_t)(int32_t)val) << 14; + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].startT = ((int64_t)(int32_t)val) << 14; + break; + case SST_startW: case SST_remap_startW: + if (chip & CHIP_FBI) + voodoo->params.startW = (int64_t)(int32_t)val << 2; + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].startW = (int64_t)(int32_t)val << 2; + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].startW = (int64_t)(int32_t)val << 2; + break; + + case SST_dRdX: case SST_remap_dRdX: + voodoo->params.dRdX = (val & 0xffffff) | ((val & 0x800000) ? 0xff000000 : 0); + break; + case SST_dGdX: case SST_remap_dGdX: + voodoo->params.dGdX = (val & 0xffffff) | ((val & 0x800000) ? 0xff000000 : 0); + break; + case SST_dBdX: case SST_remap_dBdX: + voodoo->params.dBdX = (val & 0xffffff) | ((val & 0x800000) ? 0xff000000 : 0); + break; + case SST_dZdX: case SST_remap_dZdX: + voodoo->params.dZdX = val; + break; + case SST_dAdX: case SST_remap_dAdX: + voodoo->params.dAdX = (val & 0xffffff) | ((val & 0x800000) ? 0xff000000 : 0); + break; + case SST_dSdX: case SST_remap_dSdX: + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].dSdX = ((int64_t)(int32_t)val) << 14; + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dSdX = ((int64_t)(int32_t)val) << 14; + break; + case SST_dTdX: case SST_remap_dTdX: + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].dTdX = ((int64_t)(int32_t)val) << 14; + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dTdX = ((int64_t)(int32_t)val) << 14; + break; + case SST_dWdX: case SST_remap_dWdX: + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].dWdX = (int64_t)(int32_t)val << 2; + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dWdX = (int64_t)(int32_t)val << 2; + if (chip & CHIP_FBI) + voodoo->params.dWdX = (int64_t)(int32_t)val << 2; + break; + + case SST_dRdY: case SST_remap_dRdY: + voodoo->params.dRdY = (val & 0xffffff) | ((val & 0x800000) ? 0xff000000 : 0); + break; + case SST_dGdY: case SST_remap_dGdY: + voodoo->params.dGdY = (val & 0xffffff) | ((val & 0x800000) ? 0xff000000 : 0); + break; + case SST_dBdY: case SST_remap_dBdY: + voodoo->params.dBdY = (val & 0xffffff) | ((val & 0x800000) ? 0xff000000 : 0); + break; + case SST_dZdY: case SST_remap_dZdY: + voodoo->params.dZdY = val; + break; + case SST_dAdY: case SST_remap_dAdY: + voodoo->params.dAdY = (val & 0xffffff) | ((val & 0x800000) ? 0xff000000 : 0); + break; + case SST_dSdY: case SST_remap_dSdY: + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].dSdY = ((int64_t)(int32_t)val) << 14; + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dSdY = ((int64_t)(int32_t)val) << 14; + break; + case SST_dTdY: case SST_remap_dTdY: + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].dTdY = ((int64_t)(int32_t)val) << 14; + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dTdY = ((int64_t)(int32_t)val) << 14; + break; + case SST_dWdY: case SST_remap_dWdY: + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].dWdY = (int64_t)(int32_t)val << 2; + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dWdY = (int64_t)(int32_t)val << 2; + if (chip & CHIP_FBI) + voodoo->params.dWdY = (int64_t)(int32_t)val << 2; + break; + + case SST_triangleCMD: case SST_remap_triangleCMD: + voodoo->params.sign = val & (1 << 31); + + if (voodoo->ncc_dirty[0]) + voodoo_update_ncc(voodoo, 0); + if (voodoo->ncc_dirty[1]) + voodoo_update_ncc(voodoo, 1); + voodoo->ncc_dirty[0] = voodoo->ncc_dirty[1] = 0; + + voodoo_queue_triangle(voodoo, &voodoo->params); + + voodoo->cmd_read++; + break; + + case SST_fvertexAx: case SST_remap_fvertexAx: + voodoo->fvertexAx.i = val; + voodoo->params.vertexAx = (int32_t)(int16_t)(int32_t)(voodoo->fvertexAx.f * 16.0f) & 0xffff; + break; + case SST_fvertexAy: case SST_remap_fvertexAy: + voodoo->fvertexAy.i = val; + voodoo->params.vertexAy = (int32_t)(int16_t)(int32_t)(voodoo->fvertexAy.f * 16.0f) & 0xffff; + break; + case SST_fvertexBx: case SST_remap_fvertexBx: + voodoo->fvertexBx.i = val; + voodoo->params.vertexBx = (int32_t)(int16_t)(int32_t)(voodoo->fvertexBx.f * 16.0f) & 0xffff; + break; + case SST_fvertexBy: case SST_remap_fvertexBy: + voodoo->fvertexBy.i = val; + voodoo->params.vertexBy = (int32_t)(int16_t)(int32_t)(voodoo->fvertexBy.f * 16.0f) & 0xffff; + break; + case SST_fvertexCx: case SST_remap_fvertexCx: + voodoo->fvertexCx.i = val; + voodoo->params.vertexCx = (int32_t)(int16_t)(int32_t)(voodoo->fvertexCx.f * 16.0f) & 0xffff; + break; + case SST_fvertexCy: case SST_remap_fvertexCy: + voodoo->fvertexCy.i = val; + voodoo->params.vertexCy = (int32_t)(int16_t)(int32_t)(voodoo->fvertexCy.f * 16.0f) & 0xffff; + break; + + case SST_fstartR: case SST_remap_fstartR: + tempif.i = val; + voodoo->params.startR = (int32_t)(tempif.f * 4096.0f); + break; + case SST_fstartG: case SST_remap_fstartG: + tempif.i = val; + voodoo->params.startG = (int32_t)(tempif.f * 4096.0f); + break; + case SST_fstartB: case SST_remap_fstartB: + tempif.i = val; + voodoo->params.startB = (int32_t)(tempif.f * 4096.0f); + break; + case SST_fstartZ: case SST_remap_fstartZ: + tempif.i = val; + voodoo->params.startZ = (int32_t)(tempif.f * 4096.0f); + break; + case SST_fstartA: case SST_remap_fstartA: + tempif.i = val; + voodoo->params.startA = (int32_t)(tempif.f * 4096.0f); + break; + case SST_fstartS: case SST_remap_fstartS: + tempif.i = val; + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].startS = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].startS = (int64_t)(tempif.f * 4294967296.0f); + break; + case SST_fstartT: case SST_remap_fstartT: + tempif.i = val; + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].startT = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].startT = (int64_t)(tempif.f * 4294967296.0f); + break; + case SST_fstartW: case SST_remap_fstartW: + tempif.i = val; + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].startW = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].startW = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_FBI) + voodoo->params.startW = (int64_t)(tempif.f * 4294967296.0f); + break; + + case SST_fdRdX: case SST_remap_fdRdX: + tempif.i = val; + voodoo->params.dRdX = (int32_t)(tempif.f * 4096.0f); + break; + case SST_fdGdX: case SST_remap_fdGdX: + tempif.i = val; + voodoo->params.dGdX = (int32_t)(tempif.f * 4096.0f); + break; + case SST_fdBdX: case SST_remap_fdBdX: + tempif.i = val; + voodoo->params.dBdX = (int32_t)(tempif.f * 4096.0f); + break; + case SST_fdZdX: case SST_remap_fdZdX: + tempif.i = val; + voodoo->params.dZdX = (int32_t)(tempif.f * 4096.0f); + break; + case SST_fdAdX: case SST_remap_fdAdX: + tempif.i = val; + voodoo->params.dAdX = (int32_t)(tempif.f * 4096.0f); + break; + case SST_fdSdX: case SST_remap_fdSdX: + tempif.i = val; + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].dSdX = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dSdX = (int64_t)(tempif.f * 4294967296.0f); + break; + case SST_fdTdX: case SST_remap_fdTdX: + tempif.i = val; + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].dTdX = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dTdX = (int64_t)(tempif.f * 4294967296.0f); + break; + case SST_fdWdX: case SST_remap_fdWdX: + tempif.i = val; + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].dWdX = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dWdX = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_FBI) + voodoo->params.dWdX = (int64_t)(tempif.f * 4294967296.0f); + break; + + case SST_fdRdY: case SST_remap_fdRdY: + tempif.i = val; + voodoo->params.dRdY = (int32_t)(tempif.f * 4096.0f); + break; + case SST_fdGdY: case SST_remap_fdGdY: + tempif.i = val; + voodoo->params.dGdY = (int32_t)(tempif.f * 4096.0f); + break; + case SST_fdBdY: case SST_remap_fdBdY: + tempif.i = val; + voodoo->params.dBdY = (int32_t)(tempif.f * 4096.0f); + break; + case SST_fdZdY: case SST_remap_fdZdY: + tempif.i = val; + voodoo->params.dZdY = (int32_t)(tempif.f * 4096.0f); + break; + case SST_fdAdY: case SST_remap_fdAdY: + tempif.i = val; + voodoo->params.dAdY = (int32_t)(tempif.f * 4096.0f); + break; + case SST_fdSdY: case SST_remap_fdSdY: + tempif.i = val; + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].dSdY = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dSdY = (int64_t)(tempif.f * 4294967296.0f); + break; + case SST_fdTdY: case SST_remap_fdTdY: + tempif.i = val; + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].dTdY = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dTdY = (int64_t)(tempif.f * 4294967296.0f); + break; + case SST_fdWdY: case SST_remap_fdWdY: + tempif.i = val; + if (chip & CHIP_TREX0) + voodoo->params.tmu[0].dWdY = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_TREX1) + voodoo->params.tmu[1].dWdY = (int64_t)(tempif.f * 4294967296.0f); + if (chip & CHIP_FBI) + voodoo->params.dWdY = (int64_t)(tempif.f * 4294967296.0f); + break; + + case SST_ftriangleCMD: + voodoo->params.sign = val & (1 << 31); + + if (voodoo->ncc_dirty[0]) + voodoo_update_ncc(voodoo, 0); + if (voodoo->ncc_dirty[1]) + voodoo_update_ncc(voodoo, 1); + voodoo->ncc_dirty[0] = voodoo->ncc_dirty[1] = 0; + + voodoo_queue_triangle(voodoo, &voodoo->params); + + voodoo->cmd_read++; + break; + + case SST_fbzColorPath: + voodoo->params.fbzColorPath = val; + voodoo->rgb_sel = val & 3; + break; + + case SST_fogMode: + voodoo->params.fogMode = val; + break; + case SST_alphaMode: + voodoo->params.alphaMode = val; + break; + case SST_fbzMode: + voodoo->params.fbzMode = val; + voodoo_recalc(voodoo); + break; + case SST_lfbMode: + voodoo->lfbMode = val; + voodoo_recalc(voodoo); + break; + + case SST_clipLeftRight: + if (voodoo->type >= VOODOO_2) + { + voodoo->params.clipRight = val & 0xfff; + voodoo->params.clipLeft = (val >> 16) & 0xfff; + } + else + { + voodoo->params.clipRight = val & 0x3ff; + voodoo->params.clipLeft = (val >> 16) & 0x3ff; + } + break; + case SST_clipLowYHighY: + if (voodoo->type >= VOODOO_2) + { + voodoo->params.clipHighY = val & 0xfff; + voodoo->params.clipLowY = (val >> 16) & 0xfff; + } + else + { + voodoo->params.clipHighY = val & 0x3ff; + voodoo->params.clipLowY = (val >> 16) & 0x3ff; + } + break; + + case SST_nopCMD: + voodoo->cmd_read++; + voodoo->fbiPixelsIn = 0; + voodoo->fbiChromaFail = 0; + voodoo->fbiZFuncFail = 0; + voodoo->fbiAFuncFail = 0; + voodoo->fbiPixelsOut = 0; + break; + case SST_fastfillCMD: + voodoo_wait_for_render_thread_idle(voodoo); + voodoo_fastfill(voodoo, &voodoo->params); + voodoo->cmd_read++; + break; + + case SST_fogColor: + voodoo->params.fogColor.r = (val >> 16) & 0xff; + voodoo->params.fogColor.g = (val >> 8) & 0xff; + voodoo->params.fogColor.b = val & 0xff; + break; + + case SST_zaColor: + voodoo->params.zaColor = val; + break; + case SST_chromaKey: + voodoo->params.chromaKey_r = (val >> 16) & 0xff; + voodoo->params.chromaKey_g = (val >> 8) & 0xff; + voodoo->params.chromaKey_b = val & 0xff; + voodoo->params.chromaKey = val & 0xffffff; + break; + case SST_stipple: + voodoo->params.stipple = val; + break; + case SST_color0: + voodoo->params.color0 = val; + break; + case SST_color1: + voodoo->params.color1 = val; + break; + + case SST_fogTable00: case SST_fogTable01: case SST_fogTable02: case SST_fogTable03: + case SST_fogTable04: case SST_fogTable05: case SST_fogTable06: case SST_fogTable07: + case SST_fogTable08: case SST_fogTable09: case SST_fogTable0a: case SST_fogTable0b: + case SST_fogTable0c: case SST_fogTable0d: case SST_fogTable0e: case SST_fogTable0f: + case SST_fogTable10: case SST_fogTable11: case SST_fogTable12: case SST_fogTable13: + case SST_fogTable14: case SST_fogTable15: case SST_fogTable16: case SST_fogTable17: + case SST_fogTable18: case SST_fogTable19: case SST_fogTable1a: case SST_fogTable1b: + case SST_fogTable1c: case SST_fogTable1d: case SST_fogTable1e: case SST_fogTable1f: + addr = (addr - SST_fogTable00) >> 1; + voodoo->params.fogTable[addr].dfog = val & 0xff; + voodoo->params.fogTable[addr].fog = (val >> 8) & 0xff; + voodoo->params.fogTable[addr+1].dfog = (val >> 16) & 0xff; + voodoo->params.fogTable[addr+1].fog = (val >> 24) & 0xff; + break; + + case SST_clipLeftRight1: + if (voodoo->type >= VOODOO_BANSHEE) + { + voodoo->params.clipRight1 = val & 0xfff; + voodoo->params.clipLeft1 = (val >> 16) & 0xfff; + } + break; + case SST_clipTopBottom1: + if (voodoo->type >= VOODOO_BANSHEE) + { + voodoo->params.clipHighY1 = val & 0xfff; + voodoo->params.clipLowY1 = (val >> 16) & 0xfff; + } + break; + + case SST_colBufferAddr: + if (voodoo->type >= VOODOO_BANSHEE) + { + voodoo->params.draw_offset = val & 0xfffff0; + voodoo->fb_write_offset = voodoo->params.draw_offset; +// pclog("colorBufferAddr=%06x\n", voodoo->params.draw_offset); + } + break; + case SST_colBufferStride: + if (voodoo->type >= VOODOO_BANSHEE) + { + voodoo->col_tiled = val & (1 << 15); + voodoo->params.col_tiled = voodoo->col_tiled; + if (voodoo->col_tiled) + { + voodoo->row_width = (val & 0x7f) * 128*32; +// pclog("colBufferStride tiled = %i bytes, tiled %08x\n", voodoo->row_width, val); + } + else + { + voodoo->row_width = val & 0x3fff; +// pclog("colBufferStride linear = %i bytes, linear\n", voodoo->row_width); + } + voodoo->params.row_width = voodoo->row_width; + } + break; + case SST_auxBufferAddr: + if (voodoo->type >= VOODOO_BANSHEE) + { + voodoo->params.aux_offset = val & 0xfffff0; +// pclog("auxBufferAddr=%06x\n", voodoo->params.aux_offset); + } + break; + case SST_auxBufferStride: + if (voodoo->type >= VOODOO_BANSHEE) + { + voodoo->aux_tiled = val & (1 << 15); + voodoo->params.aux_tiled = voodoo->aux_tiled; + if (voodoo->aux_tiled) + { + voodoo->aux_row_width = (val & 0x7f) * 128*32; +// pclog("auxBufferStride tiled = %i bytes, tiled\n", voodoo->aux_row_width); + } + else + { + voodoo->aux_row_width = val & 0x3fff; +// pclog("auxBufferStride linear = %i bytes, linear\n", voodoo->aux_row_width); + } + voodoo->params.aux_row_width = voodoo->aux_row_width; + } + break; + + case SST_clutData: + voodoo->clutData[(val >> 24) & 0x3f].b = val & 0xff; + voodoo->clutData[(val >> 24) & 0x3f].g = (val >> 8) & 0xff; + voodoo->clutData[(val >> 24) & 0x3f].r = (val >> 16) & 0xff; + if (val & 0x20000000) + { + voodoo->clutData[(val >> 24) & 0x3f].b = 255; + voodoo->clutData[(val >> 24) & 0x3f].g = 255; + voodoo->clutData[(val >> 24) & 0x3f].r = 255; + } + voodoo->clutData_dirty = 1; + break; + + case SST_sSetupMode: + voodoo->sSetupMode = val; + break; + case SST_sVx: + tempif.i = val; + voodoo->verts[3].sVx = tempif.f; +// pclog("sVx[%i]=%f\n", voodoo->vertex_num, tempif.f); + break; + case SST_sVy: + tempif.i = val; + voodoo->verts[3].sVy = tempif.f; +// pclog("sVy[%i]=%f\n", voodoo->vertex_num, tempif.f); + break; + case SST_sARGB: + voodoo->verts[3].sBlue = (float)(val & 0xff); + voodoo->verts[3].sGreen = (float)((val >> 8) & 0xff); + voodoo->verts[3].sRed = (float)((val >> 16) & 0xff); + voodoo->verts[3].sAlpha = (float)((val >> 24) & 0xff); + break; + case SST_sRed: + tempif.i = val; + voodoo->verts[3].sRed = tempif.f; + break; + case SST_sGreen: + tempif.i = val; + voodoo->verts[3].sGreen = tempif.f; + break; + case SST_sBlue: + tempif.i = val; + voodoo->verts[3].sBlue = tempif.f; + break; + case SST_sAlpha: + tempif.i = val; + voodoo->verts[3].sAlpha = tempif.f; + break; + case SST_sVz: + tempif.i = val; + voodoo->verts[3].sVz = tempif.f; + break; + case SST_sWb: + tempif.i = val; + voodoo->verts[3].sWb = tempif.f; + break; + case SST_sW0: + tempif.i = val; + voodoo->verts[3].sW0 = tempif.f; + break; + case SST_sS0: + tempif.i = val; + voodoo->verts[3].sS0 = tempif.f; + break; + case SST_sT0: + tempif.i = val; + voodoo->verts[3].sT0 = tempif.f; + break; + case SST_sW1: + tempif.i = val; + voodoo->verts[3].sW1 = tempif.f; + break; + case SST_sS1: + tempif.i = val; + voodoo->verts[3].sS1 = tempif.f; + break; + case SST_sT1: + tempif.i = val; + voodoo->verts[3].sT1 = tempif.f; + break; + + case SST_sBeginTriCMD: +// pclog("sBeginTriCMD %i %f\n", voodoo->vertex_num, voodoo->verts[4].sVx); + voodoo->verts[0] = voodoo->verts[3]; + voodoo->verts[1] = voodoo->verts[3]; + voodoo->verts[2] = voodoo->verts[3]; + voodoo->vertex_next_age = 0; + voodoo->vertex_ages[0] = voodoo->vertex_next_age++; + + voodoo->num_verticies = 1; + voodoo->cull_pingpong = 0; + break; + case SST_sDrawTriCMD: +// pclog("sDrawTriCMD %i %i\n", voodoo->num_verticies, voodoo->sSetupMode & SETUPMODE_STRIP_MODE); + /*I'm not sure this is the vertex selection algorithm actually used in the 3dfx + chips, but this works with a number of games that switch between strip and fan + mode in the middle of a run (eg Black & White, Viper Racing)*/ + if (voodoo->vertex_next_age < 3) + { + /*Fewer than three vertices already written, store in next slot*/ + int vertex_nr = voodoo->vertex_next_age; + + voodoo->verts[vertex_nr] = voodoo->verts[3]; + voodoo->vertex_ages[vertex_nr] = voodoo->vertex_next_age++; + } + else + { + int vertex_nr = 0; + + if (!(voodoo->sSetupMode & SETUPMODE_STRIP_MODE)) + { + /*Strip - find oldest vertex*/ + if ((voodoo->vertex_ages[0] < voodoo->vertex_ages[1]) && + (voodoo->vertex_ages[0] < voodoo->vertex_ages[2])) + vertex_nr = 0; + else if ((voodoo->vertex_ages[1] < voodoo->vertex_ages[0]) && + (voodoo->vertex_ages[1] < voodoo->vertex_ages[2])) + vertex_nr = 1; + else + vertex_nr = 2; + } + else + { + /*Fan - find second oldest vertex (ie pivot around oldest)*/ + if ((voodoo->vertex_ages[1] < voodoo->vertex_ages[0]) && + (voodoo->vertex_ages[0] < voodoo->vertex_ages[2])) + vertex_nr = 0; + else if ((voodoo->vertex_ages[2] < voodoo->vertex_ages[0]) && + (voodoo->vertex_ages[0] < voodoo->vertex_ages[1])) + vertex_nr = 0; + else if ((voodoo->vertex_ages[0] < voodoo->vertex_ages[1]) && + (voodoo->vertex_ages[1] < voodoo->vertex_ages[2])) + vertex_nr = 1; + else if ((voodoo->vertex_ages[2] < voodoo->vertex_ages[1]) && + (voodoo->vertex_ages[1] < voodoo->vertex_ages[0])) + vertex_nr = 1; + else + vertex_nr = 2; + } + voodoo->verts[vertex_nr] = voodoo->verts[3]; + voodoo->vertex_ages[vertex_nr] = voodoo->vertex_next_age++; + } + + voodoo->num_verticies++; + if (voodoo->num_verticies == 3) + { +// pclog("triangle_setup\n"); + voodoo_triangle_setup(voodoo); + voodoo->cull_pingpong = !voodoo->cull_pingpong; + + voodoo->num_verticies = 2; + } + break; + + case SST_bltSrcBaseAddr: + voodoo->bltSrcBaseAddr = val & 0x3fffff; + break; + case SST_bltDstBaseAddr: +// pclog("Write bltDstBaseAddr %08x\n", val); + voodoo->bltDstBaseAddr = val & 0x3fffff; + break; + case SST_bltXYStrides: + voodoo->bltSrcXYStride = val & 0xfff; + voodoo->bltDstXYStride = (val >> 16) & 0xfff; +// pclog("Write bltXYStrides %08x\n", val); + break; + case SST_bltSrcChromaRange: + voodoo->bltSrcChromaRange = val; + voodoo->bltSrcChromaMinB = val & 0x1f; + voodoo->bltSrcChromaMinG = (val >> 5) & 0x3f; + voodoo->bltSrcChromaMinR = (val >> 11) & 0x1f; + voodoo->bltSrcChromaMaxB = (val >> 16) & 0x1f; + voodoo->bltSrcChromaMaxG = (val >> 21) & 0x3f; + voodoo->bltSrcChromaMaxR = (val >> 27) & 0x1f; + break; + case SST_bltDstChromaRange: + voodoo->bltDstChromaRange = val; + voodoo->bltDstChromaMinB = val & 0x1f; + voodoo->bltDstChromaMinG = (val >> 5) & 0x3f; + voodoo->bltDstChromaMinR = (val >> 11) & 0x1f; + voodoo->bltDstChromaMaxB = (val >> 16) & 0x1f; + voodoo->bltDstChromaMaxG = (val >> 21) & 0x3f; + voodoo->bltDstChromaMaxR = (val >> 27) & 0x1f; + break; + case SST_bltClipX: + voodoo->bltClipRight = val & 0xfff; + voodoo->bltClipLeft = (val >> 16) & 0xfff; + break; + case SST_bltClipY: + voodoo->bltClipHighY = val & 0xfff; + voodoo->bltClipLowY = (val >> 16) & 0xfff; + break; + + case SST_bltSrcXY: + voodoo->bltSrcX = val & 0x7ff; + voodoo->bltSrcY = (val >> 16) & 0x7ff; + break; + case SST_bltDstXY: +// pclog("Write bltDstXY %08x\n", val); + voodoo->bltDstX = val & 0x7ff; + voodoo->bltDstY = (val >> 16) & 0x7ff; + if (val & (1 << 31)) + voodoo_v2_blit_start(voodoo); + break; + case SST_bltSize: +// pclog("Write bltSize %08x\n", val); + voodoo->bltSizeX = val & 0xfff; + if (voodoo->bltSizeX & 0x800) + voodoo->bltSizeX |= 0xfffff000; + voodoo->bltSizeY = (val >> 16) & 0xfff; + if (voodoo->bltSizeY & 0x800) + voodoo->bltSizeY |= 0xfffff000; + if (val & (1 << 31)) + voodoo_v2_blit_start(voodoo); + break; + case SST_bltRop: + voodoo->bltRop[0] = val & 0xf; + voodoo->bltRop[1] = (val >> 4) & 0xf; + voodoo->bltRop[2] = (val >> 8) & 0xf; + voodoo->bltRop[3] = (val >> 12) & 0xf; + break; + case SST_bltColor: +// pclog("Write bltColor %08x\n", val); + voodoo->bltColorFg = val & 0xffff; + voodoo->bltColorBg = (val >> 16) & 0xffff; + break; + + case SST_bltCommand: + voodoo->bltCommand = val; +// pclog("Write bltCommand %08x\n", val); + if (val & (1 << 31)) + voodoo_v2_blit_start(voodoo); + break; + case SST_bltData: + voodoo_v2_blit_data(voodoo, val); + break; + + case SST_textureMode: + if (chip & CHIP_TREX0) + { + voodoo->params.textureMode[0] = val; + voodoo->params.tformat[0] = (val >> 8) & 0xf; + } + if (chip & CHIP_TREX1) + { + voodoo->params.textureMode[1] = val; + voodoo->params.tformat[1] = (val >> 8) & 0xf; + } + break; + case SST_tLOD: + if (chip & CHIP_TREX0) + { + voodoo->params.tLOD[0] = val; + voodoo_recalc_tex(voodoo, 0); + } + if (chip & CHIP_TREX1) + { + voodoo->params.tLOD[1] = val; + voodoo_recalc_tex(voodoo, 1); + } + break; + case SST_tDetail: + if (chip & CHIP_TREX0) + { + voodoo->params.detail_max[0] = val & 0xff; + voodoo->params.detail_bias[0] = (val >> 8) & 0x3f; + voodoo->params.detail_scale[0] = (val >> 14) & 7; + } + if (chip & CHIP_TREX1) + { + voodoo->params.detail_max[1] = val & 0xff; + voodoo->params.detail_bias[1] = (val >> 8) & 0x3f; + voodoo->params.detail_scale[1] = (val >> 14) & 7; + } + break; + case SST_texBaseAddr: + if (chip & CHIP_TREX0) + { + if (voodoo->type >= VOODOO_BANSHEE) + voodoo->params.texBaseAddr[0] = val & 0xfffff0; + else + voodoo->params.texBaseAddr[0] = (val & 0x7ffff) << 3; +// pclog("texBaseAddr = %08x %08x\n", voodoo->params.texBaseAddr[0], val); + voodoo_recalc_tex(voodoo, 0); + } + if (chip & CHIP_TREX1) + { + if (voodoo->type >= VOODOO_BANSHEE) + voodoo->params.texBaseAddr[1] = val & 0xfffff0; + else + voodoo->params.texBaseAddr[1] = (val & 0x7ffff) << 3; + voodoo_recalc_tex(voodoo, 1); + } + break; + case SST_texBaseAddr1: + if (chip & CHIP_TREX0) + { + if (voodoo->type >= VOODOO_BANSHEE) + voodoo->params.texBaseAddr1[0] = val & 0xfffff0; + else + voodoo->params.texBaseAddr1[0] = (val & 0x7ffff) << 3; + voodoo_recalc_tex(voodoo, 0); + } + if (chip & CHIP_TREX1) + { + if (voodoo->type >= VOODOO_BANSHEE) + voodoo->params.texBaseAddr1[1] = val & 0xfffff0; + else + voodoo->params.texBaseAddr1[1] = (val & 0x7ffff) << 3; + voodoo_recalc_tex(voodoo, 1); + } + break; + case SST_texBaseAddr2: + if (chip & CHIP_TREX0) + { + if (voodoo->type >= VOODOO_BANSHEE) + voodoo->params.texBaseAddr2[0] = val & 0xfffff0; + else + voodoo->params.texBaseAddr2[0] = (val & 0x7ffff) << 3; + voodoo_recalc_tex(voodoo, 0); + } + if (chip & CHIP_TREX1) + { + if (voodoo->type >= VOODOO_BANSHEE) + voodoo->params.texBaseAddr2[1] = val & 0xfffff0; + else + voodoo->params.texBaseAddr2[1] = (val & 0x7ffff) << 3; + voodoo_recalc_tex(voodoo, 1); + } + break; + case SST_texBaseAddr38: + if (chip & CHIP_TREX0) + { + if (voodoo->type >= VOODOO_BANSHEE) + voodoo->params.texBaseAddr38[0] = val & 0xfffff0; + else + voodoo->params.texBaseAddr38[0] = (val & 0x7ffff) << 3; + voodoo_recalc_tex(voodoo, 0); + } + if (chip & CHIP_TREX1) + { + if (voodoo->type >= VOODOO_BANSHEE) + voodoo->params.texBaseAddr38[1] = val & 0xfffff0; + else + voodoo->params.texBaseAddr38[1] = (val & 0x7ffff) << 3; + voodoo_recalc_tex(voodoo, 1); + } + break; + + case SST_trexInit1: + if (chip & CHIP_TREX0) + voodoo->trexInit1[0] = val; + if (chip & CHIP_TREX1) + voodoo->trexInit1[1] = val; + break; + + case SST_nccTable0_Y0: + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].y[0] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].y[0] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + case SST_nccTable0_Y1: + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].y[1] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].y[1] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + case SST_nccTable0_Y2: + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].y[2] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].y[2] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + case SST_nccTable0_Y3: + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].y[3] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].y[3] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + + case SST_nccTable0_I0: + if (!(val & (1 << 31))) + { + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].i[0] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].i[0] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + } + case SST_nccTable0_I2: + if (!(val & (1 << 31))) + { + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].i[2] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].i[2] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + } + case SST_nccTable0_Q0: + if (!(val & (1 << 31))) + { + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].q[0] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].q[0] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + } + case SST_nccTable0_Q2: + if (!(val & (1 << 31))) + { + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].i[2] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].i[2] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + } + if (val & (1 << 31)) + { + int p = (val >> 23) & 0xfe; + if (chip & CHIP_TREX0) + { + voodoo->palette[0][p].u = val | 0xff000000; + voodoo->palette_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->palette[1][p].u = val | 0xff000000; + voodoo->palette_dirty[1] = 1; + } + } + break; + + case SST_nccTable0_I1: + if (!(val & (1 << 31))) + { + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].i[1] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].i[1] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + } + case SST_nccTable0_I3: + if (!(val & (1 << 31))) + { + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].i[3] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].i[3] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + } + case SST_nccTable0_Q1: + if (!(val & (1 << 31))) + { + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].q[1] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].q[1] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + } + case SST_nccTable0_Q3: + if (!(val & (1 << 31))) + { + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][0].q[3] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][0].q[3] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + } + if (val & (1 << 31)) + { + int p = ((val >> 23) & 0xfe) | 0x01; + if (chip & CHIP_TREX0) + { + voodoo->palette[0][p].u = val | 0xff000000; + voodoo->palette_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->palette[1][p].u = val | 0xff000000; + voodoo->palette_dirty[1] = 1; + } + } + break; + + case SST_nccTable1_Y0: + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].y[0] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].y[0] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + case SST_nccTable1_Y1: + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].y[1] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].y[1] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + case SST_nccTable1_Y2: + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].y[2] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].y[2] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + case SST_nccTable1_Y3: + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].y[3] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].y[3] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + case SST_nccTable1_I0: + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].i[0] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].i[0] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + case SST_nccTable1_I1: + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].i[1] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].i[1] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + case SST_nccTable1_I2: + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].i[2] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].i[2] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + case SST_nccTable1_I3: + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].i[3] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].i[3] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + case SST_nccTable1_Q0: + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].q[0] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].q[0] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + case SST_nccTable1_Q1: + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].q[1] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].q[1] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + case SST_nccTable1_Q2: + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].q[2] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].q[2] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + case SST_nccTable1_Q3: + if (chip & CHIP_TREX0) + { + voodoo->nccTable[0][1].q[3] = val; + voodoo->ncc_dirty[0] = 1; + } + if (chip & CHIP_TREX1) + { + voodoo->nccTable[1][1].q[3] = val; + voodoo->ncc_dirty[1] = 1; + } + break; + + case SST_userIntrCMD: + fatal("userIntrCMD write %08x from FIFO\n", val); + break; + + + case SST_leftOverlayBuf: + voodoo->leftOverlayBuf = val; + break; + } +} diff --git a/pcem/vid_voodoo_reg.h b/pcem/vid_voodoo_reg.h new file mode 100644 index 00000000..f3a94187 --- /dev/null +++ b/pcem/vid_voodoo_reg.h @@ -0,0 +1 @@ +void voodoo_reg_writel(uint32_t addr, uint32_t val, void *p); diff --git a/pcem/vid_voodoo_regs.h b/pcem/vid_voodoo_regs.h new file mode 100644 index 00000000..f7ab2089 --- /dev/null +++ b/pcem/vid_voodoo_regs.h @@ -0,0 +1,691 @@ +enum +{ + SST_status = 0x000, + SST_intrCtrl = 0x004, + + SST_vertexAx = 0x008, + SST_vertexAy = 0x00c, + SST_vertexBx = 0x010, + SST_vertexBy = 0x014, + SST_vertexCx = 0x018, + SST_vertexCy = 0x01c, + + SST_startR = 0x0020, + SST_startG = 0x0024, + SST_startB = 0x0028, + SST_startZ = 0x002c, + SST_startA = 0x0030, + SST_startS = 0x0034, + SST_startT = 0x0038, + SST_startW = 0x003c, + + SST_dRdX = 0x0040, + SST_dGdX = 0x0044, + SST_dBdX = 0x0048, + SST_dZdX = 0x004c, + SST_dAdX = 0x0050, + SST_dSdX = 0x0054, + SST_dTdX = 0x0058, + SST_dWdX = 0x005c, + + SST_dRdY = 0x0060, + SST_dGdY = 0x0064, + SST_dBdY = 0x0068, + SST_dZdY = 0x006c, + SST_dAdY = 0x0070, + SST_dSdY = 0x0074, + SST_dTdY = 0x0078, + SST_dWdY = 0x007c, + + SST_triangleCMD = 0x0080, + + SST_fvertexAx = 0x088, + SST_fvertexAy = 0x08c, + SST_fvertexBx = 0x090, + SST_fvertexBy = 0x094, + SST_fvertexCx = 0x098, + SST_fvertexCy = 0x09c, + + SST_fstartR = 0x00a0, + SST_fstartG = 0x00a4, + SST_fstartB = 0x00a8, + SST_fstartZ = 0x00ac, + SST_fstartA = 0x00b0, + SST_fstartS = 0x00b4, + SST_fstartT = 0x00b8, + SST_fstartW = 0x00bc, + + SST_fdRdX = 0x00c0, + SST_fdGdX = 0x00c4, + SST_fdBdX = 0x00c8, + SST_fdZdX = 0x00cc, + SST_fdAdX = 0x00d0, + SST_fdSdX = 0x00d4, + SST_fdTdX = 0x00d8, + SST_fdWdX = 0x00dc, + + SST_fdRdY = 0x00e0, + SST_fdGdY = 0x00e4, + SST_fdBdY = 0x00e8, + SST_fdZdY = 0x00ec, + SST_fdAdY = 0x00f0, + SST_fdSdY = 0x00f4, + SST_fdTdY = 0x00f8, + SST_fdWdY = 0x00fc, + + SST_ftriangleCMD = 0x0100, + + SST_fbzColorPath = 0x104, + SST_fogMode = 0x108, + + SST_alphaMode = 0x10c, + SST_fbzMode = 0x110, + SST_lfbMode = 0x114, + + SST_clipLeftRight = 0x118, + SST_clipLowYHighY = 0x11c, + + SST_nopCMD = 0x120, + SST_fastfillCMD = 0x124, + SST_swapbufferCMD = 0x128, + + SST_fogColor = 0x12c, + SST_zaColor = 0x130, + SST_chromaKey = 0x134, + + SST_userIntrCMD = 0x13c, + SST_stipple = 0x140, + SST_color0 = 0x144, + SST_color1 = 0x148, + + SST_fbiPixelsIn = 0x14c, + SST_fbiChromaFail = 0x150, + SST_fbiZFuncFail = 0x154, + SST_fbiAFuncFail = 0x158, + SST_fbiPixelsOut = 0x15c, + + SST_fogTable00 = 0x160, + SST_fogTable01 = 0x164, + SST_fogTable02 = 0x168, + SST_fogTable03 = 0x16c, + SST_fogTable04 = 0x170, + SST_fogTable05 = 0x174, + SST_fogTable06 = 0x178, + SST_fogTable07 = 0x17c, + SST_fogTable08 = 0x180, + SST_fogTable09 = 0x184, + SST_fogTable0a = 0x188, + SST_fogTable0b = 0x18c, + SST_fogTable0c = 0x190, + SST_fogTable0d = 0x194, + SST_fogTable0e = 0x198, + SST_fogTable0f = 0x19c, + SST_fogTable10 = 0x1a0, + SST_fogTable11 = 0x1a4, + SST_fogTable12 = 0x1a8, + SST_fogTable13 = 0x1ac, + SST_fogTable14 = 0x1b0, + SST_fogTable15 = 0x1b4, + SST_fogTable16 = 0x1b8, + SST_fogTable17 = 0x1bc, + SST_fogTable18 = 0x1c0, + SST_fogTable19 = 0x1c4, + SST_fogTable1a = 0x1c8, + SST_fogTable1b = 0x1cc, + SST_fogTable1c = 0x1d0, + SST_fogTable1d = 0x1d4, + SST_fogTable1e = 0x1d8, + SST_fogTable1f = 0x1dc, + + SST_cmdFifoBaseAddr = 0x1e0, + SST_cmdFifoBump = 0x1e4, + SST_cmdFifoRdPtr = 0x1e8, + SST_cmdFifoAMin = 0x1ec, + SST_cmdFifoAMax = 0x1f0, + SST_cmdFifoDepth = 0x1f4, + SST_cmdFifoHoles = 0x1f8, + + SST_colBufferAddr = 0x1ec, /*Banshee*/ + SST_colBufferStride = 0x1f0, /*Banshee*/ + SST_auxBufferAddr = 0x1f4, /*Banshee*/ + SST_auxBufferStride = 0x1f8, /*Banshee*/ + + SST_clipLeftRight1 = 0x200, /*Banshee*/ + SST_clipTopBottom1 = 0x204, /*Banshee*/ + + SST_fbiInit4 = 0x200, + SST_vRetrace = 0x204, + SST_backPorch = 0x208, + SST_videoDimensions = 0x20c, + SST_fbiInit0 = 0x210, + SST_fbiInit1 = 0x214, + SST_fbiInit2 = 0x218, + SST_fbiInit3 = 0x21c, + SST_hSync = 0x220, + SST_vSync = 0x224, + SST_clutData = 0x228, + SST_dacData = 0x22c, + + SST_scrFilter = 0x230, + + SST_hvRetrace = 0x240, + SST_fbiInit5 = 0x244, + SST_fbiInit6 = 0x248, + SST_fbiInit7 = 0x24c, + + SST_swapPending = 0x24c, /*Banshee*/ + SST_leftOverlayBuf = 0x250, /*Banshee*/ + + SST_sSetupMode = 0x260, + SST_sVx = 0x264, + SST_sVy = 0x268, + SST_sARGB = 0x26c, + SST_sRed = 0x270, + SST_sGreen = 0x274, + SST_sBlue = 0x278, + SST_sAlpha = 0x27c, + SST_sVz = 0x280, + SST_sWb = 0x284, + SST_sW0 = 0x288, + SST_sS0 = 0x28c, + SST_sT0 = 0x290, + SST_sW1 = 0x294, + SST_sS1 = 0x298, + SST_sT1 = 0x29c, + + SST_sDrawTriCMD = 0x2a0, + SST_sBeginTriCMD = 0x2a4, + + SST_bltSrcBaseAddr = 0x2c0, + SST_bltDstBaseAddr = 0x2c4, + SST_bltXYStrides = 0x2c8, + SST_bltSrcChromaRange = 0x2cc, + SST_bltDstChromaRange = 0x2d0, + SST_bltClipX = 0x2d4, + SST_bltClipY = 0x2d8, + + SST_bltSrcXY = 0x2e0, + SST_bltDstXY = 0x2e4, + SST_bltSize = 0x2e8, + SST_bltRop = 0x2ec, + SST_bltColor = 0x2f0, + + SST_bltCommand = 0x2f8, + SST_bltData = 0x2fc, + + SST_textureMode = 0x300, + SST_tLOD = 0x304, + SST_tDetail = 0x308, + SST_texBaseAddr = 0x30c, + SST_texBaseAddr1 = 0x310, + SST_texBaseAddr2 = 0x314, + SST_texBaseAddr38 = 0x318, + + SST_trexInit1 = 0x320, + + SST_nccTable0_Y0 = 0x324, + SST_nccTable0_Y1 = 0x328, + SST_nccTable0_Y2 = 0x32c, + SST_nccTable0_Y3 = 0x330, + SST_nccTable0_I0 = 0x334, + SST_nccTable0_I1 = 0x338, + SST_nccTable0_I2 = 0x33c, + SST_nccTable0_I3 = 0x340, + SST_nccTable0_Q0 = 0x344, + SST_nccTable0_Q1 = 0x348, + SST_nccTable0_Q2 = 0x34c, + SST_nccTable0_Q3 = 0x350, + + SST_nccTable1_Y0 = 0x354, + SST_nccTable1_Y1 = 0x358, + SST_nccTable1_Y2 = 0x35c, + SST_nccTable1_Y3 = 0x360, + SST_nccTable1_I0 = 0x364, + SST_nccTable1_I1 = 0x368, + SST_nccTable1_I2 = 0x36c, + SST_nccTable1_I3 = 0x370, + SST_nccTable1_Q0 = 0x374, + SST_nccTable1_Q1 = 0x378, + SST_nccTable1_Q2 = 0x37c, + SST_nccTable1_Q3 = 0x380, + + SST_remap_status = 0x000 | 0x400, + + SST_remap_vertexAx = 0x008 | 0x400, + SST_remap_vertexAy = 0x00c | 0x400, + SST_remap_vertexBx = 0x010 | 0x400, + SST_remap_vertexBy = 0x014 | 0x400, + SST_remap_vertexCx = 0x018 | 0x400, + SST_remap_vertexCy = 0x01c | 0x400, + + SST_remap_startR = 0x0020 | 0x400, + SST_remap_startG = 0x002c | 0x400, + SST_remap_startB = 0x0038 | 0x400, + SST_remap_startZ = 0x0044 | 0x400, + SST_remap_startA = 0x0050 | 0x400, + SST_remap_startS = 0x005c | 0x400, + SST_remap_startT = 0x0068 | 0x400, + SST_remap_startW = 0x0074 | 0x400, + + SST_remap_dRdX = 0x0024 | 0x400, + SST_remap_dGdX = 0x0030 | 0x400, + SST_remap_dBdX = 0x003c | 0x400, + SST_remap_dZdX = 0x0048 | 0x400, + SST_remap_dAdX = 0x0054 | 0x400, + SST_remap_dSdX = 0x0060 | 0x400, + SST_remap_dTdX = 0x006c | 0x400, + SST_remap_dWdX = 0x0078 | 0x400, + + SST_remap_dRdY = 0x0028 | 0x400, + SST_remap_dGdY = 0x0034 | 0x400, + SST_remap_dBdY = 0x0040 | 0x400, + SST_remap_dZdY = 0x004c | 0x400, + SST_remap_dAdY = 0x0058 | 0x400, + SST_remap_dSdY = 0x0064 | 0x400, + SST_remap_dTdY = 0x0070 | 0x400, + SST_remap_dWdY = 0x007c | 0x400, + + SST_remap_triangleCMD = 0x0080 | 0x400, + + SST_remap_fvertexAx = 0x088 | 0x400, + SST_remap_fvertexAy = 0x08c | 0x400, + SST_remap_fvertexBx = 0x090 | 0x400, + SST_remap_fvertexBy = 0x094 | 0x400, + SST_remap_fvertexCx = 0x098 | 0x400, + SST_remap_fvertexCy = 0x09c | 0x400, + + SST_remap_fstartR = 0x00a0 | 0x400, + SST_remap_fstartG = 0x00ac | 0x400, + SST_remap_fstartB = 0x00b8 | 0x400, + SST_remap_fstartZ = 0x00c4 | 0x400, + SST_remap_fstartA = 0x00d0 | 0x400, + SST_remap_fstartS = 0x00dc | 0x400, + SST_remap_fstartT = 0x00e8 | 0x400, + SST_remap_fstartW = 0x00f4 | 0x400, + + SST_remap_fdRdX = 0x00a4 | 0x400, + SST_remap_fdGdX = 0x00b0 | 0x400, + SST_remap_fdBdX = 0x00bc | 0x400, + SST_remap_fdZdX = 0x00c8 | 0x400, + SST_remap_fdAdX = 0x00d4 | 0x400, + SST_remap_fdSdX = 0x00e0 | 0x400, + SST_remap_fdTdX = 0x00ec | 0x400, + SST_remap_fdWdX = 0x00f8 | 0x400, + + SST_remap_fdRdY = 0x00a8 | 0x400, + SST_remap_fdGdY = 0x00b4 | 0x400, + SST_remap_fdBdY = 0x00c0 | 0x400, + SST_remap_fdZdY = 0x00cc | 0x400, + SST_remap_fdAdY = 0x00d8 | 0x400, + SST_remap_fdSdY = 0x00e4 | 0x400, + SST_remap_fdTdY = 0x00f0 | 0x400, + SST_remap_fdWdY = 0x00fc | 0x400, +}; + +enum +{ + LFB_WRITE_FRONT = 0x0000, + LFB_WRITE_BACK = 0x0010, + LFB_WRITE_MASK = 0x0030 +}; + +enum +{ + LFB_READ_FRONT = 0x0000, + LFB_READ_BACK = 0x0040, + LFB_READ_AUX = 0x0080, + LFB_READ_MASK = 0x00c0 +}; + +enum +{ + LFB_FORMAT_RGB565 = 0, + LFB_FORMAT_RGB555 = 1, + LFB_FORMAT_ARGB1555 = 2, + LFB_FORMAT_ARGB8888 = 5, + LFB_FORMAT_DEPTH = 15, + LFB_FORMAT_MASK = 15 +}; + +enum +{ + LFB_WRITE_COLOUR = 1, + LFB_WRITE_DEPTH = 2 +}; + +enum +{ + FBZ_CHROMAKEY = (1 << 1), + FBZ_W_BUFFER = (1 << 3), + FBZ_DEPTH_ENABLE = (1 << 4), + + FBZ_DITHER = (1 << 8), + FBZ_RGB_WMASK = (1 << 9), + FBZ_DEPTH_WMASK = (1 << 10), + FBZ_DITHER_2x2 = (1 << 11), + + FBZ_DRAW_FRONT = 0x0000, + FBZ_DRAW_BACK = 0x4000, + FBZ_DRAW_MASK = 0xc000, + + FBZ_DEPTH_BIAS = (1 << 16), + + FBZ_DEPTH_SOURCE = (1 << 20), + + FBZ_PARAM_ADJUST = (1 << 26) +}; + +enum +{ + TEX_RGB332 = 0x0, + TEX_Y4I2Q2 = 0x1, + TEX_A8 = 0x2, + TEX_I8 = 0x3, + TEX_AI8 = 0x4, + TEX_PAL8 = 0x5, + TEX_APAL8 = 0x6, + TEX_ARGB8332 = 0x8, + TEX_A8Y4I2Q2 = 0x9, + TEX_R5G6B5 = 0xa, + TEX_ARGB1555 = 0xb, + TEX_ARGB4444 = 0xc, + TEX_A8I8 = 0xd, + TEX_APAL88 = 0xe +}; + +enum +{ + TEXTUREMODE_NCC_SEL = (1 << 5), + TEXTUREMODE_TCLAMPS = (1 << 6), + TEXTUREMODE_TCLAMPT = (1 << 7), + TEXTUREMODE_TRILINEAR = (1 << 30) +}; + +enum +{ + FBIINIT0_VGA_PASS = 1, + FBIINIT0_GRAPHICS_RESET = (1 << 1) +}; + +enum +{ + FBIINIT1_MULTI_SST = (1 << 2), /*Voodoo Graphics only*/ + FBIINIT1_VIDEO_RESET = (1 << 8), + FBIINIT1_SLI_ENABLE = (1 << 23) +}; + +enum +{ + FBIINIT2_SWAP_ALGORITHM_MASK = (3 << 9) +}; + +enum +{ + FBIINIT2_SWAP_ALGORITHM_DAC_VSYNC = (0 << 9), + FBIINIT2_SWAP_ALGORITHM_DAC_DATA = (1 << 9), + FBIINIT2_SWAP_ALGORITHM_PCI_FIFO_STALL = (2 << 9), + FBIINIT2_SWAP_ALGORITHM_SLI_SYNC = (3 << 9) +}; + +enum +{ + FBIINIT3_REMAP = 1 +}; + +enum +{ + FBIINIT5_MULTI_CVG = (1 << 14) +}; + +enum +{ + FBIINIT7_CMDFIFO_ENABLE = (1 << 8) +}; + +enum +{ + CC_LOCALSELECT_ITER_RGB = 0, + CC_LOCALSELECT_TEX = 1, + CC_LOCALSELECT_COLOR1 = 2, + CC_LOCALSELECT_LFB = 3 +}; + +enum +{ + CCA_LOCALSELECT_ITER_A = 0, + CCA_LOCALSELECT_COLOR0 = 1, + CCA_LOCALSELECT_ITER_Z = 2 +}; + +enum +{ + C_SEL_ITER_RGB = 0, + C_SEL_TEX = 1, + C_SEL_COLOR1 = 2, + C_SEL_LFB = 3 +}; + +enum +{ + A_SEL_ITER_A = 0, + A_SEL_TEX = 1, + A_SEL_COLOR1 = 2, + A_SEL_LFB = 3 +}; + +enum +{ + CC_MSELECT_ZERO = 0, + CC_MSELECT_CLOCAL = 1, + CC_MSELECT_AOTHER = 2, + CC_MSELECT_ALOCAL = 3, + CC_MSELECT_TEX = 4, + CC_MSELECT_TEXRGB = 5 +}; + +enum +{ + CCA_MSELECT_ZERO = 0, + CCA_MSELECT_ALOCAL = 1, + CCA_MSELECT_AOTHER = 2, + CCA_MSELECT_ALOCAL2 = 3, + CCA_MSELECT_TEX = 4 +}; + +enum +{ + TC_MSELECT_ZERO = 0, + TC_MSELECT_CLOCAL = 1, + TC_MSELECT_AOTHER = 2, + TC_MSELECT_ALOCAL = 3, + TC_MSELECT_DETAIL = 4, + TC_MSELECT_LOD_FRAC = 5 +}; + +enum +{ + TCA_MSELECT_ZERO = 0, + TCA_MSELECT_CLOCAL = 1, + TCA_MSELECT_AOTHER = 2, + TCA_MSELECT_ALOCAL = 3, + TCA_MSELECT_DETAIL = 4, + TCA_MSELECT_LOD_FRAC = 5 +}; + +enum +{ + CC_ADD_CLOCAL = 1, + CC_ADD_ALOCAL = 2 +}; + +enum +{ + CCA_ADD_CLOCAL = 1, + CCA_ADD_ALOCAL = 2 +}; + +enum +{ + AFUNC_AZERO = 0x0, + AFUNC_ASRC_ALPHA = 0x1, + AFUNC_A_COLOR = 0x2, + AFUNC_ADST_ALPHA = 0x3, + AFUNC_AONE = 0x4, + AFUNC_AOMSRC_ALPHA = 0x5, + AFUNC_AOM_COLOR = 0x6, + AFUNC_AOMDST_ALPHA = 0x7, + AFUNC_ASATURATE = 0xf +}; + +enum +{ + AFUNC_ACOLORBEFOREFOG = 0xf +}; + +enum +{ + AFUNC_NEVER = 0, + AFUNC_LESSTHAN = 1, + AFUNC_EQUAL = 2, + AFUNC_LESSTHANEQUAL = 3, + AFUNC_GREATERTHAN = 4, + AFUNC_NOTEQUAL = 5, + AFUNC_GREATERTHANEQUAL = 6, + AFUNC_ALWAYS = 7 +}; + +enum +{ + DEPTHOP_NEVER = 0, + DEPTHOP_LESSTHAN = 1, + DEPTHOP_EQUAL = 2, + DEPTHOP_LESSTHANEQUAL = 3, + DEPTHOP_GREATERTHAN = 4, + DEPTHOP_NOTEQUAL = 5, + DEPTHOP_GREATERTHANEQUAL = 6, + DEPTHOP_ALWAYS = 7 +}; + +enum +{ + FOG_ENABLE = 0x01, + FOG_ADD = 0x02, + FOG_MULT = 0x04, + FOG_ALPHA = 0x08, + FOG_Z = 0x10, + FOG_W = 0x18, + FOG_CONSTANT = 0x20 +}; + +enum +{ + LOD_ODD = (1 << 18), + LOD_SPLIT = (1 << 19), + LOD_S_IS_WIDER = (1 << 20), + LOD_TMULTIBASEADDR = (1 << 24), + LOD_TMIRROR_S = (1 << 28), + LOD_TMIRROR_T = (1 << 29) +}; +enum +{ + CMD_INVALID = 0, + CMD_DRAWTRIANGLE, + CMD_FASTFILL, + CMD_SWAPBUF +}; + +enum +{ + FBZCP_TEXTURE_ENABLED = (1 << 27) +}; + +enum +{ + BLTCMD_SRC_TILED = (1 << 14), + BLTCMD_DST_TILED = (1 << 15) +}; + +enum +{ + INITENABLE_SLI_MASTER_SLAVE = (1 << 11) +}; + +enum +{ + SETUPMODE_RGB = (1 << 0), + SETUPMODE_ALPHA = (1 << 1), + SETUPMODE_Z = (1 << 2), + SETUPMODE_Wb = (1 << 3), + SETUPMODE_W0 = (1 << 4), + SETUPMODE_S0_T0 = (1 << 5), + SETUPMODE_W1 = (1 << 6), + SETUPMODE_S1_T1 = (1 << 7), + + SETUPMODE_STRIP_MODE = (1 << 16), + SETUPMODE_CULLING_ENABLE = (1 << 17), + SETUPMODE_CULLING_SIGN = (1 << 18), + SETUPMODE_DISABLE_PINGPONG = (1 << 19) +}; + +#define TEXTUREMODE_MASK 0x3ffff000 +#define TEXTUREMODE_PASSTHROUGH 0 + +#define TEXTUREMODE_LOCAL_MASK 0x00643000 +#define TEXTUREMODE_LOCAL 0x00241000 + + +#define SLI_ENABLED (voodoo->fbiInit1 & FBIINIT1_SLI_ENABLE) +#define TRIPLE_BUFFER ((voodoo->fbiInit2 & 0x10) || (voodoo->fbiInit5 & 0x600) == 0x400) + + +#define _rgb_sel ( params->fbzColorPath & 3) +#define a_sel ( (params->fbzColorPath >> 2) & 3) +#define cc_localselect ( params->fbzColorPath & (1 << 4)) +#define cca_localselect ( (params->fbzColorPath >> 5) & 3) +#define cc_localselect_override ( params->fbzColorPath & (1 << 7)) +#define cc_zero_other ( params->fbzColorPath & (1 << 8)) +#define cc_sub_clocal ( params->fbzColorPath & (1 << 9)) +#define cc_mselect ( (params->fbzColorPath >> 10) & 7) +#define cc_reverse_blend ( params->fbzColorPath & (1 << 13)) +#define cc_add ( (params->fbzColorPath >> 14) & 3) +#define cc_add_alocal ( params->fbzColorPath & (1 << 15)) +#define cc_invert_output ( params->fbzColorPath & (1 << 16)) +#define cca_zero_other ( params->fbzColorPath & (1 << 17)) +#define cca_sub_clocal ( params->fbzColorPath & (1 << 18)) +#define cca_mselect ( (params->fbzColorPath >> 19) & 7) +#define cca_reverse_blend ( params->fbzColorPath & (1 << 22)) +#define cca_add ( (params->fbzColorPath >> 23) & 3) +#define cca_invert_output ( params->fbzColorPath & (1 << 25)) +#define tc_zero_other (params->textureMode[0] & (1 << 12)) +#define tc_sub_clocal (params->textureMode[0] & (1 << 13)) +#define tc_mselect ((params->textureMode[0] >> 14) & 7) +#define tc_reverse_blend (params->textureMode[0] & (1 << 17)) +#define tc_add_clocal (params->textureMode[0] & (1 << 18)) +#define tc_add_alocal (params->textureMode[0] & (1 << 19)) +#define tc_invert_output (params->textureMode[0] & (1 << 20)) +#define tca_zero_other (params->textureMode[0] & (1 << 21)) +#define tca_sub_clocal (params->textureMode[0] & (1 << 22)) +#define tca_mselect ((params->textureMode[0] >> 23) & 7) +#define tca_reverse_blend (params->textureMode[0] & (1 << 26)) +#define tca_add_clocal (params->textureMode[0] & (1 << 27)) +#define tca_add_alocal (params->textureMode[0] & (1 << 28)) +#define tca_invert_output (params->textureMode[0] & (1 << 29)) + +#define tc_sub_clocal_1 (params->textureMode[1] & (1 << 13)) +#define tc_mselect_1 ((params->textureMode[1] >> 14) & 7) +#define tc_reverse_blend_1 (params->textureMode[1] & (1 << 17)) +#define tc_add_clocal_1 (params->textureMode[1] & (1 << 18)) +#define tc_add_alocal_1 (params->textureMode[1] & (1 << 19)) +#define tca_sub_clocal_1 (params->textureMode[1] & (1 << 22)) +#define tca_mselect_1 ((params->textureMode[1] >> 23) & 7) +#define tca_reverse_blend_1 (params->textureMode[1] & (1 << 26)) +#define tca_add_clocal_1 (params->textureMode[1] & (1 << 27)) +#define tca_add_alocal_1 (params->textureMode[1] & (1 << 28)) + +#define src_afunc ( (params->alphaMode >> 8) & 0xf) +#define dest_afunc ( (params->alphaMode >> 12) & 0xf) +#define alpha_func ( (params->alphaMode >> 1) & 7) +#define a_ref ( params->alphaMode >> 24) +#define depth_op ( (params->fbzMode >> 5) & 7) +#define dither ( params->fbzMode & FBZ_DITHER) +#define dither2x2 (params->fbzMode & FBZ_DITHER_2x2) diff --git a/pcem/vid_voodoo_render.cpp b/pcem/vid_voodoo_render.cpp new file mode 100644 index 00000000..21161807 --- /dev/null +++ b/pcem/vid_voodoo_render.cpp @@ -0,0 +1,1640 @@ +#include +#include +#include "ibm.h" +#include "device.h" +#include "mem.h" +#include "thread.h" +#include "video.h" +#include "vid_svga.h" +#include "vid_voodoo.h" +#include "vid_voodoo_common.h" +#include "vid_voodoo_dither.h" +#include "vid_voodoo_regs.h" +#include "vid_voodoo_render.h" +#include "vid_voodoo_texture.h" + +typedef struct voodoo_state_t +{ + int xstart, xend, xdir; + uint32_t base_r, base_g, base_b, base_a, base_z; + struct + { + int64_t base_s, base_t, base_w; + int lod; + } tmu[2]; + int64_t base_w; + int lod; + int lod_min[2], lod_max[2]; + int dx1, dx2; + int y, yend, ydir; + int32_t dxAB, dxAC, dxBC; + int tex_b[2], tex_g[2], tex_r[2], tex_a[2]; + int tex_s, tex_t; + int clamp_s[2], clamp_t[2]; + + int32_t vertexAx, vertexAy, vertexBx, vertexBy, vertexCx, vertexCy; + + uint32_t *tex[2][LOD_MAX+1]; + int tformat; + + int *tex_w_mask[2]; + int *tex_h_mask[2]; + int *tex_shift[2]; + int *tex_lod[2]; + + uint16_t *fb_mem, *aux_mem; + + int32_t ib, ig, ir, ia; + int32_t z; + + int32_t new_depth; + + int64_t tmu0_s, tmu0_t; + int64_t tmu0_w; + int64_t tmu1_s, tmu1_t; + int64_t tmu1_w; + int64_t w; + + int pixel_count, texel_count; + int x, x2, x_tiled; + + uint32_t w_depth; + + float log_temp; + uint32_t ebp_store; + uint32_t texBaseAddr; + + int lod_frac[2]; +} voodoo_state_t; + +static int voodoo_output = 0; + +static uint8_t logtable[256] = +{ + 0x00,0x01,0x02,0x04,0x05,0x07,0x08,0x09,0x0b,0x0c,0x0e,0x0f,0x10,0x12,0x13,0x15, + 0x16,0x17,0x19,0x1a,0x1b,0x1d,0x1e,0x1f,0x21,0x22,0x23,0x25,0x26,0x27,0x28,0x2a, + 0x2b,0x2c,0x2e,0x2f,0x30,0x31,0x33,0x34,0x35,0x36,0x38,0x39,0x3a,0x3b,0x3d,0x3e, + 0x3f,0x40,0x41,0x43,0x44,0x45,0x46,0x47,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x50,0x51, + 0x52,0x53,0x54,0x55,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x60,0x61,0x62,0x63, + 0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6c,0x6d,0x6e,0x6f,0x70,0x71,0x72,0x73,0x74, + 0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,0x80,0x81,0x83,0x84,0x85, + 0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8c,0x8d,0x8e,0x8f,0x90,0x91,0x92,0x93,0x94, + 0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,0xa0,0xa1,0xa2,0xa2,0xa3, + 0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xad,0xae,0xaf,0xb0,0xb1,0xb2, + 0xb3,0xb4,0xb5,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbc,0xbd,0xbe,0xbf,0xc0, + 0xc1,0xc2,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xcd, + 0xce,0xcf,0xd0,0xd1,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd6,0xd7,0xd8,0xd9,0xda,0xda, + 0xdb,0xdc,0xdd,0xde,0xde,0xdf,0xe0,0xe1,0xe1,0xe2,0xe3,0xe4,0xe5,0xe5,0xe6,0xe7, + 0xe8,0xe8,0xe9,0xea,0xeb,0xeb,0xec,0xed,0xee,0xef,0xef,0xf0,0xf1,0xf2,0xf2,0xf3, + 0xf4,0xf5,0xf5,0xf6,0xf7,0xf7,0xf8,0xf9,0xfa,0xfa,0xfb,0xfc,0xfd,0xfd,0xfe,0xff +}; + +static inline int fastlog(uint64_t val) +{ + uint64_t oldval = val; + int exp = 63; + int frac; + + if (!val || val & (1ULL << 63)) + return 0x80000000; + + if (!(val & 0xffffffff00000000)) + { + exp -= 32; + val <<= 32; + } + if (!(val & 0xffff000000000000)) + { + exp -= 16; + val <<= 16; + } + if (!(val & 0xff00000000000000)) + { + exp -= 8; + val <<= 8; + } + if (!(val & 0xf000000000000000)) + { + exp -= 4; + val <<= 4; + } + if (!(val & 0xc000000000000000)) + { + exp -= 2; + val <<= 2; + } + if (!(val & 0x8000000000000000)) + { + exp -= 1; + val <<= 1; + } + + if (exp >= 8) + frac = (oldval >> (exp - 8)) & 0xff; + else + frac = (oldval << (8 - exp)) & 0xff; + + return (exp << 8) | logtable[frac]; +} + +static inline int voodoo_fls(uint16_t val) +{ + int num = 0; + +//pclog("fls(%04x) = ", val); + if (!(val & 0xff00)) + { + num += 8; + val <<= 8; + } + if (!(val & 0xf000)) + { + num += 4; + val <<= 4; + } + if (!(val & 0xc000)) + { + num += 2; + val <<= 2; + } + if (!(val & 0x8000)) + { + num += 1; + val <<= 1; + } +//pclog("%i %04x\n", num, val); + return num; +} + +typedef struct voodoo_texture_state_t +{ + int s, t; + int w_mask, h_mask; + int tex_shift; +} voodoo_texture_state_t; + +static inline void tex_read(voodoo_state_t *state, voodoo_texture_state_t *texture_state, int tmu) +{ + uint32_t dat; + + if (texture_state->s & ~texture_state->w_mask) + { + if (state->clamp_s[tmu]) + { + if (texture_state->s < 0) + texture_state->s = 0; + if (texture_state->s > texture_state->w_mask) + texture_state->s = texture_state->w_mask; + } + else + texture_state->s &= texture_state->w_mask; + } + if (texture_state->t & ~texture_state->h_mask) + { + if (state->clamp_t[tmu]) + { + if (texture_state->t < 0) + texture_state->t = 0; + if (texture_state->t > texture_state->h_mask) + texture_state->t = texture_state->h_mask; + } + else + texture_state->t &= texture_state->h_mask; + } + + dat = state->tex[tmu][state->lod][texture_state->s + (texture_state->t << texture_state->tex_shift)]; + + state->tex_b[tmu] = dat & 0xff; + state->tex_g[tmu] = (dat >> 8) & 0xff; + state->tex_r[tmu] = (dat >> 16) & 0xff; + state->tex_a[tmu] = (dat >> 24) & 0xff; +} + +#define LOW4(x) ((x & 0x0f) | ((x & 0x0f) << 4)) +#define HIGH4(x) ((x & 0xf0) | ((x & 0xf0) >> 4)) + +static inline void tex_read_4(voodoo_state_t *state, voodoo_texture_state_t *texture_state, int s, int t, int *d, int tmu, int x) +{ + rgba_u dat[4]; + + if (((s | (s + 1)) & ~texture_state->w_mask) || ((t | (t + 1)) & ~texture_state->h_mask)) + { + int c; + for (c = 0; c < 4; c++) + { + int _s = s + (c & 1); + int _t = t + ((c & 2) >> 1); + + if (_s & ~texture_state->w_mask) + { + if (state->clamp_s[tmu]) + { + if (_s < 0) + _s = 0; + if (_s > texture_state->w_mask) + _s = texture_state->w_mask; + } + else + _s &= texture_state->w_mask; + } + if (_t & ~texture_state->h_mask) + { + if (state->clamp_t[tmu]) + { + if (_t < 0) + _t = 0; + if (_t > texture_state->h_mask) + _t = texture_state->h_mask; + } + else + _t &= texture_state->h_mask; + } + dat[c].u = state->tex[tmu][state->lod][_s + (_t << texture_state->tex_shift)]; + } + } + else + { + dat[0].u = state->tex[tmu][state->lod][s + (t << texture_state->tex_shift)]; + dat[1].u = state->tex[tmu][state->lod][s + 1 + (t << texture_state->tex_shift)]; + dat[2].u = state->tex[tmu][state->lod][s + ((t + 1) << texture_state->tex_shift)]; + dat[3].u = state->tex[tmu][state->lod][s + 1 + ((t + 1) << texture_state->tex_shift)]; + } + + state->tex_r[tmu] = (dat[0].rgba.r * d[0] + dat[1].rgba.r * d[1] + dat[2].rgba.r * d[2] + dat[3].rgba.r * d[3]) >> 8; + state->tex_g[tmu] = (dat[0].rgba.g * d[0] + dat[1].rgba.g * d[1] + dat[2].rgba.g * d[2] + dat[3].rgba.g * d[3]) >> 8; + state->tex_b[tmu] = (dat[0].rgba.b * d[0] + dat[1].rgba.b * d[1] + dat[2].rgba.b * d[2] + dat[3].rgba.b * d[3]) >> 8; + state->tex_a[tmu] = (dat[0].rgba.a * d[0] + dat[1].rgba.a * d[1] + dat[2].rgba.a * d[2] + dat[3].rgba.a * d[3]) >> 8; +} + +static inline void voodoo_get_texture(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int tmu, int x) +{ + voodoo_texture_state_t texture_state; + int d[4]; + int s, t; + int tex_lod = state->tex_lod[tmu][state->lod]; + + texture_state.w_mask = state->tex_w_mask[tmu][state->lod]; + texture_state.h_mask = state->tex_h_mask[tmu][state->lod]; + texture_state.tex_shift = 8 - tex_lod; + + if (params->tLOD[tmu] & LOD_TMIRROR_S) + { + if (state->tex_s & 0x1000) + state->tex_s = ~state->tex_s; + } + if (params->tLOD[tmu] & LOD_TMIRROR_T) + { + if (state->tex_t & 0x1000) + state->tex_t = ~state->tex_t; + } + + if (voodoo->bilinear_enabled && params->textureMode[tmu] & 6) + { + int _ds, dt; + + state->tex_s -= 1 << (3+tex_lod); + state->tex_t -= 1 << (3+tex_lod); + + s = state->tex_s >> tex_lod; + t = state->tex_t >> tex_lod; + + _ds = s & 0xf; + dt = t & 0xf; + + s >>= 4; + t >>= 4; +//if (x == 80) +//if (voodoo_output) +// pclog("s=%08x t=%08x _ds=%02x _dt=%02x\n", s, t, _ds, dt); + d[0] = (16 - _ds) * (16 - dt); + d[1] = _ds * (16 - dt); + d[2] = (16 - _ds) * dt; + d[3] = _ds * dt; + +// texture_state.s = s; +// texture_state.t = t; + tex_read_4(state, &texture_state, s, t, d, tmu, x); + + +/* state->tex_r = (tex_samples[0].rgba.r * d[0] + tex_samples[1].rgba.r * d[1] + tex_samples[2].rgba.r * d[2] + tex_samples[3].rgba.r * d[3]) >> 8; + state->tex_g = (tex_samples[0].rgba.g * d[0] + tex_samples[1].rgba.g * d[1] + tex_samples[2].rgba.g * d[2] + tex_samples[3].rgba.g * d[3]) >> 8; + state->tex_b = (tex_samples[0].rgba.b * d[0] + tex_samples[1].rgba.b * d[1] + tex_samples[2].rgba.b * d[2] + tex_samples[3].rgba.b * d[3]) >> 8; + state->tex_a = (tex_samples[0].rgba.a * d[0] + tex_samples[1].rgba.a * d[1] + tex_samples[2].rgba.a * d[2] + tex_samples[3].rgba.a * d[3]) >> 8;*/ +/* state->tex_r = tex_samples[0].r; + state->tex_g = tex_samples[0].g; + state->tex_b = tex_samples[0].b; + state->tex_a = tex_samples[0].a;*/ + } + else + { + // rgba_t tex_samples; + // voodoo_texture_state_t texture_state; +// int s = state->tex_s >> (18+state->lod); +// int t = state->tex_t >> (18+state->lod); + // int s, t; + +// state->tex_s -= 1 << (17+state->lod); +// state->tex_t -= 1 << (17+state->lod); + + s = state->tex_s >> (4+tex_lod); + t = state->tex_t >> (4+tex_lod); + + texture_state.s = s; + texture_state.t = t; + tex_read(state, &texture_state, tmu); + +/* state->tex_r = tex_samples[0].rgba.r; + state->tex_g = tex_samples[0].rgba.g; + state->tex_b = tex_samples[0].rgba.b; + state->tex_a = tex_samples[0].rgba.a;*/ + } +} + +static inline void voodoo_tmu_fetch(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int tmu, int x) +{ + if (params->textureMode[tmu] & 1) + { + int64_t _w = 0; + + if (tmu) + { + if (state->tmu1_w) + _w = (int64_t)((1ULL << 48) / state->tmu1_w); + state->tex_s = (int32_t)(((((state->tmu1_s + (1 << 13)) >> 14) * _w) + (1 << 29)) >> 30); + state->tex_t = (int32_t)(((((state->tmu1_t + (1 << 13)) >> 14) * _w) + (1 << 29)) >> 30); + } + else + { + if (state->tmu0_w) + _w = (int64_t)((1ULL << 48) / state->tmu0_w); + state->tex_s = (int32_t)(((((state->tmu0_s + (1 << 13)) >> 14) * _w) + (1 << 29)) >> 30); + state->tex_t = (int32_t)(((((state->tmu0_t + (1 << 13)) >> 14) * _w) + (1 << 29)) >> 30); + } + + state->lod = state->tmu[tmu].lod + (fastlog(_w) - (19 << 8)); + } + else + { + if (tmu) + { + state->tex_s = (int32_t)(state->tmu1_s >> (14+14)); + state->tex_t = (int32_t)(state->tmu1_t >> (14+14)); + } + else + { + state->tex_s = (int32_t)(state->tmu0_s >> (14+14)); + state->tex_t = (int32_t)(state->tmu0_t >> (14+14)); + } + state->lod = state->tmu[tmu].lod; + } + + if (state->lod < state->lod_min[tmu]) + state->lod = state->lod_min[tmu]; + else if (state->lod > state->lod_max[tmu]) + state->lod = state->lod_max[tmu]; + state->lod_frac[tmu] = state->lod & 0xff; + state->lod >>= 8; + + voodoo_get_texture(voodoo, params, state, tmu, x); +} + + +/*Perform texture fetch and blending for both TMUs*/ +static inline void voodoo_tmu_fetch_and_blend(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int x) +{ + int r,g,b,a; + int c_reverse, a_reverse; +// int c_reverse1, a_reverse1; + int factor_r = 0, factor_g = 0, factor_b = 0, factor_a = 0; + + voodoo_tmu_fetch(voodoo, params, state, 1, x); + + if ((params->textureMode[1] & TEXTUREMODE_TRILINEAR) && (state->lod & 1)) + { + c_reverse = tc_reverse_blend; + a_reverse = tca_reverse_blend; + } + else + { + c_reverse = !tc_reverse_blend; + a_reverse = !tca_reverse_blend; + } +/* c_reverse1 = c_reverse; + a_reverse1 = a_reverse;*/ + if (tc_sub_clocal_1) + { + switch (tc_mselect_1) + { + case TC_MSELECT_ZERO: + factor_r = factor_g = factor_b = 0; + break; + case TC_MSELECT_CLOCAL: + factor_r = state->tex_r[1]; + factor_g = state->tex_g[1]; + factor_b = state->tex_b[1]; + break; + case TC_MSELECT_AOTHER: + factor_r = factor_g = factor_b = 0; + break; + case TC_MSELECT_ALOCAL: + factor_r = factor_g = factor_b = state->tex_a[1]; + break; + case TC_MSELECT_DETAIL: + factor_r = (params->detail_bias[1] - state->lod) << params->detail_scale[1]; + if (factor_r > params->detail_max[1]) + factor_r = params->detail_max[1]; + factor_g = factor_b = factor_r; + break; + case TC_MSELECT_LOD_FRAC: + factor_r = factor_g = factor_b = state->lod_frac[1]; + break; + } + if (!c_reverse) + { + r = (-state->tex_r[1] * (factor_r + 1)) >> 8; + g = (-state->tex_g[1] * (factor_g + 1)) >> 8; + b = (-state->tex_b[1] * (factor_b + 1)) >> 8; + } + else + { + r = (-state->tex_r[1] * ((factor_r^0xff) + 1)) >> 8; + g = (-state->tex_g[1] * ((factor_g^0xff) + 1)) >> 8; + b = (-state->tex_b[1] * ((factor_b^0xff) + 1)) >> 8; + } + if (tc_add_clocal_1) + { + r += state->tex_r[1]; + g += state->tex_g[1]; + b += state->tex_b[1]; + } + else if (tc_add_alocal_1) + { + r += state->tex_a[1]; + g += state->tex_a[1]; + b += state->tex_a[1]; + } + state->tex_r[1] = CLAMP(r); + state->tex_g[1] = CLAMP(g); + state->tex_b[1] = CLAMP(b); + } + if (tca_sub_clocal_1) + { + switch (tca_mselect_1) + { + case TCA_MSELECT_ZERO: + factor_a = 0; + break; + case TCA_MSELECT_CLOCAL: + factor_a = state->tex_a[1]; + break; + case TCA_MSELECT_AOTHER: + factor_a = 0; + break; + case TCA_MSELECT_ALOCAL: + factor_a = state->tex_a[1]; + break; + case TCA_MSELECT_DETAIL: + factor_a = (params->detail_bias[1] - state->lod) << params->detail_scale[1]; + if (factor_a > params->detail_max[1]) + factor_a = params->detail_max[1]; + break; + case TCA_MSELECT_LOD_FRAC: + factor_a = state->lod_frac[1]; + break; + } + if (!a_reverse) + a = (-state->tex_a[1] * ((factor_a ^ 0xff) + 1)) >> 8; + else + a = (-state->tex_a[1] * (factor_a + 1)) >> 8; + if (tca_add_clocal_1 || tca_add_alocal_1) + a += state->tex_a[1]; + state->tex_a[1] = CLAMP(a); + } + + + voodoo_tmu_fetch(voodoo, params, state, 0, x); + + if ((params->textureMode[0] & TEXTUREMODE_TRILINEAR) && (state->lod & 1)) + { + c_reverse = tc_reverse_blend; + a_reverse = tca_reverse_blend; + } + else + { + c_reverse = !tc_reverse_blend; + a_reverse = !tca_reverse_blend; + } + + if (!tc_zero_other) + { + r = state->tex_r[1]; + g = state->tex_g[1]; + b = state->tex_b[1]; + } + else + r = g = b = 0; + if (tc_sub_clocal) + { + r -= state->tex_r[0]; + g -= state->tex_g[0]; + b -= state->tex_b[0]; + } + switch (tc_mselect) + { + case TC_MSELECT_ZERO: + factor_r = factor_g = factor_b = 0; + break; + case TC_MSELECT_CLOCAL: + factor_r = state->tex_r[0]; + factor_g = state->tex_g[0]; + factor_b = state->tex_b[0]; + break; + case TC_MSELECT_AOTHER: + factor_r = factor_g = factor_b = state->tex_a[1]; + break; + case TC_MSELECT_ALOCAL: + factor_r = factor_g = factor_b = state->tex_a[0]; + break; + case TC_MSELECT_DETAIL: + factor_r = (params->detail_bias[0] - state->lod) << params->detail_scale[0]; + if (factor_r > params->detail_max[0]) + factor_r = params->detail_max[0]; + factor_g = factor_b = factor_r; + break; + case TC_MSELECT_LOD_FRAC: + factor_r = factor_g = factor_b = state->lod_frac[0]; + break; + } + if (!c_reverse) + { + r = (r * (factor_r + 1)) >> 8; + g = (g * (factor_g + 1)) >> 8; + b = (b * (factor_b + 1)) >> 8; + } + else + { + r = (r * ((factor_r^0xff) + 1)) >> 8; + g = (g * ((factor_g^0xff) + 1)) >> 8; + b = (b * ((factor_b^0xff) + 1)) >> 8; + } + if (tc_add_clocal) + { + r += state->tex_r[0]; + g += state->tex_g[0]; + b += state->tex_b[0]; + } + else if (tc_add_alocal) + { + r += state->tex_a[0]; + g += state->tex_a[0]; + b += state->tex_a[0]; + } + + if (!tca_zero_other) + a = state->tex_a[1]; + else + a = 0; + if (tca_sub_clocal) + a -= state->tex_a[0]; + switch (tca_mselect) + { + case TCA_MSELECT_ZERO: + factor_a = 0; + break; + case TCA_MSELECT_CLOCAL: + factor_a = state->tex_a[0]; + break; + case TCA_MSELECT_AOTHER: + factor_a = state->tex_a[1]; + break; + case TCA_MSELECT_ALOCAL: + factor_a = state->tex_a[0]; + break; + case TCA_MSELECT_DETAIL: + factor_a = (params->detail_bias[0] - state->lod) << params->detail_scale[0]; + if (factor_a > params->detail_max[0]) + factor_a = params->detail_max[0]; + break; + case TCA_MSELECT_LOD_FRAC: + factor_a = state->lod_frac[0]; + break; + } + if (a_reverse) + a = (a * ((factor_a ^ 0xff) + 1)) >> 8; + else + a = (a * (factor_a + 1)) >> 8; + if (tca_add_clocal || tca_add_alocal) + a += state->tex_a[0]; + + + state->tex_r[0] = CLAMP(r); + state->tex_g[0] = CLAMP(g); + state->tex_b[0] = CLAMP(b); + state->tex_a[0] = CLAMP(a); + + if (tc_invert_output) + { + state->tex_r[0] ^= 0xff; + state->tex_g[0] ^= 0xff; + state->tex_b[0] ^= 0xff; + } + if (tca_invert_output) + state->tex_a[0] ^= 0xff; +} + +#if (defined i386 || defined __i386 || defined __i386__ || defined _X86_ || defined WIN32 || defined _WIN32 || defined _WIN32) && !(defined __amd64__) +#include "vid_voodoo_codegen_x86.h" +#elif (defined __amd64__) +#include "vid_voodoo_codegen_x86-64.h" +#else +int voodoo_recomp = 0; +#endif + +static void voodoo_half_triangle(voodoo_t *voodoo, voodoo_params_t *params, voodoo_state_t *state, int ystart, int yend, int odd_even) +{ +/* int rgb_sel = params->fbzColorPath & 3; + int a_sel = (params->fbzColorPath >> 2) & 3; + int cc_localselect = params->fbzColorPath & (1 << 4); + int cca_localselect = (params->fbzColorPath >> 5) & 3; + int cc_localselect_override = params->fbzColorPath & (1 << 7); + int cc_zero_other = params->fbzColorPath & (1 << 8); + int cc_sub_clocal = params->fbzColorPath & (1 << 9); + int cc_mselect = (params->fbzColorPath >> 10) & 7; + int cc_reverse_blend = params->fbzColorPath & (1 << 13); + int cc_add = (params->fbzColorPath >> 14) & 3; + int cc_add_alocal = params->fbzColorPath & (1 << 15); + int cc_invert_output = params->fbzColorPath & (1 << 16); + int cca_zero_other = params->fbzColorPath & (1 << 17); + int cca_sub_clocal = params->fbzColorPath & (1 << 18); + int cca_mselect = (params->fbzColorPath >> 19) & 7; + int cca_reverse_blend = params->fbzColorPath & (1 << 22); + int cca_add = (params->fbzColorPath >> 23) & 3; + int cca_invert_output = params->fbzColorPath & (1 << 25); + int src_afunc = (params->alphaMode >> 8) & 0xf; + int dest_afunc = (params->alphaMode >> 12) & 0xf; + int alpha_func = (params->alphaMode >> 1) & 7; + int a_ref = params->alphaMode >> 24; + int depth_op = (params->fbzMode >> 5) & 7; + int dither = params->fbzMode & FBZ_DITHER;*/ + int texels; + int c; +#ifndef NO_CODEGEN + uint8_t (*voodoo_draw)(voodoo_state_t *state, voodoo_params_t *params, int x, int real_y); +#endif + int y_diff = SLI_ENABLED ? 2 : 1; + + if ((params->textureMode[0] & TEXTUREMODE_MASK) == TEXTUREMODE_PASSTHROUGH || + (params->textureMode[0] & TEXTUREMODE_LOCAL_MASK) == TEXTUREMODE_LOCAL) + texels = 1; + else + texels = 2; + + state->clamp_s[0] = params->textureMode[0] & TEXTUREMODE_TCLAMPS; + state->clamp_t[0] = params->textureMode[0] & TEXTUREMODE_TCLAMPT; + state->clamp_s[1] = params->textureMode[1] & TEXTUREMODE_TCLAMPS; + state->clamp_t[1] = params->textureMode[1] & TEXTUREMODE_TCLAMPT; +// int last_x; +// pclog("voodoo_triangle : bottom-half %X %X %X %X %X %i %i %i %i\n", xstart, xend, dx1, dx2, dx2 * 36, xdir, y, yend, ydir); + + for (c = 0; c <= LOD_MAX; c++) + { + state->tex[0][c] = &voodoo->texture_cache[0][params->tex_entry[0]].data[texture_offset[c]]; + state->tex[1][c] = &voodoo->texture_cache[1][params->tex_entry[1]].data[texture_offset[c]]; + } + + state->tformat = params->tformat[0]; + + state->tex_w_mask[0] = params->tex_w_mask[0]; + state->tex_h_mask[0] = params->tex_h_mask[0]; + state->tex_shift[0] = params->tex_shift[0]; + state->tex_lod[0] = params->tex_lod[0]; + state->tex_w_mask[1] = params->tex_w_mask[1]; + state->tex_h_mask[1] = params->tex_h_mask[1]; + state->tex_shift[1] = params->tex_shift[1]; + state->tex_lod[1] = params->tex_lod[1]; + + if ((params->fbzMode & 1) && (ystart < params->clipLowY)) + { + int dy = params->clipLowY - ystart; + + state->base_r += params->dRdY*dy; + state->base_g += params->dGdY*dy; + state->base_b += params->dBdY*dy; + state->base_a += params->dAdY*dy; + state->base_z += params->dZdY*dy; + state->tmu[0].base_s += params->tmu[0].dSdY*dy; + state->tmu[0].base_t += params->tmu[0].dTdY*dy; + state->tmu[0].base_w += params->tmu[0].dWdY*dy; + state->tmu[1].base_s += params->tmu[1].dSdY*dy; + state->tmu[1].base_t += params->tmu[1].dTdY*dy; + state->tmu[1].base_w += params->tmu[1].dWdY*dy; + state->base_w += params->dWdY*dy; + state->xstart += state->dx1*dy; + state->xend += state->dx2*dy; + + ystart = params->clipLowY; + } + + if ((params->fbzMode & 1) && (yend >= params->clipHighY)) + yend = params->clipHighY; + + state->y = ystart; +// yend--; + + if (SLI_ENABLED) + { + int test_y; + + if (params->fbzMode & (1 << 17)) + test_y = (voodoo->v_disp-1) - state->y; + else + test_y = state->y; + + if ((!(voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && (test_y & 1)) || + ((voodoo->initEnable & INITENABLE_SLI_MASTER_SLAVE) && !(test_y & 1))) + { + state->y++; + + state->base_r += params->dRdY; + state->base_g += params->dGdY; + state->base_b += params->dBdY; + state->base_a += params->dAdY; + state->base_z += params->dZdY; + state->tmu[0].base_s += params->tmu[0].dSdY; + state->tmu[0].base_t += params->tmu[0].dTdY; + state->tmu[0].base_w += params->tmu[0].dWdY; + state->tmu[1].base_s += params->tmu[1].dSdY; + state->tmu[1].base_t += params->tmu[1].dTdY; + state->tmu[1].base_w += params->tmu[1].dWdY; + state->base_w += params->dWdY; + state->xstart += state->dx1; + state->xend += state->dx2; + } + } +#ifndef NO_CODEGEN + if (voodoo->use_recompiler) + voodoo_draw = voodoo_get_block(voodoo, params, state, odd_even); + else + voodoo_draw = NULL; +#endif + + if (voodoo_output) + pclog("dxAB=%08x dxBC=%08x dxAC=%08x\n", state->dxAB, state->dxBC, state->dxAC); +// pclog("Start %i %i\n", ystart, voodoo->fbzMode & (1 << 17)); + + for (; state->y < yend; state->y += y_diff) + { + int x, x2; + int real_y = (state->y << 4) + 8; + int start_x, start_x2; + int dx; + uint16_t *fb_mem, *aux_mem; + + state->ir = state->base_r; + state->ig = state->base_g; + state->ib = state->base_b; + state->ia = state->base_a; + state->z = state->base_z; + state->tmu0_s = state->tmu[0].base_s; + state->tmu0_t = state->tmu[0].base_t; + state->tmu0_w = state->tmu[0].base_w; + state->tmu1_s = state->tmu[1].base_s; + state->tmu1_t = state->tmu[1].base_t; + state->tmu1_w = state->tmu[1].base_w; + state->w = state->base_w; + + x = (state->vertexAx << 12) + ((state->dxAC * (real_y - state->vertexAy)) >> 4); + + if (real_y < state->vertexBy) + x2 = (state->vertexAx << 12) + ((state->dxAB * (real_y - state->vertexAy)) >> 4); + else + x2 = (state->vertexBx << 12) + ((state->dxBC * (real_y - state->vertexBy)) >> 4); + + if (params->fbzMode & (1 << 17)) + real_y = (voodoo->v_disp-1) - (real_y >> 4); + else + real_y >>= 4; + + if (SLI_ENABLED) + { + if (((real_y >> 1) & voodoo->odd_even_mask) != odd_even) + goto next_line; + } + else + { + if ((real_y & voodoo->odd_even_mask) != odd_even) + goto next_line; + } + + start_x = x; + + if (state->xdir > 0) + x2 -= (1 << 16); + else + x -= (1 << 16); + dx = ((x + 0x7000) >> 16) - (((state->vertexAx << 12) + 0x7000) >> 16); + start_x2 = x + 0x7000; + x = (x + 0x7000) >> 16; + x2 = (x2 + 0x7000) >> 16; + + if (voodoo_output) + pclog("%03i:%03i : Ax=%08x start_x=%08x dSdX=%016llx dx=%08x s=%08x -> ", x, state->y, state->vertexAx << 8, start_x, params->tmu[0].dTdX, dx, state->tmu0_t); + + state->ir += (params->dRdX * dx); + state->ig += (params->dGdX * dx); + state->ib += (params->dBdX * dx); + state->ia += (params->dAdX * dx); + state->z += (params->dZdX * dx); + state->tmu0_s += (params->tmu[0].dSdX * dx); + state->tmu0_t += (params->tmu[0].dTdX * dx); + state->tmu0_w += (params->tmu[0].dWdX * dx); + state->tmu1_s += (params->tmu[1].dSdX * dx); + state->tmu1_t += (params->tmu[1].dTdX * dx); + state->tmu1_w += (params->tmu[1].dWdX * dx); + state->w += (params->dWdX * dx); + + if (voodoo_output) + pclog("%08llx %lli %lli\n", state->tmu0_t, state->tmu0_t >> (18+state->lod), (state->tmu0_t + (1 << (17+state->lod))) >> (18+state->lod)); + + if (params->fbzMode & 1) + { + if (state->xdir > 0) + { + if (x < params->clipLeft) + { + int dx = params->clipLeft - x; + + state->ir += params->dRdX*dx; + state->ig += params->dGdX*dx; + state->ib += params->dBdX*dx; + state->ia += params->dAdX*dx; + state->z += params->dZdX*dx; + state->tmu0_s += params->tmu[0].dSdX*dx; + state->tmu0_t += params->tmu[0].dTdX*dx; + state->tmu0_w += params->tmu[0].dWdX*dx; + state->tmu1_s += params->tmu[1].dSdX*dx; + state->tmu1_t += params->tmu[1].dTdX*dx; + state->tmu1_w += params->tmu[1].dWdX*dx; + state->w += params->dWdX*dx; + + x = params->clipLeft; + } + if (x2 >= params->clipRight) + x2 = params->clipRight-1; + } + else + { + if (x >= params->clipRight) + { + int dx = (params->clipRight-1) - x; + + state->ir += params->dRdX*dx; + state->ig += params->dGdX*dx; + state->ib += params->dBdX*dx; + state->ia += params->dAdX*dx; + state->z += params->dZdX*dx; + state->tmu0_s += params->tmu[0].dSdX*dx; + state->tmu0_t += params->tmu[0].dTdX*dx; + state->tmu0_w += params->tmu[0].dWdX*dx; + state->tmu1_s += params->tmu[1].dSdX*dx; + state->tmu1_t += params->tmu[1].dTdX*dx; + state->tmu1_w += params->tmu[1].dWdX*dx; + state->w += params->dWdX*dx; + + x = params->clipRight-1; + } + if (x2 < params->clipLeft) + x2 = params->clipLeft; + } + } + + if (x2 < x && state->xdir > 0) + goto next_line; + if (x2 > x && state->xdir < 0) + goto next_line; + + if (SLI_ENABLED) + { + state->fb_mem = fb_mem = (uint16_t *)&voodoo->fb_mem[params->draw_offset + ((real_y >> 1) * params->row_width)]; + state->aux_mem = aux_mem = (uint16_t *)&voodoo->fb_mem[(params->aux_offset + ((real_y >> 1) * params->row_width)) & voodoo->fb_mask]; + } + else + { + if (params->col_tiled) + state->fb_mem = fb_mem = (uint16_t *)&voodoo->fb_mem[params->draw_offset + (real_y >> 5) * params->row_width + (real_y & 31) * 128]; + else + state->fb_mem = fb_mem = (uint16_t *)&voodoo->fb_mem[params->draw_offset + (real_y * params->row_width)]; + if (params->aux_tiled) + state->aux_mem = aux_mem = (uint16_t *)&voodoo->fb_mem[(params->aux_offset + (real_y >> 5) * params->aux_row_width + (real_y & 31) * 128) & voodoo->fb_mask]; + else + state->aux_mem = aux_mem = (uint16_t *)&voodoo->fb_mem[(params->aux_offset + (real_y * params->row_width)) & voodoo->fb_mask]; + } + + if (voodoo_output) + pclog("%03i: x=%08x x2=%08x xstart=%08x xend=%08x dx=%08x start_x2=%08x\n", state->y, x, x2, state->xstart, state->xend, dx, start_x2); + + state->pixel_count = 0; + state->texel_count = 0; + state->x = x; + state->x2 = x2; +#ifndef NO_CODEGEN + if (voodoo->use_recompiler) + { + voodoo_draw(state, params, x, real_y); + } + else +#endif + do + { + int x_tiled = (x & 63) | ((x >> 6) * 128*32/2); + start_x = x; + state->x = x; + voodoo->pixel_count[odd_even]++; + voodoo->texel_count[odd_even] += texels; + voodoo->fbiPixelsIn++; + + if (voodoo_output) + pclog(" X=%03i T=%08x\n", x, state->tmu0_t); +// if (voodoo->fbzMode & FBZ_RGB_WMASK) + { + int update = 1; + uint8_t cother_r, cother_g, cother_b, aother; + uint8_t clocal_r, clocal_g, clocal_b, alocal; + int src_r = 0, src_g = 0, src_b = 0, src_a = 0; + int msel_r, msel_g, msel_b, msel_a; + uint8_t dest_r, dest_g, dest_b, dest_a; + uint16_t dat; + int sel; + int32_t new_depth, w_depth; + + if (state->w & 0xffff00000000) + w_depth = 0; + else if (!(state->w & 0xffff0000)) + w_depth = 0xf001; + else + { + int exp = voodoo_fls((uint16_t)((uint32_t)state->w >> 16)); + int mant = ((~(uint32_t)state->w >> (19 - exp))) & 0xfff; + w_depth = (exp << 12) + mant + 1; + if (w_depth > 0xffff) + w_depth = 0xffff; + } + +// w_depth = CLAMP16(w_depth); + + if (params->fbzMode & FBZ_W_BUFFER) + new_depth = w_depth; + else + new_depth = CLAMP16(state->z >> 12); + + if (params->fbzMode & FBZ_DEPTH_BIAS) + new_depth = CLAMP16(new_depth + (int16_t)params->zaColor); + + if (params->fbzMode & FBZ_DEPTH_ENABLE) + { + uint16_t old_depth = voodoo->params.aux_tiled ? aux_mem[x_tiled] : aux_mem[x]; + + DEPTH_TEST((params->fbzMode & FBZ_DEPTH_SOURCE) ? (params->zaColor & 0xffff) : new_depth); + } + + dat = voodoo->params.col_tiled ? fb_mem[x_tiled] : fb_mem[x]; + dest_r = (dat >> 8) & 0xf8; + dest_g = (dat >> 3) & 0xfc; + dest_b = (dat << 3) & 0xf8; + dest_r |= (dest_r >> 5); + dest_g |= (dest_g >> 6); + dest_b |= (dest_b >> 5); + dest_a = 0xff; + + if (params->fbzColorPath & FBZCP_TEXTURE_ENABLED) + { + if ((params->textureMode[0] & TEXTUREMODE_LOCAL_MASK) == TEXTUREMODE_LOCAL || !voodoo->dual_tmus) + { + /*TMU0 only sampling local colour or only one TMU, only sample TMU0*/ + voodoo_tmu_fetch(voodoo, params, state, 0, x); + } + else if ((params->textureMode[0] & TEXTUREMODE_MASK) == TEXTUREMODE_PASSTHROUGH) + { + /*TMU0 in pass-through mode, only sample TMU1*/ + voodoo_tmu_fetch(voodoo, params, state, 1, x); + + state->tex_r[0] = state->tex_r[1]; + state->tex_g[0] = state->tex_g[1]; + state->tex_b[0] = state->tex_b[1]; + state->tex_a[0] = state->tex_a[1]; + } + else + { + voodoo_tmu_fetch_and_blend(voodoo, params, state, x); + } + + if ((params->fbzMode & FBZ_CHROMAKEY) && + state->tex_r[0] == params->chromaKey_r && + state->tex_g[0] == params->chromaKey_g && + state->tex_b[0] == params->chromaKey_b) + { + voodoo->fbiChromaFail++; + goto skip_pixel; + } + } + + if (voodoo->trexInit1[0] & (1 << 18)) + { + state->tex_r[0] = state->tex_g[0] = 0; + state->tex_b[0] = voodoo->tmuConfig; + } + + if (cc_localselect_override) + sel = (state->tex_a[0] & 0x80) ? 1 : 0; + else + sel = cc_localselect; + + if (sel) + { + clocal_r = (params->color0 >> 16) & 0xff; + clocal_g = (params->color0 >> 8) & 0xff; + clocal_b = params->color0 & 0xff; + } + else + { + clocal_r = CLAMP(state->ir >> 12); + clocal_g = CLAMP(state->ig >> 12); + clocal_b = CLAMP(state->ib >> 12); + } + + switch (_rgb_sel) + { + case CC_LOCALSELECT_ITER_RGB: /*Iterated RGB*/ + cother_r = CLAMP(state->ir >> 12); + cother_g = CLAMP(state->ig >> 12); + cother_b = CLAMP(state->ib >> 12); + break; + + case CC_LOCALSELECT_TEX: /*TREX Color Output*/ + cother_r = state->tex_r[0]; + cother_g = state->tex_g[0]; + cother_b = state->tex_b[0]; + break; + + case CC_LOCALSELECT_COLOR1: /*Color1 RGB*/ + cother_r = (params->color1 >> 16) & 0xff; + cother_g = (params->color1 >> 8) & 0xff; + cother_b = params->color1 & 0xff; + break; + + case CC_LOCALSELECT_LFB: /*Linear Frame Buffer*/ + cother_r = src_r; + cother_g = src_g; + cother_b = src_b; + break; + } + + switch (cca_localselect) + { + case CCA_LOCALSELECT_ITER_A: + alocal = CLAMP(state->ia >> 12); + break; + + case CCA_LOCALSELECT_COLOR0: + alocal = (params->color0 >> 24) & 0xff; + break; + + case CCA_LOCALSELECT_ITER_Z: + alocal = CLAMP(state->z >> 20); + break; + + default: + fatal("Bad cca_localselect %i\n", cca_localselect); + alocal = 0xff; + break; + } + + switch (a_sel) + { + case A_SEL_ITER_A: + aother = CLAMP(state->ia >> 12); + break; + case A_SEL_TEX: + aother = state->tex_a[0]; + break; + case A_SEL_COLOR1: + aother = (params->color1 >> 24) & 0xff; + break; + default: + fatal("Bad a_sel %i\n", a_sel); + aother = 0; + break; + } + + if (cc_zero_other) + { + src_r = 0; + src_g = 0; + src_b = 0; + } + else + { + src_r = cother_r; + src_g = cother_g; + src_b = cother_b; + } + + if (cca_zero_other) + src_a = 0; + else + src_a = aother; + + if (cc_sub_clocal) + { + src_r -= clocal_r; + src_g -= clocal_g; + src_b -= clocal_b; + } + + if (cca_sub_clocal) + src_a -= alocal; + + switch (cc_mselect) + { + case CC_MSELECT_ZERO: + msel_r = 0; + msel_g = 0; + msel_b = 0; + break; + case CC_MSELECT_CLOCAL: + msel_r = clocal_r; + msel_g = clocal_g; + msel_b = clocal_b; + break; + case CC_MSELECT_AOTHER: + msel_r = aother; + msel_g = aother; + msel_b = aother; + break; + case CC_MSELECT_ALOCAL: + msel_r = alocal; + msel_g = alocal; + msel_b = alocal; + break; + case CC_MSELECT_TEX: + msel_r = state->tex_a[0]; + msel_g = state->tex_a[0]; + msel_b = state->tex_a[0]; + break; + case CC_MSELECT_TEXRGB: + msel_r = state->tex_r[0]; + msel_g = state->tex_g[0]; + msel_b = state->tex_b[0]; + break; + + default: + fatal("Bad cc_mselect %i\n", cc_mselect); + msel_r = 0; + msel_g = 0; + msel_b = 0; + break; + } + + switch (cca_mselect) + { + case CCA_MSELECT_ZERO: + msel_a = 0; + break; + case CCA_MSELECT_ALOCAL: + msel_a = alocal; + break; + case CCA_MSELECT_AOTHER: + msel_a = aother; + break; + case CCA_MSELECT_ALOCAL2: + msel_a = alocal; + break; + case CCA_MSELECT_TEX: + msel_a = state->tex_a[0]; + break; + + default: + fatal("Bad cca_mselect %i\n", cca_mselect); + msel_a = 0; + break; + } + + if (!cc_reverse_blend) + { + msel_r ^= 0xff; + msel_g ^= 0xff; + msel_b ^= 0xff; + } + msel_r++; + msel_g++; + msel_b++; + + if (!cca_reverse_blend) + msel_a ^= 0xff; + msel_a++; + + src_r = (src_r * msel_r) >> 8; + src_g = (src_g * msel_g) >> 8; + src_b = (src_b * msel_b) >> 8; + src_a = (src_a * msel_a) >> 8; + + switch (cc_add) + { + case CC_ADD_CLOCAL: + src_r += clocal_r; + src_g += clocal_g; + src_b += clocal_b; + break; + case CC_ADD_ALOCAL: + src_r += alocal; + src_g += alocal; + src_b += alocal; + break; + case 0: + break; + default: + fatal("Bad cc_add %i\n", cc_add); + } + + if (cca_add) + src_a += alocal; + + src_r = CLAMP(src_r); + src_g = CLAMP(src_g); + src_b = CLAMP(src_b); + src_a = CLAMP(src_a); + + if (cc_invert_output) + { + src_r ^= 0xff; + src_g ^= 0xff; + src_b ^= 0xff; + } + if (cca_invert_output) + src_a ^= 0xff; + + if (params->fogMode & FOG_ENABLE) + APPLY_FOG(src_r, src_g, src_b, state->z, state->ia, state->w); + + if (params->alphaMode & 1) + ALPHA_TEST(src_a); + + if (params->alphaMode & (1 << 4)) + ALPHA_BLEND(src_r, src_g, src_b, src_a); + + if (update) + { + if (dither) + { + if (dither2x2) + { + src_r = dither_rb2x2[src_r][real_y & 1][x & 1]; + src_g = dither_g2x2[src_g][real_y & 1][x & 1]; + src_b = dither_rb2x2[src_b][real_y & 1][x & 1]; + } + else + { + src_r = dither_rb[src_r][real_y & 3][x & 3]; + src_g = dither_g[src_g][real_y & 3][x & 3]; + src_b = dither_rb[src_b][real_y & 3][x & 3]; + } + } + else + { + src_r >>= 3; + src_g >>= 2; + src_b >>= 3; + } + + if (params->fbzMode & FBZ_RGB_WMASK) + { + if (voodoo->params.col_tiled) + fb_mem[x_tiled] = src_b | (src_g << 5) | (src_r << 11); + else + fb_mem[x] = src_b | (src_g << 5) | (src_r << 11); + } + if ((params->fbzMode & (FBZ_DEPTH_WMASK | FBZ_DEPTH_ENABLE)) == (FBZ_DEPTH_WMASK | FBZ_DEPTH_ENABLE)) + { + if (voodoo->params.aux_tiled) + aux_mem[x_tiled] = new_depth; + else + aux_mem[x] = new_depth; + } + } + } + voodoo_output &= ~2; + voodoo->fbiPixelsOut++; +skip_pixel: + if (state->xdir > 0) + { + state->ir += params->dRdX; + state->ig += params->dGdX; + state->ib += params->dBdX; + state->ia += params->dAdX; + state->z += params->dZdX; + state->tmu0_s += params->tmu[0].dSdX; + state->tmu0_t += params->tmu[0].dTdX; + state->tmu0_w += params->tmu[0].dWdX; + state->tmu1_s += params->tmu[1].dSdX; + state->tmu1_t += params->tmu[1].dTdX; + state->tmu1_w += params->tmu[1].dWdX; + state->w += params->dWdX; + } + else + { + state->ir -= params->dRdX; + state->ig -= params->dGdX; + state->ib -= params->dBdX; + state->ia -= params->dAdX; + state->z -= params->dZdX; + state->tmu0_s -= params->tmu[0].dSdX; + state->tmu0_t -= params->tmu[0].dTdX; + state->tmu0_w -= params->tmu[0].dWdX; + state->tmu1_s -= params->tmu[1].dSdX; + state->tmu1_t -= params->tmu[1].dTdX; + state->tmu1_w -= params->tmu[1].dWdX; + state->w -= params->dWdX; + } + + x += state->xdir; + } while (start_x != x2); + + voodoo->pixel_count[odd_even] += state->pixel_count; + voodoo->texel_count[odd_even] += state->texel_count; + voodoo->fbiPixelsIn += state->pixel_count; + + if (voodoo->params.draw_offset == voodoo->params.front_offset && (real_y >> 1) < 2048) + voodoo->dirty_line[real_y >> 1] = 1; + +next_line: + if (SLI_ENABLED) + { + state->base_r += params->dRdY; + state->base_g += params->dGdY; + state->base_b += params->dBdY; + state->base_a += params->dAdY; + state->base_z += params->dZdY; + state->tmu[0].base_s += params->tmu[0].dSdY; + state->tmu[0].base_t += params->tmu[0].dTdY; + state->tmu[0].base_w += params->tmu[0].dWdY; + state->tmu[1].base_s += params->tmu[1].dSdY; + state->tmu[1].base_t += params->tmu[1].dTdY; + state->tmu[1].base_w += params->tmu[1].dWdY; + state->base_w += params->dWdY; + state->xstart += state->dx1; + state->xend += state->dx2; + } + state->base_r += params->dRdY; + state->base_g += params->dGdY; + state->base_b += params->dBdY; + state->base_a += params->dAdY; + state->base_z += params->dZdY; + state->tmu[0].base_s += params->tmu[0].dSdY; + state->tmu[0].base_t += params->tmu[0].dTdY; + state->tmu[0].base_w += params->tmu[0].dWdY; + state->tmu[1].base_s += params->tmu[1].dSdY; + state->tmu[1].base_t += params->tmu[1].dTdY; + state->tmu[1].base_w += params->tmu[1].dWdY; + state->base_w += params->dWdY; + state->xstart += state->dx1; + state->xend += state->dx2; + } + + voodoo->texture_cache[0][params->tex_entry[0]].refcount_r[odd_even]++; + voodoo->texture_cache[1][params->tex_entry[1]].refcount_r[odd_even]++; +} + +void voodoo_triangle(voodoo_t *voodoo, voodoo_params_t *params, int odd_even) +{ + voodoo_state_t state; + int vertexAy_adjusted; + int vertexCy_adjusted; + int dx, dy; + + uint64_t tempdx, tempdy; + uint64_t tempLOD; + int LOD; + int lodbias; + + voodoo->tri_count++; + + dx = 8 - (params->vertexAx & 0xf); + if ((params->vertexAx & 0xf) > 8) + dx += 16; + dy = 8 - (params->vertexAy & 0xf); + if ((params->vertexAy & 0xf) > 8) + dy += 16; + +/* pclog("voodoo_triangle %i %i %i : vA %f, %f vB %f, %f vC %f, %f f %i,%i %08x %08x %08x,%08x tex=%i,%i fogMode=%08x\n", odd_even, voodoo->params_read_idx[odd_even], voodoo->params_read_idx[odd_even] & PARAM_MASK, (float)params->vertexAx / 16.0, (float)params->vertexAy / 16.0, + (float)params->vertexBx / 16.0, (float)params->vertexBy / 16.0, + (float)params->vertexCx / 16.0, (float)params->vertexCy / 16.0, + (params->fbzColorPath & FBZCP_TEXTURE_ENABLED) ? params->tformat[0] : 0, + (params->fbzColorPath & FBZCP_TEXTURE_ENABLED) ? params->tformat[1] : 0, params->fbzColorPath, params->alphaMode, params->textureMode[0],params->textureMode[1], params->tex_entry[0],params->tex_entry[1], params->fogMode);*/ + + state.base_r = params->startR; + state.base_g = params->startG; + state.base_b = params->startB; + state.base_a = params->startA; + state.base_z = params->startZ; + state.tmu[0].base_s = params->tmu[0].startS; + state.tmu[0].base_t = params->tmu[0].startT; + state.tmu[0].base_w = params->tmu[0].startW; + state.tmu[1].base_s = params->tmu[1].startS; + state.tmu[1].base_t = params->tmu[1].startT; + state.tmu[1].base_w = params->tmu[1].startW; + state.base_w = params->startW; + + if (params->fbzColorPath & FBZ_PARAM_ADJUST) + { + state.base_r += (dx*params->dRdX + dy*params->dRdY) >> 4; + state.base_g += (dx*params->dGdX + dy*params->dGdY) >> 4; + state.base_b += (dx*params->dBdX + dy*params->dBdY) >> 4; + state.base_a += (dx*params->dAdX + dy*params->dAdY) >> 4; + state.base_z += (dx*params->dZdX + dy*params->dZdY) >> 4; + state.tmu[0].base_s += (dx*params->tmu[0].dSdX + dy*params->tmu[0].dSdY) >> 4; + state.tmu[0].base_t += (dx*params->tmu[0].dTdX + dy*params->tmu[0].dTdY) >> 4; + state.tmu[0].base_w += (dx*params->tmu[0].dWdX + dy*params->tmu[0].dWdY) >> 4; + state.tmu[1].base_s += (dx*params->tmu[1].dSdX + dy*params->tmu[1].dSdY) >> 4; + state.tmu[1].base_t += (dx*params->tmu[1].dTdX + dy*params->tmu[1].dTdY) >> 4; + state.tmu[1].base_w += (dx*params->tmu[1].dWdX + dy*params->tmu[1].dWdY) >> 4; + state.base_w += (dx*params->dWdX + dy*params->dWdY) >> 4; + } + + tris++; + + state.vertexAy = params->vertexAy & ~0xffff0000; + if (state.vertexAy & 0x8000) + state.vertexAy |= 0xffff0000; + state.vertexBy = params->vertexBy & ~0xffff0000; + if (state.vertexBy & 0x8000) + state.vertexBy |= 0xffff0000; + state.vertexCy = params->vertexCy & ~0xffff0000; + if (state.vertexCy & 0x8000) + state.vertexCy |= 0xffff0000; + + state.vertexAx = params->vertexAx & ~0xffff0000; + if (state.vertexAx & 0x8000) + state.vertexAx |= 0xffff0000; + state.vertexBx = params->vertexBx & ~0xffff0000; + if (state.vertexBx & 0x8000) + state.vertexBx |= 0xffff0000; + state.vertexCx = params->vertexCx & ~0xffff0000; + if (state.vertexCx & 0x8000) + state.vertexCx |= 0xffff0000; + + vertexAy_adjusted = (state.vertexAy+7) >> 4; + vertexCy_adjusted = (state.vertexCy+7) >> 4; + + if (state.vertexBy - state.vertexAy) + state.dxAB = (int)((((int64_t)state.vertexBx << 12) - ((int64_t)state.vertexAx << 12)) << 4) / (int)(state.vertexBy - state.vertexAy); + else + state.dxAB = 0; + if (state.vertexCy - state.vertexAy) + state.dxAC = (int)((((int64_t)state.vertexCx << 12) - ((int64_t)state.vertexAx << 12)) << 4) / (int)(state.vertexCy - state.vertexAy); + else + state.dxAC = 0; + if (state.vertexCy - state.vertexBy) + state.dxBC = (int)((((int64_t)state.vertexCx << 12) - ((int64_t)state.vertexBx << 12)) << 4) / (int)(state.vertexCy - state.vertexBy); + else + state.dxBC = 0; + + state.lod_min[0] = (params->tLOD[0] & 0x3f) << 6; + state.lod_max[0] = ((params->tLOD[0] >> 6) & 0x3f) << 6; + if (state.lod_max[0] > 0x800) + state.lod_max[0] = 0x800; + state.lod_min[1] = (params->tLOD[1] & 0x3f) << 6; + state.lod_max[1] = ((params->tLOD[1] >> 6) & 0x3f) << 6; + if (state.lod_max[1] > 0x800) + state.lod_max[1] = 0x800; + + state.xstart = state.xend = state.vertexAx << 8; + state.xdir = params->sign ? -1 : 1; + + state.y = (state.vertexAy + 8) >> 4; + state.ydir = 1; + + + tempdx = (params->tmu[0].dSdX >> 14) * (params->tmu[0].dSdX >> 14) + (params->tmu[0].dTdX >> 14) * (params->tmu[0].dTdX >> 14); + tempdy = (params->tmu[0].dSdY >> 14) * (params->tmu[0].dSdY >> 14) + (params->tmu[0].dTdY >> 14) * (params->tmu[0].dTdY >> 14); + + if (tempdx > tempdy) + tempLOD = tempdx; + else + tempLOD = tempdy; + + LOD = (int)(log2((double)tempLOD / (double)(1ULL << 36)) * 256); + LOD >>= 2; + + lodbias = (params->tLOD[0] >> 12) & 0x3f; + if (lodbias & 0x20) + lodbias |= ~0x3f; + state.tmu[0].lod = LOD + (lodbias << 6); + + + tempdx = (params->tmu[1].dSdX >> 14) * (params->tmu[1].dSdX >> 14) + (params->tmu[1].dTdX >> 14) * (params->tmu[1].dTdX >> 14); + tempdy = (params->tmu[1].dSdY >> 14) * (params->tmu[1].dSdY >> 14) + (params->tmu[1].dTdY >> 14) * (params->tmu[1].dTdY >> 14); + + if (tempdx > tempdy) + tempLOD = tempdx; + else + tempLOD = tempdy; + + LOD = (int)(log2((double)tempLOD / (double)(1ULL << 36)) * 256); + LOD >>= 2; + + lodbias = (params->tLOD[1] >> 12) & 0x3f; + if (lodbias & 0x20) + lodbias |= ~0x3f; + state.tmu[1].lod = LOD + (lodbias << 6); + + + voodoo_half_triangle(voodoo, params, &state, vertexAy_adjusted, vertexCy_adjusted, odd_even); +} + + +static void render_thread(void *param, int odd_even) +{ + voodoo_t *voodoo = (voodoo_t *)param; + + while (1) + { + thread_set_event(voodoo->render_not_full_event[odd_even]); + thread_wait_event(voodoo->wake_render_thread[odd_even], -1); + thread_reset_event(voodoo->wake_render_thread[odd_even]); + voodoo->render_voodoo_busy[odd_even] = 1; + + while (!PARAM_EMPTY(odd_even)) + { + uint64_t start_time = timer_read(); + uint64_t end_time; + voodoo_params_t *params = &voodoo->params_buffer[voodoo->params_read_idx[odd_even] & PARAM_MASK]; + + voodoo_triangle(voodoo, params, odd_even); + + voodoo->params_read_idx[odd_even]++; + + if (PARAM_ENTRIES(odd_even) > (PARAM_SIZE - 10)) + thread_set_event(voodoo->render_not_full_event[odd_even]); + + end_time = timer_read(); + voodoo->render_time[odd_even] += end_time - start_time; + } + + voodoo->render_voodoo_busy[odd_even] = 0; + } +} + +void voodoo_render_thread_1(void *param) +{ + render_thread(param, 0); +} +void voodoo_render_thread_2(void *param) +{ + render_thread(param, 1); +} +void voodoo_render_thread_3(void *param) +{ + render_thread(param, 2); +} +void voodoo_render_thread_4(void *param) +{ + render_thread(param, 3); +} + +void voodoo_queue_triangle(voodoo_t *voodoo, voodoo_params_t *params) +{ + voodoo_params_t *params_new = &voodoo->params_buffer[voodoo->params_write_idx & PARAM_MASK]; + + while (PARAM_FULL(0) || (voodoo->render_threads >= 2 && PARAM_FULL(1)) || + (voodoo->render_threads == 4 && (PARAM_FULL(2) || PARAM_FULL(3)))) + { + thread_reset_event(voodoo->render_not_full_event[0]); + if (voodoo->render_threads >= 2) + thread_reset_event(voodoo->render_not_full_event[1]); + if (voodoo->render_threads == 4) + { + thread_reset_event(voodoo->render_not_full_event[2]); + thread_reset_event(voodoo->render_not_full_event[3]); + } + if (PARAM_FULL(0)) + thread_wait_event(voodoo->render_not_full_event[0], -1); /*Wait for room in ringbuffer*/ + if (voodoo->render_threads >= 2 && PARAM_FULL(1)) + thread_wait_event(voodoo->render_not_full_event[1], -1); /*Wait for room in ringbuffer*/ + if (voodoo->render_threads == 4 && PARAM_FULL(2)) + thread_wait_event(voodoo->render_not_full_event[2], -1); /*Wait for room in ringbuffer*/ + if (voodoo->render_threads == 4 && PARAM_FULL(3)) + thread_wait_event(voodoo->render_not_full_event[3], -1); /*Wait for room in ringbuffer*/ + } + + voodoo_use_texture(voodoo, params, 0); + if (voodoo->dual_tmus) + voodoo_use_texture(voodoo, params, 1); + + memcpy(params_new, params, sizeof(voodoo_params_t)); + + voodoo->params_write_idx++; + + if (PARAM_ENTRIES(0) < 4 || (voodoo->render_threads >= 2 && PARAM_ENTRIES(1) < 4) || + (voodoo->render_threads == 4 && (PARAM_ENTRIES(2) < 4 || PARAM_ENTRIES(3) < 4))) + voodoo_wake_render_thread(voodoo); +} diff --git a/pcem/vid_voodoo_render.h b/pcem/vid_voodoo_render.h new file mode 100644 index 00000000..9ba73dc6 --- /dev/null +++ b/pcem/vid_voodoo_render.h @@ -0,0 +1,338 @@ +#if !(defined i386 || defined __i386 || defined __i386__ || defined _X86_ || defined WIN32 || defined _WIN32 || defined _WIN32) && !(defined __amd64__) +#define NO_CODEGEN +#endif + +#ifndef NO_CODEGEN +void voodoo_codegen_init(voodoo_t *voodoo); +void voodoo_codegen_close(voodoo_t *voodoo); +#endif + +#define DEPTH_TEST(comp_depth) \ + do \ + { \ + switch (depth_op) \ + { \ + case DEPTHOP_NEVER: \ + voodoo->fbiZFuncFail++; \ + goto skip_pixel; \ + case DEPTHOP_LESSTHAN: \ + if (!(comp_depth < old_depth)) \ + { \ + voodoo->fbiZFuncFail++; \ + goto skip_pixel; \ + } \ + break; \ + case DEPTHOP_EQUAL: \ + if (!(comp_depth == old_depth)) \ + { \ + voodoo->fbiZFuncFail++; \ + goto skip_pixel; \ + } \ + break; \ + case DEPTHOP_LESSTHANEQUAL: \ + if (!(comp_depth <= old_depth)) \ + { \ + voodoo->fbiZFuncFail++; \ + goto skip_pixel; \ + } \ + break; \ + case DEPTHOP_GREATERTHAN: \ + if (!(comp_depth > old_depth)) \ + { \ + voodoo->fbiZFuncFail++; \ + goto skip_pixel; \ + } \ + break; \ + case DEPTHOP_NOTEQUAL: \ + if (!(comp_depth != old_depth)) \ + { \ + voodoo->fbiZFuncFail++; \ + goto skip_pixel; \ + } \ + break; \ + case DEPTHOP_GREATERTHANEQUAL: \ + if (!(comp_depth >= old_depth)) \ + { \ + voodoo->fbiZFuncFail++; \ + goto skip_pixel; \ + } \ + break; \ + case DEPTHOP_ALWAYS: \ + break; \ + } \ + } while (0) + +#define APPLY_FOG(src_r, src_g, src_b, z, ia, w) \ + do \ + { \ + if (params->fogMode & FOG_CONSTANT) \ + { \ + src_r += params->fogColor.r; \ + src_g += params->fogColor.g; \ + src_b += params->fogColor.b; \ + } \ + else \ + { \ + int fog_r, fog_g, fog_b, fog_a = 0; \ + int fog_idx; \ + \ + if (!(params->fogMode & FOG_ADD)) \ + { \ + fog_r = params->fogColor.r; \ + fog_g = params->fogColor.g; \ + fog_b = params->fogColor.b; \ + } \ + else \ + fog_r = fog_g = fog_b = 0; \ + \ + if (!(params->fogMode & FOG_MULT)) \ + { \ + fog_r -= src_r; \ + fog_g -= src_g; \ + fog_b -= src_b; \ + } \ + \ + switch (params->fogMode & (FOG_Z|FOG_ALPHA)) \ + { \ + case 0: \ + fog_idx = (w_depth >> 10) & 0x3f; \ + \ + fog_a = params->fogTable[fog_idx].fog; \ + fog_a += (params->fogTable[fog_idx].dfog * ((w_depth >> 2) & 0xff)) >> 10; \ + break; \ + case FOG_Z: \ + fog_a = (z >> 20) & 0xff; \ + break; \ + case FOG_ALPHA: \ + fog_a = CLAMP(ia >> 12); \ + break; \ + case FOG_W: \ + fog_a = CLAMP((w >> 32) & 0xff); \ + break; \ + } \ + fog_a++; \ + \ + fog_r = (fog_r * fog_a) >> 8; \ + fog_g = (fog_g * fog_a) >> 8; \ + fog_b = (fog_b * fog_a) >> 8; \ + \ + if (params->fogMode & FOG_MULT) \ + { \ + src_r = fog_r; \ + src_g = fog_g; \ + src_b = fog_b; \ + } \ + else \ + { \ + src_r += fog_r; \ + src_g += fog_g; \ + src_b += fog_b; \ + } \ + } \ + \ + src_r = CLAMP(src_r); \ + src_g = CLAMP(src_g); \ + src_b = CLAMP(src_b); \ + } while (0) + +#define ALPHA_TEST(src_a) \ + do \ + { \ + switch (alpha_func) \ + { \ + case AFUNC_NEVER: \ + voodoo->fbiAFuncFail++; \ + goto skip_pixel; \ + case AFUNC_LESSTHAN: \ + if (!(src_a < a_ref)) \ + { \ + voodoo->fbiAFuncFail++; \ + goto skip_pixel; \ + } \ + break; \ + case AFUNC_EQUAL: \ + if (!(src_a == a_ref)) \ + { \ + voodoo->fbiAFuncFail++; \ + goto skip_pixel; \ + } \ + break; \ + case AFUNC_LESSTHANEQUAL: \ + if (!(src_a <= a_ref)) \ + { \ + voodoo->fbiAFuncFail++; \ + goto skip_pixel; \ + } \ + break; \ + case AFUNC_GREATERTHAN: \ + if (!(src_a > a_ref)) \ + { \ + voodoo->fbiAFuncFail++; \ + goto skip_pixel; \ + } \ + break; \ + case AFUNC_NOTEQUAL: \ + if (!(src_a != a_ref)) \ + { \ + voodoo->fbiAFuncFail++; \ + goto skip_pixel; \ + } \ + break; \ + case AFUNC_GREATERTHANEQUAL: \ + if (!(src_a >= a_ref)) \ + { \ + voodoo->fbiAFuncFail++; \ + goto skip_pixel; \ + } \ + break; \ + case AFUNC_ALWAYS: \ + break; \ + } \ + } while (0) + +#define ALPHA_BLEND(src_r, src_g, src_b, src_a) \ + do \ + { \ + int _a; \ + int newdest_r = 0, newdest_g = 0, newdest_b = 0; \ + \ + switch (dest_afunc) \ + { \ + case AFUNC_AZERO: \ + newdest_r = newdest_g = newdest_b = 0; \ + break; \ + case AFUNC_ASRC_ALPHA: \ + newdest_r = (dest_r * src_a) / 255; \ + newdest_g = (dest_g * src_a) / 255; \ + newdest_b = (dest_b * src_a) / 255; \ + break; \ + case AFUNC_A_COLOR: \ + newdest_r = (dest_r * src_r) / 255; \ + newdest_g = (dest_g * src_g) / 255; \ + newdest_b = (dest_b * src_b) / 255; \ + break; \ + case AFUNC_ADST_ALPHA: \ + newdest_r = (dest_r * dest_a) / 255; \ + newdest_g = (dest_g * dest_a) / 255; \ + newdest_b = (dest_b * dest_a) / 255; \ + break; \ + case AFUNC_AONE: \ + newdest_r = dest_r; \ + newdest_g = dest_g; \ + newdest_b = dest_b; \ + break; \ + case AFUNC_AOMSRC_ALPHA: \ + newdest_r = (dest_r * (255-src_a)) / 255; \ + newdest_g = (dest_g * (255-src_a)) / 255; \ + newdest_b = (dest_b * (255-src_a)) / 255; \ + break; \ + case AFUNC_AOM_COLOR: \ + newdest_r = (dest_r * (255-src_r)) / 255; \ + newdest_g = (dest_g * (255-src_g)) / 255; \ + newdest_b = (dest_b * (255-src_b)) / 255; \ + break; \ + case AFUNC_AOMDST_ALPHA: \ + newdest_r = (dest_r * (255-dest_a)) / 255; \ + newdest_g = (dest_g * (255-dest_a)) / 255; \ + newdest_b = (dest_b * (255-dest_a)) / 255; \ + break; \ + case AFUNC_ASATURATE: \ + _a = MIN(src_a, 1-dest_a); \ + newdest_r = (dest_r * _a) / 255; \ + newdest_g = (dest_g * _a) / 255; \ + newdest_b = (dest_b * _a) / 255; \ + break; \ + } \ + \ + switch (src_afunc) \ + { \ + case AFUNC_AZERO: \ + src_r = src_g = src_b = 0; \ + break; \ + case AFUNC_ASRC_ALPHA: \ + src_r = (src_r * src_a) / 255; \ + src_g = (src_g * src_a) / 255; \ + src_b = (src_b * src_a) / 255; \ + break; \ + case AFUNC_A_COLOR: \ + src_r = (src_r * dest_r) / 255; \ + src_g = (src_g * dest_g) / 255; \ + src_b = (src_b * dest_b) / 255; \ + break; \ + case AFUNC_ADST_ALPHA: \ + src_r = (src_r * dest_a) / 255; \ + src_g = (src_g * dest_a) / 255; \ + src_b = (src_b * dest_a) / 255; \ + break; \ + case AFUNC_AONE: \ + break; \ + case AFUNC_AOMSRC_ALPHA: \ + src_r = (src_r * (255-src_a)) / 255; \ + src_g = (src_g * (255-src_a)) / 255; \ + src_b = (src_b * (255-src_a)) / 255; \ + break; \ + case AFUNC_AOM_COLOR: \ + src_r = (src_r * (255-dest_r)) / 255; \ + src_g = (src_g * (255-dest_g)) / 255; \ + src_b = (src_b * (255-dest_b)) / 255; \ + break; \ + case AFUNC_AOMDST_ALPHA: \ + src_r = (src_r * (255-dest_a)) / 255; \ + src_g = (src_g * (255-dest_a)) / 255; \ + src_b = (src_b * (255-dest_a)) / 255; \ + break; \ + case AFUNC_ACOLORBEFOREFOG: \ + fatal("AFUNC_ACOLORBEFOREFOG\n"); \ + break; \ + } \ + \ + src_r += newdest_r; \ + src_g += newdest_g; \ + src_b += newdest_b; \ + \ + src_r = CLAMP(src_r); \ + src_g = CLAMP(src_g); \ + src_b = CLAMP(src_b); \ + } while(0) + + + +void voodoo_render_thread_1(void *param); +void voodoo_render_thread_2(void *param); +void voodoo_render_thread_3(void *param); +void voodoo_render_thread_4(void *param); +void voodoo_queue_triangle(voodoo_t *voodoo, voodoo_params_t *params); + +extern int voodoo_recomp; +extern int tris; + +static inline void voodoo_wake_render_thread(voodoo_t *voodoo) +{ + thread_set_event(voodoo->wake_render_thread[0]); /*Wake up render thread if moving from idle*/ + if (voodoo->render_threads >= 2) + thread_set_event(voodoo->wake_render_thread[1]); /*Wake up render thread if moving from idle*/ + if (voodoo->render_threads == 4) + { + thread_set_event(voodoo->wake_render_thread[2]); /*Wake up render thread if moving from idle*/ + thread_set_event(voodoo->wake_render_thread[3]); /*Wake up render thread if moving from idle*/ + } +} + +static inline void voodoo_wait_for_render_thread_idle(voodoo_t *voodoo) +{ + while (!PARAM_EMPTY(0) || (voodoo->render_threads >= 2 && !PARAM_EMPTY(1)) || + (voodoo->render_threads == 4 && (!PARAM_EMPTY(2) || !PARAM_EMPTY(3))) || + voodoo->render_voodoo_busy[0] || (voodoo->render_threads >= 2 && voodoo->render_voodoo_busy[1]) || + (voodoo->render_threads == 4 && (voodoo->render_voodoo_busy[2] || voodoo->render_voodoo_busy[3]))) + { + voodoo_wake_render_thread(voodoo); + if (!PARAM_EMPTY(0) || voodoo->render_voodoo_busy[0]) + thread_wait_event(voodoo->render_not_full_event[0], 1); + if (voodoo->render_threads >= 2 && (!PARAM_EMPTY(1) || voodoo->render_voodoo_busy[1])) + thread_wait_event(voodoo->render_not_full_event[1], 1); + if (voodoo->render_threads == 4 && (!PARAM_EMPTY(2) || voodoo->render_voodoo_busy[2])) + thread_wait_event(voodoo->render_not_full_event[2], 1); + if (voodoo->render_threads == 4 && (!PARAM_EMPTY(3) || voodoo->render_voodoo_busy[3])) + thread_wait_event(voodoo->render_not_full_event[3], 1); + } +} diff --git a/pcem/vid_voodoo_setup.cpp b/pcem/vid_voodoo_setup.cpp new file mode 100644 index 00000000..360d14a8 --- /dev/null +++ b/pcem/vid_voodoo_setup.cpp @@ -0,0 +1,216 @@ +#include "ibm.h" +#include "device.h" +#include "mem.h" +#include "thread.h" +#include "video.h" +#include "vid_svga.h" +#include "vid_voodoo.h" +#include "vid_voodoo_common.h" +#include "vid_voodoo_regs.h" +#include "vid_voodoo_render.h" +#include "vid_voodoo_setup.h" + +void voodoo_triangle_setup(voodoo_t *voodoo) +{ + float dxAB, dxBC, dyAB, dyBC; + float area; + int va = 0, vb = 1, vc = 2; + vert_t verts[3]; + + verts[0] = voodoo->verts[0]; + verts[1] = voodoo->verts[1]; + verts[2] = voodoo->verts[2]; + + if (verts[0].sVy < verts[1].sVy) + { + if (verts[1].sVy < verts[2].sVy) + { + /* V1>V0, V2>V1, V2>V1>V0*/ + va = 0; /*OK*/ + vb = 1; + vc = 2; + } + else + { + /* V1>V0, V1>V2*/ + if (verts[0].sVy < verts[2].sVy) + { + /* V1>V0, V1>V2, V2>V0, V1>V2>V0*/ + va = 0; + vb = 2; + vc = 1; + } + else + { + /* V1>V0, V1>V2, V0>V2, V1>V0>V2*/ + va = 2; + vb = 0; + vc = 1; + } + } + } + else + { + if (verts[1].sVy < verts[2].sVy) + { + /* V0>V1, V2>V1*/ + if (verts[0].sVy < verts[2].sVy) + { + /* V0>V1, V2>V1, V2>V0, V2>V0>V1*/ + va = 1; + vb = 0; + vc = 2; + } + else + { + /* V0>V1, V2>V1, V0>V2, V0>V2>V1*/ + va = 1; + vb = 2; + vc = 0; + } + } + else + { + /*V0>V1>V2*/ + va = 2; + vb = 1; + vc = 0; + } + } + + dxAB = verts[0].sVx - verts[1].sVx; + dxBC = verts[1].sVx - verts[2].sVx; + dyAB = verts[0].sVy - verts[1].sVy; + dyBC = verts[1].sVy - verts[2].sVy; + + area = dxAB * dyBC - dxBC * dyAB; + + if (area == 0.0) + return; + + if (voodoo->sSetupMode & SETUPMODE_CULLING_ENABLE) + { + int cull_sign = voodoo->sSetupMode & SETUPMODE_CULLING_SIGN; + int sign = (area < 0.0); + + if ((voodoo->sSetupMode & (SETUPMODE_CULLING_ENABLE | SETUPMODE_DISABLE_PINGPONG)) + == SETUPMODE_CULLING_ENABLE && voodoo->cull_pingpong) + cull_sign = !cull_sign; + + if (cull_sign && sign) + return; + if (!cull_sign && !sign) + return; + } + + + dxAB = verts[va].sVx - verts[vb].sVx; + dxBC = verts[vb].sVx - verts[vc].sVx; + dyAB = verts[va].sVy - verts[vb].sVy; + dyBC = verts[vb].sVy - verts[vc].sVy; + + area = dxAB * dyBC - dxBC * dyAB; + + dxAB /= area; + dxBC /= area; + dyAB /= area; + dyBC /= area; + + + + voodoo->params.vertexAx = (int32_t)(int16_t)((int32_t)(verts[va].sVx * 16.0f) & 0xffff); + voodoo->params.vertexAy = (int32_t)(int16_t)((int32_t)(verts[va].sVy * 16.0f) & 0xffff); + voodoo->params.vertexBx = (int32_t)(int16_t)((int32_t)(verts[vb].sVx * 16.0f) & 0xffff); + voodoo->params.vertexBy = (int32_t)(int16_t)((int32_t)(verts[vb].sVy * 16.0f) & 0xffff); + voodoo->params.vertexCx = (int32_t)(int16_t)((int32_t)(verts[vc].sVx * 16.0f) & 0xffff); + voodoo->params.vertexCy = (int32_t)(int16_t)((int32_t)(verts[vc].sVy * 16.0f) & 0xffff); + + if (voodoo->params.vertexAy > voodoo->params.vertexBy || voodoo->params.vertexBy > voodoo->params.vertexCy) + { + pclog("triangle_setup wrong order %d %d %d\n", voodoo->params.vertexAy, voodoo->params.vertexBy, voodoo->params.vertexCy); + return; + } + + if (voodoo->sSetupMode & SETUPMODE_RGB) + { + voodoo->params.startR = (int32_t)(verts[va].sRed * 4096.0f); + voodoo->params.dRdX = (int32_t)(((verts[va].sRed - verts[vb].sRed) * dyBC - (verts[vb].sRed - verts[vc].sRed) * dyAB) * 4096.0f); + voodoo->params.dRdY = (int32_t)(((verts[vb].sRed - verts[vc].sRed) * dxAB - (verts[va].sRed - verts[vb].sRed) * dxBC) * 4096.0f); + voodoo->params.startG = (int32_t)(verts[va].sGreen * 4096.0f); + voodoo->params.dGdX = (int32_t)(((verts[va].sGreen - verts[vb].sGreen) * dyBC - (verts[vb].sGreen - verts[vc].sGreen) * dyAB) * 4096.0f); + voodoo->params.dGdY = (int32_t)(((verts[vb].sGreen - verts[vc].sGreen) * dxAB - (verts[va].sGreen - verts[vb].sGreen) * dxBC) * 4096.0f); + voodoo->params.startB = (int32_t)(verts[va].sBlue * 4096.0f); + voodoo->params.dBdX = (int32_t)(((verts[va].sBlue - verts[vb].sBlue) * dyBC - (verts[vb].sBlue - verts[vc].sBlue) * dyAB) * 4096.0f); + voodoo->params.dBdY = (int32_t)(((verts[vb].sBlue - verts[vc].sBlue) * dxAB - (verts[va].sBlue - verts[vb].sBlue) * dxBC) * 4096.0f); + } + if (voodoo->sSetupMode & SETUPMODE_ALPHA) + { + voodoo->params.startA = (int32_t)(verts[va].sAlpha * 4096.0f); + voodoo->params.dAdX = (int32_t)(((verts[va].sAlpha - verts[vb].sAlpha) * dyBC - (verts[vb].sAlpha - verts[vc].sAlpha) * dyAB) * 4096.0f); + voodoo->params.dAdY = (int32_t)(((verts[vb].sAlpha - verts[vc].sAlpha) * dxAB - (verts[va].sAlpha - verts[vb].sAlpha) * dxBC) * 4096.0f); + } + if (voodoo->sSetupMode & SETUPMODE_Z) + { + voodoo->params.startZ = (int32_t)(verts[va].sVz * 4096.0f); + voodoo->params.dZdX = (int32_t)(((verts[va].sVz - verts[vb].sVz) * dyBC - (verts[vb].sVz - verts[vc].sVz) * dyAB) * 4096.0f); + voodoo->params.dZdY = (int32_t)(((verts[vb].sVz - verts[vc].sVz) * dxAB - (verts[va].sVz - verts[vb].sVz) * dxBC) * 4096.0f); + } + if (voodoo->sSetupMode & SETUPMODE_Wb) + { + voodoo->params.startW = (int64_t)(verts[va].sWb * 4294967296.0f); + voodoo->params.dWdX = (int64_t)(((verts[va].sWb - verts[vb].sWb) * dyBC - (verts[vb].sWb - verts[vc].sWb) * dyAB) * 4294967296.0f); + voodoo->params.dWdY = (int64_t)(((verts[vb].sWb - verts[vc].sWb) * dxAB - (verts[va].sWb - verts[vb].sWb) * dxBC) * 4294967296.0f); + voodoo->params.tmu[0].startW = voodoo->params.tmu[1].startW = voodoo->params.startW; + voodoo->params.tmu[0].dWdX = voodoo->params.tmu[1].dWdX = voodoo->params.dWdX; + voodoo->params.tmu[0].dWdY = voodoo->params.tmu[1].dWdY = voodoo->params.dWdY; + } + if (voodoo->sSetupMode & SETUPMODE_W0) + { + voodoo->params.tmu[0].startW = (int64_t)(verts[va].sW0 * 4294967296.0f); + voodoo->params.tmu[0].dWdX = (int64_t)(((verts[va].sW0 - verts[vb].sW0) * dyBC - (verts[vb].sW0 - verts[vc].sW0) * dyAB) * 4294967296.0f); + voodoo->params.tmu[0].dWdY = (int64_t)(((verts[vb].sW0 - verts[vc].sW0) * dxAB - (verts[va].sW0 - verts[vb].sW0) * dxBC) * 4294967296.0f); + voodoo->params.tmu[1].startW = voodoo->params.tmu[0].startW; + voodoo->params.tmu[1].dWdX = voodoo->params.tmu[0].dWdX; + voodoo->params.tmu[1].dWdY = voodoo->params.tmu[0].dWdY; + } + if (voodoo->sSetupMode & SETUPMODE_S0_T0) + { + voodoo->params.tmu[0].startS = (int64_t)(verts[va].sS0 * 4294967296.0f); + voodoo->params.tmu[0].dSdX = (int64_t)(((verts[va].sS0 - verts[vb].sS0) * dyBC - (verts[vb].sS0 - verts[vc].sS0) * dyAB) * 4294967296.0f); + voodoo->params.tmu[0].dSdY = (int64_t)(((verts[vb].sS0 - verts[vc].sS0) * dxAB - (verts[va].sS0 - verts[vb].sS0) * dxBC) * 4294967296.0f); + voodoo->params.tmu[0].startT = (int64_t)(verts[va].sT0 * 4294967296.0f); + voodoo->params.tmu[0].dTdX = (int64_t)(((verts[va].sT0 - verts[vb].sT0) * dyBC - (verts[vb].sT0 - verts[vc].sT0) * dyAB) * 4294967296.0f); + voodoo->params.tmu[0].dTdY = (int64_t)(((verts[vb].sT0 - verts[vc].sT0) * dxAB - (verts[va].sT0 - verts[vb].sT0) * dxBC) * 4294967296.0f); + voodoo->params.tmu[1].startS = voodoo->params.tmu[0].startS; + voodoo->params.tmu[1].dSdX = voodoo->params.tmu[0].dSdX; + voodoo->params.tmu[1].dSdY = voodoo->params.tmu[0].dSdY; + voodoo->params.tmu[1].startT = voodoo->params.tmu[0].startT; + voodoo->params.tmu[1].dTdX = voodoo->params.tmu[0].dTdX; + voodoo->params.tmu[1].dTdY = voodoo->params.tmu[0].dTdY; + } + if (voodoo->sSetupMode & SETUPMODE_W1) + { + voodoo->params.tmu[1].startW = (int64_t)(verts[va].sW1 * 4294967296.0f); + voodoo->params.tmu[1].dWdX = (int64_t)(((verts[va].sW1 - verts[vb].sW1) * dyBC - (verts[vb].sW1 - verts[vc].sW1) * dyAB) * 4294967296.0f); + voodoo->params.tmu[1].dWdY = (int64_t)(((verts[vb].sW1 - verts[vc].sW1) * dxAB - (verts[va].sW1 - verts[vb].sW1) * dxBC) * 4294967296.0f); + } + if (voodoo->sSetupMode & SETUPMODE_S1_T1) + { + voodoo->params.tmu[1].startS = (int64_t)(verts[va].sS1 * 4294967296.0f); + voodoo->params.tmu[1].dSdX = (int64_t)(((verts[va].sS1 - verts[vb].sS1) * dyBC - (verts[vb].sS1 - verts[vc].sS1) * dyAB) * 4294967296.0f); + voodoo->params.tmu[1].dSdY = (int64_t)(((verts[vb].sS1 - verts[vc].sS1) * dxAB - (verts[va].sS1 - verts[vb].sS1) * dxBC) * 4294967296.0f); + voodoo->params.tmu[1].startT = (int64_t)(verts[va].sT1 * 4294967296.0f); + voodoo->params.tmu[1].dTdX = (int64_t)(((verts[va].sT1 - verts[vb].sT1) * dyBC - (verts[vb].sT1 - verts[vc].sT1) * dyAB) * 4294967296.0f); + voodoo->params.tmu[1].dTdY = (int64_t)(((verts[vb].sT1 - verts[vc].sT1) * dxAB - (verts[va].sT1 - verts[vb].sT1) * dxBC) * 4294967296.0f); + } + + voodoo->params.sign = (area < 0.0); + + if (voodoo->ncc_dirty[0]) + voodoo_update_ncc(voodoo, 0); + if (voodoo->ncc_dirty[1]) + voodoo_update_ncc(voodoo, 1); + voodoo->ncc_dirty[0] = voodoo->ncc_dirty[1] = 0; + + voodoo_queue_triangle(voodoo, &voodoo->params); +} diff --git a/pcem/vid_voodoo_setup.h b/pcem/vid_voodoo_setup.h new file mode 100644 index 00000000..06d2f8d4 --- /dev/null +++ b/pcem/vid_voodoo_setup.h @@ -0,0 +1 @@ +void voodoo_triangle_setup(voodoo_t *voodoo); diff --git a/pcem/vid_voodoo_texture.cpp b/pcem/vid_voodoo_texture.cpp new file mode 100644 index 00000000..f1d5c78d --- /dev/null +++ b/pcem/vid_voodoo_texture.cpp @@ -0,0 +1,583 @@ +#include +#include +#include "ibm.h" +#include "device.h" +#include "mem.h" +#include "thread.h" +#include "video.h" +#include "vid_svga.h" +#include "vid_voodoo.h" +#include "vid_voodoo_common.h" +#include "vid_voodoo_dither.h" +#include "vid_voodoo_regs.h" +#include "vid_voodoo_render.h" +#include "vid_voodoo_texture.h" + +void voodoo_recalc_tex(voodoo_t *voodoo, int tmu) +{ + int aspect = (voodoo->params.tLOD[tmu] >> 21) & 3; + int width = 256, height = 256; + int shift = 8; + int lod; + uint32_t base = voodoo->params.texBaseAddr[tmu]; + uint32_t offset = 0; + int tex_lod = 0; + uint32_t offsets[LOD_MAX+3]; + int widths[LOD_MAX+3], heights[LOD_MAX+3], shifts[LOD_MAX+3]; + + if (voodoo->params.tLOD[tmu] & LOD_S_IS_WIDER) + height >>= aspect; + else + { + width >>= aspect; + shift -= aspect; + } + + for (lod = 0; lod <= LOD_MAX + 2; lod++) + { + offsets[lod] = offset; + widths[lod] = width >> lod; + heights[lod] = height >> lod; + shifts[lod] = shift - lod; + + if (!widths[lod]) + widths[lod] = 1; + if (!heights[lod]) + heights[lod] = 1; + if (shifts[lod] < 0) + shifts[lod] = 0; + + if (!(voodoo->params.tLOD[tmu] & LOD_SPLIT) || + ((lod & 1) && (voodoo->params.tLOD[tmu] & LOD_ODD)) || + (!(lod & 1) && !(voodoo->params.tLOD[tmu] & LOD_ODD))) + { + if (voodoo->params.tformat[tmu] & 8) + offset += (width >> lod) * (height >> lod) * 2; + else + offset += (width >> lod) * (height >> lod); + } + } + + + if ((voodoo->params.textureMode[tmu] & TEXTUREMODE_TRILINEAR) && (voodoo->params.tLOD[tmu] & LOD_ODD)) + tex_lod++; /*Skip LOD 0*/ + +// pclog("TMU %i: %08x\n", tmu, voodoo->params.textureMode[tmu]); + for (lod = 0; lod <= LOD_MAX+1; lod++) + { + if (voodoo->params.tLOD[tmu] & LOD_TMULTIBASEADDR) + { + switch (tex_lod) + { + case 0: + base = voodoo->params.texBaseAddr[tmu]; + break; + case 1: + base = voodoo->params.texBaseAddr1[tmu]; + break; + case 2: + base = voodoo->params.texBaseAddr2[tmu]; + break; + default: + base = voodoo->params.texBaseAddr38[tmu]; + break; + } + } + + voodoo->params.tex_base[tmu][lod] = base + offsets[tex_lod]; + if (voodoo->params.tformat[tmu] & 8) + voodoo->params.tex_end[tmu][lod] = base + offsets[tex_lod] + (widths[tex_lod] * heights[tex_lod] * 2); + else + voodoo->params.tex_end[tmu][lod] = base + offsets[tex_lod] + (widths[tex_lod] * heights[tex_lod]); + voodoo->params.tex_w_mask[tmu][lod] = widths[tex_lod] - 1; + voodoo->params.tex_w_nmask[tmu][lod] = ~(widths[tex_lod] - 1); + voodoo->params.tex_h_mask[tmu][lod] = heights[tex_lod] - 1; + voodoo->params.tex_shift[tmu][lod] = shifts[tex_lod]; + voodoo->params.tex_lod[tmu][lod] = tex_lod; + + if (!(voodoo->params.textureMode[tmu] & TEXTUREMODE_TRILINEAR) || + ((lod & 1) && (voodoo->params.tLOD[tmu] & LOD_ODD)) || + (!(lod & 1) && !(voodoo->params.tLOD[tmu] & LOD_ODD))) + { + if (!(voodoo->params.tLOD[tmu] & LOD_ODD) || lod != 0) + { + if (voodoo->params.textureMode[tmu] & TEXTUREMODE_TRILINEAR) + tex_lod += 2; + else + tex_lod++; + } + } + } + + voodoo->params.tex_width[tmu] = width; +} + +#define makergba(r, g, b, a) ((b) | ((g) << 8) | ((r) << 16) | ((a) << 24)) + +void voodoo_use_texture(voodoo_t *voodoo, voodoo_params_t *params, int tmu) +{ + int c, d; + int lod; + int lod_min, lod_max; + uint32_t addr = 0, addr_end; + uint32_t palette_checksum; + + lod_min = (params->tLOD[tmu] >> 2) & 15; + lod_max = (params->tLOD[tmu] >> 8) & 15; + + if (params->tformat[tmu] == TEX_PAL8 || params->tformat[tmu] == TEX_APAL8 || params->tformat[tmu] == TEX_APAL88) + { + if (voodoo->palette_dirty[tmu]) + { + palette_checksum = 0; + + for (c = 0; c < 256; c++) + palette_checksum ^= voodoo->palette[tmu][c].u; + + voodoo->palette_checksum[tmu] = palette_checksum; + voodoo->palette_dirty[tmu] = 0; + } + else + palette_checksum = voodoo->palette_checksum[tmu]; + } + else + palette_checksum = 0; + + if ((voodoo->params.tLOD[tmu] & LOD_SPLIT) && (voodoo->params.tLOD[tmu] & LOD_ODD) && (voodoo->params.tLOD[tmu] & LOD_TMULTIBASEADDR)) + addr = params->texBaseAddr1[tmu]; + else + addr = params->texBaseAddr[tmu]; + + /*Try to find texture in cache*/ + for (c = 0; c < TEX_CACHE_MAX; c++) + { + if (voodoo->texture_cache[tmu][c].base == addr && + voodoo->texture_cache[tmu][c].tLOD == (params->tLOD[tmu] & 0xf00fff) && + voodoo->texture_cache[tmu][c].palette_checksum == palette_checksum) + { + params->tex_entry[tmu] = c; + voodoo->texture_cache[tmu][c].refcount++; + return; + } + } + + /*Texture not found, search for unused texture*/ + do + { + for (c = 0; c < TEX_CACHE_MAX; c++) + { + voodoo->texture_last_removed++; + voodoo->texture_last_removed &= (TEX_CACHE_MAX-1); + if (voodoo->texture_cache[tmu][voodoo->texture_last_removed].refcount == voodoo->texture_cache[tmu][voodoo->texture_last_removed].refcount_r[0] && + (voodoo->render_threads == 1 || voodoo->texture_cache[tmu][voodoo->texture_last_removed].refcount == voodoo->texture_cache[tmu][voodoo->texture_last_removed].refcount_r[1])) + break; + } + if (c == TEX_CACHE_MAX) + voodoo_wait_for_render_thread_idle(voodoo); + } while (c == TEX_CACHE_MAX); + if (c == TEX_CACHE_MAX) + fatal("Texture cache full!\n"); + + c = voodoo->texture_last_removed; + + + if ((voodoo->params.tLOD[tmu] & LOD_SPLIT) && (voodoo->params.tLOD[tmu] & LOD_ODD) && (voodoo->params.tLOD[tmu] & LOD_TMULTIBASEADDR)) + voodoo->texture_cache[tmu][c].base = params->texBaseAddr1[tmu]; + else + voodoo->texture_cache[tmu][c].base = params->texBaseAddr[tmu]; + voodoo->texture_cache[tmu][c].tLOD = params->tLOD[tmu] & 0xf00fff; + + lod_min = (params->tLOD[tmu] >> 2) & 15; + lod_max = (params->tLOD[tmu] >> 8) & 15; +// pclog(" add new texture to %i tformat=%i %08x LOD=%i-%i tmu=%i\n", c, voodoo->params.tformat[tmu], params->texBaseAddr[tmu], lod_min, lod_max, tmu); + lod_min = MIN(lod_min, 8); + lod_max = MIN(lod_max, 8); + for (lod = lod_min; lod <= lod_max; lod++) + { + uint32_t *base = &voodoo->texture_cache[tmu][c].data[texture_offset[lod]]; + uint32_t tex_addr = params->tex_base[tmu][lod] & voodoo->texture_mask; + int x, y; + int shift = 8 - params->tex_lod[tmu][lod]; + rgba_u *pal; + + //pclog(" LOD %i : %08x - %08x %i %i,%i\n", lod, params->tex_base[tmu][lod] & voodoo->texture_mask, addr, voodoo->params.tformat[tmu], voodoo->params.tex_w_mask[tmu][lod],voodoo->params.tex_h_mask[tmu][lod]); + + + switch (params->tformat[tmu]) + { + case TEX_RGB332: + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask]; + + base[x] = makergba(rgb332[dat].r, rgb332[dat].g, rgb332[dat].b, 0xff); + } + tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]); + base += (1 << shift); + } + break; + + case TEX_Y4I2Q2: + pal = voodoo->ncc_lookup[tmu][(voodoo->params.textureMode[tmu] & TEXTUREMODE_NCC_SEL) ? 1 : 0]; + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask]; + + base[x] = makergba(pal[dat].rgba.r, pal[dat].rgba.g, pal[dat].rgba.b, 0xff); + } + tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]); + base += (1 << shift); + } + break; + + case TEX_A8: + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask]; + + base[x] = makergba(dat, dat, dat, dat); + } + tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]); + base += (1 << shift); + } + break; + + case TEX_I8: + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask]; + + base[x] = makergba(dat, dat, dat, 0xff); + } + tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]); + base += (1 << shift); + } + break; + + case TEX_AI8: + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask]; + + base[x] = makergba((dat & 0x0f) | ((dat << 4) & 0xf0), (dat & 0x0f) | ((dat << 4) & 0xf0), (dat & 0x0f) | ((dat << 4) & 0xf0), (dat & 0xf0) | ((dat >> 4) & 0x0f)); + } + tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]); + base += (1 << shift); + } + break; + + case TEX_PAL8: + pal = voodoo->palette[tmu]; + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask]; + + base[x] = makergba(pal[dat].rgba.r, pal[dat].rgba.g, pal[dat].rgba.b, 0xff); + } + tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]); + base += (1 << shift); + } + break; + + case TEX_APAL8: + pal = voodoo->palette[tmu]; + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint8_t dat = voodoo->tex_mem[tmu][(tex_addr+x) & voodoo->texture_mask]; + + int r = ((pal[dat].rgba.r & 3) << 6) | ((pal[dat].rgba.g & 0xf0) >> 2) | (pal[dat].rgba.r & 3); + int g = ((pal[dat].rgba.g & 0xf) << 4) | ((pal[dat].rgba.b & 0xc0) >> 4) | ((pal[dat].rgba.g & 0xf) >> 2); + int b = ((pal[dat].rgba.b & 0x3f) << 2) | ((pal[dat].rgba.b & 0x30) >> 4); + int a = (pal[dat].rgba.r & 0xfc) | ((pal[dat].rgba.r & 0xc0) >> 6); + + base[x] = makergba(r, g, b, a); + } + tex_addr += (1 << voodoo->params.tex_shift[tmu][lod]); + base += (1 << shift); + } + break; + + case TEX_ARGB8332: + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask]; + + base[x] = makergba(rgb332[dat & 0xff].r, rgb332[dat & 0xff].g, rgb332[dat & 0xff].b, dat >> 8); + } + tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1)); + base += (1 << shift); + } + break; + + case TEX_A8Y4I2Q2: + pal = voodoo->ncc_lookup[tmu][(voodoo->params.textureMode[tmu] & TEXTUREMODE_NCC_SEL) ? 1 : 0]; + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask]; + + base[x] = makergba(pal[dat & 0xff].rgba.r, pal[dat & 0xff].rgba.g, pal[dat & 0xff].rgba.b, dat >> 8); + } + tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1)); + base += (1 << shift); + } + break; + + case TEX_R5G6B5: + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask]; + + base[x] = makergba(rgb565[dat].r, rgb565[dat].g, rgb565[dat].b, 0xff); + } + tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1)); + base += (1 << shift); + } + break; + + case TEX_ARGB1555: + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask]; + + base[x] = makergba(argb1555[dat].r, argb1555[dat].g, argb1555[dat].b, argb1555[dat].a); + } + tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1)); + base += (1 << shift); + } + break; + + case TEX_ARGB4444: + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask]; + + base[x] = makergba(argb4444[dat].r, argb4444[dat].g, argb4444[dat].b, argb4444[dat].a); + } + tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1)); + base += (1 << shift); + } + break; + + case TEX_A8I8: + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask]; + + base[x] = makergba(dat & 0xff, dat & 0xff, dat & 0xff, dat >> 8); + } + tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1)); + base += (1 << shift); + } + break; + + case TEX_APAL88: + pal = voodoo->palette[tmu]; + for (y = 0; y < voodoo->params.tex_h_mask[tmu][lod]+1; y++) + { + for (x = 0; x < voodoo->params.tex_w_mask[tmu][lod]+1; x++) + { + uint16_t dat = *(uint16_t *)&voodoo->tex_mem[tmu][(tex_addr + x*2) & voodoo->texture_mask]; + + base[x] = makergba(pal[dat & 0xff].rgba.r, pal[dat & 0xff].rgba.g, pal[dat & 0xff].rgba.b, dat >> 8); + } + tex_addr += (1 << (voodoo->params.tex_shift[tmu][lod]+1)); + base += (1 << shift); + } + break; + + default: + fatal("Unknown texture format %i\n", params->tformat[tmu]); + } + } + + voodoo->texture_cache[tmu][c].is16 = voodoo->params.tformat[tmu] & 8; + + if (params->tformat[tmu] == TEX_PAL8 || params->tformat[tmu] == TEX_APAL8 || params->tformat[tmu] == TEX_APAL88) + voodoo->texture_cache[tmu][c].palette_checksum = palette_checksum; + else + voodoo->texture_cache[tmu][c].palette_checksum = 0; + + if (lod_min == 0) + { + voodoo->texture_cache[tmu][c].addr_start[0] = voodoo->params.tex_base[tmu][0]; + voodoo->texture_cache[tmu][c].addr_end[0] = voodoo->params.tex_end[tmu][0]; + } + else + voodoo->texture_cache[tmu][c].addr_start[0] = voodoo->texture_cache[tmu][c].addr_end[0] = 0; + + if (lod_min <= 1 && lod_max >= 1) + { + voodoo->texture_cache[tmu][c].addr_start[1] = voodoo->params.tex_base[tmu][1]; + voodoo->texture_cache[tmu][c].addr_end[1] = voodoo->params.tex_end[tmu][1]; + } + else + voodoo->texture_cache[tmu][c].addr_start[1] = voodoo->texture_cache[tmu][c].addr_end[1] = 0; + + if (lod_min <= 2 && lod_max >= 2) + { + voodoo->texture_cache[tmu][c].addr_start[2] = voodoo->params.tex_base[tmu][2]; + voodoo->texture_cache[tmu][c].addr_end[2] = voodoo->params.tex_end[tmu][2]; + } + else + voodoo->texture_cache[tmu][c].addr_start[2] = voodoo->texture_cache[tmu][c].addr_end[2] = 0; + + if (lod_max >= 3) + { + voodoo->texture_cache[tmu][c].addr_start[3] = voodoo->params.tex_base[tmu][(lod_min > 3) ? lod_min : 3]; + voodoo->texture_cache[tmu][c].addr_end[3] = voodoo->params.tex_end[tmu][(lod_max < 8) ? lod_max : 8]; + } + else + voodoo->texture_cache[tmu][c].addr_start[3] = voodoo->texture_cache[tmu][c].addr_end[3] = 0; + + + for (d = 0; d < 4; d++) + { + addr = voodoo->texture_cache[tmu][c].addr_start[d]; + addr_end = voodoo->texture_cache[tmu][c].addr_end[d]; + + if (addr_end != 0) + { + for (; addr <= addr_end; addr += (1 << TEX_DIRTY_SHIFT)) + voodoo->texture_present[tmu][(addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT] = 1; + } + } + + params->tex_entry[tmu] = c; + voodoo->texture_cache[tmu][c].refcount++; +} + +void flush_texture_cache(voodoo_t *voodoo, uint32_t dirty_addr, int tmu) +{ + int wait_for_idle = 0; + int c; + + memset(voodoo->texture_present[tmu], 0, sizeof(voodoo->texture_present[0])); +// pclog("Evict %08x %i\n", dirty_addr, sizeof(voodoo->texture_present)); + for (c = 0; c < TEX_CACHE_MAX; c++) + { + if (voodoo->texture_cache[tmu][c].base != -1) + { + int d; + + for (d = 0; d < 4; d++) + { + int addr_start = voodoo->texture_cache[tmu][c].addr_start[d]; + int addr_end = voodoo->texture_cache[tmu][c].addr_end[d]; + + if (addr_end != 0) + { + int addr_start_masked = addr_start & voodoo->texture_mask & ~0x3ff; + int addr_end_masked = ((addr_end & voodoo->texture_mask) + 0x3ff) & ~0x3ff; + + if (addr_end_masked < addr_start_masked) + addr_end_masked = voodoo->texture_mask+1; + if (dirty_addr >= addr_start_masked && dirty_addr < addr_end_masked) + { +// pclog(" Evict texture %i %08x\n", c, voodoo->texture_cache[tmu][c].base); + + if (voodoo->texture_cache[tmu][c].refcount != voodoo->texture_cache[tmu][c].refcount_r[0] || + (voodoo->render_threads == 2 && voodoo->texture_cache[tmu][c].refcount != voodoo->texture_cache[tmu][c].refcount_r[1])) + wait_for_idle = 1; + + voodoo->texture_cache[tmu][c].base = -1; + } + else + { + for (; addr_start <= addr_end; addr_start += (1 << TEX_DIRTY_SHIFT)) + voodoo->texture_present[tmu][(addr_start & voodoo->texture_mask) >> TEX_DIRTY_SHIFT] = 1; + } + } + } + } + } + if (wait_for_idle) + voodoo_wait_for_render_thread_idle(voodoo); +} + +void voodoo_tex_writel(uint32_t addr, uint32_t val, void *p) +{ + int lod, s, t; + voodoo_t *voodoo = (voodoo_t *)p; + int tmu; + + if (addr & 0x400000) + return; /*TREX != 0*/ + + tmu = (addr & 0x200000) ? 1 : 0; + + if (tmu && !voodoo->dual_tmus) + return; + + if (voodoo->type < VOODOO_BANSHEE) + { + if (!(voodoo->params.tformat[tmu] & 8) && voodoo->type >= VOODOO_BANSHEE) + { + lod = (addr >> 16) & 0xf; + t = (addr >> 8) & 0xff; + } + else + { + lod = (addr >> 17) & 0xf; + t = (addr >> 9) & 0xff; + } + if (voodoo->params.tformat[tmu] & 8) + s = (addr >> 1) & 0xfe; + else + { + if ((voodoo->params.textureMode[tmu] & (1 << 31)) || voodoo->type >= VOODOO_BANSHEE) + s = addr & 0xfc; + else + s = (addr >> 1) & 0xfc; + } + if (lod > LOD_MAX) + return; + +// if (addr >= 0x200000) +// return; + + if (voodoo->params.tformat[tmu] & 8) + addr = voodoo->params.tex_base[tmu][lod] + s*2 + (t << voodoo->params.tex_shift[tmu][lod])*2; + else + addr = voodoo->params.tex_base[tmu][lod] + s + (t << voodoo->params.tex_shift[tmu][lod]); + } + else + addr = (addr & 0x1ffffc) + voodoo->params.tex_base[tmu][0]; + + if (voodoo->texture_present[tmu][(addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT]) + { +// pclog("texture_present at %08x %i\n", addr, (addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT); + flush_texture_cache(voodoo, addr & voodoo->texture_mask, tmu); + } + if (voodoo->type == VOODOO_3 && voodoo->texture_present[tmu^1][(addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT]) + { +// pclog("texture_present at %08x %i\n", addr, (addr & voodoo->texture_mask) >> TEX_DIRTY_SHIFT); + flush_texture_cache(voodoo, addr & voodoo->texture_mask, tmu^1); + } + *(uint32_t *)(&voodoo->tex_mem[tmu][addr & voodoo->texture_mask]) = val; +} diff --git a/pcem/vid_voodoo_texture.h b/pcem/vid_voodoo_texture.h new file mode 100644 index 00000000..e4c56020 --- /dev/null +++ b/pcem/vid_voodoo_texture.h @@ -0,0 +1,19 @@ +static const uint32_t texture_offset[LOD_MAX+3] = +{ + 0, + 256*256, + 256*256 + 128*128, + 256*256 + 128*128 + 64*64, + 256*256 + 128*128 + 64*64 + 32*32, + 256*256 + 128*128 + 64*64 + 32*32 + 16*16, + 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8, + 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4, + 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4 + 2*2, + 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4 + 2*2 + 1*1, + 256*256 + 128*128 + 64*64 + 32*32 + 16*16 + 8*8 + 4*4 + 2*2 + 1*1 + 1 +}; + +void voodoo_recalc_tex(voodoo_t *voodoo, int tmu); +void voodoo_use_texture(voodoo_t *voodoo, voodoo_params_t *params, int tmu); +void voodoo_tex_writel(uint32_t addr, uint32_t val, void *p); +void flush_texture_cache(voodoo_t *voodoo, uint32_t dirty_addr, int tmu); -- 2.47.3