From: Toni Wilen Date: Fri, 14 Feb 2025 15:59:40 +0000 (+0200) Subject: Custom chipset bitplane change optimization X-Git-Url: https://git.unchartedbackwaters.co.uk/w/?a=commitdiff_plain;h=f8e50838a6b5f2a81a79aaac683b00d1eee1a4b9;p=francis%2Fwinuae.git Custom chipset bitplane change optimization --- diff --git a/custom.cpp b/custom.cpp index db9d5814..d7d38dd5 100644 --- a/custom.cpp +++ b/custom.cpp @@ -60,12 +60,8 @@ #define CYCLE_CONFLICT_LOGGING 0 -#define SPEEDUP 1 - #define CUSTOM_DEBUG 0 #define SPRITE_DEBUG 0 -#define SPRITE_DEBUG_MINY 0 -#define SPRITE_DEBUG_MAXY 0x30 #define REFRESH_FIRST_HPOS 3 #define COPPER_CYCLE_POLARITY 1 @@ -110,15 +106,15 @@ struct pipeline_reg #define LINETYPE_BPL 3 struct linestate { - int rga_index; int type; - bool needdraw; uae_u32 cnt; uae_u16 ddfstrt, ddfstop; uae_u16 diwstrt, diwstop, diwhigh; uae_u16 bplcon0, bplcon1, bplcon2, bplcon3, bplcon4; uae_u16 fmode; uae_u32 color0; + uae_u8 *linedatastate; + int bpllen; }; static uae_u32 displayresetcnt; uae_u8 agnus_hpos; @@ -128,9 +124,7 @@ static uae_u32 dmal_shifter; static uae_u16 pipelined_write_addr; static uae_u16 pipelined_write_value; static struct rgabuf rga_pipe[RGA_SLOT_TOTAL + 1]; -struct denise_rga rga_denise[DENISE_RGA_SLOT_MAX_TOTAL]; -int denise_rga_slot_size_mask = DENISE_RGA_SLOT_MIN_TOTAL - 1; -static int previous_field_rga_index; +struct denise_rga rga_denise[DENISE_RGA_SLOT_TOTAL]; static struct linestate *current_line_state; static struct linestate lines[MAX_SCANDOUBLED_LINES][2]; static int rga_denise_cycle, rga_denise_cycle_start, rga_denise_cycle_start_prev, rga_denise_cycle_count; @@ -388,7 +382,7 @@ static bool nosignal_trigger; static bool syncs_stopped; int display_reset; static bool initial_frame; -static int custom_fastmode_line_update; +static int custom_fastmode_exit; static int plffirstline, plflastline; /* Stupid genlock-detection prevention hack. @@ -648,8 +642,6 @@ enum class diw_states int plffirstline_total, plflastline_total; static int autoscale_bordercolors; -static int plfstrt, plfstop; -static int sprite_minx; static int first_bpl_vpos; static int last_decide_line_hpos; static int last_fetch_hpos, last_decide_sprite_hpos; @@ -1136,7 +1128,6 @@ static void update_mirrors(void) if (currprefs.gfx_overscanmode >= OVERSCANMODE_ULTRA) { hsyncdebug = currprefs.gfx_overscanmode - OVERSCANMODE_ULTRA + 1; } - denise_rga_slot_size_mask = currprefs.cpu_memory_cycle_exact ? (DENISE_RGA_SLOT_MIN_TOTAL - 1) : (DENISE_RGA_SLOT_MAX_TOTAL - 1); } void notice_new_xcolors(void) @@ -6339,7 +6330,7 @@ void custom_reset(bool hardreset, bool keyboardreset) rga_slot_in_offset = 1; rga_slot_out_offset = 2; - for(int i = 0; i < DENISE_RGA_SLOT_MAX_TOTAL; i++) { + for(int i = 0; i < DENISE_RGA_SLOT_TOTAL; i++) { struct denise_rga *r = &rga_denise[i]; memset(r, 0, sizeof(struct denise_rga)); } @@ -10157,7 +10148,7 @@ static void do_scandouble(void) int vp = linear_vpos; struct rgabuf rga = { 0 }; for (int i = 0; i < rga_denise_cycle_count; i++) { - int idx = (i + rga_denise_cycle_start) & denise_rga_slot_size_mask; + int idx = (i + rga_denise_cycle_start) & DENISE_RGA_SLOT_MASK; struct denise_rga *rd = &rga_denise[idx]; if (rd->rga >= 0x110 && rd->rga < 0x120) { int plane = (rd->rga - 0x110) / 2; @@ -10182,11 +10173,11 @@ static void next_denise_rga(void) rga_denise_cycle_start_prev = rga_denise_cycle_start; rga_denise_cycle_start = rga_denise_cycle; rga_denise_cycle_count = 0; - rga_denise[(rga_denise_cycle - 1) & denise_rga_slot_size_mask].line++; - rga_denise[(rga_denise_cycle - 2) & denise_rga_slot_size_mask].line++; - rga_denise[(rga_denise_cycle - 3) & denise_rga_slot_size_mask].line++; - rga_denise[(rga_denise_cycle - 4) & denise_rga_slot_size_mask].line++; - rga_denise[(rga_denise_cycle - 5) & denise_rga_slot_size_mask].line++; + rga_denise[(rga_denise_cycle - 1) & DENISE_RGA_SLOT_MASK].line++; + rga_denise[(rga_denise_cycle - 2) & DENISE_RGA_SLOT_MASK].line++; + rga_denise[(rga_denise_cycle - 3) & DENISE_RGA_SLOT_MASK].line++; + rga_denise[(rga_denise_cycle - 4) & DENISE_RGA_SLOT_MASK].line++; + rga_denise[(rga_denise_cycle - 5) & DENISE_RGA_SLOT_MASK].line++; rga_denise[rga_denise_cycle].line++; rga_denise_cycle_line++; } @@ -10218,13 +10209,78 @@ static int getlinetype(void) return type; } -static void marklinedrawn(int line) +static int getcolorcount(int planes) { - if (line >= MAX_SCANDOUBLED_LINES) { - return; + bool ham = (bplcon0 & 0x800) != 0; + if (aga_mode) { + if (ham) { + if (planes < 8) { + return 16; + } + return 64; + } + return 1 << planes; + } + if (ham) { + return 16; + } + if (planes > 5) { + planes = 5; + } + return 1 << planes; +} + +static bool checkprevfieldlinestateequalbpl(struct linestate *l) +{ + if (l->bplcon0 == bplcon0 && l->bplcon1 == bplcon1 && + l->bplcon2 == bplcon2 && l->ddfstrt == ddfstrt && + l->ddfstop == ddfstop && l->diwstrt == diwstrt && + l->diwstop == diwstop && l->bplcon3 == bplcon3 && + l->bplcon4 == bplcon4 && l->diwhigh == diwhigh && + l->fmode == fmode && l->bpllen > 0) + { + // compare bpl data + uae_u8 *dpt = l->linedatastate; + int planes = GET_PLANES(bplcon0); + int len = l->bpllen; + for (int i = 0; i < planes; i++) { + uaecptr apt = bplpt[i]; + if (!valid_address(apt, len)) { + return false; + } + uae_u8 *pt = get_real_address(apt); + if (memcmp(dpt, pt, len)) { + return false; + } + dpt += len; + } + int colors = getcolorcount(planes); + if (aga_mode) { + if (memcmp(dpt, agnus_colors.color_regs_aga, colors * sizeof(uae_u32))) { + return false; + } + } else { + if (memcmp(dpt, agnus_colors.color_regs_ecs, colors * sizeof(uae_u16))) { + return false; + } + } + // advance bpl pointers + for (int i = 0; i < planes; i++) { + int mod; + if (fmode & 0x4000) { + if (((diwstrt >> 8) ^ (vpos ^ 1)) & 1) { + mod = bpl1mod; + } else { + mod = bpl2mod; + } + } else { + mod = (i & 1) ? bpl2mod : bpl1mod; + } + bplpt[i] += mod + len; + } + return true; } - struct linestate *l = &lines[line][lof_display]; - l->needdraw = false; + return false; } static bool checkprevfieldlinestateequal(void) @@ -10232,24 +10288,40 @@ static bool checkprevfieldlinestateequal(void) if (linear_vpos >= MAX_SCANDOUBLED_LINES) { return false; } + bool ret = false; int lof = interlace_seen ? 1 - lof_display : lof_display; struct linestate *l = &lines[linear_vpos][lof]; int type = getlinetype(); - if (type != l->type) { - l->type = 0; - } - if (type == l->type && displayresetcnt == l->cnt && !l->needdraw) { - if (1 && type == LINETYPE_BLANK) { - return true; + if (type == l->type && displayresetcnt == l->cnt) { + if (type == LINETYPE_BLANK) { + if (1) { + ret = true; + } } else if (type == LINETYPE_BORDER) { - uae_u32 c = aga_mode ? agnus_colors.color_regs_aga[0] : agnus_colors.color_regs_ecs[0]; - if (1 && c == l->color0) { - return true; + if (1) { + uae_u32 c = aga_mode ? agnus_colors.color_regs_aga[0] : agnus_colors.color_regs_ecs[0]; + if (c == l->color0) { + ret = true; + } + } + } else if (type == LINETYPE_BPL) { + if (1) { + ret = checkprevfieldlinestateequalbpl(l); } } } - return false; + return ret; +} + +static void resetlinestate(void) +{ + if (linear_vpos >= MAX_SCANDOUBLED_LINES) { + return; + } + struct linestate *l = &lines[linear_vpos][lof_display]; + l->type = 0; + } static void storelinestate(void) @@ -10259,10 +10331,9 @@ static void storelinestate(void) } struct linestate *l = &lines[linear_vpos][lof_display]; - l->rga_index = rga_denise_cycle_start; l->type = getlinetype(); + l->bpllen = -1; l->cnt = displayresetcnt; - l->needdraw = l->type != LINETYPE_BLANK; l->bplcon0 = bplcon0; l->bplcon1 = bplcon1; l->bplcon2 = bplcon2; @@ -10277,10 +10348,38 @@ static void storelinestate(void) l->diwhigh = diwhigh; l->fmode = fmode; - -// if (l->drawcnt == 0) { -// write_log("%d ", linear_vpos); -// } + if (l->type == LINETYPE_BPL) { + if (!l->linedatastate) { + l->linedatastate = xmalloc(uae_u8, MAXHPOS * sizeof(uae_u16) + (256 * sizeof(uae_u32))); + } + int stop = ddfstop > 0xd8 ? 0xd8 : ddfstop; + int len = ((stop - ddfstrt) + fetchunit - 1) / fetchunit + 1; + len = len * fetchunit / fetchstart; + len <<= 1; + len <<= fetchmode; + l->bpllen = len; + uae_u8 *dpt = l->linedatastate; + int planes = GET_PLANES(bplcon0); + for (int i = 0; i < planes; i++) { + if (!valid_address(bplpt[i], len)) { + l->type = 0; + return; + } + uae_u8 *pt = get_real_address(bplpt[i]); + memcpy(dpt, pt, len); + dpt += len; + } + if (aga_mode) { + int colors = 1 << planes; + memcpy(dpt, agnus_colors.color_regs_aga, colors * sizeof(uae_u32)); + } else { + if (planes > 5) { + planes = 5; + } + int colors = 1 << planes; + memcpy(dpt, agnus_colors.color_regs_ecs, colors * sizeof(uae_u16)); + } + } } @@ -10304,8 +10403,6 @@ static void draw_line(void) draw_denise_line(dvp, nextline_how, rga_denise_cycle_line, rga_denise_cycle_start, rga_denise_cycle_count, display_hstart_cyclewait_skip, display_hstart_cyclewait_skip2, wclks, cs, cslen); - - marklinedrawn(custom_fastmode_line_update); } static void dmal_fast(void) @@ -10475,8 +10572,26 @@ static void draw_line_fast(void) static int display_hstart_fastmode; +static void quick_denise_rga(void) +{ + for (int i = 0; i < rga_denise_cycle_count; i++) { + int off = i + rga_denise_cycle_start; + struct denise_rga *rd = &rga_denise[off & DENISE_RGA_SLOT_MASK]; + if (rd->line == rga_denise_cycle_line && rd->rga != 0x1fe) { + denise_update_reg(rd->rga, rd->v); + } + } +} + static void do_draw_line(void) { + if (custom_fastmode_exit) { + custom_fastmode_exit = 0; + quick_denise_rga(); + next_denise_rga(); + decide_line_end(); + return; + } if (!custom_disabled) { if (custom_fastmode >= 0) { if (doflickerfix_active() && scandoubled_bpl_ena[linear_vpos]) { @@ -10530,7 +10645,7 @@ static void handle_pipelined_write(void) static int can_fast_custom(void) { - if (currprefs.chipset_hr) { + if (1 && currprefs.chipset_hr) { return 0; } if (not_safe_mode || syncs_stopped || agnus_pos_change > -2) { @@ -10547,8 +10662,8 @@ static int can_fast_custom(void) if (agnus_vb_active_end_line) { return 0; } - // skip sprite lines if bordersprite enabled - if ((bplcon0 & 1) && (bplcon3 & 2) && !agnus_vb && getlinetype() == LINETYPE_BORDER) { + int type = getlinetype(); + if (type == LINETYPE_BPL || (type == LINETYPE_BORDER && (bplcon0 & 1) && (bplcon3 & 0x02) && !(bplcon3 & 0x20))) { for (int i = 0; i < MAX_SPRITES; i++) { struct sprite *s = &spr[i]; if (s->vstart == vpos || s->vstop == vpos) { @@ -10560,12 +10675,52 @@ static int can_fast_custom(void) } } } - if (0 || 1) return 1; return 0; } +static void do_imm_dmal(void) +{ + process_dmal(0); + uae_u32 dmal_d = dmal; + + // "immediate" audio DMA + if (dmaen(DMA_AUD0 | DMA_AUD1 | DMA_AUD2 | DMA_AUD3) && ((dmal_d >> (3 * 2)) & 255)) { + for (int nr = 0; nr < 4; nr++) { + uae_u32 dmalbits = (dmal_d >> ((3 + nr) * 2)) & 3; + if (dmalbits) { + struct rgabuf r = { 0 }; + uaecptr *pt = audio_getpt(nr); + r.pv = *pt; + dmal_emu_audio(&r, nr); + *pt += 2; + if (dmalbits & 1) { + *pt = audio_getloadpt(nr); + } + } + } + } + + // "immediate" disk DMA + if (dmaen(DMA_DISK) && (((dmal_d >> 0) & 63))) { + for (int nr = 0; nr < 3; nr++) { + uae_u32 dmalbits = (dmal_d >> (0 + nr * 2)) & 3; + if (dmalbits) { + int w = (dmalbits & 3) == 3; + struct rgabuf r = { 0 }; + uaecptr *pt = disk_getpt(); + r.pv = *pt; + dmal_emu_disk(&r, nr, w); + *pt += 2; + } + } + } + + dmal = 0; + dmal_shifter = 0; +} + static void sync_equalline_handler(void); static void start_sync_equalline_handler(void) { @@ -10623,7 +10778,6 @@ static void custom_trigger_start(void) if (linear_vpos < MAX_SCANDOUBLED_LINES) { current_line_state = &lines[linear_vpos][lof_display]; - previous_field_rga_index = current_line_state->rga_index; } if (vpos == vsync_startline) { @@ -10746,16 +10900,21 @@ static void custom_trigger_start(void) fast_lines_cnt = 0; } int canline = can_fast_custom(); - if (canline && checkprevfieldlinestateequal()) { - start_sync_equalline_handler(); - if (custom_fastmode > 0) { - custom_fastmode = 2; + if (canline) { + bool same = checkprevfieldlinestateequal(); + if (same) { + start_sync_equalline_handler(); + if (custom_fastmode > 0) { + custom_fastmode = 2; + } else { + custom_fastmode = 1; + do_imm_dmal(); + } } else { - custom_fastmode = 1; + storelinestate(); } } else { - storelinestate(); - custom_fastmode_line_update = linear_vpos; + resetlinestate(); } } @@ -10919,9 +11078,7 @@ static void inc_cck(void) set_fakehsync_handler(); } else { rga_denise_cycle++; - rga_denise_cycle &= denise_rga_slot_size_mask; - previous_field_rga_index++; - previous_field_rga_index &= denise_rga_slot_size_mask; + rga_denise_cycle &= DENISE_RGA_SLOT_MASK; rga_denise_cycle_count++; } if (beamcon0_dual) { @@ -11308,7 +11465,6 @@ static void handle_rga_out(void) if (check_rga_out()) { int hp = agnus_hpos; struct rgabuf *r = read_rga_out(); - struct denise_rga *prga = &rga_denise[previous_field_rga_index]; bool done = false; bool disinc = false; @@ -11649,47 +11805,6 @@ static void do_cck(bool docycles) } -static void do_imm_dmal(void) -{ - process_dmal(0); - uae_u32 dmal_d = dmal; - - // "immediate" audio DMA - if (dmaen(DMA_AUD0 | DMA_AUD1 | DMA_AUD2 | DMA_AUD3) && ((dmal_d >> (3 * 2)) & 255)) { - for (int nr = 0; nr < 4; nr++) { - uae_u32 dmalbits = (dmal_d >> ((3 + nr) * 2)) & 3; - if (dmalbits) { - struct rgabuf r = { 0 }; - uaecptr *pt = audio_getpt(nr); - r.pv = *pt; - dmal_emu_audio(&r, nr); - *pt += 2; - if (dmalbits & 1) { - *pt = audio_getloadpt(nr); - } - } - } - } - - // "immediate" disk DMA - if (dmaen(DMA_DISK) && (((dmal_d >> 0) & 63))) { - for (int nr = 0; nr < 3; nr++) { - uae_u32 dmalbits = (dmal_d >> (0 + nr * 2)) & 3; - if (dmalbits) { - int w = (dmalbits & 3) == 3; - struct rgabuf r = { 0 }; - uaecptr *pt = disk_getpt(); - r.pv = *pt; - dmal_emu_disk(&r, nr, w); - *pt += 2; - } - } - } - - dmal = 0; - dmal_shifter = 0; -} - #if 0 // quick single line static void sync_fast_evhandler(void) @@ -11742,6 +11857,17 @@ static void sync_fast_evhandler(void) } #endif +static void fast_strobe(void) +{ + uae_u16 str = get_strobe_reg(0); + write_drga(str, NULL, 0); + if (prev_strobe == 0x3c && str != 0x3c) { + INTREQ_INT(5, 0); + } + denise_handle_quick_strobe(str); + prev_strobe = str; +} + // horizontal sync callback when line not changed + fast cpu static void sync_equalline_handler(void) { @@ -11755,21 +11881,15 @@ static void sync_equalline_handler(void) int rdc_offset = REFRESH_FIRST_HPOS - hpos_delta; rga_denise_cycle += rdc_offset; - rga_denise_cycle &= denise_rga_slot_size_mask; - uae_u16 str = get_strobe_reg(0); - write_drga(str, NULL, 0); - if (prev_strobe == 0x3c && str != 0x3c) { - INTREQ_INT(5, 0); - } - denise_handle_quick_strobe(str); - prev_strobe = str; + rga_denise_cycle &= DENISE_RGA_SLOT_MASK; + fast_strobe(); int diff = display_hstart_fastmode - hpos_delta; linear_hpos += diff; currcycle_cck += diff; rga_denise_cycle_count += diff; rga_denise_cycle += diff - rdc_offset; - rga_denise_cycle &= denise_rga_slot_size_mask; + rga_denise_cycle &= DENISE_RGA_SLOT_MASK; if (custom_fastmode == 1) { int rdc = rga_denise_cycle; @@ -11790,18 +11910,18 @@ static void sync_equalline_handler(void) currcycle_cck += diff; rga_denise_cycle_count += diff; rga_denise_cycle += diff; - rga_denise_cycle &= denise_rga_slot_size_mask; + rga_denise_cycle &= DENISE_RGA_SLOT_MASK; fast_lines_cnt++; - custom_fastmode_line_update = linear_vpos; - custom_trigger_start(); + if (eventtab[ev_sync].active) { check_extra(); do_imm_dmal(); } else { custom_fastmode = -1; + custom_fastmode_exit = 1; } } diff --git a/drawing.cpp b/drawing.cpp index ac893c38..5f7abf74 100644 --- a/drawing.cpp +++ b/drawing.cpp @@ -4258,11 +4258,11 @@ bool denise_update_reg_queued(uae_u16 reg, uae_u16 v, uae_u32 cycle) static void do_denise_cck(int linecnt, int startpos, int i) { - int idxp = (i + startpos + 1) & denise_rga_slot_size_mask; - int idx0 = (i + startpos - 0) & denise_rga_slot_size_mask; - int idx1 = (i + startpos - 1) & denise_rga_slot_size_mask; - int idx2 = (i + startpos - 1) & denise_rga_slot_size_mask; - int idx3 = (i + startpos - 2) & denise_rga_slot_size_mask; + int idxp = (i + startpos + 1) & DENISE_RGA_SLOT_MASK; + int idx0 = (i + startpos - 0) & DENISE_RGA_SLOT_MASK; + int idx1 = (i + startpos - 1) & DENISE_RGA_SLOT_MASK; + int idx2 = (i + startpos - 1) & DENISE_RGA_SLOT_MASK; + int idx3 = (i + startpos - 2) & DENISE_RGA_SLOT_MASK; denise_hcounter_new = denise_hcounter + 2; denise_hcounter_new &= 511; @@ -4321,7 +4321,7 @@ static void do_denise_cck(int linecnt, int startpos, int i) } if (!aga_mode && ecs_denise) { - int idbe = (i + startpos - 3) & denise_rga_slot_size_mask; + int idbe = (i + startpos - 3) & DENISE_RGA_SLOT_MASK; rd = &rga_denise[idbe]; if (rd->line == linecnt) { expand_drga_blanken(rd); diff --git a/include/custom.h b/include/custom.h index 72ef3046..0995872d 100644 --- a/include/custom.h +++ b/include/custom.h @@ -287,10 +287,9 @@ struct denise_rga #endif }; -#define DENISE_RGA_SLOT_MIN_TOTAL 512 -#define DENISE_RGA_SLOT_MAX_TOTAL (512 * 1024) -extern int denise_rga_slot_size_mask; -extern struct denise_rga rga_denise[DENISE_RGA_SLOT_MAX_TOTAL]; +#define DENISE_RGA_SLOT_TOTAL 512 +#define DENISE_RGA_SLOT_MASK (DENISE_RGA_SLOT_TOTAL - 1) +extern struct denise_rga rga_denise[DENISE_RGA_SLOT_TOTAL]; struct denise_fastsprite {