From 3a622c44babcf566ad4e7a1070efabfbf08a9539 Mon Sep 17 00:00:00 2001 From: Toni Wilen Date: Sat, 25 Jul 2009 10:07:09 +0300 Subject: [PATCH] imported winuaesrc1620b2.zip --- archivers/dms/pfile.c | 32 +- archivers/dms/pfile.h | 2 +- audio.c | 12 +- blitter.c | 508 ++++++++++++++---------- custom.c | 736 ++++++++++++++++++++--------------- debug.c | 178 +++++++-- drawing.c | 1 + gencpu.c | 225 ++++++----- include/blitter.h | 5 +- include/custom.h | 14 +- include/debug.h | 30 +- include/events.h | 1 + include/newcpu.h | 10 +- include/options.h | 4 +- include/uae.h | 2 + main.c | 1 + newcpu.c | 87 +++-- od-win32/sounddep/sound.c | 6 +- od-win32/win32.c | 323 ++++++++++++--- od-win32/win32.h | 4 +- od-win32/winuaechangelog.txt | 63 ++- zfile.c | 24 +- 22 files changed, 1465 insertions(+), 803 deletions(-) diff --git a/archivers/dms/pfile.c b/archivers/dms/pfile.c index c2f4ef75..5641c3f5 100644 --- a/archivers/dms/pfile.c +++ b/archivers/dms/pfile.c @@ -41,7 +41,7 @@ static void printbandiz(UCHAR *, USHORT); static int passfound, passretries; -static char modes[7][7]={"NOCOMP","SIMPLE","QUICK ","MEDIUM","DEEP ","HEAVY1","HEAVY2"}; +static TCHAR modes[7][7]={L"NOCOMP",L"SIMPLE",L"QUICK ",L"MEDIUM",L"DEEP ",L"HEAVY1",L"HEAVY2"}; static USHORT PWDCRC; UCHAR *text; @@ -51,7 +51,8 @@ static void log_error(int track) write_log (L"DMS: Ignored error on track %d!\n", track); } -USHORT DMS_Process_File(struct zfile *fi, struct zfile *fo, USHORT cmd, USHORT opt, USHORT PCRC, USHORT pwd){ +USHORT DMS_Process_File(struct zfile *fi, struct zfile *fo, USHORT cmd, USHORT opt, USHORT PCRC, USHORT pwd, int part) +{ USHORT from, to, geninfo, c_version, cmode, hcrc, disktype, pv, ret; ULONG pkfsize, unpkfsize; UCHAR *b1, *b2; @@ -106,6 +107,13 @@ USHORT DMS_Process_File(struct zfile *fi, struct zfile *fo, USHORT cmd, USHORT o from = (USHORT) ((b1[16]<<8) | b1[17]); /* Lowest track in archive. May be incorrect if archive is "appended" */ to = (USHORT) ((b1[18]<<8) | b1[19]); /* Highest track in archive. May be incorrect if archive is "appended" */ + if (part && from < 30) { + free(b1); + free(b2); + free(text); + return DMS_FILE_END; + } + pkfsize = (ULONG) ((((ULONG)b1[21])<<16) | (((ULONG)b1[22])<<8) | (ULONG)b1[23]); /* Length of total packed data as in archive */ unpkfsize = (ULONG) ((((ULONG)b1[25])<<16) | (((ULONG)b1[26])<<8) | (ULONG)b1[27]); /* Length of unpacked data. Usually 901120 bytes */ @@ -211,6 +219,7 @@ USHORT DMS_Process_File(struct zfile *fi, struct zfile *fo, USHORT cmd, USHORT o if (cmd == CMD_SHOWBANNER) /* Banner is in the first track */ ret = Process_Track(fi,NULL,b1,b2,cmd,opt,(geninfo & 2)); else { + zfile_fseek (fo, from * 512 * 22, SEEK_SET); while ( (ret=Process_Track(fi,fo,b1,b2,cmd,opt,(geninfo & 2))) == NO_PROBLEM ) ; } } @@ -450,15 +459,18 @@ static USHORT Unpack_Track(UCHAR *b1, UCHAR *b2, USHORT pklen2, USHORT unpklen, static void printbandiz(UCHAR *m, USHORT len){ UCHAR *i,*j; - i=j=m; - while (irequest_word >= 2) handle2 = 1; if (chan_ena) { - alloc_cycle_ext (13 + nr * 2, CYCLE_MISC); + if (dmaaction) { + alloc_cycle_ext (13 + nr * 2, CYCLE_MISC); +#ifdef DEBUGGER + if (debug_dma) + record_dma (0xaa + nr * 16, cdp->dat2, cdp->pt, 13 + nr * 2, vpos); +#endif + } if (cdp->request_word == 1 || cdp->request_word == 2) cdp->pt += 2; } diff --git a/blitter.c b/blitter.c index 99b11c4d..d3179165 100644 --- a/blitter.c +++ b/blitter.c @@ -7,8 +7,8 @@ * (c) 2002 - 2005 Toni Wilen */ +//#define BLITTER_DEBUG_NOWAIT //#define BLITTER_DEBUG -//#define BLITTER_SLOWDOWNDEBUG 4 //#define BLITTER_DEBUG_NO_D #define SPEEDUP @@ -32,6 +32,7 @@ static int blitter_cycle_exact; uae_u16 bltcon0, bltcon1; uae_u32 bltapt, bltbpt, bltcpt, bltdpt; +int blitter_nasty; static int blinea_shift; static uae_u16 blinea, blineb; @@ -55,7 +56,7 @@ static uae_u8 blit_filltable[256][4][2]; uae_u32 blit_masktable[BLITTER_MAX_WORDS]; enum blitter_states bltstate; -static int blit_cyclecounter, blit_maxcyclecounter, blit_slowdown; +static int blit_cyclecounter, blit_maxcyclecounter, blit_slowdown, blit_totalcyclecounter; static int blit_linecyclecounter, blit_misscyclecounter; #ifdef CPUEMU_12 @@ -67,132 +68,106 @@ static long blit_first_cycle; static int blit_last_cycle, blit_dmacount, blit_dmacount2; static int blit_linecycles, blit_extracycles, blit_nod; static const int *blit_diag; +static int blit_line_pixel; static uae_u16 ddat1, ddat2; static int ddat1use, ddat2use; -static int blit_last_hpos; +int blit_interrupt; + +static int last_blitter_hpos; /* -Confirmed blitter information by Toni Wilen -(order of channels or position of idle cycles are not confirmed) - -1=BLTCON0 channel mask -2=total cycles per blitted word -[3=steals all cycles if BLTNASTY=1 (always if A-channel is enabled. this is illogical..)] -4=total cycles per blitted word in fillmode -5=cycle diagram (first cycle) -6=main cycle diagram (ABCD=channels,-=idle cycle,x=idle cycle but bus allocated) - -1 234 5 6 - -F 4*4*ABC- ABCD -E 4*4*ABC- ABC- -D 3*4 AB- ABD -C 3*3 AB- AB- -B 3*3*AC- ACD -A 2*2*AC AC -9 2*3 A- AD -8 2*2 A- A- -7 4 4 -BC- -BCD -6 4 4 -BC- -BC- -5 3 4 -B- -BD -4 3 3 -B- -B- -3 3 3 -C- -CD -2 3 3 -C- -C- -1 2 3 -D -D -0 2 3 -- -- - -NOTES: (BLTNASTY=1) - -- Blitter ALWAYS needs free bus cycle, even if it is running an "idle" cycle. - Exception: possible extra fill mode idle cycle is "real" idle cycle. - Can someone explain this? Why does idle cycles need bus cycles? -- Fill mode may add one extra real idle cycle.(depends on channel mask) -- All blits with channel A enabled use all available bus cycles - (stops CPU accesses to Agnus bus if BLTNASTY=1) WTF? I did another test - and this can't be true... Maybe I am becoming crazy.. -- idle cycles (no A-channel enabled) are not "used" by blitter, they are freely - available for CPU. - -BLTNASTY=0 makes things even more interesting.. - -- even zero channel blits get slower if BLTNASTY=0 depending on the number of - active bitplanes. ALSO "2 cycle" blits with one real cycle and one idle cycle - have the exact same speed as zero channel blit in all situations -> only the - total number of cycles count, number of active channels does not matter. + idle cycles are free cycles (available for CPU) + but for some reason they still require free bus cycle -*/ + basically every blitter cycle requires free bus cycle, + "real" cycles are used for blitter DMA, idle cycles + are free for CPU (if CPU needs bus) + same in both block and line modes -/* -1 = idle cycle and allocate bus */ + number of cycles, initial cycle, main cycle +*/ static const int blit_cycle_diagram[][10] = { - { 0, 2, 0,0 }, /* 0 */ - { 0, 2, 0,4 }, /* 1 */ - { 0, 3, 0,3,0 }, /* 2 */ - { 2, 3, 0,3,4, 3,0 }, /* 3 */ - { 0, 3, 0,2,0 }, /* 4 */ - { 2, 3, 0,2,4, 2,0 }, /* 5 */ - { 0, 4, 0,2,3,0 }, /* 6 */ - { 3, 4, 0,2,3,4, 2,3,0 }, /* 7 */ - { 0, 2, 1,0 }, /* 8 */ - { 2, 2, 1,4, 1,0 }, /* 9 */ - { 0, 2, 1,3 }, /* A */ - { 3, 3, 1,3,4, 1,3,0 }, /* B */ - { 2, 3, 1,2,0, 1,2 }, /* C */ - { 3, 3, 1,2,4, 1,2,0 }, /* D */ - { 0, 3, 1,2,3 }, /* E */ - { 4, 4, 1,2,3,4, 1,2,3,0 } /* F */ + { 2, 0,0, 0,0 }, /* 0 */ + { 2, 0,0, 0,4 }, /* 1 */ + { 2, 0,3, 0,3 }, /* 2 */ + { 3, 0,3,0, 0,3,4 }, /* 3 */ + { 3, 0,2,0, 0,2,0 }, /* 4 */ + { 3, 0,2,0, 0,2,4 }, /* 5 */ + { 3, 0,2,3, 0,2,3 }, /* 6 */ + { 4, 0,2,3,0, 0,2,3,4 }, /* 7 */ + { 2, 1,0, 1,0 }, /* 8 */ + { 2, 1,0, 1,4 }, /* 9 */ + { 2, 1,3, 1,3 }, /* A */ + { 3, 1,3,0, 1,3,4, }, /* B */ + { 3, 1,2,0, 1,2,0 }, /* C */ + { 3, 1,2,0, 1,2,4 }, /* D */ + { 3, 1,2,3, 1,2,3 }, /* E */ + { 4, 1,2,3,0, 1,2,3,4 } /* F */ }; -/* 5 = fill mode idle cycle ("real" idle cycle) */ +/* + + following 4 channel combinations in fill mode have extra + idle cycle added (still requires free bus cycle) + +*/ static const int blit_cycle_diagram_fill[][10] = { - { 0, 3, 0,5,0 }, /* 0 */ - { 0, 3, 0,5,4 }, /* 1 */ - { 0, 3, 0,3,0 }, /* 2 */ - { 2, 3, 3,5,4, 3,0 }, /* 3 */ - { 0, 3, 0,2,5 }, /* 4 */ - { 3, 4, 0,2,5,4, 2,0,0 }, /* 5 */ - { 0, 4, 2,3,5,0 }, /* 6 */ - { 3, 4, 2,3,5,4, 2,3,0 }, /* 7 */ - { 0, 2, 1,5 }, /* 8 */ - { 2, 3, 1,5,4, 1,0}, /* 9 */ - { 0, 2, 1,3 }, /* A */ - { 3, 3, 1,3,4, 1,3,0 }, /* B */ - { 2, 3, 1,2,5, 1,2 }, /* C */ - { 3, 4, 1,2,5,4, 1,2,0 }, /* D */ - { 0, 3, 1,2,3 }, /* E */ - { 4, 4, 1,2,3,4, 1,2,3,0 } /* F */ + { 0 }, /* 0 */ + { 3, 0,0,0, 0,4,0 }, /* 1 */ + { 0 }, /* 2 */ + { 0 }, /* 3 */ + { 0 }, /* 4 */ + { 4, 0,2,0,0, 0,2,4,0 }, /* 5 */ + { 0 }, /* 6 */ + { 0 }, /* 7 */ + { 0 }, /* 8 */ + { 3, 1,0,0, 1,4,0 }, /* 9 */ + { 0 }, /* A */ + { 0 }, /* B */ + { 0 }, /* C */ + { 4, 1,2,0,0, 1,2,4,0 }, /* D */ + { 0 }, /* E */ + { 0 }, /* F */ }; /* - - line draw takes 4 cycles (-X-X) - it also have real idle cycles and only 2 dma fetches + -C-D -C-D ... -C-D -- (? difficult to confirm in logic analyzer) + + line draw takes 4 cycles (-C-D) + idle cycles do the same as above, 2 dma fetches (read from C, write to D, but see below) Oddities: - first word is written to address pointed by BLTDPT but all following writes go to address pointed by BLTCPT! + (some kind of internal copy because all bus cyles are + using normal BLTDDAT) - BLTDMOD is ignored by blitter (BLTCMOD is used) - state of D-channel enable bit does not matter! - disabling A-channel freezes the content of BPLAPT + - C-channel disabled: nothing is written */ +// 5 = internal "processing cycle" static const int blit_cycle_diagram_line[] = { - 0, 4, 0,3,0,4, 0,0,0,0,0,0,0,0,0,0 + 4, 0,3,5,4, 0,3,5,4 }; static const int blit_cycle_diagram_finald[] = - { 0, 2, 0,4 }; +{ + 2, 0,4, 0, 4 +}; void build_blitfilltable (void) { @@ -222,12 +197,20 @@ void build_blitfilltable (void) } } +STATIC_INLINE void record_dma_blit (uae_u16 reg, uae_u16 dat, uae_u32 addr, int hpos) +{ +#ifdef DEBUGGER + if (debug_dma) + record_dma (reg, dat, addr, hpos, vpos); +#endif +} + static void blitter_dump (void) { - write_log (L"APT=%08X BPT=%08X CPT=%08X DPT=%08X\n", bltapt, bltbpt, bltcpt, bltdpt); - write_log (L"CON0=%04X CON1=%04X ADAT=%04X BDAT=%04X CDAT=%04X\n", + write_log (L"PT A=%08X B=%08X C=%08X D=%08X\n", bltapt, bltbpt, bltcpt, bltdpt); + write_log (L"CON0=%04X CON1=%04X DAT A=%04X B=%04X C=%04X\n", bltcon0, bltcon1, blt_info.bltadat, blt_info.bltbdat, blt_info.bltcdat); - write_log (L"AFWM=%04X ALWM=%04X AMOD=%04X BMOD=%04X CMOD=%04X DMOD=%04X\n", + write_log (L"AFWM=%04X ALWM=%04X MOD A=%04X B=%04X C=%04X D=%04X\n", blt_info.bltafwm, blt_info.bltalwm, blt_info.bltamod & 0xffff, blt_info.bltbmod & 0xffff, blt_info.bltcmod & 0xffff, blt_info.bltdmod & 0xffff); } @@ -237,8 +220,18 @@ STATIC_INLINE int channel_state (int cycles) if (cycles < 0) return 0; if (cycles < blit_diag[0]) - return blit_diag[blit_diag[1] + 2 + cycles]; - return blit_diag[((cycles - blit_diag[0]) % blit_diag[1]) + 2]; + return blit_diag[1 + cycles]; + cycles -= blit_diag[0]; + cycles %= blit_diag[0]; + return blit_diag[1 + blit_diag[0] + cycles]; +} +STATIC_INLINE int channel_pos (int cycles) +{ + if (cycles < blit_diag[0]) + return cycles; + cycles -= blit_diag[0]; + cycles %= blit_diag[0]; + return cycles; } extern int is_bitplane_dma (int hpos); @@ -246,25 +239,37 @@ STATIC_INLINE int canblit (int hpos) { if (is_bitplane_dma (hpos)) return 0; - if (cycle_line[hpos] == 0) - return 1; - if (cycle_line[hpos] & CYCLE_REFRESH) - return -1; - return 0; + if (cycle_line[hpos]) + return 0; + return 1; +} + +// blitter interrupt is set when last "main" cycle +// has been finished, any non-linedraw D-channel blit +// still needs 2 more cycles before final D is written +static void blitter_interrupt (int hpos) +{ + if (blit_interrupt) + return; + blit_interrupt = 1; + INTREQ (0x8040); + if (debug_dma) + record_dma_event (DMA_EVENT_BLITIRQ, hpos, vpos); } -static void blitter_done (void) +static void blitter_done (int hpos) { ddat1use = ddat2use = 0; bltstate = BLT_done; - blitter_done_notify (); - INTREQ (0x8040); + blitter_interrupt (hpos); + blitter_done_notify (hpos); + if (debug_dma) + record_dma_event (DMA_EVENT_BLITFINISHED, hpos, vpos); event2_remevent (ev2_blitter); unset_special (®s, SPCFLAG_BLTNASTY); - blit_last_hpos = 0; #ifdef BLITTER_DEBUG - write_log (L"vpos=%d, cycles %d, missed %d, total %d\n", - vpos, blit_cyclecounter, blit_misscyclecounter, blit_cyclecounter + blit_misscyclecounter); + write_log (L"cycles %d, missed %d, total %d\n", + blit_totalcyclecounter, blit_misscyclecounter, blit_totalcyclecounter + blit_misscyclecounter); #endif } @@ -480,7 +485,7 @@ STATIC_INLINE void blitter_read (void) if (bltcon0 & 0x200) { if (!dmaen (DMA_BLITTER)) return; - blt_info.bltcdat = chipmem_bank.wget(bltcpt); + blt_info.bltcdat = chipmem_bank.wget (bltcpt); } bltstate = BLT_work; } @@ -578,6 +583,7 @@ STATIC_INLINE void blitter_nxline (void) blineb = (blineb << 1) | (blineb >> 15); blt_info.vblitsize--; bltstate = BLT_read; + blit_line_pixel = 0; } #ifdef CPUEMU_12 @@ -586,31 +592,48 @@ static int blitter_cyclecounter; static int blitter_hcounter1, blitter_hcounter2; static int blitter_vcounter1, blitter_vcounter2; -static void decide_blitter_line (int hpos) +static void decide_blitter_line (int hsync, int hpos) { + if (dmaen (DMA_BLITTER)) { - while (blit_last_hpos <= hpos) { - int c = blit_cyclecounter % 4; + + while (last_blitter_hpos < hpos) { + int c = channel_state (blit_cyclecounter); + + if (blit_linecyclecounter > 0) { + blit_linecyclecounter--; + break; + } + + for (;;) { - if (c == 1 || c == 3) { - /* onedot mode and no pixel = bus write access is skipped */ - if (c == 3 && blitsing && blitonedot > 1) { - blit_cyclecounter++; - if (blt_info.vblitsize == 0) { - bltdpt = bltcpt; - blitter_done (); - return; - } - break; - } - if (canblit (blit_last_hpos) <= 0) - break; + + if (!canblit (last_blitter_hpos)) { + blit_misscyclecounter++; + break; } + blit_cyclecounter++; - if (c == 1) { + blit_totalcyclecounter++; + + /* onedot mode and no pixel = bus write access is skipped */ + if (c == 4 && blitsing && blitonedot > 1) { + if (blt_info.vblitsize == 0) { + bltdpt = bltcpt; + blitter_done (last_blitter_hpos); + return; + } + break; + } + + if (c == 3) { + blitter_read (); - alloc_cycle_ext (blit_last_hpos, CYCLE_BLITTER); - } else if (c == 2) { + alloc_cycle_ext (last_blitter_hpos, CYCLE_BLITTER); + record_dma_blit (0x70, blt_info.bltcdat, bltcpt, last_blitter_hpos); + + } else if (c == 5) { + if (ddat1use) { bltdpt = bltcpt; } @@ -618,24 +641,29 @@ static void decide_blitter_line (int hpos) blitter_line (); blitter_line_proc (); blitter_nxline (); - } else if (c == 3) { + + } else if (c == 4) { + blitter_write (); - alloc_cycle_ext (blit_last_hpos, CYCLE_BLITTER); + alloc_cycle_ext (last_blitter_hpos, CYCLE_BLITTER); + record_dma_blit (0x00, blt_info.bltddat, bltdpt, last_blitter_hpos); if (blt_info.vblitsize == 0) { bltdpt = bltcpt; - blitter_done (); + blitter_done (last_blitter_hpos); return; } + } + break; } - blit_last_hpos++; + last_blitter_hpos++; } } else { - blit_last_hpos = hpos + 1; + last_blitter_hpos = hpos; } - if (blit_last_hpos > maxhpos) - blit_last_hpos = 0; + if (hsync) + last_blitter_hpos = 0; } #endif @@ -691,7 +719,7 @@ void blitter_handler (uae_u32 data) #else actually_do_blit (); #endif - blitter_done (); + blitter_done (current_hpos ()); } #ifdef CPUEMU_12 @@ -732,7 +760,7 @@ STATIC_INLINE uae_u16 blitter_doblit (void) } -STATIC_INLINE int blitter_doddma (void) +STATIC_INLINE int blitter_doddma (int hpos) { int wd; uae_u16 d; @@ -751,6 +779,8 @@ STATIC_INLINE int blitter_doddma (void) wd = 1; } if (wd) { + alloc_cycle_ext (hpos, CYCLE_BLITTER); + record_dma_blit (0x00, d, bltdpt, hpos); chipmem_agnus_wput2 (bltdpt, d); bltdpt += blit_add; blitter_hcounter2++; @@ -761,34 +791,52 @@ STATIC_INLINE int blitter_doddma (void) if (blitter_vcounter2 > blitter_vcounter1) blitter_vcounter1 = blitter_vcounter2; } +#if 0 + if (blitter_hcounter1 == 0 && blitter_vcounter1 == blt_info.vblitsize) { + if (blit_diag != blit_cycle_diagram_finald) { + blit_cyclecounter = -1; + blit_diag = blit_cycle_diagram_finald; + } + } +#endif if (blit_ch == 1) blitter_hcounter1 = blitter_hcounter2; } return wd; } -STATIC_INLINE void blitter_dodma (int ch) +STATIC_INLINE void blitter_dodma (int ch, int hpos) { + uae_u16 dat, reg; + uae_u32 addr; switch (ch) { case 1: - blt_info.bltadat = chipmem_agnus_wget (bltapt); + blt_info.bltadat = dat = chipmem_agnus_wget (bltapt); + addr = bltapt; bltapt += blit_add; + reg = 0x74; break; case 2: - blt_info.bltbdat = chipmem_agnus_wget (bltbpt); + blt_info.bltbdat = dat = chipmem_agnus_wget (bltbpt); + addr = bltbpt; bltbpt += blit_add; if (blitdesc) blt_info.bltbhold = (((uae_u32)blt_info.bltbdat << 16) | prevb) >> blt_info.blitdownbshift; else blt_info.bltbhold = (((uae_u32)prevb << 16) | blt_info.bltbdat) >> blt_info.blitbshift; prevb = blt_info.bltbdat; + reg = 0x72; break; case 3: - blt_info.bltcdat = chipmem_agnus_wget (bltcpt); + blt_info.bltcdat = dat = chipmem_agnus_wget (bltcpt); + addr = bltcpt; bltcpt += blit_add; + reg = 0x70; break; + default: + abort (); } blitter_cyclecounter++; @@ -813,10 +861,25 @@ STATIC_INLINE void blitter_dodma (int ch) blitfc = !!(bltcon1 & 0x4); } } + alloc_cycle_ext (hpos, CYCLE_BLITTER); + record_dma_blit (reg, dat, addr, hpos); +} + +int blitter_need (int hpos) +{ + int c; + if (bltstate == BLT_done) + return 0; + if (!dmaen (DMA_BLITTER)) + return 0; + c = channel_state (blit_cyclecounter); + return c; } void decide_blitter (int hpos) { + int hsync = hpos < 0; + if (bltstate == BLT_done) return; #ifdef BLITTER_DEBUG @@ -828,96 +891,97 @@ void decide_blitter (int hpos) if (!blitter_cycle_exact) return; - if (blit_linecyclecounter > 0) { - while (blit_linecyclecounter > 0 && blit_last_hpos <= hpos) { - blit_linecyclecounter--; - blit_last_hpos++; - } - if (blit_last_hpos > maxhpos) - blit_last_hpos = 0; - } - if (blit_linecyclecounter > 0) { - blit_last_hpos = hpos + 1; - return; - } + if (hpos < 0) + hpos = maxhpos; if (blitline) { blt_info.got_cycle = 1; - decide_blitter_line (hpos); + decide_blitter_line (hsync, hpos); return; } if (dmaen (DMA_BLITTER)) { - while (blit_last_hpos <= hpos) { - int c = channel_state (blit_cyclecounter); -#ifdef BLITTER_SLOWDOWNDEBUG - blitter_slowdowndebug--; - if (blitter_slowdowndebug < 0) { - cycle_line[blit_last_hpos] |= CYCLE_BLITTER; - blitter_slowdowndebug = BLITTER_SLOWDOWNDEBUG; - } -#endif + while (last_blitter_hpos < hpos) { + int c; + + c = channel_state (blit_cyclecounter); + for (;;) { int v; - if (c == 5) { /* real idle cycle */ - blit_cyclecounter++; + if (blit_linecyclecounter > 0) { + blit_linecyclecounter--; break; } - /* all cycles need free bus, even idle cycles (except fillmode idle) */ - v = canblit (blit_last_hpos); - if (v < 0 && c == 0) { + v = canblit (last_blitter_hpos); + + // idle cycles require free bus.. + // (CPU can still use this cycle) + if (c == 0 && v == 0) { + blitter_nasty++; + blit_misscyclecounter++; + break; + } + + if (c == 0) { + blt_info.got_cycle = 1; blit_cyclecounter++; + blit_totalcyclecounter++; + /* check if blit with zero channels has ended */ + if (blit_ch == 0 && blit_cyclecounter >= blit_maxcyclecounter) { + blitter_done (last_blitter_hpos); + return; + } break; } - if (v <= 0) { + + blitter_nasty++; + + if (v == 0) { blit_misscyclecounter++; break; } blt_info.got_cycle = 1; - if (c < 0) { /* no channel but bus still needs to be allocated.. */ - alloc_cycle_ext (blit_last_hpos, CYCLE_BLITTER); - blit_cyclecounter++; - } else if (c == 4) { - if (blitter_doddma ()) { - alloc_cycle_ext (blit_last_hpos, CYCLE_BLITTER); + if (c == 4) { + if (blitter_doddma (last_blitter_hpos)) { blit_cyclecounter++; + blit_totalcyclecounter++; } - } else if (c) { + } else { if (blitter_vcounter1 < blt_info.vblitsize) { - alloc_cycle_ext (blit_last_hpos, CYCLE_BLITTER); - blitter_dodma (c); + blitter_dodma (c, last_blitter_hpos); } blit_cyclecounter++; - } else { - blit_cyclecounter++; - /* check if blit with zero channels has ended */ - if (blit_cyclecounter >= blit_maxcyclecounter) { - blitter_done (); - return; - } + blit_totalcyclecounter++; } + if (blitter_vcounter1 >= blt_info.vblitsize && blitter_vcounter2 >= blt_info.vblitsize) { if (!ddat1use && !ddat2use) { - blitter_done (); + blitter_done (last_blitter_hpos); return; } - if (blit_diag != blit_cycle_diagram_finald) { - blit_cyclecounter = 0; - blit_diag = blit_cycle_diagram_finald; - } } break; } - blit_last_hpos++; + + if (blitter_vcounter1 == blt_info.vblitsize && channel_pos (blit_cyclecounter - 1) == blit_diag[0] - 1) { + if (blit_diag != blit_cycle_diagram_finald) { + blitter_interrupt (last_blitter_hpos); + blit_cyclecounter = 0; + blit_diag = blit_cycle_diagram_finald; + } + } + last_blitter_hpos++; + } } else { - blit_last_hpos = hpos + 1; + last_blitter_hpos = hpos; } - if (blit_last_hpos > maxhpos) - blit_last_hpos = 0; + + if (hsync) + last_blitter_hpos = 0; } #else void decide_blitter (int hpos) { } @@ -947,7 +1011,7 @@ static void blitter_force_finish (void) } else { actually_do_blit (); } - blitter_done (); + blitter_done (current_hpos ()); dmacon = odmacon; } } @@ -986,16 +1050,16 @@ static void blit_bltset (int con) } if (blitfill && !blitdesc) debugtest (DEBUGTEST_BLITTER, L"fill without desc\n"); - blit_diag = blitfill ? blit_cycle_diagram_fill[blit_ch] : blit_cycle_diagram[blit_ch]; + blit_diag = blitfill && blit_cycle_diagram_fill[blit_ch][0] ? blit_cycle_diagram_fill[blit_ch] : blit_cycle_diagram[blit_ch]; } if ((bltcon1 & 0x80) && (currprefs.chipset_mask & CSMASK_ECS_AGNUS)) debugtest (DEBUGTEST_BLITTER, L"ECS BLTCON1 DOFF-bit set\n"); blit_dmacount = blit_dmacount2 = 0; blit_nod = 1; - for (i = 0; i < blit_diag[1]; i++) { - int v = blit_diag[2 + i]; - if (v) + for (i = 0; i < blit_diag[0]; i++) { + int v = blit_diag[1 + blit_diag[0] + i]; + if (v <= 4) blit_dmacount++; if (v > 0 && v < 4) blit_dmacount2++; @@ -1031,8 +1095,11 @@ void reset_blit (int bltcon) void do_blitter (int hpos) { int cycles; -#ifdef BLITTER_DEBUG - int oldstate = bltstate; + +#ifdef BLITTER_DEBUG_NOWAIT + if (bltstate != BLT_done) { + write_log (L"blitter was already active! PC=%08x\n", M68K_GETPC); + } #endif blitter_cycle_exact = currprefs.blitter_cycle_exact; @@ -1046,12 +1113,14 @@ void do_blitter (int hpos) blit_misscyclecounter = 0; blit_last_cycle = 0; blit_maxcyclecounter = 0; - blit_last_hpos = hpos; + last_blitter_hpos = hpos; blit_cyclecounter = 0; + blit_totalcyclecounter = 0; blit_bltset (1|2); blit_modset (); ddat1use = ddat2use = 0; + blit_interrupt = 0; if (blitline) { blitsing = bltcon1 & 0x2; @@ -1061,7 +1130,7 @@ void do_blitter (int hpos) blitonedot = 0; cycles = blt_info.vblitsize; } else { - blit_firstline_cycles = blit_first_cycle + (blit_diag[1] * blt_info.hblitsize + cpu_cycles) * CYCLE_UNIT; + blit_firstline_cycles = blit_first_cycle + (blit_diag[0] * blt_info.hblitsize + cpu_cycles) * CYCLE_UNIT; cycles = blt_info.vblitsize * blt_info.hblitsize; } @@ -1069,8 +1138,6 @@ void do_blitter (int hpos) blitter_dontdo = 0; if (1) { int ch = 0; - if (oldstate != BLT_done) - write_log (L"blitter was already active!\n"); if (blit_ch & 1) ch++; if (blit_ch & 2) @@ -1079,8 +1146,8 @@ void do_blitter (int hpos) ch++; if (blit_ch & 8) ch++; - write_log (L"blitstart: v=%03d h=%03d %dx%d ch=%d %d*%d=%d d=%d f=%02X n=%d pc=%p l=%d dma=%04X\n", - vpos, hpos, blt_info.hblitsize, blt_info.vblitsize, ch, blit_diag[1], cycles, blit_diag[1] * cycles, + write_log (L"blitstart: %dx%d ch=%d %d*%d=%d d=%d f=%02X n=%d pc=%p l=%d dma=%04X\n", + blt_info.hblitsize, blt_info.vblitsize, ch, blit_diag[0], cycles, blit_diag[0] * cycles, blitdesc ? 1 : 0, blitfill, dmaen (DMA_BLITPRI) ? 1 : 0, M68K_GETPC, blitline, dmacon); blitter_dump (); } @@ -1092,7 +1159,7 @@ void do_blitter (int hpos) set_special (®s, SPCFLAG_BLTNASTY); if (blt_info.vblitsize == 0 || (blitline && blt_info.hblitsize != 2)) { - blitter_done (); + blitter_done (hpos); return; } @@ -1116,9 +1183,9 @@ void do_blitter (int hpos) blitter_vcounter1 = blitter_vcounter2 = 0; if (blit_nod) blitter_vcounter2 = blt_info.vblitsize; - blit_linecyclecounter = 2; - if (blit_ch == 0) - blit_maxcyclecounter = blt_info.hblitsize * blt_info.vblitsize; + blit_linecyclecounter = 3; // delay before blitter starts + blitter_cyclecounter = 0; + blit_maxcyclecounter = blt_info.hblitsize * blt_info.vblitsize + 2; return; } @@ -1126,7 +1193,7 @@ void do_blitter (int hpos) if (currprefs.immediate_blits) cycles = 1; - blit_cyclecounter = cycles * blit_diag[1]; + blit_cyclecounter = cycles * blit_diag[0]; event2_newevent (ev2_blitter, blit_cyclecounter); } @@ -1139,12 +1206,18 @@ void maybe_blit (int hpos, int hack) if (savestate_state) return; - if (!warned && dmaen (DMA_BLITTER)) { -#ifndef BLITTER_DEBUG + if (!warned && dmaen (DMA_BLITTER) && blt_info.got_cycle) { warned = 1; + debugtest (DEBUGTEST_BLITTER, L"program does not wait for blitter tc=%d\n", + blit_cyclecounter); +#ifdef BLITTER_DEBUG + warned = 0; +#endif +#ifdef BLITTER_DEBUG_NOWAIT + warned = 0; + write_log (L"program does not wait for blitter PC=%08x\n", M68K_GETPC); + //activate_debugger (); #endif - debugtest (DEBUGTEST_BLITTER, L"program does not wait for blitter vpos=%d tc=%d\n", - vpos, blit_cyclecounter); } if (blitter_cycle_exact) { @@ -1169,7 +1242,7 @@ int blitnasty (void) return 0; if (!dmaen (DMA_BLITTER)) return 0; - if (blit_last_cycle >= blit_diag[0] && blit_dmacount == blit_diag[1]) + if (blit_last_cycle >= blit_diag[0] && blit_dmacount == blit_diag[0]) return 0; cycles = (get_cycles () - blit_first_cycle) / CYCLE_UNIT; ccnt = 0; @@ -1192,7 +1265,7 @@ void blitter_slowdown (int ddfstrt, int ddfstop, int totalcycles, int freecycles if (ddfstrt != oddfstrt || ddfstop != oddfstop || totalcycles != ototal || ofree != freecycles) { int linecycles = ((ddfstop - ddfstrt + totalcycles - 1) / totalcycles) * totalcycles; int freelinecycles = ((ddfstop - ddfstrt + totalcycles - 1) / totalcycles) * freecycles; - int dmacycles = (linecycles * blit_dmacount) / blit_diag[1]; + int dmacycles = (linecycles * blit_dmacount) / blit_diag[0]; oddfstrt = ddfstrt; oddfstop = ddfstop; ototal = totalcycles; @@ -1223,6 +1296,9 @@ uae_u8 *restore_blitter (uae_u8 *src) void restore_blitter_finish (void) { + record_dma_reset (); + record_dma_reset (); + blit_interrupt = 1; if (bltstate == BLT_init) { write_log (L"blitter was started but DMA was inactive during save\n"); //do_blitter (0); diff --git a/custom.c b/custom.c index 70144caf..88ea26b6 100644 --- a/custom.c +++ b/custom.c @@ -61,9 +61,6 @@ #define SPEEDUP #define AUTOSCALE_SPRITES 1 -#define NEW_BPL 1 -#define NEW_BPLX 0 - #define SPRBORDER 0 STATIC_INLINE int nocustom(void) @@ -135,7 +132,7 @@ extern uae_u8* compiled_code; int vpos; int hack_vpos; -static int lof; +static int lof, lol; static int next_lineno, prev_lineno; static enum nln_how nextline_how; static int lof_changed = 0; @@ -227,7 +224,7 @@ static uae_u16 bplxdat[8]; static int bpl1dat_written; static uae_s16 bpl1mod, bpl2mod; static uaecptr prevbpl[2][MAXVPOS][8]; -static uaecptr bplpt[8], bplptx[8], f_bplpt[8]; +static uaecptr bplpt[8], bplptx[8]; /* Used as a debugging aid, to offset any bitplane temporarily. */ int bpl_off[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; @@ -263,6 +260,8 @@ int first_planes_vpos, last_planes_vpos; int diwfirstword_total, diwlastword_total; int firstword_bplcon1; +static int last_copper_hpos; + /* Sprite collisions */ static unsigned int clxdat, clxcon, clxcon2, clxcon_bpl_enable, clxcon_bpl_match; @@ -277,7 +276,7 @@ enum copper_states { COP_wait, COP_skip1, COP_strobe_delay1, - COP_strobe_delay2 + COP_strobe_delay2, }; struct copper { @@ -365,7 +364,7 @@ enum plfstate plf_passed_stop, plf_passed_stop2, plf_end -} plfstate; +} plf_state; enum fetchstate { fetch_not_started, @@ -754,7 +753,7 @@ static void estimate_last_fetch_cycle (int hpos) { int fetchunit = fetchunits[fetchmode * 4 + bplcon0_res]; - if (plfstate < plf_passed_stop) { + if (plf_state < plf_passed_stop) { int stop = plfstop < hpos || plfstop > HARD_DDF_STOP ? HARD_DDF_STOP : plfstop; /* We know that fetching is up-to-date up until hpos, so we can use fetch_cycle. */ int fetch_cycle_at_stop = fetch_cycle + (stop - hpos); @@ -763,7 +762,7 @@ static void estimate_last_fetch_cycle (int hpos) estimated_last_fetch_cycle = hpos + (starting_last_block_at - fetch_cycle) + fetchunit; } else { int starting_last_block_at = (fetch_cycle + fetchunit - 1) & ~(fetchunit - 1); - if (plfstate == plf_passed_stop2) + if (plf_state == plf_passed_stop2) starting_last_block_at -= fetchunit; estimated_last_fetch_cycle = hpos + (starting_last_block_at - fetch_cycle) + fetchunit; @@ -781,7 +780,7 @@ static uae_u32 fetched_aga1[MAX_PLANES]; /* Expansions from bplcon0/bplcon1. */ static int toscr_res, toscr_nr_planes, toscr_nr_planes2, fetchwidth; -static int toscr_delay1x, toscr_delay2x, toscr_delay1, toscr_delay2; +static int toscr_delay1, toscr_delay2; /* The number of bits left from the last fetched words. This is an optimization - conceptually, we have to make sure the result is @@ -887,10 +886,10 @@ static void compute_toscr_delay_1 (void) delay1 += delayoffset; delay2 += delayoffset; delaymask = (fetchwidth - 1) >> toscr_res; - toscr_delay1x = (delay1 & delaymask) << toscr_res; - toscr_delay1x |= shdelay1 >> (RES_MAX - toscr_res); - toscr_delay2x = (delay2 & delaymask) << toscr_res; - toscr_delay2x |= shdelay2 >> (RES_MAX - toscr_res); + toscr_delay1 = (delay1 & delaymask) << toscr_res; + toscr_delay1 |= shdelay1 >> (RES_MAX - toscr_res); + toscr_delay2 = (delay2 & delaymask) << toscr_res; + toscr_delay2 |= shdelay2 >> (RES_MAX - toscr_res); } static void compute_toscr_delay (int hpos) @@ -913,11 +912,12 @@ STATIC_INLINE void fetch (int nr, int fm, int hpos) if (nr < bplcon0_planes_limit) { uaecptr p = bplpt[nr] + bpl_off[nr]; bplpt[nr] += 2 << fm; + bplptx[nr] += 2 << fm; if (nr == 0) bpl1dat_written = 1; #ifdef DEBUGGER - if (debug_copper) - record_copper_otherdma (0x110 + nr * 2, chipmem_agnus_wget (p), hpos, vpos); + if (debug_dma) + record_dma (0x110 + nr * 2, chipmem_agnus_wget (p), p, hpos, vpos); #endif switch (fm) { @@ -936,7 +936,7 @@ STATIC_INLINE void fetch (int nr, int fm, int hpos) break; #endif } - if (plfstate == plf_passed_stop2 && fetch_cycle >= (fetch_cycle & ~fetchunit_mask) + fetch_modulo_cycle) { + if (plf_state == plf_passed_stop2 && fetch_cycle >= (fetch_cycle & ~fetchunit_mask) + fetch_modulo_cycle) { int mod; if (fmode & 0x4000) { if (((diwstrt >> 8) ^ vpos) & 1) @@ -1197,8 +1197,6 @@ STATIC_INLINE void beginning_of_plane_block (int hpos, int fm) #endif update_denise (hpos); maybe_first_bpl1dat (hpos); - toscr_delay1 = toscr_delay1x; - toscr_delay2 = toscr_delay2x; compute_toscr_delay (hpos); } @@ -1405,9 +1403,9 @@ static void finish_final_fetch (int pos, int fm) { if (thisline_decision.plfleft == -1) return; - if (plfstate == plf_end) + if (plf_state == plf_end) return; - plfstate = plf_end; + plf_state = plf_end; ddfstate = DIW_waiting_start; pos += flush_plane_data (fm); thisline_decision.plfright = pos; @@ -1417,16 +1415,16 @@ static void finish_final_fetch (int pos, int fm) STATIC_INLINE int one_fetch_cycle_0 (int pos, int ddfstop_to_test, int dma, int fm) { - if (plfstate < plf_passed_stop && pos == ddfstop_to_test) - plfstate = plf_passed_stop; + if (plf_state < plf_passed_stop && pos == ddfstop_to_test) + plf_state = plf_passed_stop; if ((fetch_cycle & fetchunit_mask) == 0) { - if (plfstate == plf_passed_stop2) { + if (plf_state == plf_passed_stop2) { finish_final_fetch (pos, fm); return 1; } - if (plfstate >= plf_passed_stop) - plfstate++; + if (plf_state >= plf_passed_stop) + plf_state++; } if (dma) { @@ -1467,7 +1465,7 @@ STATIC_INLINE int one_fetch_cycle_0 (int pos, int ddfstop_to_test, int dma, int if (bpl1dat_written) { // do this here because if program plays with BPLCON0 during scanline // it is possible that one DMA BPL1DAT write is completely missed - // -> do not draw anything at all in next dma block + // and we must not draw anything at all in next dma block if this happens // (Disposable Hero titlescreen) fetch_state = fetch_was_plane0; bpl1dat_written = 0; @@ -1509,7 +1507,7 @@ STATIC_INLINE void update_fetch (int until, int fm) int ddfstop_to_test; - if (nodraw() || plfstate == plf_end) + if (nodraw() || plf_state == plf_end) return; /* We need an explicit test against HARD_DDF_STOP here to guard against @@ -1547,10 +1545,10 @@ STATIC_INLINE void update_fetch (int until, int fm) #ifdef SPEEDUP /* Unrolled version of the for loop below. */ - if (plfstate < plf_passed_stop && ddf_change != vpos && ddf_change + 1 != vpos + if (plf_state < plf_passed_stop && ddf_change != vpos && ddf_change + 1 != vpos && dma && (fetch_cycle & fetchstart_mask) == (fm_maxplane & fetchstart_mask) - && toscr_delay1 == toscr_delay1x && toscr_delay2 == toscr_delay2x && !badmode + && !badmode # if 0 /* @@@ We handle this case, but the code would be simpler if we * disallowed it - it may even be possible to guarantee that @@ -1583,9 +1581,9 @@ STATIC_INLINE void update_fetch (int until, int fm) maybe_first_bpl1dat (pos); if (pos <= ddfstop_to_test && pos + count > ddfstop_to_test) - plfstate = plf_passed_stop; + plf_state = plf_passed_stop; if (pos <= ddfstop_to_test && pos + count > ddf2) - plfstate = plf_passed_stop2; + plf_state = plf_passed_stop2; if (pos <= ddf2 && pos + count >= ddf2 + fm_maxplane) add_modulos (); pos += count; @@ -1628,8 +1626,8 @@ STATIC_INLINE void decide_fetch (int hpos) #endif default: uae_abort (L"fetchmode corrupt"); } + last_fetch_hpos = hpos; } - last_fetch_hpos = hpos; } static void start_bpl_dma (int hpos, int hstart) @@ -1689,7 +1687,7 @@ static void maybe_start_bpl_dma (int hpos) if (hpos > plfstop - fetchunit) return; if (ddfstate != DIW_waiting_start) - plfstate = plf_passed_stop; + plf_state = plf_passed_stop; start_bpl_dma (hpos, hpos); } @@ -1710,13 +1708,13 @@ STATIC_INLINE void decide_line (int hpos) if (diwstate == DIW_waiting_stop) { int ok = 0; if (last_decide_line_hpos < plfstrt_start && hpos >= plfstrt_start) { - if (plfstate == plf_idle) - plfstate = plf_start; + if (plf_state == plf_idle) + plf_state = plf_start; } if (last_decide_line_hpos < plfstrt && hpos >= plfstrt) { - if (plfstate == plf_start) - plfstate = plf_active; - if (plfstate == plf_active) + if (plf_state == plf_start) + plf_state = plf_active; + if (plf_state == plf_active) ok = 1; /* hack warning.. Writing to DDFSTRT when DMA should start must be ignored * (correct fix would be emulate this delay for every custom register, but why bother..) */ @@ -2425,10 +2423,10 @@ static void reset_decisions (void) last_sprite_point = 0; fetch_state = fetch_not_started; - if (plfstate > plf_active) - plfstate = plf_idle; - if (plfstate == plf_active && !(currprefs.chipset_mask & CSMASK_ECS_AGNUS)) - plfstate = plf_idle; + if (plf_state > plf_active) + plf_state = plf_idle; + if (plf_state == plf_active && !(currprefs.chipset_mask & CSMASK_ECS_AGNUS)) + plf_state = plf_idle; memset (todisplay, 0, sizeof todisplay); memset (fetched, 0, sizeof fetched); @@ -2513,7 +2511,7 @@ static void dumpsync (void) void init_hz (void) { int isntsc; - int odbl = doublescan; + int odbl = doublescan, omaxvpos = maxvpos; int hzc = 0; if ((currprefs.chipset_refreshrate == 50 && !currprefs.ntscmode) || @@ -2583,7 +2581,7 @@ void init_hz (void) } if (currprefs.gfx_scandoubler && doublescan == 0) doublescan = -1; - if (doublescan != odbl) + if (doublescan != odbl || maxvpos != omaxvpos) hzc = 1; /* limit to sane values */ if (vblank_hz < 10) @@ -2645,7 +2643,7 @@ static void calcdiw (void) plfstrt = ddfstrt; plfstop = ddfstop; - /* probably not the correct place.. should use plfstate instead */ + /* probably not the correct place.. should use plf_state instead */ if (currprefs.chipset_mask & CSMASK_ECS_AGNUS) { /* ECS/AGA and ddfstop > maxhpos == always-on display */ if (plfstop > maxhpos) @@ -2706,13 +2704,15 @@ STATIC_INLINE uae_u16 DENISEID (void) return 0xFC; return 0xffff; } -STATIC_INLINE uae_u16 DMACONR (void) +STATIC_INLINE uae_u16 DMACONR (int hpos) { - uae_u16 v; - decide_blitter (current_hpos ()); - v = dmacon | (bltstate == BLT_done || (bltstate != BLT_done && currprefs.cs_agnusbltbusybug && !blt_info.got_cycle) ? 0 : 0x4000) + decide_line (hpos); + decide_fetch (hpos); + decide_blitter (hpos); + dmacon &= ~(0x4000 | 0x2000); + dmacon |= (blit_interrupt || (!blit_interrupt && currprefs.cs_agnusbltbusybug && !blt_info.got_cycle) ? 0 : 0x4000) | (blt_info.blitzero ? 0x2000 : 0); - return v; + return dmacon; } STATIC_INLINE uae_u16 INTENAR (void) { @@ -2739,7 +2739,7 @@ STATIC_INLINE int GETHPOS (void) STATIC_INLINE uae_u16 VPOSR (void) { unsigned int csbit = 0; - int vp = (GETVPOS() >> 8) & 7; + int vp = (GETVPOS () >> 8) & 7; if (currprefs.cs_agnusrev >= 0) { csbit |= currprefs.cs_agnusrev << 8; @@ -2756,7 +2756,7 @@ STATIC_INLINE uae_u16 VPOSR (void) if (!(currprefs.chipset_mask & CSMASK_ECS_AGNUS)) vp &= 1; - vp = vp | (lof ? 0x8000 : 0) | csbit; + vp = vp | (lof ? 0x8000 : 0) | (lol ? 0x80 : 0) | csbit; #if 0 write_log (L"VPOSR %04x at %08x\n", vp, M68K_GETPC); #endif @@ -2767,15 +2767,19 @@ STATIC_INLINE uae_u16 VPOSR (void) static void VPOSW (uae_u16 v) { #if 0 - write_log (L"VPOSW %d PC=%08x\n", v, M68K_GETPC); + write_log (L"VPOSW %04X PC=%08x\n", v, M68K_GETPC); #endif if (lof != ((v & 0x8000) ? 1 : 0)) lof_changed = 1; lof = (v & 0x8000) ? 1 : 0; + if (currprefs.chipset_mask & CSMASK_ECS_AGNUS) { + lol = (v & 0x0080) ? 1 : 0; + } if ((v & 1) && vpos > 0) { hack_vpos = vpos + 1; if (hack_vpos > maxvpos) hack_vpos = maxvpos; + hack_vpos &= ~1; } } @@ -2783,19 +2787,24 @@ STATIC_INLINE uae_u16 VHPOSR (void) { uae_u16 vp = GETVPOS (); uae_u16 hp = GETHPOS (); - hp++; // hack.. + + hp += 2; if (hp >= maxhpos) { hp -= maxhpos; vp++; if (vp >= maxvpos) vp = 0; } + hp += 2; + if (hp >= maxhpos) + hp -= maxhpos; + vp <<= 8; vp |= hp; if (currprefs.cpu_model >= 68020) hsyncdelay (); #if 0 - write_log (L"VPOS %04x %04x at %08x\n", VPOSR(), vp, M68K_GETPC); + write_log (L"VPOS %04x %04x at %08x\n", VPOSR (), vp, M68K_GETPC); if (M68K_GETPC == 0x40e6) { activate_debugger(); } @@ -2860,14 +2869,28 @@ static void immediate_copper (int num) unset_special (®s, SPCFLAG_COPPER); } -STATIC_INLINE void COP1LCH (uae_u16 v) { cop1lc = (cop1lc & 0xffff) | ((uae_u32)v << 16); } -STATIC_INLINE void COP1LCL (uae_u16 v) { cop1lc = (cop1lc & ~0xffff) | (v & 0xfffe); } -STATIC_INLINE void COP2LCH (uae_u16 v) { cop2lc = (cop2lc & 0xffff) | ((uae_u32)v << 16); } -STATIC_INLINE void COP2LCL (uae_u16 v) { cop2lc = (cop2lc & ~0xffff) | (v & 0xfffe); } +STATIC_INLINE void COP1LCH (uae_u16 v) +{ + cop1lc = (cop1lc & 0xffff) | ((uae_u32)v << 16); +} +STATIC_INLINE void COP1LCL (uae_u16 v) +{ + cop1lc = (cop1lc & ~0xffff) | (v & 0xfffe); +} +STATIC_INLINE void COP2LCH (uae_u16 v) +{ + cop2lc = (cop2lc & 0xffff) | ((uae_u32)v << 16); +} +STATIC_INLINE void COP2LCL (uae_u16 v) +{ + cop2lc = (cop2lc & ~0xffff) | (v & 0xfffe); +} -static void compute_spcflag_copper (void); +static void compute_spcflag_copper (int hpos); -static void COPJMP (int num) +// vblank = copper starts at hpos=2 +// normal COPJMP write: takes 2 more cycles +static void COPJMP (int num, int vblank) { int oldstrobe = cop_state.strobe; @@ -2875,7 +2898,7 @@ static void COPJMP (int num) cop_state.ignore_next = 0; if (!oldstrobe) cop_state.state_prev = cop_state.state; - cop_state.state = COP_strobe_delay1; + cop_state.state = vblank ? COP_strobe_delay2 : COP_strobe_delay1; cop_state.vpos = vpos; cop_state.hpos = current_hpos () & ~1; copper_enabled_thisline = 0; @@ -2887,9 +2910,9 @@ static void COPJMP (int num) } if (dmaen (DMA_COPPER)) { - compute_spcflag_copper (); + compute_spcflag_copper (current_hpos ()); } else if (oldstrobe > 0 && oldstrobe != num && cop_state.state_prev == COP_wait) { - /* dma disabled, copper idle and accessing both COPxJMPs -> copper stops! */ + /* dma disabled, copper idle and accessed both COPxJMPs -> copper stops! */ cop_state.state = COP_stop; } } @@ -2920,24 +2943,24 @@ static void DMACON (int hpos, uae_u16 v) if (oldcop != newcop) { if (newcop && !oldcop) { - compute_spcflag_copper (); + compute_spcflag_copper (hpos); } else if (!newcop) { copper_enabled_thisline = 0; unset_special (®s, SPCFLAG_COPPER); } } - if ((dmacon & DMA_BLITPRI) > (oldcon & DMA_BLITPRI) && bltstate != BLT_done) { - decide_blitter (hpos); + if ((dmacon & DMA_BLITPRI) > (oldcon & DMA_BLITPRI) && bltstate != BLT_done) set_special (®s, SPCFLAG_BLTNASTY); - } + if (dmaen (DMA_BLITTER) && bltstate == BLT_init) bltstate = BLT_work; - if ((dmacon & (DMA_BLITPRI | DMA_BLITTER | DMA_MASTER)) != (DMA_BLITPRI | DMA_BLITTER | DMA_MASTER)) { - decide_blitter (hpos); + + if ((dmacon & (DMA_BLITPRI | DMA_BLITTER | DMA_MASTER)) != (DMA_BLITPRI | DMA_BLITTER | DMA_MASTER)) unset_special (®s, SPCFLAG_BLTNASTY); - } + if (changed & (DMA_MASTER | 0x0f)) - audio_hsync (0); + audio_hsync (hpos); + if (changed & (DMA_MASTER | DMA_BITPLANE)) { ddf_change = vpos; if (dmaen (DMA_BITPLANE)) @@ -3099,9 +3122,9 @@ static void varsync (void) int is_bitplane_dma (int hpos) { - if (fetch_state == fetch_not_started || hpos < thisline_decision.plfleft) + if (fetch_state == fetch_not_started || hpos < plfstrt) return 0; - if ((plfstate == plf_end && hpos >= thisline_decision.plfright) + if ((plf_state == plf_end && hpos >= thisline_decision.plfright) || hpos >= estimated_last_fetch_cycle) return 0; return curr_diagram[(hpos - cycle_diagram_shift) & fetchstart_mask]; @@ -3109,9 +3132,9 @@ int is_bitplane_dma (int hpos) STATIC_INLINE int is_bitplane_dma_inline (int hpos) { - if (fetch_state == fetch_not_started || hpos < thisline_decision.plfleft) + if (fetch_state == fetch_not_started || hpos < plfstrt) return 0; - if ((plfstate == plf_end && hpos >= thisline_decision.plfright) + if ((plf_state == plf_end && hpos >= thisline_decision.plfright) || hpos >= estimated_last_fetch_cycle) return 0; return curr_diagram[(hpos - cycle_diagram_shift) & fetchstart_mask]; @@ -3129,7 +3152,7 @@ static void BPLxPTL (int hpos, uae_u16 v, int num) { decide_line (hpos); decide_fetch (hpos); - f_bplpt[num] = bplpt[num] = (bplpt[num] & 0xffff0000) | (v & 0x0000fffe); + bplpt[num] = (bplpt[num] & 0xffff0000) | (v & 0x0000fffe); bplptx[num] = (bplptx[num] & 0xffff0000) | (v & 0x0000fffe); //write_log (L"%d:%d:BPL%dPTL %08X COP=%08x\n", hpos, vpos, num, bplpt[num], cop_state.ip); } @@ -3289,26 +3312,16 @@ static void BPL2MOD (int hpos, uae_u16 v) } /* needed in special OCS/ECS "7-plane" mode. */ -static void BPL5DAT (int hpos, uae_u16 v) -{ - decide_line (hpos); - decide_fetch (hpos); - bplxdat[4] = v; -} -static void BPL6DAT (int hpos, uae_u16 v) +/* (in reality only BPL5DAT and BPL6DAT needed) */ +static void BPLxDAT (int hpos, int num, uae_u16 v) { decide_line (hpos); decide_fetch (hpos); - bplxdat[5] = v; -} - -STATIC_INLINE void BPL1DAT (int hpos, uae_u16 v) -{ - decide_line (hpos); - decide_fetch (hpos); - bpl1dat_written = 1; - bplxdat[0] = v; - maybe_first_bpl1dat (hpos); + bplxdat[num] = v; + if (num == 0) { + bpl1dat_written = 1; + maybe_first_bpl1dat (hpos); + } } static void DIWSTRT (int hpos, uae_u16 v) @@ -3349,6 +3362,8 @@ static void DDFSTRT (int hpos, uae_u16 v) v &= 0xfe; if (!(currprefs.chipset_mask & CSMASK_ECS_AGNUS)) v &= 0xfc; + if (ddfstrt == v) + return; ddf_change = vpos; decide_line (hpos); ddfstrt_old_hpos = hpos; @@ -3370,6 +3385,7 @@ static void DDFSTOP (int hpos, uae_u16 v) v &= 0xfc; if (ddfstop == v) return; + ddf_change = vpos; decide_line (hpos); decide_fetch (hpos); decide_blitter (hpos); @@ -3413,9 +3429,9 @@ static void FNULL (uae_u16 v) } -static void BLTADAT (uae_u16 v) +static void BLTADAT (int hpos, uae_u16 v) { - maybe_blit (current_hpos (), 0); + maybe_blit (hpos, 0); blt_info.bltadat = v; } @@ -3424,9 +3440,9 @@ static void BLTADAT (uae_u16 v) * be true for BLTBDAT, but not for BLTADAT - it appears the A data must be * loaded for every word so that AFWM and ALWM can be applied. */ -static void BLTBDAT (uae_u16 v) +static void BLTBDAT (int hpos, uae_u16 v) { - maybe_blit (current_hpos (), 0); + maybe_blit (hpos, 0); if (bltcon1 & 2) blt_info.bltbhold = v << (bltcon1 >> 12); @@ -3434,40 +3450,42 @@ static void BLTBDAT (uae_u16 v) blt_info.bltbhold = v >> (bltcon1 >> 12); blt_info.bltbdat = v; } -static void BLTCDAT (uae_u16 v) { maybe_blit (current_hpos (), 0); blt_info.bltcdat = v; reset_blit (0); } +static void BLTCDAT (int hpos, uae_u16 v) { maybe_blit (hpos, 0); blt_info.bltcdat = v; reset_blit (0); } -static void BLTAMOD (uae_u16 v) { maybe_blit (current_hpos (), 1); blt_info.bltamod = (uae_s16)(v & 0xFFFE); reset_blit (0); } -static void BLTBMOD (uae_u16 v) { maybe_blit (current_hpos (), 1); blt_info.bltbmod = (uae_s16)(v & 0xFFFE); reset_blit (0); } -static void BLTCMOD (uae_u16 v) { maybe_blit (current_hpos (), 1); blt_info.bltcmod = (uae_s16)(v & 0xFFFE); reset_blit (0); } -static void BLTDMOD (uae_u16 v) { maybe_blit (current_hpos (), 1); blt_info.bltdmod = (uae_s16)(v & 0xFFFE); reset_blit (0); } +static void BLTAMOD (int hpos, uae_u16 v) { maybe_blit (hpos, 1); blt_info.bltamod = (uae_s16)(v & 0xFFFE); reset_blit (0); } +static void BLTBMOD (int hpos, uae_u16 v) { maybe_blit (hpos, 1); blt_info.bltbmod = (uae_s16)(v & 0xFFFE); reset_blit (0); } +static void BLTCMOD (int hpos, uae_u16 v) { maybe_blit (hpos, 1); blt_info.bltcmod = (uae_s16)(v & 0xFFFE); reset_blit (0); } +static void BLTDMOD (int hpos, uae_u16 v) { maybe_blit (hpos, 1); blt_info.bltdmod = (uae_s16)(v & 0xFFFE); reset_blit (0); } -static void BLTCON0 (uae_u16 v) { maybe_blit (current_hpos(), 2); bltcon0 = v; reset_blit (1); } +static void BLTCON0 (int hpos, uae_u16 v) { maybe_blit (hpos, 2); bltcon0 = v; reset_blit (1); } /* The next category is "Most useless hardware register". * And the winner is... */ -static void BLTCON0L (uae_u16 v) +static void BLTCON0L (int hpos, uae_u16 v) { if (! (currprefs.chipset_mask & CSMASK_ECS_AGNUS)) return; - maybe_blit (current_hpos (), 2); bltcon0 = (bltcon0 & 0xFF00) | (v & 0xFF); + maybe_blit (hpos, 2); bltcon0 = (bltcon0 & 0xFF00) | (v & 0xFF); reset_blit (1); } -static void BLTCON1 (uae_u16 v) { maybe_blit (current_hpos (), 2); bltcon1 = v; reset_blit (2); } +static void BLTCON1 (int hpos, uae_u16 v) { maybe_blit (hpos, 2); bltcon1 = v; reset_blit (2); } -static void BLTAFWM (uae_u16 v) { maybe_blit (current_hpos (), 2); blt_info.bltafwm = v; reset_blit (0); } -static void BLTALWM (uae_u16 v) { maybe_blit (current_hpos (), 2); blt_info.bltalwm = v; reset_blit (0); } +static void BLTAFWM (int hpos, uae_u16 v) { maybe_blit (hpos, 2); blt_info.bltafwm = v; reset_blit (0); } +static void BLTALWM (int hpos, uae_u16 v) { maybe_blit (hpos, 2); blt_info.bltalwm = v; reset_blit (0); } -static void BLTAPTH (uae_u16 v) { maybe_blit (current_hpos (), 0); bltapt = (bltapt & 0xffff) | ((uae_u32)v << 16); } -static void BLTAPTL (uae_u16 v) { maybe_blit (current_hpos (), 0); bltapt = (bltapt & ~0xffff) | (v & 0xFFFE); } -static void BLTBPTH (uae_u16 v) { maybe_blit (current_hpos (), 0); bltbpt = (bltbpt & 0xffff) | ((uae_u32)v << 16); } -static void BLTBPTL (uae_u16 v) { maybe_blit (current_hpos (), 0); bltbpt = (bltbpt & ~0xffff) | (v & 0xFFFE); } -static void BLTCPTH (uae_u16 v) { maybe_blit (current_hpos (), 0); bltcpt = (bltcpt & 0xffff) | ((uae_u32)v << 16); } -static void BLTCPTL (uae_u16 v) { maybe_blit (current_hpos (), 0); bltcpt = (bltcpt & ~0xffff) | (v & 0xFFFE); } -static void BLTDPTH (uae_u16 v) { maybe_blit (current_hpos (), 0); bltdpt = (bltdpt & 0xffff) | ((uae_u32)v << 16); } -static void BLTDPTL (uae_u16 v) { maybe_blit (current_hpos (), 0); bltdpt = (bltdpt & ~0xffff) | (v & 0xFFFE); } +static void BLTAPTH (int hpos, uae_u16 v) { maybe_blit (hpos, 0); bltapt = (bltapt & 0xffff) | ((uae_u32)v << 16); } +static void BLTAPTL (int hpos, uae_u16 v) { maybe_blit (hpos, 0); bltapt = (bltapt & ~0xffff) | (v & 0xFFFE); } +static void BLTBPTH (int hpos, uae_u16 v) { maybe_blit (hpos, 0); bltbpt = (bltbpt & 0xffff) | ((uae_u32)v << 16); } +static void BLTBPTL (int hpos, uae_u16 v) { maybe_blit (hpos, 0); bltbpt = (bltbpt & ~0xffff) | (v & 0xFFFE); } +static void BLTCPTH (int hpos, uae_u16 v) { maybe_blit (hpos, 0); bltcpt = (bltcpt & 0xffff) | ((uae_u32)v << 16); } +static void BLTCPTL (int hpos, uae_u16 v) { maybe_blit (hpos, 0); bltcpt = (bltcpt & ~0xffff) | (v & 0xFFFE); } +static void BLTDPTH (int hpos, uae_u16 v) { maybe_blit (hpos, 0); bltdpt = (bltdpt & 0xffff) | ((uae_u32)v << 16); } +static void BLTDPTL (int hpos, uae_u16 v) { maybe_blit (hpos, 0); bltdpt = (bltdpt & ~0xffff) | (v & 0xFFFE); } -static void BLTSIZE (uae_u16 v) +// copper writing to BLTSIZE = 5 cycle delay before copper starts (6th cycle is first copper cycle) +// CPU writing to BLTSIZE = 3 cycle delay +static void BLTSIZE (int hpos, uae_u16 v) { - maybe_blit (current_hpos (), 0); + maybe_blit (hpos, 0); blt_info.vblitsize = v >> 6; blt_info.hblitsize = v & 0x3F; @@ -3475,28 +3493,28 @@ static void BLTSIZE (uae_u16 v) blt_info.vblitsize = 1024; if (!blt_info.hblitsize) blt_info.hblitsize = 64; - do_blitter (current_hpos ()); + do_blitter (hpos); } -static void BLTSIZV (uae_u16 v) +static void BLTSIZV (int hpos, uae_u16 v) { if (! (currprefs.chipset_mask & CSMASK_ECS_AGNUS)) return; - maybe_blit (current_hpos (), 0); + maybe_blit (hpos, 0); blt_info.vblitsize = v & 0x7FFF; } -static void BLTSIZH (uae_u16 v) +static void BLTSIZH (int hpos, uae_u16 v) { if (! (currprefs.chipset_mask & CSMASK_ECS_AGNUS)) return; - maybe_blit (current_hpos (), 0); + maybe_blit (hpos, 0); blt_info.hblitsize = v & 0x7FF; if (!blt_info.vblitsize) blt_info.vblitsize = 32768; if (!blt_info.hblitsize) blt_info.hblitsize = 0x800; - do_blitter (current_hpos ()); + do_blitter (hpos); } STATIC_INLINE void spr_arm (int num, int state) @@ -3804,45 +3822,12 @@ STATIC_INLINE int copper_cant_read (int hpos) return is_bitplane_dma_inline (hpos); } -STATIC_INLINE int dangerous_reg (int reg) -{ - /* Safe: - * Bitplane pointers, control registers, modulos and data. - * Sprite pointers, control registers, and data. - * Color registers. */ - if (reg >= 0xE0 && reg < 0x1C0) - return 0; - return 1; -} - static int custom_wput_copper (int hpos, uaecptr addr, uae_u32 value, int noget) { debug_wputpeek (0xdff000 + (cop_state.saved_i1 & 0x1fe), cop_state.saved_i2); return custom_wput_1 (hpos, addr, value, noget); } -static void perform_copper_write (int hpos, int address, int data) -{ - if (test_copper_dangerous (address)) - return; - if (address == 0x88) { - cop_state.ip = cop1lc; - cop_state.state = COP_strobe_delay1; - } else if (address == 0x8A) { - cop_state.ip = cop2lc; - cop_state.state = COP_strobe_delay1; - } else { - custom_wput_copper (hpos, address, data, 0); - cop_state.last_write = address; - cop_state.last_write_hpos = hpos; - hpos++; - if (!nocustom () && address >= 0x140 && address < 0x180 && hpos >= SPR0_HPOS && hpos < SPR0_HPOS + 4 * MAX_SPRITES) { - //write_log (L"%d:%d %04X:%04X\n", vpos, hpos, cop_state.saved_i1, cop_state.saved_i2); - do_sprites (hpos); - } - } -} - static void dump_copper (TCHAR *error, int until_hpos) { write_log (L"%s: vpos=%d until_hpos=%d\n", @@ -3853,16 +3838,18 @@ static void dump_copper (TCHAR *error, int until_hpos) cop_state.state, cop_state.ip, regs.spcflags); } +// "emulate" chip internal delays, not the right place but fast and 99.9% programs +// use only copper to write BPLCON1 etc.. (exception is HulkaMania/TSP..) static int customdelay[]= { 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0, /* 32 0x00 - 0x3e */ - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 27 0x40 - 0x74 */ - - 0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0, /* 21 */ + 0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0, /* 0x40 - 0x5e */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x60 - 0x7e */ + 0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0, /* 0x80 - 0x9e */ 1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0, /* 32 0xa0 - 0xde */ /* BPLxPTH/BPLxPTL */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 16 */ /* BPLCON0-3,BPLMOD1-2 */ - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 16 */ + 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 16 */ /* SPRxPTH/SPRxPTL */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 16 */ /* SPRxPOS/SPRxCTL/SPRxDATA/SPRxDATB */ @@ -3889,11 +3876,12 @@ static void update_copper (int until_hpos) return; } - until_hpos &= ~1; + if (until_hpos <= last_copper_hpos) + return; + if (until_hpos > (maxhpos & ~1)) until_hpos = maxhpos & ~1; - until_hpos += 2; for (;;) { int old_hpos = c_hpos; int hp; @@ -3901,6 +3889,7 @@ static void update_copper (int until_hpos) if (c_hpos >= until_hpos) break; + /* So we know about the fetch state. */ decide_line (c_hpos); decide_fetch (c_hpos); @@ -3912,9 +3901,7 @@ static void update_copper (int until_hpos) BPLCON0_Denise (old_hpos, cop_state.movedata); } if (cop_state.movedelay == 0) { - perform_copper_write (old_hpos, cop_state.moveaddr, cop_state.movedata); - if (! copper_enabled_thisline) - goto out; + custom_wput_copper (old_hpos, cop_state.moveaddr, cop_state.movedata, 0); } } @@ -3928,7 +3915,6 @@ static void update_copper (int until_hpos) switch (cop_state.state) { - case COP_wait_in2: if (copper_cant_read (old_hpos)) continue; @@ -3940,14 +3926,26 @@ static void update_copper (int until_hpos) cop_state.state = COP_skip1; break; case COP_strobe_delay1: + // first cycle after COPJMP is just like normal first read cycle + if (copper_cant_read (old_hpos)) + continue; cop_state.state = COP_strobe_delay2; - alloc_cycle_maybe (old_hpos, CYCLE_COPPER); + alloc_cycle (old_hpos, CYCLE_COPPER); +#ifdef DEBUGGER + if (debug_dma) + record_dma (0x8c, chipmem_agnus_wget (cop_state.ip), cop_state.ip, old_hpos, vpos); +#endif break; case COP_strobe_delay2: + // second cycle after COPJMP is like second read cycle except + // there is 0x1FE in logic analyzer as a target register + // (next word is still read normally and tossed away) if (copper_cant_read (old_hpos)) continue; - alloc_cycle (old_hpos, CYCLE_COPPER); cop_state.state = COP_read1; + alloc_cycle (old_hpos, CYCLE_COPPER); + if (debug_dma) + record_dma (0x1fe, chipmem_agnus_wget (cop_state.ip + 2), cop_state.ip + 2, old_hpos, vpos); break; case COP_read1: @@ -3955,6 +3953,10 @@ static void update_copper (int until_hpos) continue; cop_state.i1 = chipmem_agnus_wget (cop_state.ip); alloc_cycle (old_hpos, CYCLE_COPPER); +#ifdef DEBUGGER + if (debug_dma) + record_dma (0x8c, cop_state.i1, cop_state.ip, old_hpos, vpos); +#endif cop_state.ip += 2; cop_state.state = COP_read2; break; @@ -3965,30 +3967,40 @@ static void update_copper (int until_hpos) cop_state.i2 = chipmem_agnus_wget (cop_state.ip); alloc_cycle (old_hpos, CYCLE_COPPER); cop_state.ip += 2; - cop_state.saved_i1 = cop_state.i1; cop_state.saved_i2 = cop_state.i2; cop_state.saved_ip = cop_state.ip; - if (cop_state.i1 & 1) { + if (cop_state.i1 & 1) { // WAIT or SKIP cop_state.ignore_next = 0; if (cop_state.i2 & 1) cop_state.state = COP_skip_in2; else cop_state.state = COP_wait_in2; - } else { +#ifdef DEBUGGER + if (debug_dma) + record_dma (0x8c, cop_state.i2, cop_state.ip - 2, old_hpos, vpos); +#endif + } else { // MOVE unsigned int reg = cop_state.i1 & 0x1FE; cop_state.state = COP_read1; cop_state.movedata = cop_state.i2; +#ifdef DEBUGGER + if (debug_dma) + record_dma (reg, cop_state.i2, cop_state.ip - 2, old_hpos, vpos); +#endif + test_copper_dangerous (reg); + if (! copper_enabled_thisline) + goto out; // was "dangerous" register -> copper stopped if (cop_state.ignore_next) { - test_copper_dangerous (reg); - if (! copper_enabled_thisline) - goto out; reg = 0x1fe; cop_state.ignore_next = 0; } + cop_state.moveaddr = reg; cop_state.movedelay = 0; + cop_state.last_write = reg; + cop_state.last_write_hpos = old_hpos; if (reg == 0x88) { cop_state.ip = cop1lc; cop_state.state = COP_strobe_delay1; @@ -3997,12 +4009,17 @@ static void update_copper (int until_hpos) cop_state.state = COP_strobe_delay1; } else { if (cop_state.moveaddr == 0x100) { - // special case BPLCON0 BPL DMA sequency delay + // special case BPLCON0 BPL DMA sequence delay cop_state.movedelay = 2; } else if (customdelay[cop_state.moveaddr / 2]) { cop_state.movedelay = customdelay[cop_state.moveaddr / 2]; } else { - perform_copper_write (old_hpos, cop_state.moveaddr, cop_state.movedata); + int hpos2 = old_hpos; + custom_wput_copper (hpos2, reg, cop_state.movedata, 0); + hpos2++; + if (!nocustom () && reg >= 0x140 && reg < 0x180 && hpos2 >= SPR0_HPOS && hpos2 < SPR0_HPOS + 4 * MAX_SPRITES) { + do_sprites (hpos2); + } } } #ifdef DEBUGGER @@ -4060,7 +4077,7 @@ static void update_copper (int until_hpos) /* Now we know that the comparisons were successful. We might still have to wait for the blitter though. */ - if ((cop_state.saved_i2 & 0x8000) == 0 && (DMACONR() & 0x4000)) { + if ((cop_state.saved_i2 & 0x8000) == 0 && (DMACONR (old_hpos) & 0x4000)) { /* We need to wait for the blitter. */ cop_state.state = COP_bltwait; copper_enabled_thisline = 0; @@ -4090,7 +4107,7 @@ static void update_copper (int until_hpos) vp1 = vpos & (((cop_state.saved_i2 >> 8) & 0x7F) | 0x80); hp1 = c_hpos & (cop_state.saved_i2 & 0xFE); - if ((vp1 > vcmp || (vp1 == vcmp && hp1 >= hcmp)) && ((cop_state.saved_i2 & 0x8000) != 0 || ! (DMACONR() & 0x4000))) + if ((vp1 > vcmp || (vp1 == vcmp && hp1 >= hcmp)) && ((cop_state.saved_i2 & 0x8000) != 0 || ! (DMACONR (old_hpos) & 0x4000))) cop_state.ignore_next = 1; cop_state.state = COP_read1; @@ -4109,10 +4126,13 @@ static void update_copper (int until_hpos) out: cop_state.hpos = c_hpos; + last_copper_hpos = until_hpos; } -static void compute_spcflag_copper (void) +static void compute_spcflag_copper (int hpos) { + int wasenabled = copper_enabled_thisline; + copper_enabled_thisline = 0; unset_special (®s, SPCFLAG_COPPER); if (!dmaen (DMA_COPPER) || cop_state.state == COP_stop || cop_state.state == COP_bltwait || nocustom ()) @@ -4124,19 +4144,29 @@ static void compute_spcflag_copper (void) if (vp < cop_state.vcmp) return; } + // do not use past cycles if starting for the first time in this line + // (write to DMACON for example) + if (!wasenabled && cop_state.hpos < hpos && hpos < maxhpos) { + hpos = (hpos + 2) & ~1; + if (hpos > (maxhpos & ~1)) + hpos = maxhpos & ~1; + cop_state.hpos = hpos; + } copper_enabled_thisline = 1; set_special (®s, SPCFLAG_COPPER); } -void blitter_done_notify (void) +void blitter_done_notify (int hpos) { if (cop_state.state != COP_bltwait) return; - - cop_state.hpos = current_hpos () & ~1; + cop_state.hpos = (hpos + 2) & ~1; cop_state.vpos = vpos; cop_state.state = COP_read1; - compute_spcflag_copper (); + if (dmaen (DMA_COPPER)) { + copper_enabled_thisline = 1; + set_special (®s, SPCFLAG_COPPER); + } } void do_copper (void) @@ -4191,6 +4221,19 @@ STATIC_INLINE uae_u16 sprite_fetch (struct sprite *s, int dma, int hpos, int cyc if (dma) { data = last_custom_value = chipmem_agnus_wget (s->pt); alloc_cycle (hpos, CYCLE_SPRITE); +#ifdef DEBUGGER + if (debug_dma) + record_dma ((s - &spr[0]) * 2 + 0x120, data, s->pt, hpos, vpos); +#endif + } + s->pt += 2; + return data; +} +STATIC_INLINE uae_u16 sprite_fetch2 (struct sprite *s, int dma, int hpos, int cycle, int mode) +{ + uae_u16 data = last_custom_value; + if (dma) { + data = last_custom_value = chipmem_agnus_wget (s->pt); } s->pt += 2; return data; @@ -4251,10 +4294,10 @@ STATIC_INLINE void do_sprites_1 (int num, int cycle, int hpos) switch (sprite_width) { case 64: - sprite_fetch (s, dma, hpos, cycle, 0); - sprite_fetch (s, dma, hpos, cycle, 0); + sprite_fetch2 (s, dma, hpos, cycle, 0); + sprite_fetch2 (s, dma, hpos, cycle, 0); case 32: - sprite_fetch (s, dma, hpos, cycle, 0); + sprite_fetch2 (s, dma, hpos, cycle, 0); break; } } else { @@ -4296,9 +4339,9 @@ STATIC_INLINE void do_sprites_1 (int num, int cycle, int hpos) { case 64: { - uae_u16 data32 = sprite_fetch (s, dma, hpos, cycle, 1); - uae_u16 data641 = sprite_fetch (s, dma, hpos, cycle, 1); - uae_u16 data642 = sprite_fetch (s, dma, hpos, cycle, 1); + uae_u16 data32 = sprite_fetch2 (s, dma, hpos, cycle, 1); + uae_u16 data641 = sprite_fetch2 (s, dma, hpos, cycle, 1); + uae_u16 data642 = sprite_fetch2 (s, dma, hpos, cycle, 1); if (dma) { if (cycle == 0) { sprdata[num][3] = data642; @@ -4314,7 +4357,7 @@ STATIC_INLINE void do_sprites_1 (int num, int cycle, int hpos) break; case 32: { - uae_u16 data32 = sprite_fetch (s, dma, hpos, cycle, 1); + uae_u16 data32 = sprite_fetch2 (s, dma, hpos, cycle, 1); if (dma) { if (cycle == 0) sprdata[num][1] = data32; @@ -4640,6 +4683,8 @@ static void vsync_handler (void) if (debug_copper) record_copper_reset (); + if (debug_dma) + record_dma_reset (); vsync_handle_redraw (lof, lof_changed); @@ -4650,7 +4695,7 @@ static void vsync_handler (void) lof_changed = 0; - COPJMP (1); + COPJMP (1, 1); if (timehack_alive > 0) timehack_alive--; @@ -4754,6 +4799,7 @@ static void hsync_scandoubler (void) next_lineno++; scandoubled_line = 1; + debug_dma = 0; for (i = 0; i < 8; i++) { int diff; @@ -4775,7 +4821,7 @@ static void hsync_scandoubler (void) } reset_decisions (); - plfstate = plf_idle; + plf_state = plf_idle; // copy color changes dip1 = curr_drawinfo + next_lineno - 1; @@ -4817,6 +4863,7 @@ static void hsync_handler (void) if (!nocustom ()) { sync_copper_with_cpu (maxhpos, 0); + last_copper_hpos = 0; finish_decisions (hpos); if (thisline_decision.plfleft != -1) { if (currprefs.collision_level > 1) @@ -4844,19 +4891,8 @@ static void hsync_handler (void) #endif #ifdef CPUEMU_12 if (currprefs.cpu_cycle_exact || currprefs.blitter_cycle_exact) { - decide_blitter (hpos); + decide_blitter (-1); memset (cycle_line, 0, sizeof cycle_line); -#if 1 - alloc_cycle (1, CYCLE_REFRESH); /* strobe */ - alloc_cycle (3, CYCLE_REFRESH); - alloc_cycle (5, CYCLE_REFRESH); - alloc_cycle (7, CYCLE_REFRESH); -#else - alloc_cycle (maxhpos - 1, CYCLE_REFRESH); /* strobe */ - alloc_cycle (1, CYCLE_REFRESH); - alloc_cycle (3, CYCLE_REFRESH); - alloc_cycle (5, CYCLE_REFRESH); -#endif } #endif @@ -4886,9 +4922,6 @@ static void hsync_handler (void) else last_custom_value = uaerand (); - if (currprefs.produce_sound) - audio_hsync (1); - if (!nocustom()) { if (!currprefs.blitter_cycle_exact && bltstate != BLT_done && dmaen (DMA_BITPLANE) && diwstate == DIW_waiting_stop) { blitter_slowdown (thisline_decision.plfleft, thisline_decision.plfright - (16 << fetchmode), @@ -4900,6 +4933,10 @@ static void hsync_handler (void) hsync_scandoubler (); } + if (!(beamcon0 & 0x0800) && !(beamcon0 & 0x0020) && (currprefs.chipset_mask & CSMASK_ECS_AGNUS)) { + lol ^= 1; // NTSC and !LOLDIS = LOL toggles every line + } + /* In theory only an equality test is needed here - but if a program goes haywire with the VPOSW register, it can cause us to miss this, with vpos going into the thousands (and all the nasty consequences @@ -4932,7 +4969,26 @@ static void hsync_handler (void) CIA_vsync_prehandler (!(bplcon0 & 2) || ((bplcon0 & 2) && currprefs.genlock)); } +#ifdef CPUEMU_12 + if (currprefs.cpu_cycle_exact || currprefs.blitter_cycle_exact) { + int hp = maxhpos - 1, i; + for (i = 0; i < 4; i++) { + alloc_cycle (hp, i == 0 ? CYCLE_STROBE : CYCLE_REFRESH); /* strobe */ +#ifdef DEBUGGER + if (debug_dma) + record_dma (i == 0 ? (vpos + 1 == maxvpos + lof ? 0x3a : 0x3c) : 0x1fe, 0xffff, 0xffffffff, hp, vpos); +#endif + hp += 2; + if (hp >= maxhpos) + hp -= maxhpos; + } + } +#endif + + DISK_hsync (maxhpos); + if (currprefs.produce_sound) + audio_hsync (-1); #ifdef JIT if (compiled_code) { @@ -4999,7 +5055,7 @@ static void hsync_handler (void) /* See if there's a chance of a copper wait ending this line. */ cop_state.hpos = 0; cop_state.last_write = 0; - compute_spcflag_copper (); + compute_spcflag_copper (hpos); serial_hsynchandler (); #ifdef CUSTOM_SIMPLE do_sprites (0); @@ -5070,7 +5126,7 @@ static void hsync_handler (void) #endif } -static void MISC_handler(void) +static void MISC_handler (void) { int i, recheck; evt mintime; @@ -5297,7 +5353,7 @@ void customreset (int hardreset) audio_update_adkmasks (); INTENA_f (0); INTREQ_f (0); - COPJMP (1); + COPJMP (1, 1); v = bplcon0; BPLCON0 (0, 0); BPLCON0 (0, v); @@ -5352,7 +5408,7 @@ void customreset (int hardreset) void dumpcustom (void) { - console_out_f (L"DMACON: %x INTENA: %x INTREQ: %x VPOS: %x HPOS: %x\n", DMACONR(), + console_out_f (L"DMACON: %x INTENA: %x INTREQ: %x VPOS: %x HPOS: %x\n", DMACONR (current_hpos ()), (unsigned int)intena, (unsigned int)intreq, (unsigned int)vpos, (unsigned int)current_hpos()); console_out_f (L"COP1LC: %08lx, COP2LC: %08lx COPPTR: %08lx\n", (unsigned long)cop1lc, (unsigned long)cop2lc, cop_state.ip); console_out_f (L"DIWSTRT: %04x DIWSTOP: %04x DDFSTRT: %04x DDFSTOP: %04x\n", @@ -5489,17 +5545,17 @@ addrbank custom_bank = { static uae_u32 REGPARAM2 custom_wgeti (uaecptr addr) { if (currprefs.cpu_model >= 68020) - return dummy_wgeti(addr); - return custom_wget(addr); + return dummy_wgeti (addr); + return custom_wget (addr); } static uae_u32 REGPARAM2 custom_lgeti (uaecptr addr) { if (currprefs.cpu_model >= 68020) - return dummy_lgeti(addr); - return custom_lget(addr); + return dummy_lgeti (addr); + return custom_lget (addr); } -STATIC_INLINE uae_u32 REGPARAM2 custom_wget_1 (uaecptr addr, int noput) +STATIC_INLINE uae_u32 REGPARAM2 custom_wget_1 (int hpos, uaecptr addr, int noput) { uae_u16 v; #ifdef JIT @@ -5510,7 +5566,7 @@ STATIC_INLINE uae_u32 REGPARAM2 custom_wget_1 (uaecptr addr, int noput) write_log (L"%d:%d:wget: %04X=%04X pc=%p\n", current_hpos(), vpos, addr, addr & 0x1fe, m68k_getpc ()); #endif switch (addr & 0x1fe) { - case 0x002: v = DMACONR (); break; + case 0x002: v = DMACONR (hpos); break; case 0x004: v = VPOSR (); break; case 0x006: v = VHPOSR (); break; @@ -5523,7 +5579,7 @@ STATIC_INLINE uae_u32 REGPARAM2 custom_wget_1 (uaecptr addr, int noput) case 0x014: v = POT1DAT (); break; case 0x016: v = POTGOR (); break; case 0x018: v = SERDATR (); break; - case 0x01A: v = DSKBYTR (current_hpos ()); break; + case 0x01A: v = DSKBYTR (hpos); break; case 0x01C: v = INTENAR (); break; case 0x01E: v = INTREQR (); break; case 0x07C: v = DENISEID (); break; @@ -5552,7 +5608,6 @@ STATIC_INLINE uae_u32 REGPARAM2 custom_wget_1 (uaecptr addr, int noput) v = last_custom_value; if (!noput) { int r; - int hpos = current_hpos (); decide_line (hpos); decide_fetch (hpos); decide_blitter (hpos); @@ -5573,8 +5628,10 @@ STATIC_INLINE uae_u32 REGPARAM2 custom_wget_1 (uaecptr addr, int noput) STATIC_INLINE uae_u32 custom_wget2 (uaecptr addr) { uae_u32 v; - sync_copper_with_cpu (current_hpos (), 1); - v = custom_wget_1 (addr, 0); + int hpos = current_hpos (); + + sync_copper_with_cpu (hpos, 1); + v = custom_wget_1 (hpos, addr, 0); #ifdef ACTION_REPLAY #ifdef ACTION_REPLAY_COMMON addr &= 0x1ff; @@ -5638,31 +5695,32 @@ static int REGPARAM2 custom_wput_1 (int hpos, uaecptr addr, uae_u32 value, int n case 0x030: SERDAT (value); break; case 0x032: SERPER (value); break; case 0x034: POTGO (value); break; - case 0x040: BLTCON0 (value); break; - case 0x042: BLTCON1 (value); break; - case 0x044: BLTAFWM (value); break; - case 0x046: BLTALWM (value); break; + case 0x040: BLTCON0 (hpos, value); break; + case 0x042: BLTCON1 (hpos, value); break; + + case 0x044: BLTAFWM (hpos, value); break; + case 0x046: BLTALWM (hpos, value); break; - case 0x050: BLTAPTH (value); break; - case 0x052: BLTAPTL (value); break; - case 0x04C: BLTBPTH (value); break; - case 0x04E: BLTBPTL (value); break; - case 0x048: BLTCPTH (value); break; - case 0x04A: BLTCPTL (value); break; - case 0x054: BLTDPTH (value); break; - case 0x056: BLTDPTL (value); break; + case 0x050: BLTAPTH (hpos, value); break; + case 0x052: BLTAPTL (hpos, value); break; + case 0x04C: BLTBPTH (hpos, value); break; + case 0x04E: BLTBPTL (hpos, value); break; + case 0x048: BLTCPTH (hpos, value); break; + case 0x04A: BLTCPTL (hpos, value); break; + case 0x054: BLTDPTH (hpos, value); break; + case 0x056: BLTDPTL (hpos, value); break; - case 0x058: BLTSIZE (value); break; + case 0x058: BLTSIZE (hpos, value); break; - case 0x064: BLTAMOD (value); break; - case 0x062: BLTBMOD (value); break; - case 0x060: BLTCMOD (value); break; - case 0x066: BLTDMOD (value); break; + case 0x064: BLTAMOD (hpos, value); break; + case 0x062: BLTBMOD (hpos, value); break; + case 0x060: BLTCMOD (hpos, value); break; + case 0x066: BLTDMOD (hpos, value); break; - case 0x070: BLTCDAT (value); break; - case 0x072: BLTBDAT (value); break; - case 0x074: BLTADAT (value); break; + case 0x070: BLTCDAT (hpos, value); break; + case 0x072: BLTBDAT (hpos, value); break; + case 0x074: BLTADAT (hpos, value); break; case 0x07E: DSKSYNC (hpos, value); break; @@ -5671,8 +5729,8 @@ static int REGPARAM2 custom_wput_1 (int hpos, uaecptr addr, uae_u32 value, int n case 0x084: COP2LCH (value); break; case 0x086: COP2LCL (value); break; - case 0x088: COPJMP (1); break; - case 0x08A: COPJMP (2); break; + case 0x088: COPJMP (1, 0); break; + case 0x08A: COPJMP (2, 0); break; case 0x08E: DIWSTRT (hpos, value); break; case 0x090: DIWSTOP (hpos, value); break; @@ -5743,9 +5801,14 @@ static int REGPARAM2 custom_wput_1 (int hpos, uaecptr addr, uae_u32 value, int n case 0x10E: CLXCON2 (value); break; #endif - case 0x110: BPL1DAT (hpos, value); break; - case 0x118: BPL5DAT (hpos, value); break; - case 0x11A: BPL6DAT (hpos, value); break; + case 0x110: BPLxDAT (hpos, 0, value); break; + case 0x112: BPLxDAT (hpos, 1, value); break; + case 0x114: BPLxDAT (hpos, 2, value); break; + case 0x116: BPLxDAT (hpos, 3, value); break; + case 0x118: BPLxDAT (hpos, 4, value); break; + case 0x11A: BPLxDAT (hpos, 5, value); break; + case 0x11C: BPLxDAT (hpos, 6, value); break; + case 0x11E: BPLxDAT (hpos, 7, value); break; case 0x180: case 0x182: case 0x184: case 0x186: case 0x188: case 0x18A: case 0x18C: case 0x18E: case 0x190: case 0x192: case 0x194: case 0x196: @@ -5781,9 +5844,9 @@ static int REGPARAM2 custom_wput_1 (int hpos, uaecptr addr, uae_u32 value, int n break; case 0x36: JOYTEST (value); break; - case 0x5A: BLTCON0L (value); break; - case 0x5C: BLTSIZV (value); break; - case 0x5E: BLTSIZH (value); break; + case 0x5A: BLTCON0L (hpos, value); break; + case 0x5C: BLTSIZV (hpos, value); break; + case 0x5E: BLTSIZH (hpos, value); break; case 0x1E4: DIWHIGH (hpos, value); break; #ifdef AGA case 0x10C: BPLCON4 (hpos, value); break; @@ -5814,7 +5877,7 @@ static int REGPARAM2 custom_wput_1 (int hpos, uaecptr addr, uae_u32 value, int n /* writing to read-only register causes read access */ default: if (!noget) - custom_wget_1 (addr, 1); + custom_wget_1 (hpos, addr, 1); return 1; } return 0; @@ -5841,8 +5904,14 @@ static void REGPARAM2 custom_wput (uaecptr addr, uae_u32 value) static void REGPARAM2 custom_bput (uaecptr addr, uae_u32 value) { - uae_u16 rval = (value << 8) | (value & 0xFF); static int warned; + uae_u16 rval; + + if (addr & 1) { + rval = value & 0xff; + } else { + rval = (value << 8) | (value & 0xFF); + } #ifdef JIT special_mem |= S_WRITE; @@ -5899,8 +5968,8 @@ uae_u8 *restore_custom (uae_u8 *src) audio_reset (); changed_prefs.chipset_mask = currprefs.chipset_mask = RL; - update_mirrors(); - RW; /* 000 ? */ + update_mirrors (); + RW; /* 000 BLTDDAT */ RW; /* 002 DMACONR */ RW; /* 004 VPOSR */ RW; /* 006 VHPOSR */ @@ -5920,45 +5989,45 @@ uae_u8 *restore_custom (uae_u8 *src) dsklen = RW; /* 024 DSKLEN */ RW; /* 026 DSKDAT */ RW; /* 028 REFPTR */ - lof = RW ? 1 : 0; /* 02A VPOSW */ + i = RW; lof = (i & 0x8000) ? 1 : 0; lol = (i & 0x0080); /* 02A VPOSW */ RW; /* 02C VHPOSW */ - COPCON(RW); /* 02E COPCON */ + COPCON (RW); /* 02E COPCON */ RW; /* 030 SERDAT* */ RW; /* 032 SERPER* */ - POTGO(RW); /* 034 POTGO */ + POTGO (RW); /* 034 POTGO */ RW; /* 036 JOYTEST* */ RW; /* 038 STREQU */ RW; /* 03A STRVHBL */ RW; /* 03C STRHOR */ RW; /* 03E STRLONG */ - BLTCON0(RW); /* 040 BLTCON0 */ - BLTCON1(RW); /* 042 BLTCON1 */ - BLTAFWM(RW); /* 044 BLTAFWM */ - BLTALWM(RW); /* 046 BLTALWM */ - BLTCPTH(RL); /* 048-04B BLTCPT */ - BLTBPTH(RL); /* 04C-04F BLTBPT */ - BLTAPTH(RL); /* 050-053 BLTAPT */ - BLTDPTH(RL); /* 054-057 BLTDPT */ + BLTCON0 (0, RW); /* 040 BLTCON0 */ + BLTCON1 (0, RW); /* 042 BLTCON1 */ + BLTAFWM (0, RW); /* 044 BLTAFWM */ + BLTALWM (0, RW); /* 046 BLTALWM */ + BLTCPTH (0, RL); /* 048-04B BLTCPT */ + BLTBPTH (0, RL); /* 04C-04F BLTBPT */ + BLTAPTH (0, RL); /* 050-053 BLTAPT */ + BLTDPTH (0, RL); /* 054-057 BLTDPT */ RW; /* 058 BLTSIZE */ RW; /* 05A BLTCON0L */ blt_info.vblitsize = RW; /* 05C BLTSIZV */ blt_info.hblitsize = RW; /* 05E BLTSIZH */ - BLTCMOD(RW); /* 060 BLTCMOD */ - BLTBMOD(RW); /* 062 BLTBMOD */ - BLTAMOD(RW); /* 064 BLTAMOD */ - BLTDMOD(RW); /* 066 BLTDMOD */ + BLTCMOD (0, RW); /* 060 BLTCMOD */ + BLTBMOD (0, RW); /* 062 BLTBMOD */ + BLTAMOD (0, RW); /* 064 BLTAMOD */ + BLTDMOD (0, RW); /* 066 BLTDMOD */ RW; /* 068 ? */ RW; /* 06A ? */ RW; /* 06C ? */ RW; /* 06E ? */ - BLTCDAT(RW); /* 070 BLTCDAT */ - BLTBDAT(RW); /* 072 BLTBDAT */ - BLTADAT(RW); /* 074 BLTADAT */ + BLTCDAT (0, RW); /* 070 BLTCDAT */ + BLTBDAT (0, RW); /* 072 BLTBDAT */ + BLTADAT (0, RW); /* 074 BLTADAT */ RW; /* 076 ? */ RW; /* 078 ? */ RW; /* 07A ? */ RW; /* 07C LISAID */ - DSKSYNC(-1, RW); /* 07E DSKSYNC */ + DSKSYNC (-1, RW); /* 07E DSKSYNC */ cop1lc = RL; /* 080/082 COP1LC */ cop2lc = RL; /* 084/086 COP2LC */ RW; /* 088 ? */ @@ -5969,7 +6038,7 @@ uae_u8 *restore_custom (uae_u8 *src) ddfstrt = RW; /* 092 DDFSTRT */ ddfstop = RW; /* 094 DDFSTOP */ dmacon = RW & ~(0x2000|0x4000); /* 096 DMACON */ - CLXCON(RW); /* 098 CLXCON */ + CLXCON (RW); /* 098 CLXCON */ intena = RW; /* 09A INTENA */ intreq = intreqr = RW | 0x20; /* 09C INTREQ */ adkcon = RW; /* 09E ADKCON */ @@ -6049,30 +6118,30 @@ uae_u8 *save_custom (int *len, uae_u8 *dstptr, int full) if (dstptr) dstbak = dst = dstptr; else - dstbak = dst = (uae_u8*)malloc (8 + 256 * 2); + dstbak = dst = malloc (8 + 256 * 2); SL (currprefs.chipset_mask); - SW (0); /* 000 ? */ + SW (0); /* 000 BLTDDAT */ SW (dmacon); /* 002 DMACONR */ - SW (VPOSR()); /* 004 VPOSR */ - SW (VHPOSR()); /* 006 VHPOSR */ + SW (VPOSR ()); /* 004 VPOSR */ + SW (VHPOSR ()); /* 006 VHPOSR */ SW (0); /* 008 DSKDATR */ - SW (JOY0DAT()); /* 00A JOY0DAT */ - SW (JOY1DAT()); /* 00C JOY1DAT */ - SW (clxdat); /* 00E CLXDAT */ - SW (ADKCONR()); /* 010 ADKCONR */ - SW (POT0DAT()); /* 012 POT0DAT */ - SW (POT0DAT()); /* 014 POT1DAT */ + SW (JOY0DAT ()); /* 00A JOY0DAT */ + SW (JOY1DAT ()); /* 00C JOY1DAT */ + SW (clxdat | 0x8000); /* 00E CLXDAT */ + SW (ADKCONR ()); /* 010 ADKCONR */ + SW (POT0DAT ()); /* 012 POT0DAT */ + SW (POT0DAT ()); /* 014 POT1DAT */ SW (0) ; /* 016 POTINP * */ SW (0); /* 018 SERDATR * */ SW (dskbytr); /* 01A DSKBYTR */ - SW (INTENAR()); /* 01C INTENAR */ - SW (INTREQR()); /* 01E INTREQR */ + SW (INTENAR ()); /* 01C INTENAR */ + SW (INTREQR ()); /* 01E INTREQR */ SL (dskpt); /* 020-023 DSKPT */ SW (dsklen); /* 024 DSKLEN */ SW (0); /* 026 DSKDAT */ SW (0); /* 028 REFPTR */ - SW (lof ? 0x8001 : 0); /* 02A VPOSW */ + SW ((lof ? 0x8001 : 0) | (lol ? 0x0080 : 0));/* 02A VPOSW */ SW (0); /* 02C VHPOSW */ SW (copcon); /* 02E COPCON */ SW (serper); /* 030 SERDAT * */ @@ -6109,7 +6178,7 @@ uae_u8 *save_custom (int *len, uae_u8 *dstptr, int full) SW (0); /* 076 ? */ SW (0); /* 078 ? */ SW (0); /* 07A ? */ - SW (DENISEID()); /* 07C DENISEID/LISAID */ + SW (DENISEID ()); /* 07C DENISEID/LISAID */ SW (dsksync); /* 07E DSKSYNC */ SL (cop1lc); /* 080-083 COP1LC */ SL (cop2lc); /* 084-087 COP2LC */ @@ -6346,46 +6415,71 @@ STATIC_INLINE void decide_fetch_ce (int hpos) decide_fetch (hpos); } -STATIC_INLINE void dma_cycle (void) +#define BLIT_NASTY 4 + +// blitter not in nasty mode = CPU gets one cycle if it has been waiting +// at least 4 cycles (all DMA cycles count, not just blitter cycles, even +// blitter idle cycles do count!) + +STATIC_INLINE int dma_cycle (void) { - int hpos; - static int bnasty; + int hpos, hpos_old; + blitter_nasty = 1; for (;;) { int bpldma; - int blitpri = dmaen (DMA_BLITPRI); - do_cycles (1 * CYCLE_UNIT); - hpos = current_hpos (); + int blitpri = dmacon & DMA_BLITPRI; + hpos_old = current_hpos (); + hpos = hpos_old + 1; sync_copper (hpos); decide_line (hpos); decide_fetch_ce (hpos); - bpldma = is_bitplane_dma (hpos); + bpldma = is_bitplane_dma (hpos_old); if (bltstate != BLT_done) { - if (!blitpri && bnasty >= 3 && !cycle_line[hpos] && !bpldma) { - bnasty = 0; + if (!blitpri && blitter_nasty >= BLIT_NASTY && cycle_line[hpos_old] == 0 && !bpldma) break; - } decide_blitter (hpos); - if (dmaen (DMA_BLITTER)) - bnasty++; + // copper may have been waiting for the blitter + sync_copper (hpos); } - if (cycle_line[hpos] == 0 && !bpldma) + if (cycle_line[hpos_old] == 0 && !bpldma) break; + do_cycles (1 * CYCLE_UNIT); /* bus was allocated to dma channel, wait for next cycle.. */ } - bnasty = 0; - alloc_cycle (hpos, CYCLE_CPU); + alloc_cycle (hpos_old, CYCLE_CPU); + return hpos_old; +} + +STATIC_INLINE void checknasty (int hpos, int vpos) +{ + if (blitter_nasty >= BLIT_NASTY && !(dmacon & DMA_BLITPRI)) + record_dma_event (DMA_EVENT_BLITNASTY, hpos, vpos); } uae_u32 wait_cpu_cycle_read (uaecptr addr, int mode) { uae_u32 v = 0; - dma_cycle (); + int hpos; + struct dma_rec *dr; + + hpos = dma_cycle (); +#ifdef DEBUGGER + if (debug_dma) { + dr = record_dma (0x1000, v, addr, hpos, vpos); + checknasty (hpos, vpos); + } +#endif + do_cycles_ce (1 * CYCLE_UNIT); if (mode > 0) v = get_word (addr); else if (mode == 0) v = get_byte (addr); - do_cycles (1 * CYCLE_UNIT); +#ifdef DEBUGGER + if (debug_dma) + dr->dat = v; +#endif + do_cycles_ce (1 * CYCLE_UNIT); return v; } @@ -6405,26 +6499,42 @@ uae_u32 wait_cpu_cycle_read_cycles (uaecptr addr, int mode, int *cycles) void wait_cpu_cycle_write (uaecptr addr, int mode, uae_u32 v) { - dma_cycle (); + int hpos; + + hpos = dma_cycle (); +#ifdef DEBUGGER + if (debug_dma) { + record_dma (0x1001, v, addr, hpos, vpos); + checknasty (hpos, vpos); + } +#endif + do_cycles_ce (1 * CYCLE_UNIT); if (mode > 0) put_word (addr, v); else if (mode == 0) put_byte (addr, v); - do_cycles (1 * CYCLE_UNIT); + do_cycles_ce (1 * CYCLE_UNIT); } void do_cycles_ce (long cycles) { int hpos; while (cycles > 0) { - do_cycles (1 * CYCLE_UNIT); - cycles -= CYCLE_UNIT; - hpos = current_hpos (); + hpos = current_hpos () + 1; sync_copper (hpos); decide_line (hpos); decide_fetch_ce (hpos); - decide_blitter (hpos); + if (bltstate != BLT_done) + decide_blitter (hpos); + do_cycles (1 * CYCLE_UNIT); + cycles -= CYCLE_UNIT; } } +int is_cycle_ce (void) +{ + int hpos = current_hpos (); + return cycle_line[hpos]; +} + #endif diff --git a/debug.c b/debug.c index a4bca941..65e801d9 100644 --- a/debug.c +++ b/debug.c @@ -44,6 +44,7 @@ static uae_u16 sr_bpmask, sr_bpvalue; int debugging; int exception_debugging; int debug_copper = 0; +int debug_dma = 0; int debug_sprite_mask = 0xff; static uaecptr processptr; @@ -133,6 +134,7 @@ static TCHAR help[] = { L" dj [] Enable joystick/mouse input debugging\n" L" smc [<0-1>] Enable self-modifying code detector. 1 = enable break.\n" L" dm Dump current address space map\n" + L" v [] Show DMA data (accurate only in cycle-exact mode)\n" L" ? Hex/Bin/Dec converter\n" #ifdef _WIN32 L" x Close debugger.\n" @@ -684,37 +686,140 @@ static void disassemble_wait (FILE *file, unsigned long insn) vp, ve, hp, he, bfd); } -#define NR_COPPER_RECORDS 1000000 +#define NR_COPPER_RECORDS 100000 /* Record copper activity for the debugger. */ struct cop_rec { int hpos, vpos; - uae_u16 reg, dat; uaecptr addr; }; static struct cop_rec *cop_record[2]; static int nr_cop_records[2], curr_cop_set; +#define NR_DMA_REC_HPOS 256 +#define NR_DMA_REC_VPOS 1000 +static struct dma_rec *dma_record[2]; +static int dma_record_toggle; + +void record_dma_reset (void) +{ + int v, h; + struct dma_rec *dr, *dr2; + + if (!dma_record[0]) + return; + dma_record_toggle ^= 1; + dr = dma_record[dma_record_toggle]; + for (v = 0; v < NR_DMA_REC_VPOS; v++) { + for (h = 0; h < NR_DMA_REC_HPOS; h++) { + dr2 = &dr[v * NR_DMA_REC_HPOS + h]; + memset (dr2, 0, sizeof (struct dma_rec)); + dr2->reg = 0xffff; + dr2->addr = 0xffffffff; + } + } +} + void record_copper_reset (void) { -/* Start a new set of copper records. */ + /* Start a new set of copper records. */ curr_cop_set ^= 1; nr_cop_records[curr_cop_set] = 0; } -void record_copper_otherdma (uae_u16 bpl, uae_u16 dat, int hpos, int vpos) +void record_dma_event (int evt, int hpos, int vpos) { - int t = nr_cop_records[curr_cop_set]; - if (!cop_record[0]) + struct dma_rec *dr; + + if (!dma_record[0]) return; - if (t >= NR_COPPER_RECORDS) + if (hpos >= NR_DMA_REC_HPOS || vpos >= NR_DMA_REC_VPOS) + return ; + dr = &dma_record[dma_record_toggle][vpos * NR_DMA_REC_HPOS + hpos]; + dr->evt |= evt; +} + +struct dma_rec *record_dma (uae_u16 reg, uae_u16 dat, uae_u32 addr, int hpos, int vpos) +{ + struct dma_rec *dr; + + if (!dma_record[0]) { + dma_record[0] = xmalloc (NR_DMA_REC_HPOS * NR_DMA_REC_VPOS * sizeof (struct dma_rec)); + dma_record[1] = xmalloc (NR_DMA_REC_HPOS * NR_DMA_REC_VPOS * sizeof (struct dma_rec)); + dma_record_toggle = 0; + record_dma_reset (); + } + if (hpos >= NR_DMA_REC_HPOS || vpos >= NR_DMA_REC_VPOS) + return NULL; + dr = &dma_record[dma_record_toggle][vpos * NR_DMA_REC_HPOS + hpos]; + if (dr->reg != 0xffff) + write_log (L"DMA conflict: v=%d h=%d OREG=%04X NREG=%04X\n", vpos, hpos, dr->reg, reg); + dr->reg = reg; + dr->dat = dat; + dr->addr = addr; + return dr; +} + +static void decode_dma_record (int hpos, int vpos, int toggle) +{ + struct dma_rec *dr; + int h, i, maxh; + + if (!dma_record[0]) return; - cop_record[curr_cop_set][t].addr = 0xffffffff; - cop_record[curr_cop_set][t].hpos = hpos; - cop_record[curr_cop_set][t].vpos = vpos; - cop_record[curr_cop_set][t].reg = bpl; - cop_record[curr_cop_set][t].dat = dat; - nr_cop_records[curr_cop_set] = t + 1; + dr = &dma_record[dma_record_toggle ^ toggle][vpos * NR_DMA_REC_HPOS]; + console_out_f (L"Line: %02X %3d HPOS %02X %3d:\n", vpos, vpos, hpos, hpos); + h = hpos; + dr += hpos; + maxh = hpos + 80; + if (maxh > maxhpos) + maxh = maxhpos; + while (h < maxh) { + int col = 9; + int cols = 8; + TCHAR l1[81]; + TCHAR l2[81]; + TCHAR l3[81]; + TCHAR l4[81]; + for (i = 0; i < cols && h < maxh; i++, h++, dr++) { + int cl = i * col, cl2; + + _stprintf (l1 + cl, L"[%02X %3d]", h, h); + _tcscpy (l4 + cl, L" "); + if (dr->reg != 0xffff) { + if ((dr->reg & 0x1001) == 0x1000) + _tcscpy (l2 + cl, L" CPU-R "); + else if ((dr->reg & 0x1001) == 0x1001) + _tcscpy (l2 + cl, L" CPU-W "); + else + _stprintf (l2 + cl, L" %03X", dr->reg); + _stprintf (l3 + cl, L" %04X", dr->dat); + if (dr->addr != 0xffffffff) + _stprintf (l4 + cl, L"%08X", dr->addr & 0x00ffffff); + } else { + _tcscpy (l2 + cl, L" "); + _tcscpy (l3 + cl, L" "); + } + cl2 = cl; + if (dr->evt & DMA_EVENT_BLITNASTY) + l2[cl2++] = 'N'; + if (dr->evt & DMA_EVENT_BLITFINISHED) + l2[cl2++] = 'B'; + if (dr->evt & DMA_EVENT_BLITIRQ) + l2[cl2++] = 'b'; + if (i < cols - 1 && h < maxh - 1) { + l1[cl + col - 1] = 32; + l2[cl + col - 1] = 32; + l3[cl + col - 1] = 32; + l4[cl + col - 1] = 32; + } + } + console_out_f (L"%s\n", l1); + console_out_f (L"%s\n", l2); + console_out_f (L"%s\n", l3); + console_out_f (L"%s\n", l4); + console_out_f (L"\n"); + } } void record_copper (uaecptr addr, int hpos, int vpos) @@ -728,8 +833,6 @@ void record_copper (uaecptr addr, int hpos, int vpos) cop_record[curr_cop_set][t].addr = addr; cop_record[curr_cop_set][t].hpos = hpos; cop_record[curr_cop_set][t].vpos = vpos; - cop_record[curr_cop_set][t].reg = 0xffff; - cop_record[curr_cop_set][t].dat = 0xffff; nr_cop_records[curr_cop_set] = t + 1; } if (debug_copper & 2) { /* trace */ @@ -761,7 +864,6 @@ static void decode_copper_insn (FILE* file, unsigned long insn, unsigned long ad uae_u32 insn_type = insn & 0x00010001; TCHAR here = ' '; TCHAR record[] = L" "; - int cnt; if ((cr = find_copper_records (addr))) { _stprintf (record, L" [%03x %03x]", cr->vpos, cr->hpos); @@ -808,32 +910,6 @@ static void decode_copper_insn (FILE* file, unsigned long insn, unsigned long ad abort (); } - if (!cr) - return; - cr++; - cnt = 0; - while (cr->addr == 0xffffffff) { - int addr = cr->reg; - int i = 0; - while (custd[i].name) { - if (custd[i].adr == addr + 0xdff000) - break; - i++; - } - _stprintf (record, L" [%03x %03x]", cr->vpos, cr->hpos); - console_out_f (L" %04lx %04lx%s\t; ", addr, cr->dat, record); - if (custd[i].name) - console_out_f (L"%s := 0x%04lx\n", custd[i].name, cr->dat); - else - console_out_f (L"%04x := 0x%04lx\n", addr, cr->dat); - cr++; - if (cnt++ >= 10) { - console_out_f (L" ...\n"); - break; - } - - } - } static uaecptr decode_copperlist (FILE* file, uaecptr address, int nolines) @@ -2962,6 +3038,22 @@ static void debug_1 (void) dumpmem (maddr, &nxmem, lines); } break; + case 'v': + case 'V': + { + int v1 = vpos, v2 = 0; + if (more_params (&inptr)) + v1 = readint (&inptr); + if (more_params (&inptr)) + v2 = readint (&inptr); + if (debug_dma) { + decode_dma_record (v2, v1, cmd == 'v'); + } else { + debug_dma = 1; + console_out_f (L"DMA debugger enabled.\n"); + } + } + break; case 'o': { if (copper_debugger (&inptr)) { @@ -3167,7 +3259,7 @@ void debug (void) do_skip = 1; if (do_skip) { set_special (®s, SPCFLAG_BRK); - unset_special (®s, SPCFLAG_STOP); + m68k_resumestopped (®s); debugging = 1; } resume_sound (); diff --git a/drawing.c b/drawing.c index b32a0b3c..2261ded7 100644 --- a/drawing.c +++ b/drawing.c @@ -2683,6 +2683,7 @@ void finish_drawing_frame (void) unlockscr (); return; #endif + for (i = 0; i < max_ypos_thisframe; i++) { int i1 = i + min_ypos_for_screen; int line = i + thisframe_y_adjust_real; diff --git a/gencpu.c b/gencpu.c index d7d3be91..6e3f867b 100644 --- a/gencpu.c +++ b/gencpu.c @@ -802,6 +802,7 @@ static void genmovemel (uae_u16 opcode) printf ("\tuae_u16 mask = %s;\n", gen_nextiword (0)); printf ("\tunsigned int dmask = mask & 0xff, amask = (mask >> 8) & 0xff;\n"); genamode (table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src", 2, 1, 0); + fill_prefetch_next (); start_brace (); printf ("\twhile (dmask) { m68k_dreg (regs, movem_index1[dmask]) = %s; srca += %d; dmask = movem_next[dmask]; }\n", getcode, size); @@ -819,6 +820,7 @@ static void genmovemel_ce (uae_u16 opcode) printf ("\tunsigned int dmask = mask & 0xff, amask = (mask >> 8) & 0xff;\n"); printf ("\tuae_u32 v;\n"); genamode (table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src", 2, 1, GF_AA); + fill_prefetch_next (); if (table68k[opcode].dmode == Ad8r || table68k[opcode].dmode == PC8r) addcycles (2); start_brace (); @@ -833,7 +835,7 @@ static void genmovemel_ce (uae_u16 opcode) printf ("\twhile (amask) { m68k_areg (regs, movem_index1[amask]) = (uae_s32)(uae_s16)get_word_ce(srca); srca += %d; amask = movem_next[amask]; }\n", size); } - printf ("\tget_word_ce (srca);\n"); + printf ("\tget_word_ce (srca);\n"); // and final extra word fetch that goes nowhere.. if (table68k[opcode].dmode == Aipi) printf ("\tm68k_areg (regs, dstreg) = srca;\n"); } @@ -859,8 +861,10 @@ static void genmovemle (uae_u16 opcode) printf ("\tuae_u16 mask = %s;\n", gen_nextiword (0)); genamode (table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src", 2, 1, 0); - if (using_prefetch) + if (using_prefetch) { sync_m68k_pc (); + fill_prefetch_next (); + } start_brace (); if (table68k[opcode].dmode == Apdi) { @@ -892,6 +896,7 @@ static void genmovemle_ce (uae_u16 opcode) if (table68k[opcode].dmode == Ad8r || table68k[opcode].dmode == PC8r) addcycles (2); + fill_prefetch_next (); start_brace (); if (table68k[opcode].size == sz_long) { if (table68k[opcode].dmode == Apdi) { @@ -1201,17 +1206,36 @@ static void shift_ce (amodes dmode, int size) { if (using_ce && isreg (dmode)) { printf ("\t{\n"); - printf ("\t\tint cycles = %d * CYCLE_UNIT / 2;\n", size == sz_long ? 8 : 6); + printf ("\t\tint cycles = %d * CYCLE_UNIT / 2;\n", size == sz_long ? 4 : 2); printf ("\t\tcycles += 2 * CYCLE_UNIT / 2 * ccnt;\n"); addcycles3 ("\t\t"); printf ("\t}\n"); } } +// BCHG/BSET/BCLR Dx,Dx or #xx,Dx adds 2 cycles if bit number > 15 +static void bsetcycles (struct instr *curi) +{ + if (curi->size == sz_byte) { + printf ("\tsrc &= 7;\n"); + } else { + printf ("\tsrc &= 31;\n"); + if (isreg (curi->dmode)) { + addcycles (2); + if (curi->mnemo != i_BTST) + printf ("\tif (src > 15) do_cycles_ce (2 * CYCLE_UNIT / 2);\n"); + } + } +} + +static int islongimm (struct instr *curi) +{ + return (curi->size == sz_long && (isreg (curi->smode) || curi->smode == imm || curi->smode == immi)); +} + static void gen_opcode (unsigned long int opcode) { struct instr *curi = table68k + opcode; - int tmpc = 0; insn_n_cycles = using_prefetch ? 0 : 4; start_brace (); @@ -1247,8 +1271,13 @@ static void gen_opcode (unsigned long int opcode) genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0, 0); printf ("\tsrc %c= dst;\n", curi->mnemo == i_OR ? '|' : curi->mnemo == i_AND ? '&' : '^'); genflags (flag_logical, curi->size, "src", "", ""); - if (curi->size == sz_long && isreg (curi->dmode)) - addcycles (curi->mnemo == i_AND ? 2 : 4); + if (curi->size == sz_long) { + int c = (curi->mnemo == i_EOR ? 4 : 2); + if (curi->mnemo != i_EOR && islongimm (curi)) + c += 2; + if (c > 0) + addcycles (c); + } fill_prefetch_next (); genastore ("src", curi->dmode, "dstreg", curi->size, "dst"); break; @@ -1279,8 +1308,14 @@ static void gen_opcode (unsigned long int opcode) genamode (curi->smode, "srcreg", curi->size, "src", 1, 0, 0); genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0, 0); if (isreg (curi->dmode)) { - if (curi->dmode == Dreg && curi->size == sz_long) - addcycles ((curi->smode == imm || curi->smode == immi) ? 4 : 2); + int c = 0; + if (curi->size == sz_long) { + c += 2; + if (islongimm (curi)) + c += 2; + } + if (c > 0) + addcycles (c); } fill_prefetch_next (); start_brace (); @@ -1290,11 +1325,12 @@ static void gen_opcode (unsigned long int opcode) case i_SUBA: genamode (curi->smode, "srcreg", curi->size, "src", 1, 0, 0); genamode (curi->dmode, "dstreg", sz_long, "dst", 1, 0, 0); - if (isreg (curi->dmode) && curi->dmode == Areg) { - tmpc += curi->size == sz_long ? 2 : 4; - if (curi->size == sz_long) - tmpc += (isreg (curi->smode) || curi->smode == imm) ? 2 : 0; - addcycles (4); + if (isreg (curi->dmode)) { + int c = curi->size == sz_long ? 2 : 4; + if (islongimm (curi)) + c += 2; + if (c > 0) + addcycles (c); } fill_prefetch_next (); start_brace (); @@ -1304,8 +1340,12 @@ static void gen_opcode (unsigned long int opcode) case i_SUBX: genamode (curi->smode, "srcreg", curi->size, "src", 1, 0, GF_AA); genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0, GF_AA); - if ((isreg (curi->smode) && curi->size == sz_long) || !isreg (curi->smode)) - addcycles (2); + if (curi->size == sz_long) { + if (isreg (curi->smode)) + addcycles (4); + else + addcycles (2); + } fill_prefetch_next (); start_brace (); printf ("\tuae_u32 newv = dst - src - (GET_XFLG (®s->ccrflags) ? 1 : 0);\n"); @@ -1343,8 +1383,14 @@ static void gen_opcode (unsigned long int opcode) genamode (curi->smode, "srcreg", curi->size, "src", 1, 0, 0); genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0, 0); if (isreg (curi->dmode)) { - if (curi->dmode == Dreg && curi->size == sz_long) - addcycles ((curi->smode == imm || curi->smode == immi) ? 4 : 2); + int c = 0; + if (curi->size == sz_long) { + c += 2; + if (islongimm (curi)) + c += 2; + } + if (c > 0) + addcycles (c); } fill_prefetch_next (); start_brace (); @@ -1354,11 +1400,12 @@ static void gen_opcode (unsigned long int opcode) case i_ADDA: genamode (curi->smode, "srcreg", curi->size, "src", 1, 0, 0); genamode (curi->dmode, "dstreg", sz_long, "dst", 1, 0, 0); - if (isreg (curi->dmode) && curi->dmode == Areg) { - tmpc += curi->size == sz_long ? 2 : 4; - if (curi->size == sz_long) - tmpc += (isreg (curi->smode) || curi->smode == imm) ? 2 : 0; - addcycles (tmpc); + if (isreg (curi->dmode)) { + int c = curi->size == sz_long ? 2 : 4; + if (islongimm (curi)) + c += 2; + if (c > 0) + addcycles (c); } fill_prefetch_next (); start_brace (); @@ -1368,8 +1415,12 @@ static void gen_opcode (unsigned long int opcode) case i_ADDX: genamode (curi->smode, "srcreg", curi->size, "src", 1, 0, GF_AA); genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0, GF_AA); - if ((isreg (curi->smode) && curi->size == sz_long) || !isreg (curi->smode)) - addcycles (2); + if (curi->size == sz_long) { + if (isreg (curi->smode)) + addcycles (4); + else + addcycles (2); + } fill_prefetch_next (); start_brace (); printf ("\tuae_u32 newv = dst + src + (GET_XFLG (®s->ccrflags) ? 1 : 0);\n"); @@ -1408,7 +1459,7 @@ static void gen_opcode (unsigned long int opcode) case i_NEG: genamode (curi->smode, "srcreg", curi->size, "src", 1, 0, 0); if (isreg (curi->smode) && curi->size == sz_long) - addcycles (2); + addcycles (2); fill_prefetch_next (); start_brace (); genflags (flag_sub, curi->size, "dst", "src", "0"); @@ -1417,7 +1468,7 @@ static void gen_opcode (unsigned long int opcode) case i_NEGX: genamode (curi->smode, "srcreg", curi->size, "src", 1, 0, 0); if (isreg (curi->smode) && curi->size == sz_long) - addcycles (2); + addcycles (2); fill_prefetch_next (); start_brace (); printf ("\tuae_u32 newv = 0 - src - (GET_XFLG (®s->ccrflags) ? 1 : 0);\n"); @@ -1428,7 +1479,7 @@ static void gen_opcode (unsigned long int opcode) case i_NBCD: genamode (curi->smode, "srcreg", curi->size, "src", 1, 0, 0); if (isreg (curi->smode)) - addcycles (2); + addcycles (2); fill_prefetch_next (); start_brace (); printf ("\tuae_u16 newv_lo = - (src & 0xF) - (GET_XFLG (®s->ccrflags) ? 1 : 0);\n"); @@ -1478,55 +1529,29 @@ static void gen_opcode (unsigned long int opcode) case i_BTST: genamode (curi->smode, "srcreg", curi->size, "src", 1, 0, 0); genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0, 0); - if (isreg (curi->dmode)) - addcycles (2); fill_prefetch_next (); - if (curi->size == sz_byte) - printf ("\tsrc &= 7;\n"); - else - printf ("\tsrc &= 31;\n"); + bsetcycles (curi); printf ("\tSET_ZFLG (®s->ccrflags, 1 ^ ((dst >> src) & 1));\n"); break; case i_BCHG: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0, 0); - if (isreg (curi->dmode)) - addcycles (4); - fill_prefetch_next (); - if (curi->size == sz_byte) - printf ("\tsrc &= 7;\n"); - else - printf ("\tsrc &= 31;\n"); - printf ("\tdst ^= (1 << src);\n"); - printf ("\tSET_ZFLG (®s->ccrflags, ((uae_u32)dst & (1 << src)) >> src);\n"); - genastore ("dst", curi->dmode, "dstreg", curi->size, "dst"); - break; case i_BCLR: - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0, 0); - genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0, 0); - if (isreg (curi->dmode)) - addcycles (4); - fill_prefetch_next (); - if (curi->size == sz_byte) - printf ("\tsrc &= 7;\n"); - else - printf ("\tsrc &= 31;\n"); - printf ("\tSET_ZFLG (®s->ccrflags, 1 ^ ((dst >> src) & 1));\n"); - printf ("\tdst &= ~(1 << src);\n"); - genastore ("dst", curi->dmode, "dstreg", curi->size, "dst"); - break; case i_BSET: genamode (curi->smode, "srcreg", curi->size, "src", 1, 0, 0); genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0, 0); - if (isreg (curi->dmode)) - addcycles (4); + bsetcycles (curi); + if (curi->mnemo == i_BCLR) + addcycles (2); fill_prefetch_next (); - if (curi->size == sz_byte) - printf ("\tsrc &= 7;\n"); - else - printf ("\tsrc &= 31;\n"); - printf ("\tSET_ZFLG (®s->ccrflags, 1 ^ ((dst >> src) & 1));\n"); - printf ("\tdst |= (1 << src);\n"); + if (curi->mnemo == i_BCHG) { + printf ("\tdst ^= (1 << src);\n"); + printf ("\tSET_ZFLG (®s->ccrflags, ((uae_u32)dst & (1 << src)) >> src);\n"); + } else if (curi->mnemo == i_BCLR) { + printf ("\tSET_ZFLG (®s->ccrflags, 1 ^ ((dst >> src) & 1));\n"); + printf ("\tdst &= ~(1 << src);\n"); + } else if (curi->mnemo == i_BSET) { + printf ("\tSET_ZFLG (®s->ccrflags, 1 ^ ((dst >> src) & 1));\n"); + printf ("\tdst |= (1 << src);\n"); + } genastore ("dst", curi->dmode, "dstreg", curi->size, "dst"); break; case i_CMPM: @@ -1541,7 +1566,7 @@ static void gen_opcode (unsigned long int opcode) genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0, 0); if (isreg (curi->dmode)) { if (curi->dmode == Areg || (curi->dmode == Dreg && curi->size == sz_long)) - addcycles (2); + addcycles (2); } fill_prefetch_next (); start_brace (); @@ -1580,7 +1605,7 @@ static void gen_opcode (unsigned long int opcode) } fill_prefetch_next (); break; - case i_MVPMR: + case i_MVPMR: // MOVEP printf ("\tuaecptr memp = m68k_areg (regs, srcreg) + (uae_s32)(uae_s16)%s;\n", gen_nextiword (0)); genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0, 0); if (using_ce) { @@ -1604,10 +1629,9 @@ static void gen_opcode (unsigned long int opcode) case i_MOVE: case i_MOVEA: { - /* moves have special prefetch sequences: - * - MOVE ,-(An) = prefetch is before writes - * - MOVE ,(xxx).L, the most stupid ever. 2 prefetches after write - * if is not register or immediate + /* 2 MOVE instructions have special prefetch sequence: + * - MOVE ,-(An) = prefetch is before writes (Apdi) + * - MOVE memory,(xxx).L, the most stupid ever. 2 prefetches after write * - all others = prefetch is done after writes */ int prefetch_done = 0; @@ -1633,8 +1657,8 @@ static void gen_opcode (unsigned long int opcode) fill_prefetch_next (); } break; - case i_MVSR2: - genamode (curi->smode, "srcreg", sz_word, "src", 2, 0, 0); + case i_MVSR2: // MOVE FROM SR (like CLR, does dummy read first on 68000) + genamode (curi->smode, "srcreg", sz_word, "src", cpu_level == 0 ? 1 : 2, 0, 0); if (isreg (curi->smode)) addcycles (2); fill_prefetch_next (); @@ -1644,7 +1668,7 @@ static void gen_opcode (unsigned long int opcode) else genastore ("regs->sr", curi->smode, "srcreg", sz_word, "src"); break; - case i_MV2SR: + case i_MV2SR: // MOVE TO SR genamode (curi->smode, "srcreg", sz_word, "src", 1, 0, 0); if (curi->size == sz_byte) { addcycles (8); @@ -1692,14 +1716,12 @@ static void gen_opcode (unsigned long int opcode) genmovemel_ce (opcode); else genmovemel (opcode); - fill_prefetch_next (); break; case i_MVMLE: if (using_ce) genmovemle_ce (opcode); else genmovemle (opcode); - fill_prefetch_next (); break; case i_TRAP: genamode (curi->smode, "srcreg", curi->size, "src", 1, 0, 0); @@ -1728,13 +1750,17 @@ static void gen_opcode (unsigned long int opcode) fill_prefetch_next (); break; case i_STOP: - /* real stop do not prefetch anything, later... */ - genamode (curi->smode, "srcreg", curi->size, "src", 1, 0, 0); - printf ("\tregs->sr = src;\n"); + if (using_prefetch) { + printf ("\tregs->sr = regs->irc;\n"); + } else { + genamode (curi->smode, "srcreg", curi->size, "src", 1, 0, 0); + printf ("\tregs->sr = src;\n"); + } printf ("\tMakeFromSR (regs);\n"); - printf ("\tm68k_setstopped(regs, 1);\n"); + printf ("\tm68k_setstopped (regs);\n"); sync_m68k_pc (); - fill_prefetch_full (); + // STOP does not prefetch anything + did_prefetch = -1; break; case i_LPSTOP: /* 68060 */ printf ("\tuae_u16 sw = get_iword (regs, 2);\n"); @@ -1744,7 +1770,7 @@ static void gen_opcode (unsigned long int opcode) printf ("\tif (!(sr & 0x8000)) { Exception (8, regs, 0); goto %s; }\n", endlabelstr); printf ("\tregs->sr = sr;\n"); printf ("\tMakeFromSR (regs);\n"); - printf ("\tm68k_setstopped(regs, 1);\n"); + printf ("\tm68k_setstopped(regs);\n"); m68k_pc_offset += 4; sync_m68k_pc (); fill_prefetch_full (); @@ -1802,9 +1828,9 @@ static void gen_opcode (unsigned long int opcode) case i_LINK: genamode (Apdi, "7", sz_long, "old", 2, 0, GF_AA); genamode (curi->smode, "srcreg", sz_long, "src", 1, 0, GF_AA); + genamode (curi->dmode, "dstreg", curi->size, "offs", 1, 0, 0); genastore ("src", Apdi, "7", sz_long, "old"); genastore ("m68k_areg (regs, 7)", curi->smode, "srcreg", sz_long, "src"); - genamode (curi->dmode, "dstreg", curi->size, "offs", 1, 0, 0); printf ("\tm68k_areg (regs, 7) += offs;\n"); fill_prefetch_next (); break; @@ -1844,7 +1870,7 @@ static void gen_opcode (unsigned long int opcode) m68k_pc_offset = 0; fill_prefetch_full (); break; - case i_JSR: + case i_JSR: // TODO: check stack write order genamode (curi->smode, "srcreg", curi->size, "src", 0, 0, GF_AA|GF_NOREFILL); start_brace (); printf ("\tuaecptr oldpc = m68k_getpc (regs) + %d;\n", m68k_pc_offset); @@ -1969,17 +1995,16 @@ static void gen_opcode (unsigned long int opcode) genamode (curi->smode, "srcreg", curi->size, "src", 0, 0, GF_AA); genamode (Apdi, "7", sz_long, "dst", 2, 0, GF_AA); if (curi->smode == Ad8r || curi->smode == PC8r) - addcycles (4); + addcycles (4); if (!(curi->smode == absw || curi->smode == absl)) - fill_prefetch_next (); + fill_prefetch_next (); genastore ("srca", Apdi, "7", sz_long, "dst"); if ((curi->smode == absw || curi->smode == absl)) - fill_prefetch_next (); + fill_prefetch_next (); break; case i_DBcc: genamode (curi->smode, "srcreg", curi->size, "src", 1, 0, GF_AA | GF_NOREFILL); genamode (curi->dmode, "dstreg", curi->size, "offs", 1, 0, GF_AA | GF_NOREFILL); - printf ("\tuaecptr oldpc = m68k_getpc (regs);\n"); addcycles (2); printf ("\tif (!cctrue(®s->ccrflags, %d)) {\n", curi->cc); @@ -2019,7 +2044,7 @@ static void gen_opcode (unsigned long int opcode) if (using_ce) { printf ("\tint cycles = 0;\n"); if (isreg (curi->smode)) - printf ("\tif (val) cycles += 2 * CYCLE_UNIT / 2;\n"); + printf ("\tif (val) cycles += 2 * CYCLE_UNIT / 2;\n"); addcycles3 ("\t"); } genastore ("val", curi->smode, "srcreg", curi->size, "src"); @@ -2154,8 +2179,8 @@ static void gen_opcode (unsigned long int opcode) genamode (curi->smode, "srcreg", curi->size, "src", 1, 0, 0); genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0, 0); sync_m68k_pc (); - addcycles (6); fill_prefetch_next (); + addcycles (6); printf ("\tif ((uae_s32)dst < 0) {\n"); printf ("\t\tSET_NFLG (®s->ccrflags, 1);\n"); printf ("\t\tException (6, regs, oldpc);\n"); @@ -2316,8 +2341,7 @@ static void gen_opcode (unsigned long int opcode) printf ("\tcnt &= 63;\n"); printf ("\tCLEAR_CZNV (®s->ccrflags);\n"); printf ("\tif (cnt >= %d) {\n", bit_size (curi->size)); - printf ("\t\tSET_CFLG (®s->ccrflags, cnt == %d ? val & 1 : 0);\n", - bit_size (curi->size)); + printf ("\t\tSET_CFLG (®s->ccrflags, cnt == %d ? val & 1 : 0);\n", bit_size (curi->size)); duplicate_carry (1); printf ("\t\tval = 0;\n"); if (source_is_imm1_8 (curi)) @@ -2858,16 +2882,16 @@ static void gen_opcode (unsigned long int opcode) if (!isreg (curi->smode)) addcycles (2); fill_prefetch_next (); - if (1 || cpu_level >= 2 || curi->smode == Dreg) { - printf ("\tsrc |= 0x80;\n"); + printf ("\tsrc |= 0x80;\n"); + if (cpu_level >= 2 || curi->smode == Dreg || !using_ce) { if (next_cpu_level < 2) next_cpu_level = 2 - 1; genastore ("src", curi->smode, "srcreg", curi->size, "src"); } else { - /* not exactly like this either.. */ - printf ("\tif (src >= 0x200000 || (src >= 0xc00000 && src < 0xe00000)) {\n"); - printf ("\t src |= 0x80; \n"); + printf ("\tif (!is_cycle_ce ()) {\n"); genastore ("src", curi->smode, "srcreg", curi->size, "src"); + printf ("\t} else {\n"); + printf ("\t\tdo_cycles_ce (4 * CYCLE_UNIT / 2);\n"); printf ("\t}\n"); } break; @@ -2991,7 +3015,8 @@ static void gen_opcode (unsigned long int opcode) limit_braces = 0; finish_braces (); } - fill_prefetch_finish (); + if (did_prefetch >= 0) + fill_prefetch_finish (); sync_m68k_pc (); did_prefetch = 0; } diff --git a/include/blitter.h b/include/blitter.h index b8688578..8d642a26 100644 --- a/include/blitter.h +++ b/include/blitter.h @@ -22,6 +22,8 @@ extern enum blitter_states { extern struct bltinfo blt_info; +extern int blitter_nasty, blit_interrupt; + extern uae_u16 bltsize; extern uae_u16 bltcon0, bltcon1; extern uae_u32 bltapt, bltbpt, bltcpt, bltdpt; @@ -35,7 +37,8 @@ extern void blitter_handler (uae_u32); extern void build_blitfilltable (void); extern void do_blitter (int); extern void decide_blitter (int hpos); -extern void blitter_done_notify (void); +extern int blitter_need (int hpos); +extern void blitter_done_notify (int hpos); extern void blitter_slowdown (int, int, int, int); typedef void blitter_func(uaecptr, uaecptr, uaecptr, uaecptr, struct bltinfo *); diff --git a/include/custom.h b/include/custom.h index 1dd2a1cb..7367581f 100644 --- a/include/custom.h +++ b/include/custom.h @@ -119,13 +119,13 @@ extern frame_time_t syncbase; #define DMA_BLITPRI 0x0400 #define CYCLE_REFRESH 0x01 -#define CYCLE_MISC 0x02 -#define CYCLE_SPRITE 0x04 -#define CYCLE_BITPLANE 0x08 -#define CYCLE_COPPER 0x10 -#define CYCLE_BLITTER 0x20 -#define CYCLE_CPU 0x40 -#define CYCLE_NOCPU 0x80 +#define CYCLE_STROBE 0x02 +#define CYCLE_MISC 0x04 +#define CYCLE_SPRITE 0x08 +#define CYCLE_BITPLANE 0x10 +#define CYCLE_COPPER 0x20 +#define CYCLE_BLITTER 0x40 +#define CYCLE_CPU 0x80 extern unsigned long frametime, timeframes; extern int plfstrt, plfstop, plffirstline, plflastline; diff --git a/include/debug.h b/include/debug.h index 37ba89f5..e5cf79c0 100644 --- a/include/debug.h +++ b/include/debug.h @@ -15,21 +15,21 @@ extern int debugging; extern int exception_debugging; extern int debug_copper; +extern int debug_dma; extern int debug_sprite_mask; extern int debug_bpl_mask, debug_bpl_mask_one; extern int debugger_active; -extern void debug(void); -extern void debugger_change(int mode); -extern void activate_debugger(void); +extern void debug (void); +extern void debugger_change (int mode); +extern void activate_debugger (void); extern void deactivate_debugger (void); extern int notinrom (void); extern const TCHAR *debuginfo (int); extern void record_copper (uaecptr addr, int hpos, int vpos); -extern void record_copper_otherdma (uae_u16 bpl, uae_u16 dat, int hpos, int vpos); -extern void record_copper_reset(void); -extern int mmu_init(int,uaecptr,uaecptr); -extern void mmu_do_hit(void); +extern void record_copper_reset (void); +extern int mmu_init (int, uaecptr,uaecptr); +extern void mmu_do_hit (void); extern void dump_aga_custom (void); extern void memory_map_dump (void); extern void debug_help (void); @@ -71,6 +71,22 @@ void debug_lputpeek(uaecptr addr, uae_u32 v); enum debugtest_item { DEBUGTEST_BLITTER, DEBUGTEST_KEYBOARD, DEBUGTEST_FLOPPY, DEBUGTEST_MAX }; void debugtest (enum debugtest_item, const TCHAR *, ...); +struct dma_rec +{ + uae_u16 reg; + uae_u16 dat; + uae_u32 addr; + uae_u16 evt; +}; + +#define DMA_EVENT_BLITIRQ 1 +#define DMA_EVENT_BLITNASTY 2 +#define DMA_EVENT_BLITFINISHED 4 + +extern struct dma_rec *record_dma (uae_u16 reg, uae_u16 dat, uae_u32 addr, int hpos, int vpos); +extern void record_dma_reset (void); +extern void record_dma_event (int evt, int hpos, int vpos); + #else STATIC_INLINE void activate_debugger (void) { }; diff --git a/include/events.h b/include/events.h index fc62616d..f739d086 100644 --- a/include/events.h +++ b/include/events.h @@ -24,6 +24,7 @@ extern frame_time_t syncbase; extern void compute_vsynctime (void); extern void init_eventtab (void); extern void do_cycles_ce (long cycles); +extern int is_cycle_ce (void); extern unsigned long currcycle, nextevent, is_lastline; typedef void (*evfunc)(void); diff --git a/include/newcpu.h b/include/newcpu.h index e8268103..a9b9c9cf 100644 --- a/include/newcpu.h +++ b/include/newcpu.h @@ -267,14 +267,8 @@ STATIC_INLINE uae_u32 next_ilongi (struct regstruct *regs) return r; } -STATIC_INLINE void m68k_setstopped (struct regstruct *regs, int stop) -{ - regs->stopped = stop; - /* A traced STOP instruction drops through immediately without - actually stopping. */ - if (stop && (regs->spcflags & SPCFLAG_DOTRACE) == 0) - set_special (regs, SPCFLAG_STOP); -} +extern void m68k_setstopped (struct regstruct *regs); +extern void m68k_resumestopped (struct regstruct *regs); extern uae_u32 REGPARAM3 get_disp_ea_020 (struct regstruct *regs, uae_u32 base, uae_u32 dp) REGPARAM; extern uae_u32 REGPARAM3 get_disp_ea_020i (struct regstruct *regs, uae_u32 base, uae_u32 dp) REGPARAM; diff --git a/include/options.h b/include/options.h index 3ee00028..b5e7c087 100644 --- a/include/options.h +++ b/include/options.h @@ -349,7 +349,8 @@ struct uae_prefs { int win32_guikey; int win32_kbledmode; int win32_fscodepage; - TCHAR win32_commandpath[MAX_DPATH]; + TCHAR win32_commandpathstart[MAX_DPATH]; + TCHAR win32_commandpathend[MAX_DPATH]; int curses_reverse_video; @@ -418,7 +419,6 @@ extern void target_save_options (struct zfile*, struct uae_prefs *); extern void target_default_options (struct uae_prefs *, int type); extern void target_fixup_options (struct uae_prefs *); extern int target_cfgfile_load (struct uae_prefs *, TCHAR *filename, int type, int isdefault); -extern void target_quit (void); extern void cfgfile_save_options (struct zfile *f, struct uae_prefs *p, int type); extern int cfgfile_load (struct uae_prefs *p, const TCHAR *filename, int *type, int ignorelink, int userconfig); diff --git a/include/uae.h b/include/uae.h index 4f1a47ca..d1327991 100644 --- a/include/uae.h +++ b/include/uae.h @@ -23,6 +23,8 @@ extern void uae_restart (int, TCHAR*); extern void reset_all_systems (void); extern void target_reset (void); extern void target_addtorecent (const TCHAR*, int); +extern void target_run (void); +extern void target_quit (void); extern int quit_program; diff --git a/main.c b/main.c index ee52c582..6ee719ee 100644 --- a/main.c +++ b/main.c @@ -759,6 +759,7 @@ static int real_main2 (int argc, TCHAR **argv) fixup_prefs (&currprefs); changed_prefs = currprefs; + target_run (); /* force sound settings change */ currprefs.produce_sound = 0; diff --git a/newcpu.c b/newcpu.c index 7626123d..03cd3b2f 100644 --- a/newcpu.c +++ b/newcpu.c @@ -1691,7 +1691,7 @@ unsigned long REGPARAM2 op_illg (uae_u32 opcode, struct regstruct *regs) return 4; } else if (inrt) { /* User-mode STOP replacement */ - m68k_setstopped (regs, 1); + m68k_setstopped (regs); return 4; } } @@ -2039,6 +2039,8 @@ STATIC_INLINE int do_specialties (int cycles, struct regstruct *regs) } if ((regs->spcflags & (SPCFLAG_BRK | SPCFLAG_MODE_CHANGE))) { unset_special (regs, SPCFLAG_BRK | SPCFLAG_MODE_CHANGE); + // SPCFLAG_BRK breaks STOP condition, need to prefetch + m68k_resumestopped (regs); return 1; } @@ -3021,8 +3023,9 @@ uae_u8 *restore_cpu (uae_u8 *src) if (l & CPUMODE_HALT) { regs.stopped = 1; set_special (®s, SPCFLAG_STOP); - } else + } else { regs.stopped = 0; + } if (model >= 68010) { regs.dfc = restore_u32 (); regs.sfc = restore_u32 (); @@ -3218,6 +3221,49 @@ void cpureset (void) } +void m68k_setstopped (struct regstruct *regs) +{ + regs->stopped = 1; + /* A traced STOP instruction drops through immediately without + actually stopping. */ + if ((regs->spcflags & SPCFLAG_DOTRACE) == 0) + set_special (regs, SPCFLAG_STOP); + else + m68k_resumestopped (regs); +} + +void m68k_resumestopped (struct regstruct *regs) +{ + if (!regs->stopped) + return; + regs->stopped = 0; + fill_prefetch_slow (regs); + unset_special (regs, SPCFLAG_STOP); +} + +/* + * Compute exact number of CPU cycles taken + * by DIVU and DIVS on a 68000 processor. + * + * Copyright (c) 2005 by Jorge Cwik, pasti@fxatari.com + * + * This is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this software; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + + /* The routines below take dividend and divisor as parameters. @@ -3230,7 +3276,7 @@ void cpureset (void) Probably valid for 68008 after adding the extra prefetch cycle. - Best and worst cases are for register operand: + Best and worst cases for register operand: (Note the difference with the documented range.) @@ -3250,16 +3296,10 @@ void cpureset (void) Best case without signed overflow: 122 cycles. Best case with signed overflow: 120 cycles - + */ - -// -// DIVU -// Unsigned division -// - -STATIC_INLINE int getDivu68kCycles_2 (uae_u32 dividend, uae_u16 divisor) +int getDivu68kCycles (uae_u32 dividend, uae_u16 divisor) { int mcycles; uae_u32 hdivisor; @@ -3270,9 +3310,9 @@ STATIC_INLINE int getDivu68kCycles_2 (uae_u32 dividend, uae_u16 divisor) // Overflow if ((dividend >> 16) >= divisor) - return (mcycles = 5) * 2; + return (mcycles = 5 - 2) * 2; - mcycles = 38; + mcycles = 38 - 2; hdivisor = divisor << 16; for (i = 0; i < 15; i++) { @@ -3294,19 +3334,8 @@ STATIC_INLINE int getDivu68kCycles_2 (uae_u32 dividend, uae_u16 divisor) } return mcycles * 2; } -int getDivu68kCycles (uae_u32 dividend, uae_u16 divisor) -{ - int v = getDivu68kCycles_2 (dividend, divisor) - 4; -// write_log (L"U%d ", v); - return v; -} -// -// DIVS -// Signed division -// - -STATIC_INLINE int getDivs68kCycles_2 (uae_s32 dividend, uae_s16 divisor) +int getDivs68kCycles (uae_s32 dividend, uae_s16 divisor) { int mcycles; uae_u32 aquot; @@ -3315,7 +3344,7 @@ STATIC_INLINE int getDivs68kCycles_2 (uae_s32 dividend, uae_s16 divisor) if (divisor == 0) return 0; - mcycles = 6; + mcycles = 6 - 2; if (dividend < 0) mcycles++; @@ -3346,9 +3375,3 @@ STATIC_INLINE int getDivs68kCycles_2 (uae_s32 dividend, uae_s16 divisor) return mcycles * 2; } -int getDivs68kCycles (uae_s32 dividend, uae_s16 divisor) -{ - int v = getDivs68kCycles_2 (dividend, divisor) - 4; -// write_log (L"S%d ", v); - return v; -} diff --git a/od-win32/sounddep/sound.c b/od-win32/sounddep/sound.c index 74ff760d..107f8bce 100644 --- a/od-win32/sounddep/sound.c +++ b/od-win32/sounddep/sound.c @@ -797,7 +797,7 @@ static int open_audio_wasapi (struct sound_data *sd, int index, int exclusive) if (SUCCEEDED (hr) && hr != S_FALSE) break; write_log (L"WASAPI: IsFormatSupported(%d,%08X,%d) %08X\n", sd->channels, rn[rncnt], sd->freq, hr); - if (hr != AUDCLNT_E_UNSUPPORTED_FORMAT) + if (hr != AUDCLNT_E_UNSUPPORTED_FORMAT && hr != S_FALSE) goto error; rncnt++; if (rn[rncnt]) @@ -856,7 +856,7 @@ static int open_audio_wasapi (struct sound_data *sd, int index, int exclusive) hr = s->pAudioClient->lpVtbl->Initialize (s->pAudioClient, sharemode, AUDCLNT_STREAMFLAGS_EVENTCALLBACK, - s->hnsRequestedDuration, s->wasapiexclusive ? s->hnsRequestedDuration : 0, &wavfmt.Format, NULL); + s->hnsRequestedDuration, s->wasapiexclusive ? s->hnsRequestedDuration : 0, pwfx ? pwfx : &wavfmt.Format, NULL); if (hr == AUDCLNT_E_BUFFER_SIZE_NOT_ALIGNED) { hr = s->pAudioClient->lpVtbl->GetBufferSize (s->pAudioClient, &s->bufferFrameCount); if (FAILED (hr)) { @@ -2154,9 +2154,11 @@ int enumerate_sound_devices (void) if (alcIsExtensionPresent (NULL, "ALC_ENUMERATE_ALL_EXT")) pDeviceNames = alcGetString (NULL, ALC_ALL_DEVICES_SPECIFIER); OpenALEnumerate (sound_devices, pDeviceNames, ppDefaultDevice, FALSE); +#if 0 ppDefaultDevice = alcGetString (NULL, ALC_CAPTURE_DEFAULT_DEVICE_SPECIFIER); pDeviceNames = alcGetString (NULL, ALC_CAPTURE_DEVICE_SPECIFIER); OpenALEnumerate (record_devices, pDeviceNames, ppDefaultDevice, TRUE); +#endif } } } __except(ExceptionFilter (GetExceptionInformation (), GetExceptionCode ())) { diff --git a/od-win32/win32.c b/od-win32/win32.c index cc138e41..f9ea889f 100644 --- a/od-win32/win32.c +++ b/od-win32/win32.c @@ -2097,8 +2097,50 @@ static get_aspi (int old) return UAESCSI_SPTI; } +static void shellexecute (TCHAR *command) +{ + SHELLEXECUTEINFO sei = { 0 }; + TCHAR *f = command; + TCHAR *sf, *s, *p; + + sf = s = xcalloc (_tcslen (f) + 1 + 1, sizeof (TCHAR)); + if (!s) + return; + _tcscpy (s, f); + for (;;) { + p = _tcschr (s, ';'); + if (!p) + break; + *p = 0; + } + while (s[0]) { + sei.cbSize = sizeof sei; + sei.fMask = SEE_MASK_FLAG_NO_UI | SEE_MASK_NOCLOSEPROCESS; + sei.lpFile = s; + sei.nShow = SW_HIDE; + write_log (L"ShellExecuteEx('%s')\n", s); + if (ShellExecuteEx (&sei)) { + HANDLE h = sei.hProcess; + if (h) { + WaitForSingleObject (h, INFINITE); + CloseHandle (h); + } + write_log (L"Succeeded\n"); + } else { + write_log (L"Failed. ERR=%d\n", GetLastError ()); + } + s += _tcslen (s) + 1; + } + xfree (sf); +} + +void target_run (void) +{ + shellexecute (currprefs.win32_commandpathstart); +} void target_quit (void) { + shellexecute (currprefs.win32_commandpathend); } void target_fixup_options (struct uae_prefs *p) @@ -2143,7 +2185,8 @@ void target_default_options (struct uae_prefs *p, int type) p->win32_rtgscaleaspectratio = -1; p->win32_rtgvblankrate = 0; p->win32_fscodepage = 0; - p->win32_commandpath[0] = 0; + p->win32_commandpathstart[0] = 0; + p->win32_commandpathend[0] = 0; } if (type == 1 || type == 0) { p->win32_uaescsimode = get_aspi (p->win32_uaescsimode); @@ -2211,7 +2254,8 @@ void target_save_options (struct zfile *f, struct uae_prefs *p) cfgfile_target_dwrite (f, L"kbledmode", L"%d", p->win32_kbledmode); cfgfile_target_dwrite_bool (f, L"powersavedisabled", p->win32_powersavedisabled); cfgfile_target_dwrite (f, L"filesystem_codepage", L"%d", p->win32_fscodepage); - cfgfile_target_dwrite_str (f, L"exec", p->win32_commandpath); + cfgfile_target_dwrite_str (f, L"exec_before", p->win32_commandpathstart); + cfgfile_target_dwrite_str (f, L"exec_after", p->win32_commandpathend); } @@ -2260,7 +2304,8 @@ int target_parse_option (struct uae_prefs *p, TCHAR *option, TCHAR *value) || cfgfile_yesno (option, value, L"notaskbarbutton", &p->win32_notaskbarbutton) || cfgfile_yesno (option, value, L"always_on_top", &p->win32_alwaysontop) || cfgfile_yesno (option, value, L"powersavedisabled", &p->win32_powersavedisabled) - || cfgfile_string (option, value, L"exec", p->win32_commandpath, sizeof p->win32_commandpath / sizeof (TCHAR)) + || cfgfile_string (option, value, L"exec_before", p->win32_commandpathstart, sizeof p->win32_commandpathstart / sizeof (TCHAR)) + || cfgfile_string (option, value, L"exec_after", p->win32_commandpathend, sizeof p->win32_commandpathend / sizeof (TCHAR)) || cfgfile_intval (option, value, L"specialkey", &p->win32_specialkey, 1) || cfgfile_intval (option, value, L"guikey", &p->win32_guikey, 1) || cfgfile_intval (option, value, L"kbledmode", &p->win32_kbledmode, 1) @@ -3707,57 +3752,237 @@ static int parseargs (const TCHAR *arg, const TCHAR *np, const TCHAR *np2) return 0; } +/*** +*static void parse_cmdline(cmdstart, argv, args, numargs, numchars) +* +*Purpose: +* Parses the command line and sets up the argv[] array. +* On entry, cmdstart should point to the command line, +* argv should point to memory for the argv array, args +* points to memory to place the text of the arguments. +* If these are NULL, then no storing (only counting) +* is done. On exit, *numargs has the number of +* arguments (plus one for a final NULL argument), +* and *numchars has the number of bytes used in the buffer +* pointed to by args. +* +*Entry: +* _TSCHAR *cmdstart - pointer to command line of the form +* +* _TSCHAR **argv - where to build argv array; NULL means don't +* build array +* _TSCHAR *args - where to place argument text; NULL means don't +* store text +* +*Exit: +* no return value +* int *numargs - returns number of argv entries created +* int *numchars - number of characters used in args buffer +* +*Exceptions: +* +*******************************************************************************/ + +#define NULCHAR _T('\0') +#define SPACECHAR _T(' ') +#define TABCHAR _T('\t') +#define DQUOTECHAR _T('\"') +#define SLASHCHAR _T('\\') + + +static void __cdecl wparse_cmdline ( + _TSCHAR *cmdstart, + _TSCHAR **argv, + _TSCHAR *args, + int *numargs, + int *numchars + ) +{ + _TSCHAR *p; + _TUCHAR c; + int inquote; /* 1 = inside quotes */ + int copychar; /* 1 = copy char to *args */ + unsigned numslash; /* num of backslashes seen */ + + *numchars = 0; + *numargs = 1; /* the program name at least */ + + /* first scan the program name, copy it, and count the bytes */ + p = cmdstart; + if (argv) + *argv++ = args; + +#ifdef WILDCARD + /* To handle later wild card expansion, we prefix each entry by + it's first character before quote handling. This is done + so _[w]cwild() knows whether to expand an entry or not. */ + if (args) + *args++ = *p; + ++*numchars; + +#endif /* WILDCARD */ + + /* A quoted program name is handled here. The handling is much + simpler than for other arguments. Basically, whatever lies + between the leading double-quote and next one, or a terminal null + character is simply accepted. Fancier handling is not required + because the program name must be a legal NTFS/HPFS file name. + Note that the double-quote characters are not copied, nor do they + contribute to numchars. */ + inquote = FALSE; + do { + if (*p == DQUOTECHAR ) + { + inquote = !inquote; + c = (_TUCHAR) *p++; + continue; + } + ++*numchars; + if (args) + *args++ = *p; + + c = (_TUCHAR) *p++; +#ifdef _MBCS + if (_ismbblead(c)) { + ++*numchars; + if (args) + *args++ = *p; /* copy 2nd byte too */ + p++; /* skip over trail byte */ + } +#endif /* _MBCS */ + + } while ( (c != NULCHAR && (inquote || (c !=SPACECHAR && c != TABCHAR))) ); + + if ( c == NULCHAR ) { + p--; + } else { + if (args) + *(args-1) = NULCHAR; + } + + inquote = 0; + + /* loop on each argument */ + for(;;) { + + if ( *p ) { + while (*p == SPACECHAR || *p == TABCHAR) + ++p; + } + + if (*p == NULCHAR) + break; /* end of args */ + + /* scan an argument */ + if (argv) + *argv++ = args; /* store ptr to arg */ + ++*numargs; + +#ifdef WILDCARD + /* To handle later wild card expansion, we prefix each entry by + it's first character before quote handling. This is done + so _[w]cwild() knows whether to expand an entry or not. */ + if (args) + *args++ = *p; + ++*numchars; + +#endif /* WILDCARD */ + + /* loop through scanning one argument */ + for (;;) { + copychar = 1; + /* Rules: 2N backslashes + " ==> N backslashes and begin/end quote + 2N+1 backslashes + " ==> N backslashes + literal " + N backslashes ==> N backslashes */ + numslash = 0; + while (*p == SLASHCHAR) { + /* count number of backslashes for use below */ + ++p; + ++numslash; + } + if (*p == DQUOTECHAR) { + /* if 2N backslashes before, start/end quote, otherwise + copy literally */ + if (numslash % 2 == 0) { + if (inquote && p[1] == DQUOTECHAR) { + p++; /* Double quote inside quoted string */ + } else { /* skip first quote char and copy second */ + copychar = 0; /* don't copy quote */ + inquote = !inquote; + } + } + numslash /= 2; /* divide numslash by two */ + } + + /* copy slashes */ + while (numslash--) { + if (args) + *args++ = SLASHCHAR; + ++*numchars; + } + + /* if at end of arg, break loop */ + if (*p == NULCHAR || (!inquote && (*p == SPACECHAR || *p == TABCHAR))) + break; + + /* copy character into argument */ +#ifdef _MBCS + if (copychar) { + if (args) { + if (_ismbblead(*p)) { + *args++ = *p++; + ++*numchars; + } + *args++ = *p; + } else { + if (_ismbblead(*p)) { + ++p; + ++*numchars; + } + } + ++*numchars; + } + ++p; +#else /* _MBCS */ + if (copychar) { + if (args) + *args++ = *p; + ++*numchars; + } + ++p; +#endif /* _MBCS */ + } + + /* null-terminate the argument */ + + if (args) + *args++ = NULCHAR; /* terminate string */ + ++*numchars; + } + + /* We put one last argument in -- a null ptr */ + if (argv) + *argv++ = NULL; + ++*numargs; +} + + + + static TCHAR **parseargstring (TCHAR *s) { - int cnt, i; - TCHAR **args; + TCHAR **p; + int numa, numc; if (_tcslen (s) == 0) return NULL; - args = xcalloc (sizeof (TCHAR*), MAX_ARGUMENTS + 1); - cnt = 0; - for (;;) { - TCHAR *p = s; - TCHAR *d, prev; - int skip = 0; - while (*p && _istspace (*p)) - p++; - if (*p == 0) - break; - if (*p == '\'' || *p == '"') { - TCHAR sc = *p; - p++; - s++; - while (*p && *p != sc) - p++; - skip = 1; - } else { - while (*p && !_istspace (*p)) - p++; - } - args[cnt] = d = xcalloc (p - s + 1, sizeof (TCHAR)); - memcpy (d, s, (p - s) * sizeof (TCHAR)); - prev = 0; - for (i = 0; d[i]; i++) { - TCHAR c = d[i]; - if (c == '\"' || c == '\'') { - memmove (&d[i], &d[i + 1], (_tcslen (&d[i + 1]) + 1) * sizeof (TCHAR)); - i--; - continue; - } - prev = c; - } - cnt++; - p += skip; - while (*p && _istspace (*p)) - p++; - if (*p == 0) - break; - if (cnt >= MAX_ARGUMENTS) - break; - s = p; - } - return args; + wparse_cmdline (s, NULL, NULL, &numa, &numc); + numa++; + p = xcalloc (numa * sizeof (TCHAR*) + numc * sizeof (TCHAR), 1); + wparse_cmdline (s, (wchar_t **)p, (wchar_t *)(((char *)p) + numa * sizeof(wchar_t *)), &numa, &numc); + if (numa > MAX_ARGUMENTS) + p[MAX_ARGUMENTS] = NULL; + return p; } static TCHAR **parseargstrings (TCHAR *s, TCHAR **xargv) diff --git a/od-win32/win32.h b/od-win32/win32.h index ed086bb8..f6a54f1a 100644 --- a/od-win32/win32.h +++ b/od-win32/win32.h @@ -17,8 +17,8 @@ #define WINUAEPUBLICBETA 1 -#define WINUAEBETA L"Beta 1" -#define WINUAEDATE MAKEBD(2009, 7, 16) +#define WINUAEBETA L"Beta 2" +#define WINUAEDATE MAKEBD(2009, 7, 25) #define WINUAEEXTRA L"" #define WINUAEREV L"" diff --git a/od-win32/winuaechangelog.txt b/od-win32/winuaechangelog.txt index 924bde22..ecf687dc 100644 --- a/od-win32/winuaechangelog.txt +++ b/od-win32/winuaechangelog.txt @@ -1,6 +1,55 @@ +Beta 2: -Beta 1: +Background information: + +After many days of logic analyzer work, blitter should finally be +100% cycle-exact, even in line mode. + +DMA emulation cycle-exactness is greatly improved, most vector +routines that had slowdowns, flickering or graphics garbage should +work perfectly now (even Los Huivos/Virtual Dreams doubleglenz work, +this has shown only major graphics garbage previously) + +There are still some that won't still work or work even worse, +these could be caused by CPU emulation timing errors (even single +cycle difference in single instruction that is used regularly enough +can make the difference) + +"Standard test" programs still not working: Rampage/TEK, Hulkamania/TSP +(and left border garbage in one part of Absolute Inebriation/VD, reason +is known but I am not sure how to implement the fix) + +- "DMA cycle debugger", v [] lists selected scanline's DMA + activity, hpos, custom register, data, address. This made + compatibility testing much easier. (just compare this data to + logic analyzer data) Second row can contain extra characters: + N = blitter cycle given for CPU, b = blitter interrupt, B = + blitter finished. (not necessarily exactly same thing) + +- chipset bitplane/copper/blitter cycle exact mode DMA sequence + routines rewritten. Emulation may be much due to missing + optimizations. Later. +- blitter cycle diagrams rechecked with logic analyzer, errors fixed + +- CPU emulation fixes, lsl/asl/ror and friends had wrong cycle + counts. bclr/bset/bchg are 2 cycles shorter if bit number is less + than 16 (16-31 = add extra cycles) and more. Lots of testing to do.. + +- WASAPI sample format fallback didn't work correctly +- command line parser now really works exactly like Windows console +- added "win32.exec_before" and "win32.exec_after" configuration + entries, "before" command is run when emulation starts, "after" + when emulation shuts down or before another config is loaded. + Multiple commands can be separated with ';'. Waits until command + returns (=waits forever if program never exits..) +- interlace artifact removal works again (b1) +- automatically handle split dms files. If extracted data is only + about half of standard DD disk and if file name is "*a.dms", + attempts to open and merge "*b.dms" with already unpacked data +- do not enumerate openal recording devices + +Beta 1: (too many changes, everything can break blahblah usual stuff) - command line filename will be detected as a disk image if file don't have known extensions but is small enough and first 3 bytes contain @@ -30,21 +79,21 @@ Beta 1: inside real PC partition) - added new RTG configuration panel, more space for future options, also old RTG setting panel was not really in correct place anymore -- command line parsing didn't handle quotes inside strings as Windows - does when running from command line +- command line parsing handled quotes inside strings differently than + Windows does when running from command line - added bitplane DMA fetches to copper debugger (there will be separate DMA sequence "disassembler" in future) - Agnus bitplane DMA behavior correctly emulated when number of planes - or resolution changes mid screen. Old code was totally wrong and too - complex.. Now all my test statefiles work 100% correctly: + or resolution changes mid screen. Old code (1.5x+) was totally wrong + and too complex.. Now all my test statefiles work 100% correctly: Disposable Hero, Bass-O-Matic/Crusaders, Innovation Part 2/Axxis Brian The Lion "dialog" screen is corrupted again but this seems to be AGA specific feature (SMD chips = no logic analyzer) - CD32 CD controller emulation improved, CDXL animations should run more smoothly now, previously emulation couldn't load more than few sectors - before cd driver detected error condition and retried + before CD32's cd driver detected error condition and retried - added seek delays to CD32 emulation (CD32 drive has really slow seeks) - CD32 Lotus Trilogy's Lotus 3 finally loads (stupid loader partially overwriting already loaded data if CD DMA "slot" sequencing is not @@ -57,7 +106,7 @@ Beta 1: converted to 2D sprite, now works correctly with D3D filters (future plan: convert all DirectDraw code to D3D 2D sprites) - OpenGL filter removed, totally obsolete now. Gone forever unless - someone updates it. (includes correct positioning and scaling) + someone updates it. (including correct positioning and scaling) - built-in WASAPI sound driver implemented. Replaces DirectSound if running on Vista or newer. WASAPI has two modes, shared and diff --git a/zfile.c b/zfile.c index f3bd2cc6..2f8627bc 100644 --- a/zfile.c +++ b/zfile.c @@ -542,7 +542,10 @@ static struct zfile *dms (struct zfile *z) TCHAR *orgname = zfile_getname (z); TCHAR *ext = _tcsrchr (orgname, '.'); TCHAR newname[MAX_DPATH]; + static int recursive; + if (recursive) + return NULL; if (ext) { _tcscpy (newname, orgname); _tcscpy (newname + _tcslen (newname) - _tcslen (ext), L".adf"); @@ -553,8 +556,27 @@ static struct zfile *dms (struct zfile *z) zo = zfile_fopen_empty (z, newname, 1760 * 512); if (!zo) return z; - ret = DMS_Process_File (z, zo, CMD_UNPACK, OPT_VERBOSE, 0, 0); + ret = DMS_Process_File (z, zo, CMD_UNPACK, OPT_VERBOSE, 0, 0, 0); if (ret == NO_PROBLEM || ret == DMS_FILE_END) { + int off = zfile_ftell (zo); + if (off >= 1760 * 512 / 3 && off <= 1760 * 512 * 3 / 4) { // possibly split dms? + if (_tcslen (orgname) > 5) { + TCHAR *s = orgname + _tcslen (orgname) - 5; + if (!_tcsicmp (s, L"a.dms")) { + TCHAR *fn2 = my_strdup (orgname); + struct zfile *z2; + fn2[_tcslen (fn2) - 5]++; + recursive++; + z2 = zfile_fopen (fn2, L"rb", z->zfdmask); + recursive--; + if (z2) { + ret = DMS_Process_File (z2, zo, CMD_UNPACK, OPT_VERBOSE, 0, 0, 1); + zfile_fclose (z2); + } + xfree (fn2); + } + } + } zfile_fclose (z); zfile_fseek (zo, 0, SEEK_SET); return zo; -- 2.47.3