From 40b39286a831eb3fe4dd6b327660cd1a82add7d1 Mon Sep 17 00:00:00 2001 From: Toni Wilen Date: Sat, 4 May 2019 20:34:46 +0300 Subject: [PATCH] UAE statefile loader. --- utilities/stateload/asm.S | 123 +++++ utilities/stateload/header.h | 55 ++ utilities/stateload/inflate.S | 778 ++++++++++++++++++++++++++ utilities/stateload/main.c | 970 +++++++++++++++++++++++++++++++++ utilities/stateload/makefile | 23 + utilities/stateload/readme.txt | 22 + 6 files changed, 1971 insertions(+) create mode 100644 utilities/stateload/asm.S create mode 100644 utilities/stateload/header.h create mode 100644 utilities/stateload/inflate.S create mode 100644 utilities/stateload/main.c create mode 100644 utilities/stateload/makefile create mode 100644 utilities/stateload/readme.txt diff --git a/utilities/stateload/asm.S b/utilities/stateload/asm.S new file mode 100644 index 00000000..3a0e7e75 --- /dev/null +++ b/utilities/stateload/asm.S @@ -0,0 +1,123 @@ + +CPU_CHUNK = 4 +CIAA_CHUNK = 8 +CIAB_CHUNK = 12 +CUSTOM_CHUNK = 16 +AGA_COLORS_CHUNK = 20 +FLOPPY_CHUNK = 24 +AUDIO_CHUNK = FLOPPY_CHUNK + 4*4 +SPRITE_CHUNK = AUDIO_CHUNK + 4*4 + + .text + .chip 68020 + .globl _runit + .globl _killsystem + .globl _callinflate + .globl _inflate + +_callinflate: + movem.l a4-a5,-(sp) + move.l 4+2*4(sp),a4 + move.l 8+2*4(sp),a5 + bsr _inflate + movem.l (sp)+,a4-a5 + rts + + | params: new stack 4, uaestate 8, func(uaestate) 12 +_killsystem: + move.l 8(sp),a0 | uaestate + move.l a6,a1 + move.l 4,a6 + move.l sp,d1 + move.l a5,a0 + lea .super(pc),a5 + jsr -0x1e(a6) | Supervisor +.super: + move.w #0x2700,sr + move.l a0,a5 + move.l a1,a6 + lea 0xdff000,a0 + + | CIA: stop timers, clear interrupts + bclr #0,0xbfde00 + bclr #0,0xbfdf00 + bclr #0,0xbfee01 + bclr #0,0xbfef01 + move.b #0x7f,0xbfdd00 + move.b #0x7f,0xbfed01 + tst.b 0xbfdd00 + tst.b 0xbfed01 + + move.w #0x7fff,0x96(a0) + move.w #0x7fff,0x9a(a0) + move.w #0x7fff,0x9c(a0) + move.w #0x7fff,0x9e(a0) + move.l d1,a0 + move.l 4(a0),sp | new temp super stack + move.l 8(a0),-(sp) | uaestate + move.l 12(a0),a0 | func + jsr (a0) | func(uaestate) + | never returns + +_runit: + move.l 4(sp),a4 | get pointer to struct uaestate + subq.l #8,sp + lea 0xdff000,a6 + + | wait for first line +.wait1: + move.l 4(a6),d0 + lsr.l #8,d0 + and.w #511,d0 + cmp.w #258,d0 + bne.s .wait1 +.wait2: + move.l 4(a6),d0 + lsr.l #8,d0 + and.w #511,d0 + bne.s .wait2 + + | restore possible side-effect causing + | custom bits as late as possible + move.l CIAA_CHUNK(a4),(sp) + clr.l 4(sp) + bsr _set_cia_final + move.l CIAB_CHUNK(a4),(sp) + addq.l #1,4(sp) + bsr _set_cia_final + move.l CUSTOM_CHUNK(a4),(sp) + bsr _set_custom_final + + | restore CPU state + move.l CPU_CHUNK(a4),a0 + move.l 4+4+60+4+2+2+4(a0),SP + move.l 4+4+60+4+2+2(a0),a1 | USP + move.l a1,USP + cmp.l #68020,(a0) + bcs .cpu68010 + lea 4+4+60+4+2+2+4+4+2+4+4+4+4(a0),a1 + move.l (a1)+,d0 + movec d0,CAAR + move.l (a1)+,d0 + movec d0,CACR + move.l (a1)+,d0 + movec d0,MSP +.cpu68010: + cmp.l #68010,(a0) + bcs .cpu68000 + lea 4+4+60+4+2+2+4+4+2+4(a0),a1 + move.l (a1)+,d0 + movec d0,DFC + move.l (a1)+,d0 + movec d0,SFC + move.l (a1)+,d0 + movec d0,VBR + move.w #0x0020,-(sp) +.cpu68000 : + move.l 4+4+60(a0),-(sp) | PC + move.w 4+4+60+4+2+2+4+4(a0),-(sp) | SR + movem.l 4+4(a0),d0-d7/a0-a6 + + move.w #0x1234,0xfc0000 + + rte | GO! GO! GO! diff --git a/utilities/stateload/header.h b/utilities/stateload/header.h new file mode 100644 index 00000000..82f47430 --- /dev/null +++ b/utilities/stateload/header.h @@ -0,0 +1,55 @@ + +#define TEMP_STACK_SIZE 8000 + +#define ALLOCATIONS 30 + +struct Allocation +{ + struct MemHeader *mh; + UBYTE *addr; + ULONG size; +}; + +struct MemoryBank +{ + UBYTE *addr; + ULONG flags; + UBYTE *targetaddr; + ULONG size; + ULONG targetsize; + ULONG offset; + UBYTE chunk[5]; +}; + +// CHIP, SLOW, FAST +#define MEMORY_REGIONS 3 +#define MB_CHIP 0 +#define MB_SLOW 1 +#define MB_FAST 2 + +struct uaestate +{ + ULONG flags; + UBYTE *cpu_chunk; + UBYTE *ciaa_chunk, *ciab_chunk; + UBYTE *custom_chunk; + UBYTE *aga_colors_chunk; + UBYTE *floppy_chunk[4]; + UBYTE *audio_chunk[4]; + UBYTE *sprite_chunk[8]; + + UBYTE *extra_ram; + ULONG extra_ram_size; + ULONG errors; + + struct MemHeader *mem_allocated[MEMORY_REGIONS]; + struct MemHeader *extra_mem_head; + UBYTE *extra_mem_pointer; + struct MemoryBank membanks[MEMORY_REGIONS]; + + int num_allocations; + struct Allocation allocations[ALLOCATIONS]; +}; + +void set_custom_final(UBYTE*); +void set_cia_final(UBYTE*, ULONG); diff --git a/utilities/stateload/inflate.S b/utilities/stateload/inflate.S new file mode 100644 index 00000000..bdd5a099 --- /dev/null +++ b/utilities/stateload/inflate.S @@ -0,0 +1,778 @@ + + .chip 68020 + .globl _inflate + + +/* + * inflate.S + * + * Decompression of DEFLATE streams, as produced by zip/gzip/pkzip and + * specified in RFC 1951 "DEFLATE Compressed Data Format Specification". + * + * Usage: Optionally configure the OPT_xxx options below at build time; + * at run time 'bsr inflate' with arguments: + * a4 = output buffer, a5 = input stream + * a6 = *end* of temporary storage area (only if OPT_STORAGE_OFFSTACK) + * All register values (including arguments) are preserved. + * + * Space requirements: 638-930 bytes code; 2044-2940 bytes stack. + * (NB1. Above ranges are [No Optimisations]-[All Optimisations]) + * (NB2. Stack space can be relocated to a separately-specified storage + * area, see OPT_STORAGE_OFFSTACK below) + * + * Timings: With all Optimisation Options enabled (see below) this routine + * will decompress on a basic 7MHz 68000 at ~25kB/s. An AmigaDOS track of + * data (5.5kB) is processed in ~220ms. This is only fractionally slower than + * the track can be fetched from disk, hence there is scope for a + * decompressing loader to keep CPU and disk both at near 100% utilisation. + * + * Written & released by Keir Fraser + * + * This is free and unencumbered software released into the public domain. + * See the file COPYING for more details, or visit . + */ + +/* Optimisation Option #1: + * Avoid long Huffman-tree walks by indexing the first 8 bits of each codeword + * in a 256-entry lookup table. This shortens all walks by 8 steps and since + * the most common codes are less than 8 bits, most tree walks are avoided. + * Also pre-shifts selected symbols in the code->symbol table, ready to be used + * as indexes into further lookup tables. + * SPEEDUP: 41% (c.w. no Options); COST: 122 bytes code, 896 bytes stack */ +#ifndef OPT_TABLE_LOOKUP +#define OPT_TABLE_LOOKUP 1 +#endif + +/* Optimisation Option #2: + * Inline functions in the main decode loop to avoid all BSR/RTS pairs. + * SPEEDUP: 15% (on top of Option #1); COST: 164 bytes code */ +#ifndef OPT_INLINE_FUNCTIONS +#define OPT_INLINE_FUNCTIONS 1 +#endif + +/* Optimisation Option #3: + * Unroll the copy loop for tuples by one iteration + * (so two bytes are copied per iteration). + * SPEEDUP: ~1% (on top of Options #1 and #2); COST: 6 bytes code */ +#ifndef OPT_UNROLL_COPY_LOOP +#define OPT_UNROLL_COPY_LOOP 1 +#endif + +/* Storage Option: + * All but 12 bytes of this routine's space requirement can be allocated + * off stack, in a data area specified in register a6. + * If this option is set then inflate must be called with a6 pointing at + * the *end* of the reserved storage area (+2032 or +2928 bytes, depending + * on whether OPT_TABLE_LOOKUP is enabled). + * SPEEDUP: none; COST: -2 bytes code (makes code slightly smaller) */ +#ifndef OPT_STORAGE_OFFSTACK +#define OPT_STORAGE_OFFSTACK 0 +#endif + +/* By default all lookup/conversion tables are generated on-the-fly on every + * call to inflate. In some cases this can be very inefficient. + * If this option is enabled then two new routines are generated: At start-of- + * day call 'inflate_gentables' with a6 pointing to the *end* of a 6000-byte + * block of memory. Then call 'inflate_fromtables' instead of 'inflate', with + * a6 still pointing to the end of the pre-generated memory block. + * SPEEDUP: variable; COST: 116 bytes code */ +#ifndef OPT_PREGENERATE_TABLES +#define OPT_PREGENERATE_TABLES 0 +#endif + +/* By default all registers are saved/restored across 'inflate' and + * 'inflate_fromtables'. This set can be reduced below. Note that if + * a4 is not saved then it will point at the end of the uncompressed output. + * If a5 is not saved then it will point at the end of the DEFLATE stream. */ +#ifndef SAVE_RESTORE_REGS +#define SAVE_RESTORE_REGS d0-d6/a0-a3 +#endif + +#if OPT_STORAGE_OFFSTACK +#define aS a6 +#else +#define aS sp +#endif + +/* Longest possible code. */ +#define MAX_CODE_LEN 16 + +/* (Maximum) alphabet sizes. */ +#define nr_codelen_symbols 19 +#define nr_litlen_symbols 288 +#define nr_distance_symbols 32 + +/* Alphabet-description stream for a static Huffman block (BTYPE=01b). */ +static_huffman_prefix: + dc.b 0xff, 0x5b, 0x00, 0x6c, 0x03, 0x36, 0xdb + dc.b 0xb6, 0x6d, 0xdb, 0xb6, 0x6d, 0xdb, 0xb6 + dc.b 0xcd, 0xdb, 0xb6, 0x6d, 0xdb, 0xb6, 0x6d + dc.b 0xdb, 0xa8, 0x6d, 0xce, 0x8b, 0x6d, 0x3b + +#if OPT_TABLE_LOOKUP + +/* Number of bytes required for code-lookup table/tree: + * - 256 2-byte entries for the 8-bit lookup table + * - Worst-case only 8 symbols decode directly in the table and all the rest + * are in a tree hanging off one table entry. This tree requires + * (nr_symbols-8)-1 internal 4-byte nodes. */ +#define LOOKUP_BYTES(nr_syms) (256*2+((nr_syms)-9)*4) + + /* a0 = len[], a1 = nodes[], d0 = nr_symbols */ + /* d1 = symbol beyond which all symbols get <<2 */ + /* a2-a3 are scratched */ +build_code: + movem.l d0-d7,-(aS) + + /* Allocate space for bl_count[]/next_code[] array on stack. */ + moveq #(MAX_CODE_LEN+1)/2,d1 + moveq #0,d2 +1: move.l d2,-(aS) + dbf d1,1b + + /* Count occurrences of each code length into bl_count[] array. */ + subq.w #1,d0 + move.w d0,d1 + move.l a0,a2 /* a2 = &len[0] */ +1: move.b (a2)+,d2 /* d2 = len[i] */ +#if MC68020 + addq.w #1,(aS,d2.w*2) +#else + add.b d2,d2 + addq.w #1,(aS,d2.w) /* bl_count[len[i]]++ */ +#endif + dbf d1,1b + + /* Calculate next_code[] start values for each code length. */ + move.l aS,a2 /* a2 = bl_count[] / next_code[] */ + moveq #MAX_CODE_LEN-1,d1 + moveq #0,d2 /* d2 = code */ + move.w d2,(aS) /* bl_count[0] = 0, ignore zero-length codes */ +1: add.w (a2),d2 + add.w d2,d2 /* code = (code + bl_count[i-1]) << 1 */ + move.w d2,(a2)+ /* next_code[i] = code */ + dbf d1,1b + + /* Create the Huffman-code lookup tree */ + move.w d0,d1 + moveq #127,d4 /* d4 = next_node = 127 */ + move.l a0,a2 /* a2 = &len[0] */ +1: moveq #0,d5 + move.b (a2)+,d5 /* d5 = len[i] / *len++ */ + jeq 4f + subq.w #1,d5 + move.w d5,d6 +#if MC68020 + move.w (aS,d6.w*2),d3 + addq.w #1,(aS,d6.w*2) +#else + add.w d6,d6 + move.w (aS,d6.w),d3 /* d3 = code = next_code[len[i]]++ */ + addq.w #1,(aS,d6.w) + move.w d5,d6 +#endif + + moveq #0,d2 +9: lsr.w #1,d3 + roxl.w #1,d2 + dbf d6,9b /* d5 = codelen-1; d2 = reversed code */ + move.b d2,d3 + add.w d3,d3 /* d3 = table offset */ + move.w d0,d6 + sub.w d1,d6 /* d6 = symbol */ + cmp.w (((MAX_CODE_LEN+1)/2)+1)*4+6(aS),d6 /* symbol > saved d1.w? */ + jls 9f + lsl.w #2,d6 /* symbol <<= 2 if so */ +9: cmp.b #9-1,d5 + jcc codelen_gt_8 + +codelen_le_8: /* codelen <= 8: leaf in table entry(s) */ + lsl.w #3,d6 + or.b d5,d6 /* d6 = (symbol<<3) | (codelen-1) */ + moveq #0,d2 + addq.b #2,d5 + bset d5,d2 /* d2 = 1<<(codelen+1) [table step] */ + move.w d2,d7 + neg.w d7 + and.w #511,d7 + or.w d7,d3 /* d3 = last table offset */ +9: move.w d6,(a1,d3.w) + sub.w d2,d3 + jcc 9b + jra 4f + +codelen_gt_8: /* codelen > 8: requires a tree walk */ + lsr.w #8,d2 + subq.b #8,d5 /* Skip the first 8 bits of code */ + lea (a1,d3.w),a3 /* pnode = table entry */ + +2: /* Walk through *pnode. */ + move.w (a3),d7 /* d3 = *pnode */ + jne 3f + /* Link missing: Create a new internal node */ + addq.w #1,d4 + move.w d4,d7 + bset #15,d7 + move.w d7,(a3) /* *pnode = ++next_node | INTERNAL */ +3: /* Take left or right branch depending on next code bit */ + lsr.b #1,d2 + addx.w d7,d7 +#if MC68020 + lea (a1,d7.w*2),a3 +#else + add.w d7,d7 + lea (a1,d7.w),a3 /* pnode = next_bit ? &node->r : &node->l */ +#endif +3: dbf d5,2b + + /* Insert the current symbol as a new leaf node */ + move.w d6,(a3) /* *pnode = sym */ +4: dbf d1,1b + + lea (((MAX_CODE_LEN+1)/2)+1)*4(aS),aS + movem.l (aS)+,d0-d7 + rts + + /* d5-d6/a5 = stream, a0 = tree */ + /* d0.w = result, d1.l = scratch */ +.macro STREAM_NEXT_SYMBOL + moveq #0,d0 /* 4 */ + moveq #7,d1 /* 4 */ + cmp.b d1,d6 /* 4 */ + jhi 99f /* 10 */ + /* Less than 8 bits cached; grab another byte from the stream */ + move.b (a5)+,d0 /* [8] */ + lsl.w d6,d0 /* [~14] */ + or.w d0,d5 /* [4] */ /* s->cur |= *p++ << s->nr */ + addq.b #8,d6 /* [4] */ /* s->nr += 8 */ + moveq #0,d0 /* [4] */ +99: /* Use next input byte as index into code lookup table */ + move.b d5,d0 /* 4 */ +#if MC68020 + move.w (a0,d0.w*2),d0 +#else + add.w d0,d0 /* 4 */ + move.w (a0,d0.w),d0 /* 14 */ +#endif + jpl 99f /* 10 (taken) */ + /* Code is longer than 8 bits: do the remainder via a tree walk */ + lsr.w #8,d5 + subq.b #8,d6 /* consume 8 bits from the stream */ +98: /* stream_next_bits(1), inlined & optimised */ + subq.b #1,d6 /* 4 cy */ + jcc 97f /* 10 cy (taken) */ + move.b (a5)+,d5 /* [8 cy] */ + moveq #7,d6 /* [4 cy] */ +97: lsr.w #1,d5 /* 8 cy */ + addx.w d0,d0 /* 4 cy */ +#if MC68020 + move.w (a0,d0.w*2),d0 +#else + add.w d0,d0 /* 4 cy */ + move.w (a0,d0.w),d0 /* 14 cy */ +#endif + jmi 98b /* 10 cy (taken); loop on INTERNAL flag */ + jra 98f /* TOTAL LOOP CYCLES ~= 54 */ +99: /* Symbol found directly: consume bits and return symbol */ + and.b d0,d1 /* 4 */ + addq.b #1,d1 /* 4 */ + lsr.w d1,d5 /* ~16 */ /* consume bits from the stream */ + sub.b d1,d6 /* 4 */ + lsr.w #3,d0 /* 12 */ /* d0 = symbol */ +98: /* ~94 CYCLES TOTAL [+ 34] */ +.endm + +#else /* !OPT_TABLE_LOOKUP */ + +/* Number of bytes required for code-lookup tree: + * - Every binary tree with N leaves has N-1 internal nodes. + * - Internal nodes require 4 bytes each. Leaves are free. */ +#define LOOKUP_BYTES(nr_syms) (((nr_syms)-1)*4) + + /* a0 = len[], a1 = nodes[], d0 = nr_symbols */ + /* a2-a3 are scratched */ +build_code: + movem.l d0-d5,-(aS) + + /* Allocate space for bl_count[]/next_code[] array on stack. */ + moveq #(MAX_CODE_LEN+1)/2,d1 + moveq #0,d2 +1: move.l d2,-(aS) + dbf d1,1b + + /* Count occurrences of each code length into bl_count[] array. */ + subq.w #1,d0 + move.w d0,d1 + move.l a0,a2 /* a2 = &len[0] */ +1: move.b (a2)+,d2 /* d2 = len[i] */ + add.b d2,d2 + addq.w #1,(aS,d2.w) /* bl_count[len[i]]++ */ + dbf d1,1b + + /* Calculate next_code[] start values for each code length. */ + move.l aS,a2 /* a2 = bl_count[] / next_code[] */ + moveq #MAX_CODE_LEN-1,d1 + moveq #0,d2 /* d2 = code */ + move.w d2,(aS) /* bl_count[0] = 0, ignore zero-length codes */ +1: add.w (a2),d2 + add.w d2,d2 /* code = (code + bl_count[i-1]) << 1 */ + move.w d2,(a2)+ /* next_code[i] = code */ + dbf d1,1b + + /* Create the Huffman-code lookup tree */ + move.w d0,d1 + moveq #0,d4 /* d4 = next_node */ + move.l a0,a2 /* a2 = &len[0] */ +1: moveq #0,d5 + move.b (a2)+,d5 /* d5 = len[i] / *len++ */ + jeq 4f + subq.w #1,d5 + add.w d5,d5 + move.w (aS,d5.w),d3 /* d3 = code = next_code[len[i]]++ */ + addq.w #1,(aS,d5.w) + lsr.w #1,d5 + /* Walk down the tree, creating nodes as necessary */ + moveq #0,d2 /* d2 = 0 (root node) */ + jra 3f + +2: /* Walk through *pnode. */ + move.w (a3),d2 /* d2 = *pnode */ + jne 3f + /* Link missing: Create a new internal node */ + addq.w #1,d4 + move.w d4,d2 + bset #15,d2 + move.w d2,(a3) /* *pnode = ++next_node | INTERNAL */ +3: /* Take left or right branch depending on next code bit */ + lsl.w #2,d2 + btst d5,d3 + jeq 3f + addq.w #2,d2 +3: lea (a1,d2.w),a3 /* pnode = next_bit ? &node->r : &node->l */ + dbf d5,2b + + /* Insert the current symbol as a new leaf node */ + move.w d0,d2 + sub.w d1,d2 + move.w d2,(a3) /* *pnode = sym */ +4: dbf d1,1b + + lea (((MAX_CODE_LEN+1)/2)+1)*4(aS),aS + movem.l (aS)+,d0-d5 + rts + + /* d5-d6/a5 = stream, a0 = tree */ + /* d0.w = result */ +.macro STREAM_NEXT_SYMBOL + moveq #0,d0 +99: /* stream_next_bits(1), inlined & optimised */ + subq.b #1,d6 /* 4 cy */ + jcc 98f /* 10 cy (taken) */ + move.b (a5)+,d5 /* [8 cy] */ + moveq #7,d6 /* [4 cy] */ +98: lsr.w #1,d5 /* 8 cy */ + addx.w d0,d0 /* 4 cy */ + add.w d0,d0 /* 4 cy */ + move.w (a0,d0.w),d0 /* 14 cy */ + jmi 99b /* 10 cy (taken); loop on INTERNAL flag set */ + /* TOTAL LOOP CYCLES ~= 54 */ +.endm + +#endif + + /* d1.b = nr, d5-d6/a5 = stream [fetched_bits/nr_fetched_bits/inp] */ + /* d0.w = result */ +.macro STREAM_NEXT_BITS +99: moveq #0,d0 + cmp.b d1,d6 + jcc 99f /* while (s->nr < nr) */ + move.b (a5)+,d0 + lsl.l d6,d0 + or.l d0,d5 /* s->cur |= *p++ << s->nr */ + addq.b #8,d6 /* s->nr += 8 */ + jra 99b +99: bset d1,d0 + subq.w #1,d0 /* d0 = (1<cur & ((1<cur >>= nr */ + sub.b d1,d6 /* s->nr -= nr */ +.endm + +#if OPT_INLINE_FUNCTIONS +#define INLINE_stream_next_bits STREAM_NEXT_BITS +#define INLINE_stream_next_symbol STREAM_NEXT_SYMBOL +#else +#define INLINE_stream_next_bits jbsr stream_next_bits +#define INLINE_stream_next_symbol jbsr stream_next_symbol +#endif + +stream_next_bits: + STREAM_NEXT_BITS + rts + + /* d5-d6/a5 = stream, a4 = output */ + /* d0-d1 are scratched */ +uncompressed_block: +#if OPT_TABLE_LOOKUP + /* Push whole bytes back into input stream. */ + lsr.w #3,d6 + sub.w d6,a5 +#else + /* No need to push bytes back into input stream because stream_next_ + * {bits,symbol} will never leave more than 7 bits cached. */ +#endif + /* Snap input stream up to byte boundary. */ + moveq #0,d5 + moveq #0,d6 + /* Read block header and copy LEN bytes. */ + moveq #16,d1 + jbsr stream_next_bits /* LEN */ + addq.w #2,a5 /* skip NLEN */ + subq.w #1,d0 /* d0.w = len-1 (for dbf) */ +1: move.b (a5)+,(a4)+ + dbf d0,1b + rts + +#define o_hdist /*0*/ +#define o_hlit 2 +#define o_lens (o_hlit+2) +#define o_codelen_tree (o_lens+nr_litlen_symbols+nr_distance_symbols) +#if OPT_TABLE_LOOKUP +/* Lit/len and codelen lookup structures share space. */ +#define o_litlen_tree o_codelen_tree +#else +#define o_litlen_tree (o_codelen_tree+LOOKUP_BYTES(nr_codelen_symbols)) +#endif +#define o_dist_tree (o_litlen_tree+LOOKUP_BYTES(nr_litlen_symbols)) +#define o_stream (o_dist_tree+LOOKUP_BYTES(nr_distance_symbols)) +#define o_frame (o_stream+3*4) +#if OPT_STORAGE_OFFSTACK +#define o_mode (o_frame) +#else +/* Allow for BSR return address from decoder */ +#define o_mode (o_frame+4) +#endif +#define o_dist_extra (o_mode+4) +#define o_length_extra (o_dist_extra+30*4) + + /* d5-d6/a5 = stream, a4 = output */ + /* d0-d4,a0-a3 are scratched */ +static_huffman: + movem.l d5-d6/a5,-(aS) + moveq #0,d5 + moveq #0,d6 + lea static_huffman_prefix(pc),a5 + move.w #o_stream/4-2,d0 + jra 1f + + /* d5-d6/a5 = stream, a4 = output */ + /* d0-d4,a0-a3 are scratched */ +dynamic_huffman: + /* Allocate stack space for len[] and node[] arrays */ + move.w #o_frame/4-2,d0 +1: moveq #0,d1 +1: move.l d1,-(aS) + dbf d0,1b + /* HLIT = stream_next_bits(5) + 257 */ + moveq #5,d1 + jbsr stream_next_bits + add.w #257,d0 + move.w d0,-(aS) + /* HDIST = stream_next_bits(5) + 1 */ + moveq #5,d1 + jbsr stream_next_bits + addq.w #1,d0 + move.w d0,-(aS) + /* HCLEN = stream_next_bits(4) + 4 */ + moveq #4,d1 + jbsr stream_next_bits + addq.w #4-1,d0 /* -1 for dbf */ + /* Initialise len[] array with code-length symbol code lengths */ + lea codelen_order(pc),a1 + lea o_lens(aS),a0 /* a0 = len[] */ + moveq #0,d2 + move.w d0,d3 +1: moveq #3,d1 + jbsr stream_next_bits + move.b (a1)+,d2 + move.b d0,(a0,d2.w) /* len[codelen_order[i++]] = next_bits(3) */ + dbf d3,1b + /* Build the codelen_tree */ + lea o_codelen_tree(aS),a1 + moveq #nr_codelen_symbols,d0 +#if OPT_TABLE_LOOKUP + moveq #127,d1 /* don't left-shift any symbols */ +#endif + jbsr build_code /* build_code(codelen_tree) */ + /* Read the literal/length & distance code lengths */ + move.w o_hlit(aS),d2 + add.w o_hdist(aS),d2 + subq.w #1,d2 /* d2 = hlit+hdist-1 */ + move.l a0,a2 /* a2 = len[] */ + move.l a1,a0 /* a0 = a1 = codelen_tree */ +1: INLINE_stream_next_symbol + cmp.b #16,d0 + jcs c_lit + jeq c_16 + cmp.b #17,d0 + jeq c_17 +c_18: /* 18: Repeat zero N times */ + moveq #7,d1 + jbsr stream_next_bits + addq.w #11-3,d0 + jra 2f +c_17: /* 17: repeat zero N times */ + moveq #3,d1 + jbsr stream_next_bits +2: moveq #0,d1 + jra 3f +c_16: /* 16: repeat previous N times */ + moveq #2,d1 + jbsr stream_next_bits + move.b -1(a2),d1 +3: addq.w #3-1,d0 + sub.w d0,d2 +4: move.b d1,(a2)+ + dbf d0,4b + jra 5f +c_lit: /* 0-16: Literal symbol */ + move.b d0,(a2)+ +5: dbf d2,1b + /* Build the lit/len and distance trees */ +#if OPT_TABLE_LOOKUP + /* Clear the codelen tree (shared space with lit/len tree). + * NB. a0 = a1 = codelen_tree = litlen_tree */ + moveq #0,d0 + move.w #LOOKUP_BYTES(nr_codelen_symbols)/4-1,d1 +1: move.l d0,(a0)+ + dbf d1,1b + /* litlen_tree (= codelen_tree) is already in a1, and now zeroed. */ +#else + lea o_litlen_tree(aS),a1 +#endif + lea o_lens(aS),a0 + move.w o_hlit(aS),d0 +#if OPT_TABLE_LOOKUP + move.w #256,d1 + move.w d1,d4 /* left-shift symbols >127 (i.e., lengths) */ +#endif + jbsr build_code /* build_code(litlen_tree) */ + add.w d0,a0 + lea o_dist_tree(aS),a1 + move.w o_hdist(aS),d0 +#if OPT_TABLE_LOOKUP + moveq #0,d1 /* left-shift all symbols (i.e., distances) */ +#endif + jbsr build_code /* build_code(dist_tree) */ + /* Reinstate the main stream if we used the static prefix */ + tst.l o_stream+8(aS) + jeq decode_loop + movem.l o_stream(aS),d5-d6/a5 + /* Now decode the compressed data stream up to EOB */ +decode_loop: + lea o_litlen_tree(aS),a0 + /* START OF HOT LOOP */ +2: INLINE_stream_next_symbol /* litlen_sym */ +#if OPT_TABLE_LOOKUP + cmp.w d4,d0 /* 4 cy (d4.w = 256) */ +#else + cmp.w #256,d0 /* 8 cy */ +#endif + jcc 2f /* 8 cy */ + /* 0-255: Byte literal */ + move.b d0,(a4)+ /* 8 cy */ + jra 2b /* 10 cy */ + /* END OF HOT LOOP -- 30 + ~108 + [34] = ~160 CYCLES */ +9: /* 256: End-of-block: we're done */ + lea o_frame(aS),aS + rts +2: jeq 9b + /* 257+: pair */ +#if !OPT_TABLE_LOOKUP /* Already shifted in case of OPT_TABLE_LOOKUP */ + lsl.w #2,d0 +#endif + lea o_length_extra-257*4(aS),a2 + add.w d0,a2 + move.w (a2)+,d1 + INLINE_stream_next_bits + add.w (a2),d0 + move.w d0,d3 /* d3 = cplen */ + lea o_dist_tree(aS),a0 + INLINE_stream_next_symbol /* dist_sym */ +#if !OPT_TABLE_LOOKUP /* Already shifted in case of OPT_TABLE_LOOKUP */ + lsl.w #2,d0 +#endif + lea o_dist_extra(aS),a2 + add.w d0,a2 + move.w (a2)+,d1 + INLINE_stream_next_bits + add.w (a2),d0 /* d0 = cpdst */ + move.l a4,a0 + sub.w d0,a0 /* a0 = outp - cpdst */ +#if OPT_UNROLL_COPY_LOOP + lsr.w #1,d3 + jcs 4f + subq.w #1,d3 +3: move.b (a0)+,(a4)+ +4: move.b (a0)+,(a4)+ +#else + subq.w #1,d3 +3: move.b (a0)+,(a4)+ +#endif + dbf d3,3b + jra decode_loop + +#if !OPT_INLINE_FUNCTIONS +stream_next_symbol: + STREAM_NEXT_SYMBOL + rts +#endif + + /* Build a base/extra-bits table on the stack. + * d0 = #pairs-1, d2 = max_value, d4 = log_2(extrabits_repeat) */ +build_base_extrabits: +#if !OPT_STORAGE_OFFSTACK + move.l (sp)+,a0 +#endif +1: move.w d0,d3 + lsr.w d4,d3 + subq.w #1,d3 + jcc 2f + moveq #0,d3 +2: moveq #0,d1 + bset d3,d1 /* d1 = 1 << extrabits */ + sub.w d1,d2 /* d2 = base */ + move.w d2,-(aS) + move.w d3,-(aS) + dbf d0,1b +#if !OPT_STORAGE_OFFSTACK + jmp (a0) +#else + rts +#endif + +dispatch: /* Decoder dispatch table. */ + dc.b uncompressed_block - uncompressed_block + dc.b static_huffman - uncompressed_block + dc.b dynamic_huffman - uncompressed_block + +codelen_order: /* Order of code lengths for the code length alphabet. */ + dc.b 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 + + /* a4 = output, a5 = input, all regs preserved + * a6 = *end* of storage area (only if OPT_STORAGE_OFFSTACK) */ +_inflate: + movem.l SAVE_RESTORE_REGS,-(aS) + + /* Build the base/extra-bits table */ + move.l #258,d2 + move.l d2,-(aS) + addq.w #1,d2 + moveq #27,d0 + moveq #2,d4 + jbsr build_base_extrabits + + /* Build the base/extra-bits table */ + move.w #32769,d2 + moveq #29,d0 + moveq #1,d4 + jbsr build_base_extrabits + + /* Initialise the stream */ + moveq #0,d5 /* d5 = stream: fetched data */ + moveq #0,d6 /* d6 = stream: nr fetched bits */ + +1: /* Process a block: Grab the BTYPE|BFINAL 3-bit code */ + moveq #3,d1 + jbsr stream_next_bits + move.l d0,-(aS) + /* Dispatch to the correct decoder for this block */ + lsr.b #1,d0 + move.b dispatch(pc,d0.w),d0 + lea uncompressed_block(pc),a0 + jsr (a0,d0.w) + /* Keep going until we see BFINAL=1 */ + move.l (aS)+,d0 + lsr.b #1,d0 + jcc 1b + + /* Pop the base/extra-bits lookup tables */ + lea (30+29)*4(aS),aS + + movem.l (aS)+,SAVE_RESTORE_REGS + rts + +#if OPT_PREGENERATE_TABLES +pregen_static_huffman: + lea -o_frame(aS),aS /* frame pre-generated; skip over it */ + move.w #256,d4 + jra decode_loop +pregen_dynamic_huffman: + move.l (aS),d0 + lea -3000(aS),aS /* move to dynamic-huffman frame */ + move.l d0,(aS) /* copy o_mode into it */ + jbsr dynamic_huffman + lea 3000(aS),aS + rts + + /* Pre-generate conversion tables for Inflate. */ + /* a6 = Pointer to end of 6000-byte block of memory to contain + * pre-generated tables. All registers preserved. */ +inflate_gentables: + movem.l a5-a6,-(sp) + lea pregen_dummy_block(pc),a5 + jbsr inflate /* static block */ + lea -3000(aS),aS + lea pregen_dummy_block(pc),a5 + jbsr inflate /* dynamic block */ + movem.l (sp)+,a5-a6 + rts + + /* Inflate, using pre-generated tables. */ + /* a4 = output, a5 = input, all regs preserved + * a6 = *end* of 6000-byte pre-generated storage area */ +inflate_fromtables: + movem.l SAVE_RESTORE_REGS,-(aS) + + /* Skip the pre-generated base/extra-bits lookup tables */ + lea -(30+29)*4(aS),aS + + /* Initialise the stream */ + moveq #0,d5 /* d5 = stream: fetched data */ + moveq #0,d6 /* d6 = stream: nr fetched bits */ + +1: /* Process a block: Grab the BTYPE|BFINAL 3-bit code */ + moveq #3,d1 + jbsr stream_next_bits + move.l d0,-(aS) + /* Dispatch to the correct decoder for this block */ + and.b #0xfe,d0 + move.w pregen_dispatch(pc,d0.w),d0 + lea uncompressed_block(pc),a0 + jsr (a0,d0.w) + /* Keep going until we see BFINAL=1 */ + move.l (aS)+,d0 + lsr.b #1,d0 + jcc 1b + + /* Pop the base/extra-bits lookup tables */ + lea (30+29)*4(aS),aS + + movem.l (aS)+,SAVE_RESTORE_REGS + rts + +pregen_dispatch: + dc.w uncompressed_block - uncompressed_block + dc.w pregen_static_huffman - uncompressed_block + dc.w pregen_dynamic_huffman - uncompressed_block +pregen_dummy_block: /* A single static block containing EOB symbol only */ + dc.b 0x03,0x00 +#endif /* OPT_PREGENERATE_TABLES */ + +#undef o_hdist +#undef o_hlit +#undef o_lens +#undef o_codelen_tree +#undef o_litlen_tree +#undef o_dist_tree +#undef o_frame diff --git a/utilities/stateload/main.c b/utilities/stateload/main.c new file mode 100644 index 00000000..8a364eda --- /dev/null +++ b/utilities/stateload/main.c @@ -0,0 +1,970 @@ + +/* Real hardware UAE state file loader */ +/* Copyright 2019 Toni Wilen */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "header.h" + +extern struct GfxBase *GfxBase; +extern struct DosLibrary *DosBase; + +static const char *const chunknames[] = +{ + "ASF ", + "CPU ", "CHIP", "AGAC", + "CIAA", "CIAB", "ROM ", + "DSK0", "DSK1", "DSK2", "DSK3", + "AUD0", "AUD1", "AUD2", "AUD3", + "END ", + NULL +}; +static const char *const memchunknames[] = +{ + "CRAM", "BRAM", "FRAM", + NULL +}; + + +static ULONG getlong(UBYTE *chunk, int offset) +{ + ULONG v; + + chunk += offset; + v = (chunk[0] << 24) | (chunk[1] << 16) | (chunk[2] << 8) | (chunk[3] << 0); + return v; +} +static ULONG getword(UBYTE *chunk, int offset) +{ + ULONG v; + + chunk += offset; + v = (chunk[0] << 8) | (chunk[1] << 0); + return v; +} + +static void set_agacolor(UBYTE *p) +{ + volatile struct Custom *c = (volatile struct Custom*)0xdff000; + + int aga = (c->vposr & 0x0f00) == 0x0300; + if (!aga) + return; + + for (int i = 0; i < 8; i++) { + for (int k = 0; k < 2; k++) { + c->bplcon3 = (i << 13) | (k ? (1 << 9) : 0); + for (int j = 0; j < 32; j++) { + ULONG c32 = getlong(p, j * 4); + if (k) + c32 >>= 4; + UWORD col = ((c32 & 0x00000f) << 0) | ((c32 & 0x000f00) >> 4) | ((c32 & 0x0f0000) >> 8); + c->color[j] = col; + } + } + } +} + +static void wait_lines(WORD lines) +{ + volatile struct Custom *c = (volatile struct Custom*)0xdff000; + + UWORD line = c->vhposr & 0xff00; + while (lines-- > 0) { + for (;;) { + UWORD line2 = c->vhposr & 0xff00; + if (line == line2) + continue; + line = line2; + break; + } + } +} + +static void step_floppy(void) +{ + volatile struct CIA *ciab = (volatile struct CIA*)0xbfd000; + ciab->ciaprb &= ~CIAF_DSKSTEP; + // delay + ciab->ciaprb &= ~CIAF_DSKSTEP; + ciab->ciaprb |= CIAF_DSKSTEP; + wait_lines(300); +} + +static void set_floppy(UBYTE *p, ULONG num) +{ + ULONG id = getlong(p, 0); + UBYTE state = p[4]; + UBYTE track = p[5]; + + // drive disabled? + if (state & 2) + return; + // invalid track? + if (track >= 80) + return; + + volatile struct CIA *ciaa = (volatile struct CIA*)0xbfe001; + volatile struct CIA *ciab = (volatile struct CIA*)0xbfd000; + + ciab->ciaprb = 0xff; + + // motor on? + if (state & 1) { + ciab->ciaprb &= ~CIAF_DSKMOTOR; + } + // select drive + ciab->ciaprb &= ~(CIAF_DSKSEL0 << num); + + wait_lines(100); + int seekcnt = 80; + while (seekcnt-- > 0) { + if (!(ciaa->ciapra & CIAF_DSKTRACK0)) + break; + step_floppy(); + } + wait_lines(100); + if (seekcnt <= 0) { + // no track0 after 80 steps: drive missing or not responding + ciab->ciaprb |= CIAF_DSKMOTOR; + ciab->ciaprb |= CIAF_DSKSEL0 << num; + return; + } + + ciab->ciaprb &= ~CIAF_DSKDIREC; + wait_lines(100); + for (UBYTE i = 0; i < track; i++) { + step_floppy(); + } + + ciab->ciaprb |= CIAF_DSKSEL0 << num; +} + +static void set_audio(UBYTE *p, ULONG num) +{ + volatile UWORD *c = (volatile UWORD*)(0xdff0a0 + 16 * num); + c[8 / 2] = p[1]; // AUDxVOL + c[4 / 2] = getword(p, 1 + 1 + 1 + 1 + 2); // AUDxLEN + c[6 / 2] = getword(p, 1 + 1 + 1 + 1 + 2 + 2 + 2); // AUDxPER + c[0 / 2] = getword(p, 1 + 1 + 1 + 1 + 2 + 2 + 2 + 2); // AUDxLCH + c[2 / 2] = getword(p, 1 + 1 + 1 + 1 + 2 + 2 + 2 + 2 + 2); // AUDxLCL +} + +static void set_sprite(UBYTE *p, ULONG num) +{ + volatile UWORD *cpt = (volatile UWORD*)(0xdff120 + 4 * num); + volatile UWORD *c = (volatile UWORD*)(0xdff140 + 8 * num); + + cpt[0 / 2] = getword(p, 0); // SPRxPTH + cpt[2 / 2] = getword(p, 2); // SPRxPTL + c[0 / 2] = getword(p, 2 + 2); // SPRxPOS + c[2 / 2] = getword(p, 2 + 2 + 2); // SPRxCTL +} + +static void set_custom(UBYTE *p) +{ + volatile UWORD *c = (volatile UWORD*)0xdff000; + p += 4; + for (WORD i = 0; i < 0x1fe; i += 2, c++) { + + // sprites + if (i >= 0x120 && i < 0x180) + continue; + + // audio + if (i >= 0xa0 && i < 0xe0) + continue; + + // skip blitter start, DMACON and INTENA + if (i == 0x58 || i == 0x5e || i == 0x96 || i == 0x9a) { + p += 2; + continue; + } + + // skip programmed sync registers except BEAMCON0 + if (i >= 0x1c0 && i != 0x1fc && i != 0x1dc) { + p += 2; + continue; + } + + UWORD v = getword(p, 0); + p += 2; + + // BEAMCON0: PAL/NTSC only + if (i == 0x1dc) + v &= 0x20; + // ADKCON + if (i == 0x9e) + v |= 0x8000; + + *c = v; + } +} + +void set_custom_final(UBYTE *p) +{ + volatile struct Custom *c = (volatile struct Custom*)0xdff000; + c->intena = 0x7fff; + c->intreq = 0x7fff; + c->dmacon = 0x7fff; + c->dmacon = getword(p, 4 + 0x96) | 0x8000; + c->intena = getword(p, 4 + 0x9a) | 0x8000; + c->intreq = getword(p, 4 + 0x9c) | 0x8000; +} + +static void set_cia(UBYTE *p, ULONG num) +{ + volatile struct CIA *cia = (volatile struct CIA*)(num ? 0xbfd000 : 0xbfe001); + volatile struct Custom *c = (volatile struct Custom*)0xdff000; + + cia->ciacra &= ~(CIACRAF_START | CIACRAF_RUNMODE); + cia->ciacrb &= ~(CIACRBF_START | CIACRBF_RUNMODE); + UBYTE dummy = cia->ciaicr; + cia->ciaicr = 0x7f; + c->intreq = 0x7fff; + + UBYTE flags = p[16 + 1 + 2 * 2 + 3 + 3]; + + cia->ciapra = p[0]; + cia->ciaprb = p[1]; + cia->ciaddra = p[2]; + cia->ciaddrb = p[3]; + + // load timers + cia->ciatalo = p[4]; + cia->ciatahi = p[5]; + cia->ciatblo = p[6]; + cia->ciatbhi = p[7]; + cia->ciacra |= CIACRAF_LOAD; + cia->ciacrb |= CIACRBF_LOAD; + // load timer latches + cia->ciatalo = p[16 + 1]; + cia->ciatahi = p[16 + 2]; + cia->ciatblo = p[16 + 3]; + cia->ciatbhi = p[16 + 4]; + + // load alarm + UBYTE *alarm = &p[16 + 1 + 2 * 2 + 3]; + cia->ciacrb |= CIACRBF_ALARM; + if (flags & 2) { + // leave latched + cia->ciatodlow = alarm[0]; + cia->ciatodmid = alarm[1]; + cia->ciatodhi = alarm[2]; + } else { + cia->ciatodhi = alarm[2]; + cia->ciatodmid = alarm[1]; + cia->ciatodlow = alarm[0]; + } + cia->ciacrb &= ~CIACRBF_ALARM; + + // load tod + UBYTE *tod = &p[8]; + if (flags & 1) { + // leave latched + cia->ciatodlow = tod[0]; + cia->ciatodmid = tod[1]; + cia->ciatodhi = tod[2]; + } else { + cia->ciatodhi = tod[2]; + cia->ciatodmid = tod[1]; + cia->ciatodlow = tod[0]; + } +} + +void set_cia_final(UBYTE *p, ULONG num) +{ + volatile struct CIA *cia = (volatile struct CIA*)(num ? 0xbfd000 : 0xbfe001); + UBYTE dummy = cia->ciaicr; + cia->ciacra = p[14] & ~CIACRAF_LOAD; + cia->ciacrb = p[15] & ~CIACRBF_LOAD; + cia->ciaicr = p[13] | CIAICRF_SETCLR; +} + +static void free_allocations(struct uaestate *st) +{ + for (int i = st->num_allocations - 1; i >= 0; i--) { + struct Allocation *a = &st->allocations[i]; + if (a->mh) { + Deallocate(a->mh, a->addr, a->size); + } else { + FreeMem(a->addr, a->size); + } + } +} + +static UBYTE *extra_allocate(ULONG size, struct uaestate *st) +{ + UBYTE *b; + + for (;;) { + b = AllocAbs(size, st->extra_mem_pointer); + if (b) { + struct Allocation *a = &st->allocations[st->num_allocations++]; + a->addr = b; + a->size = size; + st->extra_mem_pointer += (size + 7) & ~7; + return b; + } + st->extra_mem_pointer += 8; + if (st->extra_mem_pointer + size >= st->extra_ram + st->extra_ram_size) + return NULL; + } +} + +// allocate from extra mem +static UBYTE *tempmem_allocate(ULONG size, struct uaestate *st) +{ + UBYTE *b = NULL; + if (st->extra_mem_head) { + b = Allocate(st->extra_mem_head, size); + if (b) { + struct Allocation *a = &st->allocations[st->num_allocations++]; + a->mh = st->extra_mem_head; + a->addr = b; + a->size = size; + } + } + if (!b) { + b = extra_allocate(size, st); + } + return b; +} + +// allocate from statefile reserved bank index +static UBYTE *tempmem_allocate_reserved(ULONG size, WORD index, struct uaestate *st) +{ + struct MemoryBank *mb = &st->membanks[index]; + if (!mb->targetsize) + return NULL; + UBYTE *addr = mb->targetaddr; + for (;;) { + addr += 65536; + if (addr - mb->targetaddr + size >= mb->targetsize) + return NULL; + UBYTE *b = AllocAbs(size, addr); + if (b) { + struct Allocation *a = &st->allocations[st->num_allocations++]; + a->addr = b; + a->size = size; + return b; + } + } +} + +static void load_memory(FILE *f, WORD index, struct uaestate *st) +{ + struct MemoryBank *mb = &st->membanks[index]; + ULONG oldoffset = ftell(f); + ULONG chunksize = mb->size + 12; + fseek(f, mb->offset, SEEK_SET); + printf("Memory '%s', size %luk, offset %lu. Target %08lx.\n", mb->chunk, chunksize >> 10, mb->offset, mb->targetaddr); + // if Chip RAM and space in another statefile block? Put it there because chip ram is decompressed first. + if (index == MB_CHIP) { + mb->addr = tempmem_allocate_reserved(chunksize, MB_SLOW, st); + if (!mb->addr) + mb->addr = tempmem_allocate_reserved(chunksize, MB_FAST, st); + } else if (index == MB_SLOW) { + mb->addr = tempmem_allocate_reserved(chunksize, MB_FAST, st); + } + if (!mb->addr) + mb->addr = tempmem_allocate(chunksize, st); + if (mb->addr) { + printf(" - Address %08lx - %08lx.\n", mb->addr, mb->addr + chunksize - 1); + int v = fread(mb->addr, 1, chunksize, f); + if (v != chunksize) { + printf("ERROR: Read error (%lu != %lu).\n", v, chunksize); + st->errors++; + } + } else { + printf("ERROR: Out of memory.\n"); + st->errors++; + } + fseek(f, oldoffset, SEEK_SET); +} + +static int read_chunk_head(FILE *f, UBYTE *cnamep, ULONG *sizep, ULONG *flagsp) +{ + ULONG size = 0, flags = 0; + UBYTE cname[5]; + + *flagsp = 0; + *sizep = 0; + cnamep[0] = 0; + if (fread(cname, 1, 4, f) != 4) { + return 0; + } + cname[4] = 0; + strcpy(cnamep, cname); + + if (fread(&size, 1, 4, f) != 4) { + cnamep[0] = 0; + return 0; + } + + if (fread(&flags, 1, 4, f) == 0) { + return 1; + } + + if (size < 8) + return 1; + + if (size < 12) { + size = 0; + flags = 0; + } else { + size -= 12; + } + *sizep = size; + *flagsp = flags; + return 1; +} + +static UBYTE *load_chunk(FILE *f, UBYTE *cname, ULONG size, struct uaestate *st) +{ + UBYTE *b = NULL; + int acate = 0; + + //printf("Allocating %lu bytes for '%s'.\n", size, cname); + + b = tempmem_allocate(size, st); + + //printf("Reading chunk '%s', %lu bytes to address %08x\.n", cname, size, b); + + if (!b) { + printf("ERROR: Not enough memory (%ul bytes required).\n", size); + return NULL; + } + + if (fread(b, 1, size, f) != size) { + printf("ERROR: Read error.\n"); + return NULL; + } + + fseek(f, 4 - (size & 3), SEEK_CUR); + + return b; +} + +static UBYTE *read_chunk(FILE *f, UBYTE *cname, ULONG *sizep, ULONG *flagsp, struct uaestate *st) +{ + ULONG size, orgsize, flags; + + if (!read_chunk_head(f, cname, &size, &flags)) + return NULL; + orgsize = size; + *flagsp = flags; + + if (size == 0) + return NULL; + + ULONG maxsize = 0x7fffffff; + int found = 0; + for (int i = 0; chunknames[i]; i++) { + if (!strcmp(cname, chunknames[i])) { + found = 1; + printf("Reading chunk '%s', %lu bytes, flags %08x.\n", cname, size, flags); + break; + } + } + if (!found) { + // read only header if memory chunk + for (int i = 0; memchunknames[i]; i++) { + if (!strcmp(cname, memchunknames[i])) { + found = 1; + maxsize = 16; + printf("Checking memory chunk '%s', %lu bytes, flags %08x.\n", cname, size, flags); + break; + } + } + } + + if (!found) { + //printf("Skipped chunk '%s', %ld bytes, flags %08x\n", cname, size, flags); + fseek(f, size, SEEK_CUR); + if (size) + fseek(f, 4 - (size & 3), SEEK_CUR); + return NULL; + } + + *sizep = size; + if (size > maxsize) + size = maxsize; + UBYTE *chunk = malloc(size); + if (!chunk) { + printf("ERROR: Not enough memory.\n"); + return NULL; + } + if (fread(chunk, 1, size, f) != size) { + printf("ERROR: Read error.\n"); + free(chunk); + return NULL; + } + if (orgsize > size) { + fseek(f, orgsize - size, SEEK_CUR); + } + fseek(f, 4 - (orgsize & 3), SEEK_CUR); + return chunk; +} + +static void find_extra_ram(struct uaestate *st) +{ + Forbid(); + struct MemHeader *mh = (struct MemHeader*)SysBase->MemList.lh_Head; + while (mh->mh_Node.ln_Succ) { + ULONG mstart = ((ULONG)mh->mh_Lower) & 0xffff0000; + ULONG msize = ((((ULONG)mh->mh_Upper) + 0xffff) & 0xffff0000) - mstart; + int i; + for (i = 0; i < MEMORY_REGIONS; i++) { + if (st->mem_allocated[i] == mh) + break; + } + if (i == MEMORY_REGIONS) { + if (msize > st->extra_ram_size) { + st->extra_ram = (UBYTE*)mstart; + st->extra_ram_size = msize; + st->extra_mem_head = mh; + } + } + mh = (struct MemHeader*)mh->mh_Node.ln_Succ; + } + Permit(); +} + +static ULONG check_ram(UBYTE *cname, UBYTE *chunk, WORD index, ULONG addr, ULONG offset, ULONG chunksize, ULONG flags, struct uaestate *st) +{ + ULONG size; + if (flags & 1) // compressed + size = getlong(chunk, 0); + else + size = chunksize; + printf("Statefile RAM: Address %08x, size %luk.\n", addr, size >> 10); + int found = 0; + ULONG mstart, msize; + Forbid(); + struct MemHeader *mh = (struct MemHeader*)SysBase->MemList.lh_Head; + while (mh->mh_Node.ln_Succ) { + mstart = ((ULONG)mh->mh_Lower) & 0xffff0000; + msize = ((((ULONG)mh->mh_Upper) + 0xffff) & 0xffff0000) - mstart; + if (mstart == addr) { + if (msize >= size) + found = 1; + else + found = -1; + break; + } + mh = (struct MemHeader*)mh->mh_Node.ln_Succ; + } + Permit(); + if (!found) { + printf("ERROR: Not found in this system.\n"); + st->errors++; + return 0; + } + st->mem_allocated[index] = mh; + struct MemoryBank *mb = &st->membanks[index]; + mb->size = chunksize; + mb->offset = offset; + mb->targetaddr = (UBYTE*)addr; + mb->targetsize = msize; + mb->flags = flags; + strcpy(mb->chunk, cname); + printf("- Detected memory at %08x, total size %luk.\n", mstart, msize >> 10); + if (found > 0) { + printf("- Is usable (%luk required, %luk unused, offset %lu).\n", size >> 10, (msize - size) >> 10, offset); + ULONG extrasize = msize - size; + if (extrasize >= 524288) { + if ((mstart >= 0x00200000 && st->extra_ram < (UBYTE*)0x00200000) || extrasize > st->extra_ram_size) { + st->extra_ram = (UBYTE*)(mstart + size); + st->extra_ram_size = extrasize; + } + } + return 1; + } + printf("ERROR: Not enough memory available (%luk required).\n", size >> 10); + st->errors++; + return 0; +} + +static void floppy_info(int num, UBYTE *p) +{ + UBYTE state = p[4]; + UBYTE track = p[5]; + if (state & 2) // disabled + return; + printf("DF%d: Track %d, '%s'.\n", num, track, &p[4 + 1 + 1 + 1 + 1 + 4 + 4]); +} + +static void check_rom(UBYTE *p, struct uaestate *st) +{ + UWORD ver = getword(p, 4 + 4 + 4); + UWORD rev = getword(p, 4 + 4 + 4 + 2); + + UWORD *rom = (UWORD*)0xf80000; + UWORD rver = rom[12 / 2]; + UWORD rrev = rom[14 / 2]; + + ULONG start = getlong(p, 0); + ULONG len = getlong(p, 4); + if (start == 0xf80000 && len == 262144) + start = 0xfc0000; + ULONG crc32 = getlong(p, 4 + 4 + 4 + 4); + + UBYTE *path = &p[4 + 4 + 4 + 4 + 4]; + while (*path++); + + printf("ROM %08lx-%08lx %d.%d (CRC=%08x).\n", start, start + len - 1, ver, rev, crc32); + printf("- '%s'\n", path); + if (ver != rver || rev != rrev) { + printf("WARNING: KS ROM version mismatch.\n"); + } +} + +static int parse_pass_2(FILE *f, struct uaestate *st) +{ + for (int i = 0; i < MEMORY_REGIONS; i++) { + struct MemoryBank *mb = &st->membanks[i]; + if (mb->size) { + load_memory(f, i, st); + } + } + + for (;;) { + ULONG size, flags; + UBYTE cname [5]; + + if (!read_chunk_head(f, cname, &size, &flags)) { + return -1; + } + + if (!strcmp(cname, "END ")) + break; + + if (!strcmp(cname, "CPU ")) { + st->cpu_chunk = load_chunk(f, cname, size, st); + } else if (!strcmp(cname, "CHIP")) { + st->custom_chunk = load_chunk(f, cname, size, st); + } else if (!strcmp(cname, "AGAC")) { + st->aga_colors_chunk = load_chunk(f, cname, size, st); + } else if (!strcmp(cname, "CIAA")) { + st->ciaa_chunk = load_chunk(f, cname, size, st); + } else if (!strcmp(cname, "CIAB")) { + st->ciab_chunk = load_chunk(f, cname, size, st); + } else if (!strcmp(cname, "DSK0")) { + st->floppy_chunk[0] = load_chunk(f, cname, size, st); + floppy_info(0, st->floppy_chunk[0]); + } else if (!strcmp(cname, "DSK1")) { + st->floppy_chunk[1] = load_chunk(f, cname, size, st); + floppy_info(1, st->floppy_chunk[1]); + } else if (!strcmp(cname, "DSK2")) { + st->floppy_chunk[2] = load_chunk(f, cname, size, st); + floppy_info(2, st->floppy_chunk[2]); + } else if (!strcmp(cname, "DSK3")) { + st->floppy_chunk[3] = load_chunk(f, cname, size, st); + floppy_info(3, st->floppy_chunk[3]); + } else if (!strcmp(cname, "AUD0")) { + st->audio_chunk[0] = load_chunk(f, cname, size, st); + } else if (!strcmp(cname, "AUD1")) { + st->audio_chunk[1] = load_chunk(f, cname, size, st); + } else if (!strcmp(cname, "AUD2")) { + st->audio_chunk[2] = load_chunk(f, cname, size, st); + } else if (!strcmp(cname, "AUD3")) { + st->audio_chunk[3] = load_chunk(f, cname, size, st); + } else { + fseek(f, size, SEEK_CUR); + fseek(f, 4 - (size & 3), SEEK_CUR); + } + } + + return st->errors; +} + +static int parse_pass_1(FILE *f, struct uaestate *st) +{ + int first = 1; + UBYTE *b = NULL; + + for (;;) { + ULONG offset = ftell(f); + ULONG size, flags; + UBYTE cname[5]; + b = read_chunk(f, cname, &size, &flags, st); + if (!strcmp(cname, "END ")) + break; + if (!b) { + if (!cname[0]) + return -1; + continue; + } + + if (first) { + if (strcmp(cname, "ASF ")) { + printf("ERROR: Not UAE statefile.\n"); + return -1; + } + first = 0; + continue; + } + + if (!strcmp(cname, "CPU ")) { + ULONG smodel = 68000; + for (int i = 0; i < 4; i++) { + if (SysBase->AttnFlags & (1 << i)) + smodel += 10; + } + if (SysBase->AttnFlags & 0x80) + smodel = 68060; + ULONG model = getlong(b, 0); + if (smodel != model) { + printf("- WARNING: %lu CPU statefile.\n", model); + } + if (model > 68020) { + printf("- ERROR: Only 68000/68010/68020 statefiles are supported.\n"); + st->errors++; + } + } else if (!strcmp(cname, "CHIP")) { + UWORD vposr = getword(b, 4 + 4); // VPOSR + volatile struct Custom *c = (volatile struct Custom*)0xdff000; + UWORD svposr = c->vposr; + int aga = (vposr & 0x0f00) == 0x0300; + int ecs = (vposr & 0x2000) == 0x2000; + int ntsc = (vposr & 0x1000) == 0x1000; + int saga = (svposr & 0x0f00) == 0x0300; + int secs = (svposr & 0x2000) == 0x2000; + int sntsc = (svposr & 0x1000) == 0x1000; + printf("Chipset: %s %s (%04X).\n", aga ? "AGA" : (ecs ? "ECS" : "OCS"), ntsc ? "NTSC" : "PAL", vposr); + if (aga && !saga) { + printf("- WARNING: AGA statefile.\n"); + } + if (saga && !aga) { + printf("- WARNING: OCS/ECS statefile.\n"); + } + if (!sntsc && !ecs && ntsc) { + printf("- WARNING: NTSC statefile.\n"); + } + if (sntsc && !ecs && !ntsc) { + printf("- WARNING: PAL statefile.\n"); + } + } else if (!strcmp(cname, "CRAM")) { + check_ram(cname, b, MB_CHIP, 0x000000, offset, size, flags, st); + } else if (!strcmp(cname, "BRAM")) { + check_ram(cname, b, MB_SLOW, 0xc00000, offset, size, flags, st); + } else if (!strcmp(cname, "FRAM")) { + check_ram(cname, b, MB_FAST, 0x200000, offset, size, flags, st); + } else if (!strcmp(cname, "ROM ")) { + check_rom(b, st); + } + + free(b); + b = NULL; + } + + if (!st->errors) { + find_extra_ram(st); + if (!st->extra_ram) { + printf("ERROR: At least 512k unused RAM required.\n"); + st->errors++; + } else { + printf("%luk extra RAM at %08x.\n", st->extra_ram_size >> 10, st->extra_ram); + st->extra_mem_pointer = st->extra_ram; + st->errors = 0; + } + } else { + printf("ERROR: Incompatible hardware configuration.\n"); + st->errors++; + } + + free(b); + + return st->errors; +} + +extern void runit(void*); +extern void callinflate(UBYTE*, UBYTE*); + +static void handlerambank(struct MemoryBank *mb, struct uaestate *st) +{ + UBYTE *sa = mb->addr + 16; /* skip chunk header + RAM size */ + if (mb->flags & 1) { + // +2 = skip zlib header + callinflate(mb->targetaddr, sa + 2); + } else { + ULONG *s = (ULONG*)sa; + ULONG *d = (ULONG*)mb->targetaddr; + for (int i = 0; i < mb->size / 4; i++) { + *d++ = *s++; + } + } +} + +// Interrupts are off, supervisor state +static void processstate(struct uaestate *st) +{ + volatile struct Custom *c = (volatile struct Custom*)0xdff000; + + for (int i = 0; i < MEMORY_REGIONS; i++) { + if (i == MB_CHIP) + c->color[0] = 0x800; + if (i == MB_SLOW) + c->color[0] = 0x080; + if (i == MB_FAST) + c->color[0] = 0x008; + struct MemoryBank *mb = &st->membanks[i]; + if (mb->addr) { + handlerambank(mb, st); + } + } + c->color[0] = 0x880; + + // must be before set_cia + for (int i = 0; i < 4; i++) { + set_floppy(st->floppy_chunk[i], i); + } + + c->color[0] = 0x808; + + set_agacolor(st->aga_colors_chunk); + set_custom(st->custom_chunk); + for (int i = 0; i < 4; i++) { + set_audio(st->audio_chunk[i], i); + } + for (int i = 0; i < 8; i++) { + set_sprite(st->sprite_chunk[i], i); + } + set_cia(st->ciaa_chunk, 0); + set_cia(st->ciab_chunk, 1); + + c->color[0] = 0x888; + + runit(st); +} + +static void take_over(struct uaestate *st) +{ + // Copy stack, variables and code to safe location + + UBYTE *tempsp = tempmem_allocate(TEMP_STACK_SIZE, st); + if (!tempsp) { + printf("Out of memory for temp stack (%lu bytes).\n", TEMP_STACK_SIZE); + return; + } + + struct uaestate *tempst = (struct uaestate*)tempmem_allocate(sizeof(struct uaestate), st); + if (!tempst) { + printf("Out of memory for temp state variables (%lu bytes).\n", sizeof(struct uaestate)); + return; + } + memcpy(tempst, st, sizeof(struct uaestate)); + + struct Process *me = (struct Process*)FindTask(0); + struct CommandLineInterface *cli = (struct CommandLineInterface*)((((ULONG)me->pr_CLI) << 2)); + if (!cli) { + printf("CLI == NULL?\n"); + return; + } + ULONG *module = (ULONG*)(cli->cli_Module << 2); + ULONG hunksize = module[-1] << 2; + UBYTE *newcode = tempmem_allocate(hunksize, st); + if (!newcode) { + printf("Out of memory for temp code (%lu bytes).\n", hunksize); + return; + } + memcpy(newcode, module, hunksize); + + // ugly relocation hack but jumps to other module (asm.S) are always absolute.. + // TODO: process the executable after linking + UWORD *cp = (UWORD*)newcode; + for (int i = 0; i < hunksize / 2; i++) { + // JSR/JMP xxxxxxxx.L? + if (*cp == 0x4eb9 || *cp == 0x4ef9) { + ULONG *ap = (ULONG*)(cp + 1); + ULONG *app = (ULONG*)(*ap); + void *addr = (void*)app; + if (addr == runit || addr == callinflate) { + *ap = (ULONG)addr - (ULONG)module + (ULONG)newcode; + //printf("Relocated %08x: %08x -> %08x\n", cp, addr, *ap); + } + } + cp++; + } + + printf("Code=%08lx Stack=%08lx Data=%08lx. Press RETURN!\n", newcode, tempsp, tempst); + Delay(100); // So that key release gets processed by AmigaOS + +#if 0 + if (SysBase->LibNode.lib_Version >= 37) { + CacheClearU(); + } +#endif + + UBYTE b; + fread(&b, 1, 1, stdin); + + if (GfxBase->LibNode.lib_Version >= 37) { + LoadView(NULL); + WaitTOF(); + WaitTOF(); + } + + // No turning back! + extern void *killsystem(UBYTE*, struct uaestate*, ULONG); + killsystem(tempsp + TEMP_STACK_SIZE, tempst, (ULONG)processstate - (ULONG)module + (ULONG)newcode); +} + +int main(int argc, char *argv[]) +{ + FILE *f; + UBYTE *b; + ULONG size; + UBYTE cname[5]; + struct uaestate *st; + + if (argc < 2) { + printf("Statefile parameter missing.\n"); + return 0; + } + + f = fopen(argv[1], "rb"); + if (!f) { + printf("Couldn't open '%s'\n", argv[1]); + return 0; + } + + st = calloc(sizeof(struct uaestate), 1); + if (!st) { + printf("Out of memory.\n"); + return 0; + } + + if (!parse_pass_1(f, st)) { + fseek(f, 0, SEEK_SET); + if (!parse_pass_2(f, st)) { + take_over(st); + } else { + printf("Pass #2 failed (%ld errors).\n", st->errors); + } + } else { + printf("Pass #1 failed (%ld errors).\n", st->errors); + } + + free(st); + + fclose(f); + + free_allocations(st); + + return 0; +} diff --git a/utilities/stateload/makefile b/utilities/stateload/makefile new file mode 100644 index 00000000..8162c615 --- /dev/null +++ b/utilities/stateload/makefile @@ -0,0 +1,23 @@ + +NOWDATE := "\"$(shell date "+%-d.%-m.%Y")\"" +NOWTIME := "\"$(shell date "+%T")\"" + +CC=/opt/amiga/bin/m68k-amigaos-gcc +AS=/opt/amiga/bin/m68k-amigaos-as + +CFLAGS = -mcrt=nix13 -Os -m68000 -fomit-frame-pointer -msmall-code +LINK_CFLAGS = -mcrt=nix13 -s + +OBJS = main.o asm.o inflate.o + +all: $(OBJS) + $(CC) $(LINK_CFLAGS) -o uaestateload $^ + +main.o: main.c + $(CC) $(CFLAGS) -I. -c -o $@ main.c + +asm.o: asm.S + $(AS) -o $@ asm.S + +inflate.o: inflate.S + $(CC) $(CFLAGS) -I. -c -o $@ inflate.S diff --git a/utilities/stateload/readme.txt b/utilities/stateload/readme.txt new file mode 100644 index 00000000..a2dd7610 --- /dev/null +++ b/utilities/stateload/readme.txt @@ -0,0 +1,22 @@ + +uaestateload: load UAE state files on real hardware. + +Currently common 68000 A500 statefiles are supported. (512k chip only, 512k+512k etc..) + +Information: + +CPU should match statefile config but it only causes warning. Mismatched CPU most likely won't work. +RAM config must match and at least one RAM address space must be 512k larger. +Both compressed and uncompressed statefiles are supported. +HD compatible (statefile is completely loaded before system take over) +KS ROM does not need to match if loaded program has already completely taken over the system. +All, even ancient statefiles should be supported, confirmed with UAE 0.8.22 created statefile. +Floppy state restore is not tested but at least motor state and track number is restored. +Statefile restore can for example fail if statefile was saved when blitter was active or program was executing self-modifying code. + +RAM config examples: + +512k chip ram statefile: hardware must have 1M chip or 512k chip+512k "slow" ram or 512k chip+512k real fast. +512k+512k statefile: hardware must have 1M+512k or 512k+1M or 512k+512k+512k real fast. + +Note that uncompressed statefiles require at least 1M contiguous extra RAM because all statefile RAM address spaces need to fit in RAM before system take over. -- 2.47.3