diff --git a/boost/lzx.asm b/boost/lzx.asm index ac27f37..9ba67ff 100644 --- a/boost/lzx.asm +++ b/boost/lzx.asm @@ -77,18 +77,18 @@ CodeStart: .RAM_DMA LDX #$8000 - STX $4300 + STX $4310 LDX $00 STX $2181 LDA $02 STA $2183 LDX #$0000 - STX $4302 + STX $4312 LDA #$41 - STA $4304 + STA $4314 LDX $8D - STX $4305 - LDA #$01 + STX $4315 + LDA #$02 STA $420B PLB diff --git a/boost/overworld.asm b/boost/overworld.asm index 17cb8b8..e7528f2 100644 --- a/boost/overworld.asm +++ b/boost/overworld.asm @@ -2,7 +2,7 @@ namespace overworld !SwitchBlocks = $418800 ; seems to use 480 bytes. - ; It's used when [!] blocks gets spreaded + ; It's used when [!] blocks gets spread ; on screen after beating a switch palace. ; It's same address as wiggler's ram ; which obviously is unused on OW. @@ -19,23 +19,36 @@ org $048267 JSL animations NOP #2 +macro old_code() +CODE_0494B2: 8D 02 42 STA.W $4202 ; Multiplicand A +CODE_0494B5: A5 0C LDA $0C +CODE_0494B7: F0 21 BEQ CODE_0494DA +CODE_0494B9: 8D 03 42 STA.W $4203 ; Multplier B +CODE_0494BC: EA NOP +CODE_0494BD: EA NOP +CODE_0494BE: EA NOP +CODE_0494BF: EA NOP +CODE_0494C0: C2 20 REP #$20 ; Accum (16 bit) +CODE_0494C2: AD 16 42 LDA.W $4216 ; Product/Remainder Result (Low Byte) +CODE_0494C5: 8D 04 42 STA.W $4204 ; Dividend (Low Byte) +CODE_0494C8: E2 20 SEP #$20 ; Accum (8 bit) +CODE_0494CA: A5 0A LDA $0A +CODE_0494CC: 8D 06 42 STA.W $4206 ; Divisor B +CODE_0494CF: EA NOP +CODE_0494D0: EA NOP +CODE_0494D1: EA NOP +CODE_0494D2: EA NOP +CODE_0494D3: EA NOP +CODE_0494D4: EA NOP +CODE_0494D5: C2 20 REP #$20 ; Accum (16 bit) +CODE_0494D7: AD 14 42 LDA.W $4214 ; Quotient of Divide Result (Low Byte) +CODE_0494DA: C2 20 REP #$20 ; Accum (16 bit) +endmacro + org $0494B2 - STZ $2250 - STA $2251 - STZ $2252 - LDA $0C - BEQ + - STA $2253 - STZ $2254 - LDY #$0001 - REP #$20 - LDA $2306 - STY $2250 - STA $2251 - JML mulfix -mulfix_back: - LDA $2306 -+ REP #$20 + JML mulfixv2 + NOP #38 +mulfixv2_back: warnpc $0494DC org $048576 @@ -89,7 +102,95 @@ org $04F3C9 org $04F3CD STA.l !SwitchBlocks+$0000,x +macro old_code2() +CODE_0482ED: 9C 04 42 STZ.W $4204 ; Dividend (Low Byte) +CODE_0482F0: B4 04 LDY $04,X +CODE_0482F2: 8C 05 42 STY.W $4205 ; Dividend (High-Byte) +CODE_0482F5: 8D 06 42 STA.W $4206 ; Divisor B +CODE_0482F8: EA NOP ; \ +CODE_0482F9: EA NOP ; | +CODE_0482FA: EA NOP ; | Wait until division is done +CODE_0482FB: EA NOP ; | +CODE_0482FC: EA NOP ; | +CODE_0482FD: EA NOP ; / +CODE_0482FE: C2 20 REP #$20 ; Accum (16 bit) +CODE_048300: AD 14 42 LDA.W $4214 ; Quotient of Divide Result (Low Byte) +CODE_048303: 4A LSR +CODE_048304: 4A LSR +CODE_048305: E2 20 SEP #$20 ; Accum (8 bit) +endmacro + org $0482ED + JML mulfix2v2 + NOP #22 +mulfix2v2_back: +warnpc $048307 +print pc + +pullpc + +mulfixv2: + PHA ; \ Run SNES code if anything. + TSC ; | + XBA ; | + CMP #$37 ; | + BNE .snes_code ; | + LDA #$00 + XBA + PLA ; / + + + STZ $2250 + STA $2251 + STZ $2252 + LDA $0C + BEQ + + STA $2253 + STZ $2254 + LDY #$0001 + REP #$20 + LDA $2306 + STY $2250 + STA $2251 + SEP #$20 + LDA $0A + STA $2253 + STZ $2254 + NOP + REP #$20 + LDA $2306 ++ REP #$20 + JML mulfixv2_back +.snes_code + LDA #$00 + XBA + PLA + + STA.W $4202 ; Multiplicand A + LDA $0C + BEQ CODE_0494DA + STA.W $4203 ; Multplier B + NOP #4 + REP #$20 ; Accum (16 bit) + LDA.W $4216 ; Product/Remainder Result (Low Byte) + STA.W $4204 ; Dividend (Low Byte) + SEP #$20 ; Accum (8 bit) + LDA $0A + STA.W $4206 ; Divisor B + NOP #6 + REP #$20 ; Accum (16 bit) + LDA.W $4214 ; Quotient of Divide Result (Low Byte) +CODE_0494DA: + REP #$20 ; Accum (16 bit) + JML mulfixv2_back + +mulfix2v2: + PHA ; \ Run SNES code if anything. + TSC ; | + XBA ; | + CMP #$37 ; | + ;BNE .snes_code ; | + PLA ; / LDY #$01 STY $2250 TAY @@ -100,14 +201,6 @@ org $0482ED AND #$7F00 STA $2251 STY $2253 - JML mapview - NOP -mapview_back: -warnpc $048307 - -pullpc - -mapview: LDY #$00 STY $2254 NOP @@ -115,7 +208,20 @@ mapview: LDA $2306 LSR SEP #$20 - JML .back + JML mulfix2v2_back + +.snes_code + PLA + STZ.W $4204 ; Dividend (Low Byte) + LDY $04,X + STY.W $4205 ; Dividend (High-Byte) + STA.W $4206 ; Divisor B + NOP #6 + REP #$20 ; Accum (16 bit) + LDA.W $4214 ; Quotient of Divide Result (Low Byte) + LSR #2 + SEP #$20 ; Accum (8 bit) + JML mulfix2v2_back continue_fix: LDA.b #.snes @@ -170,16 +276,7 @@ events: PEA.w $857D-1 LDA $73D9 JML $0086FA - -mulfix: - SEP #$20 - LDA $0A - STA $2253 - STZ $2254 - NOP - REP #$20 - JML .back - + animations: BIT $318E BMI .zsnes diff --git a/changes.txt b/changes.txt index 7efc333..24ccc73 100644 --- a/changes.txt +++ b/changes.txt @@ -1,3 +1,11 @@ +SA-1 Pack v1.25: + - Fixed SNES NMI and IRQ vectors override not getting written correctly ($220C, $220E). These registers are SA-1 only write and they were being written on SNES CPU side, making real hardware crash. None of the major emulators currently emulate this register properly. + - Added a code on RESET routine to put the SA-1 CPU on sleep state before booting and setting up initial variables. The same procedure is done on most commercial games so I decided to include it on SA-1 Pack just to be safe in real hardware. + - Added a code for clearing DMA state ($2230) on SA-1 RESET routine. + - Made all DMAs invoked by the patch use channel 1 instead of 0. This should make the DMA remap patch work with SA-1 ROMs without requering to modify any of the SA-1 Pack code. + - Made DSX (Dynamic Sprites) patch code optional, requested by anonimzwx. + - Made some minor grammar fixes. + SA-1 Pack v1.24: - Fixed Character Conversion DMA randomly freezing the game when SA-1 is unable to receive IRQs. diff --git a/quick guide.txt b/quick guide.txt index 357a958..630551d 100644 --- a/quick guide.txt +++ b/quick guide.txt @@ -3,7 +3,7 @@ WARNING: Enable word wrap! This is a quick guide to you get working fast with the SA-1 Pack. It's pretty simple really, but the readme may be a little confusing at start, so I created this file specially for who is starting to work with SA-1 Pack. Requirements: - - Asar 1.31 or newer; + - Asar 1.31 or newer; and - Lunar Magic 2.20 or newer. Guide: @@ -23,6 +23,9 @@ Compatible Patches: https://dl.dropbox.com/u/16203903/A/SA1/cpatches.html Compatible Blocks: https://dl.dropbox.com/u/16203903/A/SA1/cblocks.html Compatible Sprites: https://dl.dropbox.com/u/16203903/A/SA1/csprites.html +Most of the Sprites and Blocks however can be converted automatically with a tool called "SA-1 Convert". Please take a look in the Tools Section on SMW Central if you're interested. +Also any resource on SMW Central with "sa-1" tag also should work with SA-1 ROMs, including Patches. + Also some tools on page can have a function to automatically convert any incompatible resource to SA-1 format, so you can use them if you don't have alternative. You don't need to worry about music (.txt/.bnk/.brr) files, they will work with SA-1 normally. The only thing you have to worry is if the tool to insert them (Addmusic) is compatible. diff --git a/readme.txt b/readme.txt index d6dea34..1506541 100644 --- a/readme.txt +++ b/readme.txt @@ -4,7 +4,7 @@ \___ \ / /\ \______| | | ___/ _` |/ __| |/ / ____) / ____ \ | | | | | (_| | (__| < |_____/_/ \_\ |_| |_| \__,_|\___|_|\_\ - by Vitor Vilela Version 1.24 + by Vitor Vilela Version 1.25 The SA-1 Pack consist of a couple of patches that enable SA-1 and prepare your SMW ROM to use the SA-1 CPU in the @@ -532,7 +532,101 @@ RTL ; Return. Using said method you can get rid of almost all SA-1 limitations, but remember that the SNES's speed is 2 MHz, so if you call it too many times, you may waste some time. + +Additionally, there's a special mode called Parallel/Background Mode. +It runs a certain code periodically while the SA-1 CPU is idle. + +To enable it, put the code pointer to $3186-$3188 and set $318B to #$01. +Unlike other modes, you have to threat this one differently: + + 1. You must reserve a RAM area to use it, since other code can potentially +use it at any time. In other words, you can't use the standard RAM addresses +or your RAM writes will end up corrupted when another code gets executed by +the chip or even by SNES CPU. For these reasons, I reserved 32 bytes at +$31E0-$31FF just for Parallel Mode, so you can put your scratch values without +having risk of it getting corrupted suddenly. + + 2. Direct Page is set to $0100, since you usually will not access standard +direct page area ($3000-$30FF) and with that you will have facility with +accessing Parallel Mode reserved RAM as well other SA-1 Pack internal RAM +addresses. Of course after running your code, you should restore it back +to $0100 if you changed it. Oh and if you're wondering, in the **SA-1 CPU**, +$0100 is same thing as $3100. Don't get confused. + + 3. When accessing registers (or any other not thread-safe address), you must +disable IRQ (by using SEI opcode), to stop SA-1 from listening from SNES CPU. +With that, you can access the multiplications registers or execute a DMA +without having the risk of it getting conflicted by another thread. Don't forget +to use CLI to re-enable IRQ or otherwise the game will freeze. + + 4. Is preferred to your code work rather as a service, which runs code on demand. +This mode is useful for code that does, for example, graphics manipulation so it +won't access in-game performance because it ONLY uses SA-1 idle cycles and when +the game code is running its code gets paused. + + 5. If the status flag (318B) is set to #$FF, the service MUST stop current +operations and gets free to an another parallel service start executing. Because +obviously only one parallel mode code can be ran at once (I may change that in the +future but I don't think it will be ever needed). + +Example code (invoking parallel mode): + + LDA $318B ; \ If there's no Parallel Mode running already, + BEQ + ; / skip. + + LDA #$FF ; \ Tell previous Parallel Mode code to exit. + STA $318B ; / This is important or the game may crash or stop working. + +- LDA $318B ; \ Wait until the previous server gets free. + BNE - ; / + ++ + LDA.b #MyCode ; \ Place Parallel Mode Service Pointer + STA $3186 ; | + LDA.b #MyCode>>8 ; | + STA $3187 ; | + LDA.b #MyCode>>16 ; | + STA $3188 ; / + + LDA #$01 ; \ Start Parallel Mode Service + STA $318B ; / + +Example code (actual parallel mode): + PHB ; \ Set up banks. + PHK ; | + PLB ; / + +.main_loop + LDA $8B ; \ If the parallel mode state + CMP #$FF ; | is set to #$FF (end), shutdown + BEQ .end ; / the service. + + LDA $EF ; \ Check if there's any graphics + CMP $EE ; | rotation request. + BEQ .main_loop ; | + STA $EF ; / + + JSR .rotate ; Rotate GFX (not included there) + BRA .main_loop ; Go to back main loop. +.end + PLB ; Restore bank + RTL ; Return. + +Personally this mode is extremely useful for rotating graphics, because +it takes SA-1's unused cycles and it does not cause slowdown. If there's +not enough time to rotate a GFX, instead of making the game get unstable +and slowdown, it will just reduce the rotation frame rate, which most +users will not actually notice. It can be also useful for you, for some +reason, want to for example decompress a GFX in the background without +freezing temporally the level or even you want to run a music engine +here. Use it freely! Remember that it's multi-threaded and your code +must be thread-safe with normal SA-1 operations and with the SNES CPU. +And when SNES CPU code is running together with Parallel Mode, the code +performance may reduce a bit to around 8 MHz, but still a very good +performance to explore while SA-1 CPU is not doing anything. And when +SNES is idle (i.e. finished processing a game frame), the code is +executed normally at 10.74 MHz. There are a bunch of other useful features too, such as bit stream and fast DMA which are only available while on SA-1 side. diff --git a/sa1.asm b/sa1.asm index c9b4103..95bb1c0 100644 --- a/sa1.asm +++ b/sa1.asm @@ -6,6 +6,9 @@ !ZSNES = 1 ; Put 0 if you don't want to SA-1 Pack automatically deal with ZSNES limitations. ; (in other words, put 0 if you don't want ZSNES 1.51 or older support) + +!DSX = 1 ; Put 0 if you want to turn off legacy (Dynamic Sprites) patch support. + ; (as anoni's Dynamic Z should obsolete it soon.) sa1rom ; \ Don't touch! !c = autoclean ; | @@ -96,17 +99,19 @@ org $83C8 ; Macros ; ;===============================================; +; legacy DSX patch DMA base macro. ; A is 16-bit, X is 8-bit. macro transferslot(slot, bytes, shift) LDA.W #$7C00+(*256)+ ; \ VRAM address + line*slot STA.W $2116 ; / LDA.W #(!DSX_BUFFER&65535)+(*512)+(*2) ;\ Set Buffer location - STA.W $4302 ; / + STA.W $4312 ; / LDA.W # ; \ Set bytes to transfer - STA.W $4305 ; / + STA.W $4315 ; / STY.W $420B ; Run DMA. endmacro +; Character Conversion DMA base macro. ; A is 16-bit, Y is 8-bit. ; X is remain CDMA slots. macro ccdmaslot(slot) @@ -117,10 +122,10 @@ macro ccdmaslot(slot) ; LDA.W !CCDMA_TABLE+(*8)+3 ; \ Set source of bitmap in BW-RAM. STA $2232 ; | (Both SA-1 and CPU Register.) - STA $4302 ; | + STA $4312 ; | LDY.W !CCDMA_TABLE+(*8)+5 ; | STY $2234 ; | - STY $4304 ; / + STY $4314 ; / ; LDA.W #!CC_BUFFER ; \ Set I-RAM buffer. STA $2235 ; / (This is used as buffer in conversion, like the echo buffer.) @@ -136,11 +141,11 @@ macro ccdmaslot(slot) SEI ; / ; + LDA.W !CCDMA_TABLE+(*8)+6 ; \ Store size of conversion+transfer - STA $4305 ; / + STA $4315 ; / LDA.W !CCDMA_TABLE+(*8)+1 ; \ Store VRAM address. STA $2116 ; / ; - LDY #$01 ; \ Run SA-1 AND CPU DMA + LDY #$02 ; \ Run SA-1 AND CPU DMA STY $420B ; / ; DEX ; \ If there are no more remaining, @@ -156,13 +161,13 @@ macro ccdmaslot(slot) LDA.W !CCDMA_TABLE+(*8)+1 ; \ Set VRAM address STA $2116 ; / LDA.W !CCDMA_TABLE+(*8)+3 ; \ Set source address - STA $4302 ; | + STA $4312 ; | LDY.W !CCDMA_TABLE+(*8)+5 ; | STY $2234 ; / LDA.W !CCDMA_TABLE+(*8)+6 ; \ Store size of conversion+transfer - STA $4305 ; / + STA $4315 ; / ; - LDY #$01 ; \ Transfer. + LDY #$02 ; \ Transfer. STY $420B ; / ; LDY #$81 ; \ Enable again Character Conversion DMA. @@ -266,6 +271,9 @@ SNES_Reset: ; Super NES Reset LDA #$1FFF ; | TCS ; / ; + LDA #$0020 ; \ Set SA-1 to sleep/reset state. + STA $2200 ; / + ; LDA #Reset ; \ Set up SA-1 Vectors STA $2203 ; | LDA #NMI ; | @@ -273,11 +281,6 @@ SNES_Reset: ; Super NES Reset LDA #IRQ ; | STA $2207 ; / ; - LDA #$816A ; \ Set IRQ and NMI Vectors - STA $220C ; | (Dynamic vectors) - LDA #$8374 ; | - STA $220E ; / - ; SEP #$20 ; A = 8-bit ; JSL ResetBanks+$03 ; Reset SA-1 Banks @@ -505,14 +508,17 @@ CCDMA_END: ; STZ !CCDMA_SLOTS ; Clear CCDMA Slots ; Dynamic_Sprites: ; -------------------------------------- +if !DSX ; LDA $6100 ; \ Don't run Dynamic Sprites system CMP #$07 ; | if the game mode isn't #$07 nor #$14 BEQ + ; | CMP #$14 ; | BEQ + ; / STZ !SLOTSUSED ; \ Reset slots used and return +endif ; | JML $008172 ; / +if !DSX ; + LDA !SLOTSUSED ; Load Dynamic Sprites Slots BNE + ; Don't return to NMI if there are slots to transfer. JML $008172 ; Otherwise, return to NMI. @@ -532,10 +538,10 @@ Dynamic_Sprites: ; -------------------------------------- LDY #$80 ; \ Set up DMA STY $2115 ; | LDA #$1801 ; | - STA $4300 ; / + STA $4310 ; / LDY.B #!DSX_BUFFER/65536 ; \ Set Transfer Bank - STY $4304 ; / - LDY #$01 ; This value is written to $420B + STY $4314 ; / + LDY #$02 ; This value is written to $420B ; %transferslot(0, $0080, $C0) ; \ Transfer Slot 1, line 1. %transferslot(1, $0080, $C0) ; | Transfer Slot 1, line 2. @@ -552,10 +558,10 @@ Dynamic_Sprites: ; -------------------------------------- LDY #$80 ; \ Set up DMA STY $2115 ; | LDA #$1801 ; | - STA $4300 ; / + STA $4310 ; / LDY.B #!DSX_BUFFER/65536 ; \ Set Transfer Bank - STY $4304 ; / - LDY #$01 ; This value is written to $420B + STY $4314 ; / + LDY #$02 ; This value is written to $420B ; %transferslot(0, $0100, $80) ; \ Transfer Slot 1 & 2, line 1. %transferslot(1, $0100, $80) ; | Transfer Slot 1 & 2, line 2. @@ -572,10 +578,10 @@ Dynamic_Sprites: ; -------------------------------------- LDY #$80 ; \ Set up DMA STY $2115 ; | LDA #$1801 ; | - STA $4300 ; / + STA $4310 ; / LDY.B #!DSX_BUFFER/65536 ; \ Set Transfer Bank - STY $4304 ; / - LDY #$01 ; This value is written to $420B + STY $4314 ; / + LDY #$02 ; This value is written to $420B ; %transferslot(0, $0180, $40) ; \ Transfer Slot 1, 2 & 3, line 1. %transferslot(1, $0180, $40) ; | Transfer Slot 1, 2 & 3, line 2. @@ -594,20 +600,21 @@ Dynamic_Sprites: ; -------------------------------------- LDY #$80 ; \ Set up DMA STY $2115 ; | LDA #$1801 ; | - STA $4300 ; / + STA $4310 ; / LDA.W #!DSX_BUFFER&65535 ; \ Set DMA source - STA $4302 ; | + STA $4312 ; | LDY.B #!DSX_BUFFER/65536 ; | - STY $4304 ; / + STY $4314 ; / LDA #$0800 ; \ Set Length of transfer. - STA $4305 ; / - LDY #$01 ; \ Run DMA + STA $4315 ; / + LDY #$02 ; \ Run DMA STY $420B ; / ; DEY ; \ Zero slots used. STY !SLOTSUSED ; / ; JML $008172 ; Return to NMI. +endif ; SA1_Reset: ; SEI ; \ Disable IRQ and Emulation Mode @@ -625,6 +632,8 @@ SA1_Reset: ; ; SEP #$30 ; A/X/Y 8-bit ; + STZ $2230 ; Reset SA-1 DMA settings. + ; LDA #$80 ; \ Enable I-RAM and BW-RAM write STA $2227 ; | STZ $2225 ; | @@ -639,6 +648,13 @@ SA1_Reset: ; ; This will set up a 4bpp Virtual RAM at $60:0000-$63:FFFF ; Settings this to #$01 will make a 2bpp Virtual RAM at $60:0000-$67:FFFF. ; + REP #$20 + LDA #$816A ; \ Set SNES IRQ and NMI Vectors + STA $220C ; | (Dynamic vectors) + LDA #$8374 ; | + STA $220E ; / + SEP #$20 + LDA #$50 ; \ Enable dynamic NMI/IRQ vector. STA $2209 ; / ;