Skip to content

Commit

Permalink
n64: improve and extend cache coherency checks
Browse files Browse the repository at this point in the history
To make sure to intercept all possible errors, the check are now
performed by the RDRAM module, whenever a RDRAM read/write behind
a cacheline happens.

CPU writes to cache is now tracking dirtyness at the byte level rather
than whole cacheline level, so that hardware accessing memory does not
trigger a false positive for false-shared variables. An initial
round of testing has shown that the check would trigger far too much
otherwise.

Error reporting is also much improved to provide more context to
analyze the issue, including tracking the PC at which the hardware DMA
was triggered.
  • Loading branch information
rasky committed Dec 1, 2023
1 parent 9a8e7f8 commit e3b645f
Show file tree
Hide file tree
Showing 23 changed files with 169 additions and 86 deletions.
7 changes: 4 additions & 3 deletions ares/n64/ai/ai.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ auto AI::sample(f64& left, f64& right) -> void {

if(io.dmaLength[0] && io.dmaEnable) {
io.dmaAddress[0].bit(13,23) += io.dmaAddressCarry;
auto data = rdram.ram.read<Word>(io.dmaAddress[0]);
auto data = rdram.ram.read<Word>(io.dmaAddress[0], "AI");
auto l = s16(data >> 16);
auto r = s16(data >> 0);
left = l / 32768.0;
Expand All @@ -50,8 +50,9 @@ auto AI::sample(f64& left, f64& right) -> void {
}
if(!io.dmaLength[0]) {
if(--io.dmaCount) {
io.dmaAddress[0] = io.dmaAddress[1];
io.dmaLength [0] = io.dmaLength [1];
io.dmaAddress[0] = io.dmaAddress[1];
io.dmaLength [0] = io.dmaLength [1];
io.dmaOriginPc[0] = io.dmaOriginPc[1];
mi.raise(MI::IRQ::AI);
}
}
Expand Down
1 change: 1 addition & 0 deletions ares/n64/ai/ai.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ struct AI : Thread, Memory::RCP<AI> {
n1 dmaAddressCarry;
n18 dmaLength[2];
n2 dmaCount;
u64 dmaOriginPc[2];
n14 dacRate;
n4 bitRate;
} io;
Expand Down
1 change: 1 addition & 0 deletions ares/n64/ai/io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ auto AI::writeWord(u32 address, u32 data_, Thread& thread) -> void {
if(io.dmaCount < 2) {
if(io.dmaCount == 0) mi.raise(MI::IRQ::AI);
io.dmaLength[io.dmaCount] = length;
io.dmaOriginPc[io.dmaCount] = cpu.ipu.pc;
io.dmaCount++;
}
}
Expand Down
3 changes: 2 additions & 1 deletion ares/n64/cartridge/flash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ auto Cartridge::Flash::writeWord(u32 address, u64 data) -> void {
}
if(mode == Mode::Write) {
for(u32 index = 0; index < 128; index += 2) {
u16 half = rdram.ram.read<Half>(source + index);
// FIXME: this is obviously wrong, the flash can't access RDRAM
u16 half = rdram.ram.read<Half>(source + index, "Flash");
Memory::Writable::write<Half>(offset + index, half);
}
}
Expand Down
3 changes: 2 additions & 1 deletion ares/n64/cpu/cpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,9 +184,10 @@ struct CPU : Thread {
template<u32 Size> auto write(u32 address, u64 data) -> void;

bool valid;
bool dirty;
u16 dirty;
u32 tag;
u16 index;
u64 fillpc;
union {
u8 bytes[16];
u16 halfs[8];
Expand Down
11 changes: 6 additions & 5 deletions ares/n64/cpu/dcache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ auto CPU::DataCache::Line::hit(u32 address) const -> bool {

auto CPU::DataCache::Line::fill(u32 address) -> void {
cpu.step(40 * 2);
valid = 1;
dirty = 0;
tag = address & ~0x0000'0fff;
valid = 1;
dirty = 0;
tag = address & ~0x0000'0fff;
fillpc = cpu.ipu.pc;
cpu.busReadBurst<DCache>(tag | index, words);
}

Expand Down Expand Up @@ -41,7 +42,7 @@ auto CPU::DataCache::Line::write(u32 address, u64 data) -> void {
words[address >> 2 & 2 | 0] = data >> 32;
words[address >> 2 & 2 | 1] = data >> 0;
}
dirty = 1;
dirty |= ((1 << Size) - 1) << (address & 0xF);
}

template<u32 Size>
Expand All @@ -60,7 +61,7 @@ auto CPU::DataCache::readDebug(u32 vaddr, u32 address) -> u8 {
auto& line = this->line(vaddr);
if(!line.hit(address)) {
Thread dummyThread{};
return bus.read<Byte>(address, dummyThread);
return bus.read<Byte>(address, dummyThread, "Ares Debugger");
}
return line.read<Byte>(address);
}
Expand Down
10 changes: 5 additions & 5 deletions ares/n64/cpu/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ auto CPU::devirtualizeDebug(u64 vaddr) -> u64 {

template<u32 Size>
inline auto CPU::busWrite(u32 address, u64 data) -> void {
bus.write<Size>(address, data, *this);
bus.write<Size>(address, data, *this, "CPU");
}

template<u32 Size>
Expand All @@ -156,7 +156,7 @@ inline auto CPU::busWriteBurst(u32 address, u32 *data) -> void {

template<u32 Size>
inline auto CPU::busRead(u32 address) -> u64 {
return bus.read<Size>(address, *this);
return bus.read<Size>(address, *this, "CPU");
}

template<u32 Size>
Expand Down Expand Up @@ -239,17 +239,17 @@ auto CPU::readDebug(u64 vaddr) -> u8 {
case Context::Segment::Mapped:
if(auto match = tlb.load(vaddr, true)) {
if(match.cache) return dcache.readDebug(vaddr, match.address & context.physMask);
return bus.read<Byte>(match.address & context.physMask, dummyThread);
return bus.read<Byte>(match.address & context.physMask, dummyThread, "Ares Debugger");
}
return 0;
case Context::Segment::Cached:
return dcache.readDebug(vaddr, vaddr & 0x1fff'ffff);
case Context::Segment::Cached32:
return dcache.readDebug(vaddr, vaddr & 0xffff'ffff);
case Context::Segment::Direct:
return bus.read<Byte>(vaddr & 0x1fff'ffff, dummyThread);
return bus.read<Byte>(vaddr & 0x1fff'ffff, dummyThread, "Ares Debugger");
case Context::Segment::Direct32:
return bus.read<Byte>(vaddr & 0xffff'ffff, dummyThread);
return bus.read<Byte>(vaddr & 0xffff'ffff, dummyThread, "Ares Debugger");
}

unreachable;
Expand Down
4 changes: 2 additions & 2 deletions ares/n64/cpu/recompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ auto CPU::Recompiler::emit(u32 vaddr, u32 address, bool singleInstruction) -> Bl
Thread thread;
bool hasBranched = 0;
while(true) {
u32 instruction = bus.read<Word>(address, thread);
u32 instruction = bus.read<Word>(address, thread, "Ares Recompiler");
if(callInstructionPrologue) {
mov32(reg(1), imm(instruction));
call(&CPU::instructionPrologue);
Expand All @@ -51,7 +51,7 @@ auto CPU::Recompiler::emit(u32 vaddr, u32 address, bool singleInstruction) -> Bl
call(&CPU::instructionEpilogue);
vaddr += 4;
address += 4;
if(hasBranched || (address & 0xfc) == 0 || singleInstruction) break; //block boundary
if(hasBranched || (vaddr & 0xfc) == 0 || singleInstruction) break; //block boundary
hasBranched = branched;
testJumpEpilog();
}
Expand Down
41 changes: 17 additions & 24 deletions ares/n64/memory/bus.hpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
template<u32 Size>
inline auto Bus::read(u32 address, Thread& thread) -> u64 {
inline auto Bus::read(u32 address, Thread& thread, const char *peripheral) -> u64 {
static constexpr u64 unmapped = 0;
static_assert(Size == Byte || Size == Half || Size == Word || Size == Dual);

if(address <= 0x007f'ffff) return rdram.ram.read<Size>(address);
if(address <= 0x03ef'ffff) return unmapped;
if(address <= 0x03ef'ffff) return rdram.ram.read<Size>(address, peripheral);
if(address <= 0x03ff'ffff) return rdram.read<Size>(address, thread);
if(address <= 0x0407'ffff) return rsp.read<Size>(address, thread);
if(address <= 0x040f'ffff) return rsp.status.read<Size>(address, thread);
Expand All @@ -27,16 +26,18 @@ template<u32 Size>
inline auto Bus::readBurst(u32 address, u32 *data, Thread& thread) -> void {
static_assert(Size == DCache || Size == ICache);

if(address <= 0x03ef'ffff) return rdram.ram.readBurst<Size>(address, data, "CPU");
if(address <= 0x03ff'ffff) {
data[0] = read<Word>(address | 0x0, thread);
data[1] = read<Word>(address | 0x4, thread);
data[2] = read<Word>(address | 0x8, thread);
data[3] = read<Word>(address | 0xc, thread);
// FIXME: not hardware validated, no idea of the behavior
data[0] = rdram.readWord(address | 0x0, thread);
data[1] = 0;
data[2] = 0;
data[3] = 0;
if constexpr(Size == ICache) {
data[4] = read<Word>(address | 0x10, thread);
data[5] = read<Word>(address | 0x14, thread);
data[6] = read<Word>(address | 0x18, thread);
data[7] = read<Word>(address | 0x1c, thread);
data[4] = 0;
data[5] = 0;
data[6] = 0;
data[7] = 0;
}
return;
}
Expand All @@ -46,15 +47,14 @@ inline auto Bus::readBurst(u32 address, u32 *data, Thread& thread) -> void {
}

template<u32 Size>
inline auto Bus::write(u32 address, u64 data, Thread& thread) -> void {
inline auto Bus::write(u32 address, u64 data, Thread& thread, const char *peripheral) -> void {
static_assert(Size == Byte || Size == Half || Size == Word || Size == Dual);
if constexpr(Accuracy::CPU::Recompiler) {
cpu.recompiler.invalidate(address + 0); if constexpr(Size == Dual)
cpu.recompiler.invalidate(address + 4);
}

if(address <= 0x007f'ffff) return rdram.ram.write<Size>(address, data);
if(address <= 0x03ef'ffff) return;
if(address <= 0x03ef'ffff) return rdram.ram.write<Size>(address, data, peripheral);
if(address <= 0x03ff'ffff) return rdram.write<Size>(address, data, thread);
if(address <= 0x0407'ffff) return rsp.write<Size>(address, data, thread);
if(address <= 0x040f'ffff) return rsp.status.write<Size>(address, data, thread);
Expand All @@ -77,17 +77,10 @@ template<u32 Size>
inline auto Bus::writeBurst(u32 address, u32 *data, Thread& thread) -> void {
static_assert(Size == DCache || Size == ICache);

if(address <= 0x03ef'ffff) return rdram.ram.writeBurst<Size>(address, data, "CPU");
if(address <= 0x03ff'ffff) {
write<Word>(address | 0x0, data[0], thread);
write<Word>(address | 0x4, data[1], thread);
write<Word>(address | 0x8, data[2], thread);
write<Word>(address | 0xc, data[3], thread);
if constexpr(Size == ICache) {
write<Word>(address | 0x10, data[4], thread);
write<Word>(address | 0x14, data[5], thread);
write<Word>(address | 0x18, data[6], thread);
write<Word>(address | 0x1c, data[7], thread);
}
// FIXME: not hardware validated, but a good guess
rdram.writeWord(address | 0x0, data[0], thread);
return;
}

Expand Down
4 changes: 2 additions & 2 deletions ares/n64/memory/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ namespace Memory {

struct Bus {
//bus.hpp
template<u32 Size> auto read(u32 address, Thread& thread) -> u64;
template<u32 Size> auto write(u32 address, u64 data, Thread& thread) -> void;
template<u32 Size> auto read(u32 address, Thread& thread, const char *peripheral) -> u64;
template<u32 Size> auto write(u32 address, u64 data, Thread& thread, const char *peripheral) -> void;

template<u32 Size> auto readBurst(u32 address, u32* data, Thread& thread) -> void;
template<u32 Size> auto writeBurst(u32 address, u32* data, Thread& thread) -> void;
Expand Down
2 changes: 1 addition & 1 deletion ares/n64/n64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,8 @@ namespace ares::Nintendo64 {
#include <n64/pif/pif.hpp>
#include <n64/ri/ri.hpp>
#include <n64/si/si.hpp>
#include <n64/rdram/rdram.hpp>
#include <n64/cpu/cpu.hpp>
#include <n64/rdram/rdram.hpp>
#include <n64/rsp/rsp.hpp>
#include <n64/rdp/rdp.hpp>
#include <n64/memory/bus.hpp>
Expand Down
22 changes: 3 additions & 19 deletions ares/n64/pi/dma.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,7 @@ auto PI::dmaRead() -> void {

u32 lastCacheline = 0xffff'ffff;
for(u32 address = 0; address < io.readLength; address += 2) {
if (system.homebrewMode && ((io.dramAddress + address) & ~15) != lastCacheline) {
lastCacheline = address & ~15;
auto& line = cpu.dcache.line(io.dramAddress + address);
if (line.hit(io.dramAddress) && line.dirty) {
debug(unusual, "PI DMA reading from cached memory ", hex((io.dramAddress + address) | 0x80000000), " (missing cache writeback?)");
}
}
u16 data = rdram.ram.read<Half>(io.dramAddress + address);
u16 data = rdram.ram.read<Half>(io.dramAddress + address, "PI DMA");
busWrite<Half>(io.pbusAddress + address, data);
}
}
Expand Down Expand Up @@ -49,17 +42,8 @@ auto PI::dmaWrite() -> void {
cpu.recompiler.invalidateRange(io.dramAddress, cur_len);
}

u32 lastCacheline = 0xffff'ffff;
for (u32 i = 0; i < cur_len; i++) {
if (system.homebrewMode && (io.dramAddress & ~15) != lastCacheline) {
lastCacheline = io.dramAddress & ~15;
auto& line = cpu.dcache.line(io.dramAddress);
if (line.hit(io.dramAddress)) {
debug(unusual, "PI DMA writing to cached memory ", hex(io.dramAddress | 0x80000000), " (missing cache invalidation?)");
}
}
rdram.ram.write<Byte>(io.dramAddress++, mem[i]);
}
for (u32 i = 0; i < cur_len; i++)
rdram.ram.write<Byte>(io.dramAddress++, mem[i], "PI DMA");
io.dramAddress = (io.dramAddress + 7) & ~7;

first_block = false;
Expand Down
2 changes: 2 additions & 0 deletions ares/n64/pi/io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ auto PI::ioWrite(u32 address, u32 data_) -> void {
//PI_READ_LENGTH
io.readLength = n24(data);
io.dmaBusy = 1;
io.originPc = cpu.ipu.pc;
queue.insert(Queue::PI_DMA_Read, dmaDuration(true));
dmaRead();
}
Expand All @@ -106,6 +107,7 @@ auto PI::ioWrite(u32 address, u32 data_) -> void {
//PI_WRITE_LENGTH
io.writeLength = n24(data);
io.dmaBusy = 1;
io.originPc = cpu.ipu.pc;
queue.insert(Queue::PI_DMA_Write, dmaDuration(false));
dmaWrite();
}
Expand Down
1 change: 1 addition & 0 deletions ares/n64/pi/pi.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ struct PI : Memory::RCP<PI> {
n32 readLength;
n32 writeLength;
n32 busLatch;
u64 originPc;
} io;

struct BSD {
Expand Down
4 changes: 2 additions & 2 deletions ares/n64/pif/io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,13 @@ auto PIF::dmaRead(u32 address, u32 ramAddress) -> void {
intA(Read, Size64);
for(u32 offset = 0; offset < 64; offset += 4) {
u32 data = readInt(address + offset);
rdram.ram.write<Word>(ramAddress + offset, data);
rdram.ram.write<Word>(ramAddress + offset, data, "SI DMA");
}
}

auto PIF::dmaWrite(u32 address, u32 ramAddress) -> void {
for(u32 offset = 0; offset < 64; offset += 4) {
u32 data = rdram.ram.read<Word>(ramAddress + offset);
u32 data = rdram.ram.read<Word>(ramAddress + offset, "SI DMA");
writeInt(address + offset, data);
}
intA(Write, Size64);
Expand Down
53 changes: 50 additions & 3 deletions ares/n64/rdram/debugger.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ auto RDRAM::Debugger::load(Node::Object parent) -> void {
}

memory.ram->setRead([&](u32 address) -> u8 {
return rdram.ram.read<Byte>(address);
return rdram.ram.read<Byte>(address, "Ares Debugger");
});
memory.ram->setWrite([&](u32 address, u8 data) -> void {
return rdram.ram.write<Byte>(address, data);
return rdram.ram.write<Byte>(address, data, "Ares Debugger");
});

memory.dcache = parent->append<Node::Debugger::Memory>("DCache");
Expand All @@ -25,7 +25,7 @@ auto RDRAM::Debugger::load(Node::Object parent) -> void {
if(line.hit(address)) {
line.write<Byte>(address, data);
} else {
rdram.ram.write<Byte>(address, data);
rdram.ram.write<Byte>(address, data, "Ares Debugger");
}
});

Expand Down Expand Up @@ -59,3 +59,50 @@ auto RDRAM::Debugger::io(bool mode, u32 chipID, u32 address, u32 data) -> void {
tracer.io->notify(message);
}
}

auto RDRAM::Debugger::cacheErrorContext(string peripheral) -> string {
if(peripheral == "CPU") {
return { "\tCurrent CPU PC: ", hex(cpu.ipu.pc, 16L), "\n" };
}
if(peripheral == "RSP DMA") {
if(rsp.dma.current.originCpu) {
return { "\tRSP DMA started at CPU PC: ", hex(rsp.dma.current.originPc, 16L), "\n" };
} else {
return { "\tRSP DMA started at RSP PC: ", hex(rsp.dma.current.originPc, 3L), "\n" };
}
}
if(peripheral == "PI DMA") {
return { "\tPI DMA started at CPU PC: ", hex(pi.io.originPc, 16L), "\n" };
}
if(peripheral == "AI DMA") {
return { "\tAI DMA started at CPU PC: ", hex(ai.io.dmaOriginPc[0], 16L), "\n" };
}
return "";
}

auto RDRAM::Debugger::readWord(u32 address, int size, const char *peripheral) -> void {
if (system.homebrewMode && (address & ~15) != lastReadCacheline) {
lastReadCacheline = address & ~15;
auto& line = cpu.dcache.line(address);
u16 dirtyMask = ((1 << size) - 1) << (address & 0xF);
if (line.hit(address) && (line.dirty & dirtyMask)) {
string msg = { peripheral, " reading from RDRAM address ", hex(address), " which is modified in the cache (missing cache writeback?)\n"};
msg.append(string{ "\tCacheline was loaded at CPU PC: ", hex(line.fillpc, 16L), "\n" });
msg.append(cacheErrorContext(peripheral));
debug(unusual, msg);
}
}
}

auto RDRAM::Debugger::writeWord(u32 address, int size, u64 value, const char *peripheral) -> void {
if (system.homebrewMode && (address & ~15) != lastWrittenCacheline) {
lastWrittenCacheline = address & ~15;
auto& line = cpu.dcache.line(address);
if (line.hit(address)) {
string msg = { peripheral, " writing to RDRAM address ", hex(address), " which is cached (missing cache invalidation?)\n"};
msg.append(string{ "\tCacheline was loaded at CPU PC: ", hex(line.fillpc, 16L), "\n" });
msg.append(cacheErrorContext(peripheral));
debug(unusual, msg);
}
}
}
Loading

0 comments on commit e3b645f

Please sign in to comment.