Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

R5900: Improve the EE cache performance #12108

Merged
merged 3 commits into from
Dec 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
179 changes: 116 additions & 63 deletions pcsx2/COP0.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,116 +230,161 @@ void MapTLB(const tlbs& t, int i)
u32 saddr, eaddr;

COP0_LOG("MAP TLB %d: 0x%08X-> [0x%08X 0x%08X] S=%d G=%d ASID=%d Mask=0x%03X EntryLo0 PFN=%x EntryLo0 Cache=%x EntryLo1 PFN=%x EntryLo1 Cache=%x VPN2=%x",
i, t.VPN2, t.PFN0, t.PFN1, t.S >> 31, t.G, t.ASID,
t.Mask, t.EntryLo0 >> 6, (t.EntryLo0 & 0x38) >> 3, t.EntryLo1 >> 6, (t.EntryLo1 & 0x38) >> 3, t.VPN2);
i, t.VPN2(), t.PFN0(), t.PFN1(), t.isSPR() >> 31, t.isGlobal(), t.EntryHi.ASID,
t.Mask(), t.EntryLo0.PFN, t.EntryLo0.C, t.EntryLo1.PFN, t.EntryLo1.C, t.VPN2());

// According to the manual
// 'It [SPR] must be mapped into a contiguous 16 KB of virtual address space that is
// aligned on a 16KB boundary.Results are not guaranteed if this restriction is not followed.'
// Assume that the game isn't doing anything less-than-ideal with the scratchpad mapping and map it directly to eeMem->Scratch.
if (t.S)
if (t.isSPR())
{
if (t.VPN2 != 0x70000000)
Console.Warning("COP0: Mapping Scratchpad to non-default address 0x%08X", t.VPN2);
if (t.VPN2() != 0x70000000)
Console.Warning("COP0: Mapping Scratchpad to non-default address 0x%08X", t.VPN2());

vtlb_VMapBuffer(t.VPN2, eeMem->Scratch, Ps2MemSize::Scratch);
vtlb_VMapBuffer(t.VPN2(), eeMem->Scratch, Ps2MemSize::Scratch);
}
else
{
if (t.EntryLo0 & 0x2)
if (t.EntryLo0.V)
{
mask = ((~t.Mask) << 1) & 0xfffff;
saddr = t.VPN2 >> 12;
eaddr = saddr + t.Mask + 1;
mask = ((~t.Mask()) << 1) & 0xfffff;
saddr = t.VPN2() >> 12;
eaddr = saddr + t.Mask() + 1;

for (addr = saddr; addr < eaddr; addr++)
{
if ((addr & mask) == ((t.VPN2 >> 12) & mask))
if ((addr & mask) == ((t.VPN2() >> 12) & mask))
{ //match
memSetPageAddr(addr << 12, t.PFN0 + ((addr - saddr) << 12));
memSetPageAddr(addr << 12, t.PFN0() + ((addr - saddr) << 12));
Cpu->Clear(addr << 12, 0x400);
}
}
}

if (t.EntryLo1 & 0x2)
if (t.EntryLo1.V)
{
mask = ((~t.Mask) << 1) & 0xfffff;
saddr = (t.VPN2 >> 12) + t.Mask + 1;
eaddr = saddr + t.Mask + 1;
mask = ((~t.Mask()) << 1) & 0xfffff;
saddr = (t.VPN2() >> 12) + t.Mask() + 1;
eaddr = saddr + t.Mask() + 1;

for (addr = saddr; addr < eaddr; addr++)
{
if ((addr & mask) == ((t.VPN2 >> 12) & mask))
if ((addr & mask) == ((t.VPN2() >> 12) & mask))
{ //match
memSetPageAddr(addr << 12, t.PFN1 + ((addr - saddr) << 12));
memSetPageAddr(addr << 12, t.PFN1() + ((addr - saddr) << 12));
Cpu->Clear(addr << 12, 0x400);
}
}
}
}
}

__inline u32 ConvertPageMask(const u32 PageMask)
{
const u32 mask = std::popcount(PageMask >> 13);

pxAssertMsg(!((mask & 1) || mask > 12), "Invalid page mask for this TLB entry. EE cache doesn't know what to do here.");

return (1 << (12 + mask)) - 1;
}

void UnmapTLB(const tlbs& t, int i)
{
//Console.WriteLn("Clear TLB %d: %08x-> [%08x %08x] S=%d G=%d ASID=%d Mask= %03X", i,t.VPN2,t.PFN0,t.PFN1,t.S,t.G,t.ASID,t.Mask);
u32 mask, addr;
u32 saddr, eaddr;

if (t.S)
if (t.isSPR())
{
vtlb_VMapUnmap(t.VPN2, 0x4000);
vtlb_VMapUnmap(t.VPN2(), 0x4000);
return;
}

if (t.EntryLo0 & 0x2)
if (t.EntryLo0.V)
{
mask = ((~t.Mask) << 1) & 0xfffff;
saddr = t.VPN2 >> 12;
eaddr = saddr + t.Mask + 1;
mask = ((~t.Mask()) << 1) & 0xfffff;
saddr = t.VPN2() >> 12;
eaddr = saddr + t.Mask() + 1;
// Console.WriteLn("Clear TLB: %08x ~ %08x",saddr,eaddr-1);
for (addr = saddr; addr < eaddr; addr++)
{
if ((addr & mask) == ((t.VPN2 >> 12) & mask))
if ((addr & mask) == ((t.VPN2() >> 12) & mask))
{ //match
memClearPageAddr(addr << 12);
Cpu->Clear(addr << 12, 0x400);
}
}
}

if (t.EntryLo1 & 0x2)
if (t.EntryLo1.V)
{
mask = ((~t.Mask) << 1) & 0xfffff;
saddr = (t.VPN2 >> 12) + t.Mask + 1;
eaddr = saddr + t.Mask + 1;
mask = ((~t.Mask()) << 1) & 0xfffff;
saddr = (t.VPN2() >> 12) + t.Mask() + 1;
eaddr = saddr + t.Mask() + 1;
// Console.WriteLn("Clear TLB: %08x ~ %08x",saddr,eaddr-1);
for (addr = saddr; addr < eaddr; addr++)
{
if ((addr & mask) == ((t.VPN2 >> 12) & mask))
if ((addr & mask) == ((t.VPN2() >> 12) & mask))
{ //match
memClearPageAddr(addr << 12);
Cpu->Clear(addr << 12, 0x400);
}
}
}

for (size_t i = 0; i < cachedTlbs.count; i++)
{
if (cachedTlbs.PFN0s[i] == t.PFN0() && cachedTlbs.PFN1s[i] == t.PFN1() && cachedTlbs.PageMasks[i] == ConvertPageMask(t.PageMask.UL))
{
for (size_t j = i; j < cachedTlbs.count - 1; j++)
{
cachedTlbs.CacheEnabled0[j] = cachedTlbs.CacheEnabled0[j + 1];
cachedTlbs.CacheEnabled1[j] = cachedTlbs.CacheEnabled1[j + 1];
cachedTlbs.PFN0s[j] = cachedTlbs.PFN0s[j + 1];
cachedTlbs.PFN1s[j] = cachedTlbs.PFN1s[j + 1];
cachedTlbs.PageMasks[j] = cachedTlbs.PageMasks[j + 1];
}
cachedTlbs.count--;
break;
}
}
}

void WriteTLB(int i)
{
tlb[i].PageMask = cpuRegs.CP0.n.PageMask;
tlb[i].EntryHi = cpuRegs.CP0.n.EntryHi;
tlb[i].EntryLo0 = cpuRegs.CP0.n.EntryLo0;
tlb[i].EntryLo1 = cpuRegs.CP0.n.EntryLo1;

tlb[i].Mask = (cpuRegs.CP0.n.PageMask >> 13) & 0xfff;
tlb[i].nMask = (~tlb[i].Mask) & 0xfff;
tlb[i].VPN2 = ((cpuRegs.CP0.n.EntryHi >> 13) & (~tlb[i].Mask)) << 13;
tlb[i].ASID = cpuRegs.CP0.n.EntryHi & 0xfff;
tlb[i].G = cpuRegs.CP0.n.EntryLo0 & cpuRegs.CP0.n.EntryLo1 & 0x1;
tlb[i].PFN0 = (((cpuRegs.CP0.n.EntryLo0 >> 6) & 0xFFFFF) & (~tlb[i].Mask)) << 12;
tlb[i].PFN1 = (((cpuRegs.CP0.n.EntryLo1 >> 6) & 0xFFFFF) & (~tlb[i].Mask)) << 12;
tlb[i].S = cpuRegs.CP0.n.EntryLo0 & 0x80000000;
tlb[i].PageMask.UL = cpuRegs.CP0.n.PageMask;
tlb[i].EntryHi.UL = cpuRegs.CP0.n.EntryHi;
tlb[i].EntryLo0.UL = cpuRegs.CP0.n.EntryLo0;
tlb[i].EntryLo1.UL = cpuRegs.CP0.n.EntryLo1;

// Setting the cache mode to reserved values is vaguely defined in the manual.
// I found that SPR is set to cached regardless.
// Non-SPR entries default to uncached on reserved cache modes.
if (tlb[i].isSPR())
{
tlb[i].EntryLo0.C = 3;
tlb[i].EntryLo1.C = 3;
}
else
{
if (!tlb[i].EntryLo0.isValidCacheMode())
tlb[i].EntryLo0.C = 2;
if (!tlb[i].EntryLo1.isValidCacheMode())
tlb[i].EntryLo1.C = 2;
}

if (!tlb[i].isSPR() && ((tlb[i].EntryLo0.V && tlb[i].EntryLo0.isCached()) || (tlb[i].EntryLo1.V && tlb[i].EntryLo1.isCached())))
{
const size_t idx = cachedTlbs.count;
cachedTlbs.CacheEnabled0[idx] = tlb[i].EntryLo0.isCached() ? ~0 : 0;
cachedTlbs.CacheEnabled1[idx] = tlb[i].EntryLo1.isCached() ? ~0 : 0;
cachedTlbs.PFN1s[idx] = tlb[i].PFN1();
cachedTlbs.PFN0s[idx] = tlb[i].PFN0();
cachedTlbs.PageMasks[idx] = ConvertPageMask(tlb[i].PageMask.UL);

cachedTlbs.count++;
}

MapTLB(tlb[i], i);
}
Expand All @@ -355,49 +400,57 @@ namespace COP0 {
cpuRegs.CP0.n.Index, cpuRegs.CP0.n.PageMask, cpuRegs.CP0.n.EntryHi,
cpuRegs.CP0.n.EntryLo0, cpuRegs.CP0.n.EntryLo1);

int i = cpuRegs.CP0.n.Index & 0x3f;
const u8 i = cpuRegs.CP0.n.Index & 0x3f;

if (i > 47)
{
Console.Warning("TLBR with index > 47! (%d)", i);
return;
}

cpuRegs.CP0.n.PageMask = tlb[i].PageMask;
cpuRegs.CP0.n.EntryHi = tlb[i].EntryHi & ~(tlb[i].PageMask | 0x1f00);
cpuRegs.CP0.n.EntryLo0 = (tlb[i].EntryLo0 & ~1) | ((tlb[i].EntryHi >> 12) & 1);
cpuRegs.CP0.n.EntryLo1 = (tlb[i].EntryLo1 & ~1) | ((tlb[i].EntryHi >> 12) & 1);
cpuRegs.CP0.n.PageMask = tlb[i].PageMask.Mask << 13;
cpuRegs.CP0.n.EntryHi = tlb[i].EntryHi.UL & ~((tlb[i].PageMask.Mask << 13) | 0x1f00);
cpuRegs.CP0.n.EntryLo0 = tlb[i].EntryLo0.UL & ~(0xFC000000) & ~1;
cpuRegs.CP0.n.EntryLo1 = tlb[i].EntryLo1.UL & ~(0x7C000000) & ~1;
// "If both the Global bit of EntryLo0 and EntryLo1 are set to 1, the processor ignores the ASID during TLB lookup."
// This is reflected during TLBR, where G is only set if both EntryLo0 and EntryLo1 are global.
cpuRegs.CP0.n.EntryLo0 |= (tlb[i].EntryLo0.UL & 1) & (tlb[i].EntryLo1.UL & 1);
cpuRegs.CP0.n.EntryLo1 |= (tlb[i].EntryLo0.UL & 1) & (tlb[i].EntryLo1.UL & 1);
}

void TLBWI()
{
int j = cpuRegs.CP0.n.Index & 0x3f;
const u8 j = cpuRegs.CP0.n.Index & 0x3f;

//if (j > 48) return;
if (j > 47)
{
Console.Warning("TLBWI with index > 47! (%d)", j);
return;
}

COP0_LOG("COP0_TLBWI %d:%x,%x,%x,%x",
cpuRegs.CP0.n.Index, cpuRegs.CP0.n.PageMask, cpuRegs.CP0.n.EntryHi,
cpuRegs.CP0.n.EntryLo0, cpuRegs.CP0.n.EntryLo1);

UnmapTLB(tlb[j], j);
tlb[j].PageMask = cpuRegs.CP0.n.PageMask;
tlb[j].EntryHi = cpuRegs.CP0.n.EntryHi;
tlb[j].EntryLo0 = cpuRegs.CP0.n.EntryLo0;
tlb[j].EntryLo1 = cpuRegs.CP0.n.EntryLo1;
WriteTLB(j);
}

void TLBWR()
{
int j = cpuRegs.CP0.n.Random & 0x3f;
const u8 j = cpuRegs.CP0.n.Random & 0x3f;

//if (j > 48) return;
if (j > 47)
{
Console.Warning("TLBWR with random > 47! (%d)", j);
return;
}

DevCon.Warning("COP0_TLBWR %d:%x,%x,%x,%x\n",
cpuRegs.CP0.n.Random, cpuRegs.CP0.n.PageMask, cpuRegs.CP0.n.EntryHi,
cpuRegs.CP0.n.EntryLo0, cpuRegs.CP0.n.EntryLo1);

//if (j > 48) return;

UnmapTLB(tlb[j], j);
tlb[j].PageMask = cpuRegs.CP0.n.PageMask;
tlb[j].EntryHi = cpuRegs.CP0.n.EntryHi;
tlb[j].EntryLo0 = cpuRegs.CP0.n.EntryLo0;
tlb[j].EntryLo1 = cpuRegs.CP0.n.EntryLo1;
WriteTLB(j);
}

Expand All @@ -422,7 +475,7 @@ namespace COP0 {
cpuRegs.CP0.n.Index = 0xFFFFFFFF;
for (i = 0; i < 48; i++)
{
if (tlb[i].VPN2 == ((~tlb[i].Mask) & (EntryHi32.s.VPN2)) && ((tlb[i].G & 1) || ((tlb[i].ASID & 0xff) == EntryHi32.s.ASID)))
if (tlb[i].VPN2() == ((~tlb[i].Mask()) & (EntryHi32.s.VPN2)) && ((tlb[i].isGlobal()) || ((tlb[i].EntryHi.ASID & 0xff) == EntryHi32.s.ASID)))
{
cpuRegs.CP0.n.Index = i;
break;
Expand Down
3 changes: 3 additions & 0 deletions pcsx2/R5900.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ u32 EEoCycle;

alignas(16) cpuRegistersPack _cpuRegistersPack;
alignas(16) tlbs tlb[48];
cachedTlbs_t cachedTlbs;

R5900cpu *Cpu = NULL;

static constexpr uint eeWaitCycles = 3072;
Expand All @@ -59,6 +61,7 @@ void cpuReset()
std::memset(&cpuRegs, 0, sizeof(cpuRegs));
std::memset(&fpuRegs, 0, sizeof(fpuRegs));
std::memset(&tlb, 0, sizeof(tlb));
cachedTlbs.count = 0;

cpuRegs.pc = 0xbfc00000; //set pc reg to stack
cpuRegs.CP0.n.Config = 0x440;
Expand Down
Loading
Loading