Skip to content

Commit

Permalink
EE Cache: SIMD the EE cache lookup routine
Browse files Browse the repository at this point in the history
  • Loading branch information
F0bes committed Dec 26, 2024
1 parent 24ed18c commit fa9abbe
Show file tree
Hide file tree
Showing 4 changed files with 123 additions and 35 deletions.
34 changes: 32 additions & 2 deletions pcsx2/COP0.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,15 @@ void MapTLB(const tlbs& t, int i)
}
}

__inline int ConvertPageMask(const u32 PageMask)
{
const u32 mask = std::popcount(PageMask >> 13);

pxAssertMsg(!((mask & 1) || mask > 12), "Invalid page mask for this TLB entry. EE cache doesn't know what to do here.");

return (1 << (12 + mask)) - 1;
}

void UnmapTLB(const tlbs& t, int i)
{
//Console.WriteLn("Clear TLB %d: %08x-> [%08x %08x] S=%d G=%d ASID=%d Mask= %03X", i,t.VPN2,t.PFN0,t.PFN1,t.S,t.G,t.ASID,t.Mask);
Expand Down Expand Up @@ -324,7 +333,19 @@ void UnmapTLB(const tlbs& t, int i)
}
}

cachedTlbs.erase(std::remove(cachedTlbs.begin(), cachedTlbs.end(), &t), cachedTlbs.end());
for (size_t i = 0; i < cachedTlbs.count; i++)
{
if (cachedTlbs.PFN0s[i] == t.PFN0() && cachedTlbs.PFN1s[i] == t.PFN1() && cachedTlbs.PageMasks[i] == ConvertPageMask(t.PageMask.UL))
{
cachedTlbs.PFN0s.erase(cachedTlbs.PFN0s.begin() + i);
cachedTlbs.PFN1s.erase(cachedTlbs.PFN1s.begin() + i);
cachedTlbs.PageMasks.erase(cachedTlbs.PageMasks.begin() + i);
cachedTlbs.CacheEnabled0.erase(cachedTlbs.CacheEnabled0.begin() + i);
cachedTlbs.CacheEnabled1.erase(cachedTlbs.CacheEnabled1.begin() + i);
cachedTlbs.count--;
break;
}
}
}

void WriteTLB(int i)
Expand All @@ -335,7 +356,16 @@ void WriteTLB(int i)
tlb[i].EntryLo1.UL = cpuRegs.CP0.n.EntryLo1;

if (!tlb[i].isSPR() && ((tlb[i].EntryLo0.V && tlb[i].EntryLo0.isCached()) || (tlb[i].EntryLo1.V && tlb[i].EntryLo1.isCached())))
cachedTlbs.push_back(&tlb[i]);
{
const size_t idx = cachedTlbs.count;
cachedTlbs.CacheEnabled0[idx] = tlb[i].EntryLo0.isCached() ? ~0 : 0;
cachedTlbs.CacheEnabled1[idx] = tlb[i].EntryLo1.isCached() ? ~0 : 0;
cachedTlbs.PFN1s[idx] = tlb[i].PFN1();
cachedTlbs.PFN0s[idx] = tlb[i].PFN0();
cachedTlbs.PageMasks[idx] = ConvertPageMask(tlb[i].PageMask.UL);

cachedTlbs.count++;
}

MapTLB(tlb[i], i);
}
Expand Down
5 changes: 2 additions & 3 deletions pcsx2/R5900.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ u32 EEoCycle;

alignas(16) cpuRegistersPack _cpuRegistersPack;
alignas(16) tlbs tlb[48];
std::vector<tlbs*> cachedTlbs;
cachedTlbs_t cachedTlbs;

R5900cpu *Cpu = NULL;

Expand All @@ -61,8 +61,7 @@ void cpuReset()
std::memset(&cpuRegs, 0, sizeof(cpuRegs));
std::memset(&fpuRegs, 0, sizeof(fpuRegs));
std::memset(&tlb, 0, sizeof(tlb));
cachedTlbs.clear();
cachedTlbs.reserve(48);
cachedTlbs.reset();

cpuRegs.pc = 0xbfc00000; //set pc reg to stack
cpuRegs.CP0.n.Config = 0x440;
Expand Down
36 changes: 35 additions & 1 deletion pcsx2/R5900.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,14 @@ struct tlbs
constexpr u32 Mask() const { return PageMask.Mask; }
constexpr bool isGlobal() const { return EntryLo0.G && EntryLo1.G; }
constexpr bool isSPR() const { return EntryLo0.S; }

constexpr bool operator==(const tlbs& other) const
{
return PageMask.UL == other.PageMask.UL &&
EntryHi.UL == other.EntryHi.UL &&
EntryLo0.UL == other.EntryLo0.UL &&
EntryLo1.UL == other.EntryLo1.UL;
}
};

#ifndef _PC_
Expand Down Expand Up @@ -254,7 +262,33 @@ struct cpuRegistersPack

alignas(16) extern cpuRegistersPack _cpuRegistersPack;
alignas(16) extern tlbs tlb[48];
extern std::vector<tlbs*> cachedTlbs;

struct cachedTlbs_t
{
u32 count;
std::vector<u32> PageMasks;
std::vector<u32> PFN1s;
std::vector<u32> CacheEnabled1;
std::vector<u32> PFN0s;
std::vector<u32> CacheEnabled0;

inline void reset()
{
count = 0;
PageMasks.clear();
PageMasks.resize(48);
PFN1s.clear();
PFN1s.resize(48);
PFN0s.clear();
PFN0s.resize(48);
CacheEnabled1.clear();
CacheEnabled1.resize(48);
CacheEnabled0.clear();
CacheEnabled0.resize(48);
}
};

extern cachedTlbs_t cachedTlbs;

static cpuRegisters& cpuRegs = _cpuRegistersPack.cpuRegs;
static fpuRegisters& fpuRegs = _cpuRegistersPack.fpuRegs;
Expand Down
83 changes: 54 additions & 29 deletions pcsx2/vtlb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "fmt/core.h"

#include <bit>
#include <immintrin.h>
#include <map>
#include <unordered_set>
#include <unordered_map>
Expand Down Expand Up @@ -109,47 +110,71 @@ vtlb_private::VTLBVirtual::VTLBVirtual(VTLBPhysical phys, u32 paddr, u32 vaddr)
}
}

__inline int ConvertPageMask(u32 PageMask)
__noinline int CheckCache(u32 addr)
{
const u32 mask = std::popcount(PageMask >> 13);
// Check if the cache is enabled
if (((cpuRegs.CP0.n.Config >> 16) & 0x1) == 0)
{
return false;
}

pxAssertMsg(!((mask & 1) || mask > 12), "Invalid page mask for this TLB entry. EE cache doesn't know what to do here.");
const size_t size = cachedTlbs.count;
const int stride = 4;

return (1 << (12 + mask)) - 1;
}
__m128i addr_vec = _mm_set1_epi32(addr);

__inline int CheckCache(u32 addr)
{
u32 mask;
size_t i = 0;

if (((cpuRegs.CP0.n.Config >> 16) & 0x1) == 0)
for (; i + stride <= size; i += stride)
{
//DevCon.Warning("Data Cache Disabled! %x", cpuRegs.CP0.n.Config);
return false; //
}
__m128i pfn1_vec = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&cachedTlbs.PFN1s[i]));
__m128i pfn0_vec = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&cachedTlbs.PFN0s[i]));
__m128i mask_vec = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&cachedTlbs.PageMasks[i]));

for (int i = 0; i < cachedTlbs.size(); i++)
{
const auto& entry = cachedTlbs[i];
if (entry->EntryLo1.isCached())
__m128i cached1_vec = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&cachedTlbs.CacheEnabled1[i]));
__m128i cached0_vec = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&cachedTlbs.CacheEnabled0[i]));

__m128i pfn1_end_vec = _mm_add_epi32(pfn1_vec, mask_vec);
__m128i pfn0_end_vec = _mm_add_epi32(pfn0_vec, mask_vec);

__m128i gteLowerBound0 = _mm_or_si128(
_mm_cmpgt_epi32(addr_vec, pfn0_vec),
_mm_cmpeq_epi32(addr_vec, pfn0_vec));
__m128i gteUpperBound0 = _mm_or_si128(
_mm_cmpgt_epi32(pfn0_end_vec, addr_vec),
_mm_cmpeq_epi32(pfn0_end_vec, addr_vec));

__m128i gteUpperBound1 = _mm_or_si128(
_mm_cmpgt_epi32(pfn1_end_vec, addr_vec),
_mm_cmpeq_epi32(pfn1_end_vec, addr_vec));
__m128i gteLowerBound1 = _mm_or_si128(
_mm_cmpgt_epi32(addr_vec, pfn1_vec),
_mm_cmpeq_epi32(addr_vec, pfn1_vec));

__m128i cmp0 = _mm_and_si128(gteLowerBound0, gteUpperBound0);
__m128i cmp1 = _mm_and_si128(gteLowerBound1, gteUpperBound1);

cmp1 = _mm_and_si128(cmp1, cached1_vec);
cmp0 = _mm_and_si128(cmp0, cached0_vec);

__m128i cmp = _mm_or_si128(cmp1, cmp0);

if (!_mm_testz_si128(cmp, cmp))
{
mask = ConvertPageMask(entry->PageMask.UL);
if ((addr >= entry->PFN1()) && (addr <= entry->PFN1() + mask))
{
//DevCon.Warning("Yay! Cache check cache addr=%x, mask=%x, addr+mask=%x, VPN2=%x PFN0=%x", addr, mask, (addr & mask), tlb[i].VPN2, tlb[i].PFN0);
return true;
}
return true;
}
if (entry->EntryLo0.isCached())
}

for (; i < size; i++)
{
const u32 mask = cachedTlbs.PageMasks[i];
if ((cachedTlbs.CacheEnabled1[i] && addr >= cachedTlbs.PFN1s[i] && addr <= cachedTlbs.PFN1s[i] + mask) ||
(cachedTlbs.CacheEnabled0[i] && addr >= cachedTlbs.PFN0s[i] && addr <= cachedTlbs.PFN0s[i] + mask))
{
mask = ConvertPageMask(entry->PageMask.UL);
if ((addr >= entry->PFN0()) && (addr <= entry->PFN0() + mask))
{
//DevCon.Warning("Yay! Cache check cache addr=%x, mask=%x, addr+mask=%x, VPN2=%x PFN0=%x", addr, mask, (addr & mask), tlb[i].VPN2, tlb[i].PFN0);
return true;
}
return true;
}
}

return false;
}
// --------------------------------------------------------------------------------------
Expand Down

0 comments on commit fa9abbe

Please sign in to comment.