diff --git a/pcsx2/COP0.cpp b/pcsx2/COP0.cpp index 60cb09371afab..fbd37682da3ff 100644 --- a/pcsx2/COP0.cpp +++ b/pcsx2/COP0.cpp @@ -230,49 +230,49 @@ void MapTLB(const tlbs& t, int i) u32 saddr, eaddr; COP0_LOG("MAP TLB %d: 0x%08X-> [0x%08X 0x%08X] S=%d G=%d ASID=%d Mask=0x%03X EntryLo0 PFN=%x EntryLo0 Cache=%x EntryLo1 PFN=%x EntryLo1 Cache=%x VPN2=%x", - i, t.VPN2, t.PFN0, t.PFN1, t.S >> 31, t.G, t.ASID, - t.Mask, t.EntryLo0 >> 6, (t.EntryLo0 & 0x38) >> 3, t.EntryLo1 >> 6, (t.EntryLo1 & 0x38) >> 3, t.VPN2); + i, t.VPN2(), t.PFN0(), t.PFN1(), t.isSPR() >> 31, t.isGlobal(), t.EntryHi.ASID, + t.Mask(), t.EntryLo0.PFN, t.EntryLo0.C, t.EntryLo1.PFN, t.EntryLo1.C, t.VPN2()); // According to the manual // 'It [SPR] must be mapped into a contiguous 16 KB of virtual address space that is // aligned on a 16KB boundary.Results are not guaranteed if this restriction is not followed.' // Assume that the game isn't doing anything less-than-ideal with the scratchpad mapping and map it directly to eeMem->Scratch. - if (t.S) + if (t.isSPR()) { - if (t.VPN2 != 0x70000000) - Console.Warning("COP0: Mapping Scratchpad to non-default address 0x%08X", t.VPN2); + if (t.VPN2() != 0x70000000) + Console.Warning("COP0: Mapping Scratchpad to non-default address 0x%08X", t.VPN2()); - vtlb_VMapBuffer(t.VPN2, eeMem->Scratch, Ps2MemSize::Scratch); + vtlb_VMapBuffer(t.VPN2(), eeMem->Scratch, Ps2MemSize::Scratch); } else { - if (t.EntryLo0 & 0x2) + if (t.EntryLo0.V) { - mask = ((~t.Mask) << 1) & 0xfffff; - saddr = t.VPN2 >> 12; - eaddr = saddr + t.Mask + 1; + mask = ((~t.Mask()) << 1) & 0xfffff; + saddr = t.VPN2() >> 12; + eaddr = saddr + t.Mask() + 1; for (addr = saddr; addr < eaddr; addr++) { - if ((addr & mask) == ((t.VPN2 >> 12) & mask)) + if ((addr & mask) == ((t.VPN2() >> 12) & mask)) { //match - memSetPageAddr(addr << 12, t.PFN0 + ((addr - saddr) << 12)); + memSetPageAddr(addr << 12, t.PFN0() + ((addr - saddr) << 12)); Cpu->Clear(addr << 12, 0x400); } } } - if (t.EntryLo1 & 0x2) + if (t.EntryLo1.V) { - mask = ((~t.Mask) << 1) & 0xfffff; - saddr = (t.VPN2 >> 12) + t.Mask + 1; - eaddr = saddr + t.Mask + 1; + mask = ((~t.Mask()) << 1) & 0xfffff; + saddr = (t.VPN2() >> 12) + t.Mask() + 1; + eaddr = saddr + t.Mask() + 1; for (addr = saddr; addr < eaddr; addr++) { - if ((addr & mask) == ((t.VPN2 >> 12) & mask)) + if ((addr & mask) == ((t.VPN2() >> 12) & mask)) { //match - memSetPageAddr(addr << 12, t.PFN1 + ((addr - saddr) << 12)); + memSetPageAddr(addr << 12, t.PFN1() + ((addr - saddr) << 12)); Cpu->Clear(addr << 12, 0x400); } } @@ -280,27 +280,36 @@ void MapTLB(const tlbs& t, int i) } } +__inline u32 ConvertPageMask(const u32 PageMask) +{ + const u32 mask = std::popcount(PageMask >> 13); + + pxAssertMsg(!((mask & 1) || mask > 12), "Invalid page mask for this TLB entry. EE cache doesn't know what to do here."); + + return (1 << (12 + mask)) - 1; +} + void UnmapTLB(const tlbs& t, int i) { //Console.WriteLn("Clear TLB %d: %08x-> [%08x %08x] S=%d G=%d ASID=%d Mask= %03X", i,t.VPN2,t.PFN0,t.PFN1,t.S,t.G,t.ASID,t.Mask); u32 mask, addr; u32 saddr, eaddr; - if (t.S) + if (t.isSPR()) { - vtlb_VMapUnmap(t.VPN2, 0x4000); + vtlb_VMapUnmap(t.VPN2(), 0x4000); return; } - if (t.EntryLo0 & 0x2) + if (t.EntryLo0.V) { - mask = ((~t.Mask) << 1) & 0xfffff; - saddr = t.VPN2 >> 12; - eaddr = saddr + t.Mask + 1; + mask = ((~t.Mask()) << 1) & 0xfffff; + saddr = t.VPN2() >> 12; + eaddr = saddr + t.Mask() + 1; // Console.WriteLn("Clear TLB: %08x ~ %08x",saddr,eaddr-1); for (addr = saddr; addr < eaddr; addr++) { - if ((addr & mask) == ((t.VPN2 >> 12) & mask)) + if ((addr & mask) == ((t.VPN2() >> 12) & mask)) { //match memClearPageAddr(addr << 12); Cpu->Clear(addr << 12, 0x400); @@ -308,38 +317,58 @@ void UnmapTLB(const tlbs& t, int i) } } - if (t.EntryLo1 & 0x2) + if (t.EntryLo1.V) { - mask = ((~t.Mask) << 1) & 0xfffff; - saddr = (t.VPN2 >> 12) + t.Mask + 1; - eaddr = saddr + t.Mask + 1; + mask = ((~t.Mask()) << 1) & 0xfffff; + saddr = (t.VPN2() >> 12) + t.Mask() + 1; + eaddr = saddr + t.Mask() + 1; // Console.WriteLn("Clear TLB: %08x ~ %08x",saddr,eaddr-1); for (addr = saddr; addr < eaddr; addr++) { - if ((addr & mask) == ((t.VPN2 >> 12) & mask)) + if ((addr & mask) == ((t.VPN2() >> 12) & mask)) { //match memClearPageAddr(addr << 12); Cpu->Clear(addr << 12, 0x400); } } } + + for (size_t i = 0; i < cachedTlbs.count; i++) + { + if (cachedTlbs.PFN0s[i] == t.PFN0() && cachedTlbs.PFN1s[i] == t.PFN1() && cachedTlbs.PageMasks[i] == ConvertPageMask(t.PageMask.UL)) + { + for (size_t j = i; j < cachedTlbs.count - 1; j++) + { + cachedTlbs.CacheEnabled0[j] = cachedTlbs.CacheEnabled0[j + 1]; + cachedTlbs.CacheEnabled1[j] = cachedTlbs.CacheEnabled1[j + 1]; + cachedTlbs.PFN0s[j] = cachedTlbs.PFN0s[j + 1]; + cachedTlbs.PFN1s[j] = cachedTlbs.PFN1s[j + 1]; + cachedTlbs.PageMasks[j] = cachedTlbs.PageMasks[j + 1]; + } + cachedTlbs.count--; + break; + } + } } void WriteTLB(int i) { - tlb[i].PageMask = cpuRegs.CP0.n.PageMask; - tlb[i].EntryHi = cpuRegs.CP0.n.EntryHi; - tlb[i].EntryLo0 = cpuRegs.CP0.n.EntryLo0; - tlb[i].EntryLo1 = cpuRegs.CP0.n.EntryLo1; - - tlb[i].Mask = (cpuRegs.CP0.n.PageMask >> 13) & 0xfff; - tlb[i].nMask = (~tlb[i].Mask) & 0xfff; - tlb[i].VPN2 = ((cpuRegs.CP0.n.EntryHi >> 13) & (~tlb[i].Mask)) << 13; - tlb[i].ASID = cpuRegs.CP0.n.EntryHi & 0xfff; - tlb[i].G = cpuRegs.CP0.n.EntryLo0 & cpuRegs.CP0.n.EntryLo1 & 0x1; - tlb[i].PFN0 = (((cpuRegs.CP0.n.EntryLo0 >> 6) & 0xFFFFF) & (~tlb[i].Mask)) << 12; - tlb[i].PFN1 = (((cpuRegs.CP0.n.EntryLo1 >> 6) & 0xFFFFF) & (~tlb[i].Mask)) << 12; - tlb[i].S = cpuRegs.CP0.n.EntryLo0 & 0x80000000; + tlb[i].PageMask.UL = cpuRegs.CP0.n.PageMask; + tlb[i].EntryHi.UL = cpuRegs.CP0.n.EntryHi; + tlb[i].EntryLo0.UL = cpuRegs.CP0.n.EntryLo0; + tlb[i].EntryLo1.UL = cpuRegs.CP0.n.EntryLo1; + + if (!tlb[i].isSPR() && ((tlb[i].EntryLo0.V && tlb[i].EntryLo0.isCached()) || (tlb[i].EntryLo1.V && tlb[i].EntryLo1.isCached()))) + { + const size_t idx = cachedTlbs.count; + cachedTlbs.CacheEnabled0[idx] = tlb[i].EntryLo0.isCached() ? ~0 : 0; + cachedTlbs.CacheEnabled1[idx] = tlb[i].EntryLo1.isCached() ? ~0 : 0; + cachedTlbs.PFN1s[idx] = tlb[i].PFN1(); + cachedTlbs.PFN0s[idx] = tlb[i].PFN0(); + cachedTlbs.PageMasks[idx] = ConvertPageMask(tlb[i].PageMask.UL); + + cachedTlbs.count++; + } MapTLB(tlb[i], i); } @@ -357,10 +386,16 @@ namespace COP0 { int i = cpuRegs.CP0.n.Index & 0x3f; - cpuRegs.CP0.n.PageMask = tlb[i].PageMask; - cpuRegs.CP0.n.EntryHi = tlb[i].EntryHi & ~(tlb[i].PageMask | 0x1f00); - cpuRegs.CP0.n.EntryLo0 = (tlb[i].EntryLo0 & ~1) | ((tlb[i].EntryHi >> 12) & 1); - cpuRegs.CP0.n.EntryLo1 = (tlb[i].EntryLo1 & ~1) | ((tlb[i].EntryHi >> 12) & 1); + cpuRegs.CP0.n.PageMask = tlb[i].PageMask.UL; + cpuRegs.CP0.n.EntryHi = tlb[i].EntryHi.UL & ~(tlb[i].PageMask.UL | 0x1f00); + /* + * TEST THIS?? + cpuRegs.CP0.n.EntryLo0 = (tlb[i].EntryLo0 & ~1) | ((tlb[i].EntryHi.UL >> 12) & 1); + cpuRegs.CP0.n.EntryLo1 = (tlb[i].EntryLo1 & ~1) | ((tlb[i].EntryHi.UL >> 12) & 1); + */ + cpuRegs.CP0.n.EntryLo0 = tlb[i].EntryLo0.UL; + cpuRegs.CP0.n.EntryLo1 = tlb[i].EntryLo1.UL; + } void TLBWI() @@ -374,10 +409,10 @@ namespace COP0 { cpuRegs.CP0.n.EntryLo0, cpuRegs.CP0.n.EntryLo1); UnmapTLB(tlb[j], j); - tlb[j].PageMask = cpuRegs.CP0.n.PageMask; - tlb[j].EntryHi = cpuRegs.CP0.n.EntryHi; - tlb[j].EntryLo0 = cpuRegs.CP0.n.EntryLo0; - tlb[j].EntryLo1 = cpuRegs.CP0.n.EntryLo1; + tlb[j].PageMask.UL = cpuRegs.CP0.n.PageMask; + tlb[j].EntryHi.UL = cpuRegs.CP0.n.EntryHi; + tlb[j].EntryLo0.UL = cpuRegs.CP0.n.EntryLo0; + tlb[j].EntryLo1.UL = cpuRegs.CP0.n.EntryLo1; WriteTLB(j); } @@ -394,10 +429,10 @@ namespace COP0 { //if (j > 48) return; UnmapTLB(tlb[j], j); - tlb[j].PageMask = cpuRegs.CP0.n.PageMask; - tlb[j].EntryHi = cpuRegs.CP0.n.EntryHi; - tlb[j].EntryLo0 = cpuRegs.CP0.n.EntryLo0; - tlb[j].EntryLo1 = cpuRegs.CP0.n.EntryLo1; + tlb[j].PageMask.UL = cpuRegs.CP0.n.PageMask; + tlb[j].EntryHi.UL = cpuRegs.CP0.n.EntryHi; + tlb[j].EntryLo0.UL = cpuRegs.CP0.n.EntryLo0; + tlb[j].EntryLo1.UL = cpuRegs.CP0.n.EntryLo1; WriteTLB(j); } @@ -422,7 +457,7 @@ namespace COP0 { cpuRegs.CP0.n.Index = 0xFFFFFFFF; for (i = 0; i < 48; i++) { - if (tlb[i].VPN2 == ((~tlb[i].Mask) & (EntryHi32.s.VPN2)) && ((tlb[i].G & 1) || ((tlb[i].ASID & 0xff) == EntryHi32.s.ASID))) + if (tlb[i].VPN2() == ((~tlb[i].Mask()) & (EntryHi32.s.VPN2)) && ((tlb[i].isGlobal()) || ((tlb[i].EntryHi.ASID & 0xff) == EntryHi32.s.ASID))) { cpuRegs.CP0.n.Index = i; break; diff --git a/pcsx2/R5900.cpp b/pcsx2/R5900.cpp index da3e80f055e76..3c7381273f439 100644 --- a/pcsx2/R5900.cpp +++ b/pcsx2/R5900.cpp @@ -36,6 +36,8 @@ u32 EEoCycle; alignas(16) cpuRegistersPack _cpuRegistersPack; alignas(16) tlbs tlb[48]; +cachedTlbs_t cachedTlbs; + R5900cpu *Cpu = NULL; static constexpr uint eeWaitCycles = 3072; @@ -59,6 +61,7 @@ void cpuReset() std::memset(&cpuRegs, 0, sizeof(cpuRegs)); std::memset(&fpuRegs, 0, sizeof(fpuRegs)); std::memset(&tlb, 0, sizeof(tlb)); + cachedTlbs.count = 0; cpuRegs.pc = 0xbfc00000; //set pc reg to stack cpuRegs.CP0.n.Config = 0x440; diff --git a/pcsx2/R5900.h b/pcsx2/R5900.h index 164f3171aee5b..ad0310dda6b4e 100644 --- a/pcsx2/R5900.h +++ b/pcsx2/R5900.h @@ -5,6 +5,8 @@ #include "common/Pcsx2Defs.h" +#include + // -------------------------------------------------------------------------------------- // EE Bios function name tables. // -------------------------------------------------------------------------------------- @@ -160,17 +162,69 @@ struct fpuRegisters { u32 ACCflag; // an internal accumulator overflow flag }; +union PageMask_t +{ + struct + { + u32 : 13; + u32 Mask : 12; + u32 : 7; + }; + u32 UL; + + constexpr u32 nMask() const { return ~Mask & 0xfff; }; +}; + +union EntryHi_t +{ + struct + { + u32 ASID:8; + u32 : 5; + u32 VPN2:19; + }; + u32 UL; +}; + +union EntryLo_t +{ + struct + { + u32 G:1; + u32 V:1; + u32 D:1; + u32 C:3; + u32 PFN:20; + u32 : 5; + u32 S : 1; // Only used in EntryLo0 + }; + u32 UL; + + constexpr bool isCached() const { return C == 0x3; } +}; + struct tlbs { - u32 PageMask,EntryHi; - u32 EntryLo0,EntryLo1; - u32 Mask, nMask; - u32 G; - u32 ASID; - u32 VPN2; - u32 PFN0; - u32 PFN1; - u32 S; + PageMask_t PageMask; + EntryHi_t EntryHi; + EntryLo_t EntryLo0; + EntryLo_t EntryLo1; + + // (((cpuRegs.CP0.n.EntryLo0 >> 6) & 0xFFFFF) & (~tlb[i].Mask())) << 12; + constexpr u32 PFN0() const { return (EntryLo0.PFN & ~Mask()) << 12; } + constexpr u32 PFN1() const { return (EntryLo1.PFN & ~Mask()) << 12; } + constexpr u32 VPN2() const {return ((EntryHi.VPN2) & (~Mask())) << 13; } + constexpr u32 Mask() const { return PageMask.Mask; } + constexpr bool isGlobal() const { return EntryLo0.G && EntryLo1.G; } + constexpr bool isSPR() const { return EntryLo0.S; } + + constexpr bool operator==(const tlbs& other) const + { + return PageMask.UL == other.PageMask.UL && + EntryHi.UL == other.EntryHi.UL && + EntryLo0.UL == other.EntryLo0.UL && + EntryLo1.UL == other.EntryLo1.UL; + } }; #ifndef _PC_ @@ -211,6 +265,19 @@ struct cpuRegistersPack alignas(16) extern cpuRegistersPack _cpuRegistersPack; alignas(16) extern tlbs tlb[48]; +struct cachedTlbs_t +{ + u32 count; + + alignas(16) std::array PageMasks; + alignas(16) std::array PFN1s; + alignas(16) std::array CacheEnabled1; + alignas(16) std::array PFN0s; + alignas(16) std::array CacheEnabled0; +}; + +extern cachedTlbs_t cachedTlbs; + static cpuRegisters& cpuRegs = _cpuRegistersPack.cpuRegs; static fpuRegisters& fpuRegs = _cpuRegistersPack.fpuRegs; diff --git a/pcsx2/vtlb.cpp b/pcsx2/vtlb.cpp index 6934639d4cc0a..63be8b69369d6 100644 --- a/pcsx2/vtlb.cpp +++ b/pcsx2/vtlb.cpp @@ -30,6 +30,7 @@ #include "fmt/core.h" #include +#include #include #include #include @@ -109,46 +110,77 @@ vtlb_private::VTLBVirtual::VTLBVirtual(VTLBPhysical phys, u32 paddr, u32 vaddr) } } -__inline int ConvertPageMask(u32 PageMask) -{ - const u32 mask = std::popcount(PageMask >> 13); - - pxAssertMsg(!((mask & 1) || mask > 12), "Invalid page mask for this TLB entry. EE cache doesn't know what to do here."); - - return (1 << (12 + mask)) - 1; -} - __inline int CheckCache(u32 addr) { - u32 mask; - + // Check if the cache is enabled if (((cpuRegs.CP0.n.Config >> 16) & 0x1) == 0) { - //DevCon.Warning("Data Cache Disabled! %x", cpuRegs.CP0.n.Config); - return false; // + return false; } - for (int i = 1; i < 48; i++) + const size_t size = cachedTlbs.count; + const int stride = 4; + + __m128i addr_vec = _mm_set1_epi32(addr); + + size_t i = 0; + + for (; i + stride <= size; i += stride) { - if (((tlb[i].EntryLo1 & 0x38) >> 3) == 0x3) + const __m128i pfn1_vec = _mm_loadu_si128(reinterpret_cast(&cachedTlbs.PFN1s[i])); + const __m128i pfn0_vec = _mm_loadu_si128(reinterpret_cast(&cachedTlbs.PFN0s[i])); + const __m128i mask_vec = _mm_loadu_si128(reinterpret_cast(&cachedTlbs.PageMasks[i])); + + const __m128i cached1_vec = _mm_loadu_si128(reinterpret_cast(&cachedTlbs.CacheEnabled1[i])); + const __m128i cached0_vec = _mm_loadu_si128(reinterpret_cast(&cachedTlbs.CacheEnabled0[i])); + + const __m128i pfn1_end_vec = _mm_add_epi32(pfn1_vec, mask_vec); + const __m128i pfn0_end_vec = _mm_add_epi32(pfn0_vec, mask_vec); + + // pfn0 <= addr + const __m128i gteLowerBound0 = _mm_or_si128( + _mm_cmpgt_epi32(addr_vec, pfn0_vec), + _mm_cmpeq_epi32(addr_vec, pfn0_vec)); + // pfn0 + mask >= addr + const __m128i gteUpperBound0 = _mm_or_si128( + _mm_cmpgt_epi32(pfn0_end_vec, addr_vec), + _mm_cmpeq_epi32(pfn0_end_vec, addr_vec)); + + // pfn1 <= addr + const __m128i gteUpperBound1 = _mm_or_si128( + _mm_cmpgt_epi32(pfn1_end_vec, addr_vec), + _mm_cmpeq_epi32(pfn1_end_vec, addr_vec)); + // pfn1 + mask >= addr + const __m128i gteLowerBound1 = _mm_or_si128( + _mm_cmpgt_epi32(addr_vec, pfn1_vec), + _mm_cmpeq_epi32(addr_vec, pfn1_vec)); + + // pfn0 <= addr <= pfn0 + mask + __m128i cmp0 = _mm_and_si128(gteLowerBound0, gteUpperBound0); + // pfn1 <= addr <= pfn1 + mask + __m128i cmp1 = _mm_and_si128(gteLowerBound1, gteUpperBound1); + + cmp1 = _mm_and_si128(cmp1, cached1_vec); + cmp0 = _mm_and_si128(cmp0, cached0_vec); + + const __m128i cmp = _mm_or_si128(cmp1, cmp0); + + if (!_mm_testz_si128(cmp, cmp)) { - mask = ConvertPageMask(tlb[i].PageMask); - if ((addr >= tlb[i].PFN1) && (addr <= tlb[i].PFN1 + mask)) - { - //DevCon.Warning("Yay! Cache check cache addr=%x, mask=%x, addr+mask=%x, VPN2=%x PFN0=%x", addr, mask, (addr & mask), tlb[i].VPN2, tlb[i].PFN0); - return true; - } + return true; } - if (((tlb[i].EntryLo0 & 0x38) >> 3) == 0x3) + } + + for (; i < size; i++) + { + const u32 mask = cachedTlbs.PageMasks[i]; + if ((cachedTlbs.CacheEnabled1[i] && addr >= cachedTlbs.PFN1s[i] && addr <= cachedTlbs.PFN1s[i] + mask) || + (cachedTlbs.CacheEnabled0[i] && addr >= cachedTlbs.PFN0s[i] && addr <= cachedTlbs.PFN0s[i] + mask)) { - mask = ConvertPageMask(tlb[i].PageMask); - if ((addr >= tlb[i].PFN0) && (addr <= tlb[i].PFN0 + mask)) - { - //DevCon.Warning("Yay! Cache check cache addr=%x, mask=%x, addr+mask=%x, VPN2=%x PFN0=%x", addr, mask, (addr & mask), tlb[i].VPN2, tlb[i].PFN0); - return true; - } + return true; } } + return false; } // --------------------------------------------------------------------------------------