diff --git a/pcsx2/CMakeLists.txt b/pcsx2/CMakeLists.txt index e163156db6591..4ecc6b50b9d1c 100644 --- a/pcsx2/CMakeLists.txt +++ b/pcsx2/CMakeLists.txt @@ -450,11 +450,9 @@ set(pcsx2GSSourcesUnshared GS/Renderers/Common/GSVertexTraceFMM.cpp GS/Renderers/HW/GSRendererHWMultiISA.cpp GS/Renderers/SW/GSDrawScanline.cpp - GS/Renderers/SW/GSDrawScanlineCodeGenerator.cpp GS/Renderers/SW/GSDrawScanlineCodeGenerator.all.cpp GS/Renderers/SW/GSRasterizer.cpp GS/Renderers/SW/GSRendererSW.cpp - GS/Renderers/SW/GSSetupPrimCodeGenerator.cpp GS/Renderers/SW/GSSetupPrimCodeGenerator.all.cpp ) @@ -532,14 +530,12 @@ set(pcsx2GSHeaders GS/Renderers/HW/GSTextureCache.h GS/Renderers/HW/GSTextureReplacements.h GS/Renderers/HW/GSVertexHW.h - GS/Renderers/SW/GSDrawScanlineCodeGenerator.h GS/Renderers/SW/GSDrawScanlineCodeGenerator.all.h GS/Renderers/SW/GSDrawScanline.h GS/Renderers/SW/GSNewCodeGenerator.h GS/Renderers/SW/GSRasterizer.h GS/Renderers/SW/GSRendererSW.h GS/Renderers/SW/GSScanlineEnvironment.h - GS/Renderers/SW/GSSetupPrimCodeGenerator.h GS/Renderers/SW/GSSetupPrimCodeGenerator.all.h GS/Renderers/SW/GSTextureCacheSW.h GS/Renderers/SW/GSVertexSW.h diff --git a/pcsx2/GS/Renderers/Common/GSFunctionMap.h b/pcsx2/GS/Renderers/Common/GSFunctionMap.h index da07d72784a54..ea3d1e190b814 100644 --- a/pcsx2/GS/Renderers/Common/GSFunctionMap.h +++ b/pcsx2/GS/Renderers/Common/GSFunctionMap.h @@ -180,9 +180,12 @@ class GSCodeGeneratorFunctionMap : public GSFunctionMap } else { + HostSys::BeginCodeWrite(); + u8* code_ptr = GSCodeReserve::ReserveMemory(MAX_SIZE); CG cg(key, code_ptr, MAX_SIZE); - pxAssert(cg.getSize() < MAX_SIZE); + cg.Generate(); + pxAssert(cg.GetSize() < MAX_SIZE); #if 0 fprintf(stderr, "%s Location:%p Size:%zu Key:%llx\n", m_name.c_str(), code_ptr, cg.getSize(), (u64)key); @@ -190,9 +193,13 @@ class GSCodeGeneratorFunctionMap : public GSFunctionMap sel.Print(); #endif - GSCodeReserve::CommitMemory(cg.getSize()); + const u32 size = static_cast(cg.GetSize()); + GSCodeReserve::CommitMemory(size); + + HostSys::EndCodeWrite(); + HostSys::FlushInstructionCache(code_ptr, static_cast(size)); - ret = (VALUE)cg.getCode(); + ret = (VALUE)cg.GetCode(); m_cgmap[key] = ret; } diff --git a/pcsx2/GS/Renderers/SW/GSDrawScanline.cpp b/pcsx2/GS/Renderers/SW/GSDrawScanline.cpp index efe8f3bc59965..c5b13a4ae631f 100644 --- a/pcsx2/GS/Renderers/SW/GSDrawScanline.cpp +++ b/pcsx2/GS/Renderers/SW/GSDrawScanline.cpp @@ -8,6 +8,8 @@ #include "common/Console.h" +#include + // Comment to disable all dynamic code generation. #define ENABLE_JIT_RASTERIZER @@ -36,6 +38,64 @@ GSDrawScanline::~GSDrawScanline() DevCon.WriteLn("SW JIT generated %zu bytes of code", used); } +bool GSDrawScanline::ShouldUseCDrawScanline(u64 key) +{ + static std::map s_use_c_draw_scanline; + static std::mutex s_use_c_draw_scanline_mutex; + + static const char* const fname = getenv("USE_C_DRAW_SCANLINE"); + if (!fname) + return false; + + std::lock_guard l(s_use_c_draw_scanline_mutex); + + if (s_use_c_draw_scanline.empty()) + { + std::ifstream file(fname); + if (file) + { + for (std::string str; std::getline(file, str);) + { + u64 key; + char yn; + if (sscanf(str.c_str(), "%" PRIx64 " %c", &key, &yn) == 2) + { + if (yn != 'Y' && yn != 'N' && yn != 'y' && yn != 'n') + Console.Warning("Failed to parse %s: Not y/n", str.c_str()); + s_use_c_draw_scanline[key] = (yn == 'Y' || yn == 'y') ? true : false; + } + else + { + Console.Warning("Failed to process line %s", str.c_str()); + } + } + } + } + + auto idx = s_use_c_draw_scanline.find(key); + if (idx == s_use_c_draw_scanline.end()) + { + s_use_c_draw_scanline[key] = false; + // Rewrite file + FILE* file = fopen(fname, "w"); + if (file) + { + for (const auto& pair : s_use_c_draw_scanline) + { + fprintf(file, "%016" PRIX64 " %c %s\n", pair.first, pair.second ? 'Y' : 'N', GSScanlineSelector(pair.first).to_string().c_str()); + } + fclose(file); + } + else + { + Console.Warning("Failed to write C draw scanline usage config: %s", strerror(errno)); + } + return false; + } + + return idx->second; +} + void GSDrawScanline::BeginDraw(const GSRasterizerData& data, GSScanlineLocalData& local) { const GSScanlineGlobalData& global = data.global; diff --git a/pcsx2/GS/Renderers/SW/GSDrawScanline.h b/pcsx2/GS/Renderers/SW/GSDrawScanline.h index 8fd866ef40901..c9331a633b5c0 100644 --- a/pcsx2/GS/Renderers/SW/GSDrawScanline.h +++ b/pcsx2/GS/Renderers/SW/GSDrawScanline.h @@ -4,8 +4,8 @@ #pragma once #include "GS/GSState.h" -#include "GS/Renderers/SW/GSSetupPrimCodeGenerator.h" -#include "GS/Renderers/SW/GSDrawScanlineCodeGenerator.h" +#include "GS/Renderers/SW/GSSetupPrimCodeGenerator.all.h" +#include "GS/Renderers/SW/GSDrawScanlineCodeGenerator.all.h" struct GSScanlineLocalData; @@ -13,9 +13,6 @@ MULTI_ISA_UNSHARED_START class GSRasterizerData; -class GSSetupPrimCodeGenerator; -class GSDrawScanlineCodeGenerator; - class GSDrawScanline : public GSVirtualAlignedClass<32> { friend GSSetupPrimCodeGenerator; @@ -25,6 +22,9 @@ class GSDrawScanline : public GSVirtualAlignedClass<32> GSDrawScanline(); ~GSDrawScanline() override; + /// Debug override for disabling scanline JIT on a key basis. + static bool ShouldUseCDrawScanline(u64 key); + /// Function pointer types which we call back into. using SetupPrimPtr = void(*)(const GSVertexSW* vertex, const u16* index, const GSVertexSW& dscan, GSScanlineLocalData& local); using DrawScanlinePtr = void(*)(int pixels, int left, int top, const GSVertexSW& scan, GSScanlineLocalData& local); diff --git a/pcsx2/GS/Renderers/SW/GSDrawScanlineCodeGenerator.all.cpp b/pcsx2/GS/Renderers/SW/GSDrawScanlineCodeGenerator.all.cpp index 1bf788d164f8a..61769ae2ee3f7 100644 --- a/pcsx2/GS/Renderers/SW/GSDrawScanlineCodeGenerator.all.cpp +++ b/pcsx2/GS/Renderers/SW/GSDrawScanlineCodeGenerator.all.cpp @@ -1,8 +1,9 @@ -// SPDX-FileCopyrightText: 2002-2023 PCSX2 Dev Team +// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team // SPDX-License-Identifier: LGPL-3.0+ #include "GSDrawScanlineCodeGenerator.all.h" #include "GS/Renderers/Common/GSFunctionMap.h" +#include "GS/Renderers/SW/GSDrawScanline.h" #include "GSVertexSW.h" #include "common/Perf.h" @@ -67,8 +68,8 @@ using namespace Xbyak; #define _rip_local_d_p(x) _rip_local_d(x) #endif -GSDrawScanlineCodeGenerator2::GSDrawScanlineCodeGenerator2(Xbyak::CodeGenerator* base, const ProcessorFeatures& cpu, u64 key) - : _parent(base, cpu) +GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(u64 key, void* code, size_t maxsize) + : GSNewCodeGenerator(code, maxsize) #ifdef _WIN32 , a0(rcx), a1(rdx) , a2(r8) , a3(r9) @@ -98,7 +99,7 @@ GSDrawScanlineCodeGenerator2::GSDrawScanlineCodeGenerator2(Xbyak::CodeGenerator* // MARK: - Helpers -void GSDrawScanlineCodeGenerator2::broadcastf128(const XYm& reg, const Address& mem) +void GSDrawScanlineCodeGenerator::broadcastf128(const XYm& reg, const Address& mem) { #if USING_YMM vbroadcastf128(reg, mem); @@ -107,7 +108,7 @@ void GSDrawScanlineCodeGenerator2::broadcastf128(const XYm& reg, const Address& #endif } -void GSDrawScanlineCodeGenerator2::broadcasti128(const XYm& reg, const Address& mem) +void GSDrawScanlineCodeGenerator::broadcasti128(const XYm& reg, const Address& mem) { #if USING_YMM vbroadcasti128(reg, mem); @@ -116,7 +117,7 @@ void GSDrawScanlineCodeGenerator2::broadcasti128(const XYm& reg, const Address& #endif } -void GSDrawScanlineCodeGenerator2::broadcastssLocal(const XYm& reg, const Address& mem) +void GSDrawScanlineCodeGenerator::broadcastssLocal(const XYm& reg, const Address& mem) { #if USING_YMM vbroadcastss(reg, mem); @@ -125,7 +126,7 @@ void GSDrawScanlineCodeGenerator2::broadcastssLocal(const XYm& reg, const Addres #endif } -void GSDrawScanlineCodeGenerator2::pbroadcastqLocal(const XYm& reg, const Address& mem) +void GSDrawScanlineCodeGenerator::pbroadcastqLocal(const XYm& reg, const Address& mem) { #if USING_YMM vpbroadcastq(reg, mem); @@ -134,7 +135,7 @@ void GSDrawScanlineCodeGenerator2::pbroadcastqLocal(const XYm& reg, const Addres #endif } -void GSDrawScanlineCodeGenerator2::pbroadcastdLocal(const XYm& reg, const Address& mem) +void GSDrawScanlineCodeGenerator::pbroadcastdLocal(const XYm& reg, const Address& mem) { #if USING_YMM vpbroadcastd(reg, mem); @@ -143,7 +144,7 @@ void GSDrawScanlineCodeGenerator2::pbroadcastdLocal(const XYm& reg, const Addres #endif } -void GSDrawScanlineCodeGenerator2::pbroadcastwLocal(const XYm& reg, const Address& mem) +void GSDrawScanlineCodeGenerator::pbroadcastwLocal(const XYm& reg, const Address& mem) { #if USING_YMM vpbroadcastw(reg, mem); @@ -152,7 +153,7 @@ void GSDrawScanlineCodeGenerator2::pbroadcastwLocal(const XYm& reg, const Addres #endif } -void GSDrawScanlineCodeGenerator2::broadcastsd(const XYm& reg, const Address& mem) +void GSDrawScanlineCodeGenerator::broadcastsd(const XYm& reg, const Address& mem) { #if USING_YMM vbroadcastsd(reg, mem); @@ -161,7 +162,7 @@ void GSDrawScanlineCodeGenerator2::broadcastsd(const XYm& reg, const Address& me #endif } -void GSDrawScanlineCodeGenerator2::broadcastGPRToVec(const XYm& vec, const Xbyak::Reg32& gpr) +void GSDrawScanlineCodeGenerator::broadcastGPRToVec(const XYm& vec, const Xbyak::Reg32& gpr) { movd(Xmm(vec.getIdx()), gpr); #if USING_YMM @@ -171,7 +172,7 @@ void GSDrawScanlineCodeGenerator2::broadcastGPRToVec(const XYm& vec, const Xbyak #endif } -void GSDrawScanlineCodeGenerator2::modulate16(const XYm& a, const Operand& f, u8 shift) +void GSDrawScanlineCodeGenerator::modulate16(const XYm& a, const Operand& f, u8 shift) { if (shift == 0) { @@ -184,14 +185,14 @@ void GSDrawScanlineCodeGenerator2::modulate16(const XYm& a, const Operand& f, u8 } } -void GSDrawScanlineCodeGenerator2::lerp16(const XYm& a, const XYm& b, const XYm& f, u8 shift) +void GSDrawScanlineCodeGenerator::lerp16(const XYm& a, const XYm& b, const XYm& f, u8 shift) { psubw(a, b); modulate16(a, f, shift); paddw(a, b); } -void GSDrawScanlineCodeGenerator2::lerp16_4(const XYm& a, const XYm& b, const XYm& f) +void GSDrawScanlineCodeGenerator::lerp16_4(const XYm& a, const XYm& b, const XYm& f) { psubw(a, b); pmullw(a, f); @@ -199,12 +200,12 @@ void GSDrawScanlineCodeGenerator2::lerp16_4(const XYm& a, const XYm& b, const XY paddw(a, b); } -void GSDrawScanlineCodeGenerator2::mix16(const XYm& a, const XYm& b, const XYm& temp) +void GSDrawScanlineCodeGenerator::mix16(const XYm& a, const XYm& b, const XYm& temp) { pblendw(a, b, 0xaa); } -void GSDrawScanlineCodeGenerator2::clamp16(const XYm& a, const XYm& temp) +void GSDrawScanlineCodeGenerator::clamp16(const XYm& a, const XYm& temp) { if (isXmm) { @@ -219,7 +220,7 @@ void GSDrawScanlineCodeGenerator2::clamp16(const XYm& a, const XYm& temp) } } -void GSDrawScanlineCodeGenerator2::alltrue(const XYm& test) +void GSDrawScanlineCodeGenerator::alltrue(const XYm& test) { u32 mask = test.isYMM() ? 0xffffffff : 0xffff; pmovmskb(eax, test); @@ -227,7 +228,7 @@ void GSDrawScanlineCodeGenerator2::alltrue(const XYm& test) je("step", Xbyak::CodeGenerator::T_NEAR); } -void GSDrawScanlineCodeGenerator2::blend(const XYm& a, const XYm& b, const XYm& mask) +void GSDrawScanlineCodeGenerator::blend(const XYm& a, const XYm& b, const XYm& mask) { pand(b, mask); pandn(mask, a); @@ -242,19 +243,19 @@ void GSDrawScanlineCodeGenerator2::blend(const XYm& a, const XYm& b, const XYm& } } -void GSDrawScanlineCodeGenerator2::blendr(const XYm& b, const XYm& a, const XYm& mask) +void GSDrawScanlineCodeGenerator::blendr(const XYm& b, const XYm& a, const XYm& mask) { pand(b, mask); pandn(mask, a); por(b, mask); } -void GSDrawScanlineCodeGenerator2::blend8(const XYm& a, const XYm& b) +void GSDrawScanlineCodeGenerator::blend8(const XYm& a, const XYm& b) { pblendvb(a, b /*, xym0 */); } -void GSDrawScanlineCodeGenerator2::blend8r(const XYm& b, const XYm& a) +void GSDrawScanlineCodeGenerator::blend8r(const XYm& b, const XYm& a) { if (hasAVX) { @@ -267,7 +268,7 @@ void GSDrawScanlineCodeGenerator2::blend8r(const XYm& b, const XYm& a) } } -void GSDrawScanlineCodeGenerator2::split16_2x8(const XYm& l, const XYm& h, const XYm& src) +void GSDrawScanlineCodeGenerator::split16_2x8(const XYm& l, const XYm& h, const XYm& src) { // l = src & 0xFF; (1 left shift + 1 right shift) // h = (src >> 8) & 0xFF; (1 right shift) @@ -314,8 +315,18 @@ void GSDrawScanlineCodeGenerator2::split16_2x8(const XYm& l, const XYm& h, const // MARK: - Main Implementation -void GSDrawScanlineCodeGenerator2::Generate() +void GSDrawScanlineCodeGenerator::Generate() { + if (m_sel.breakpoint) + db(0xCC); + + if (GSDrawScanline::ShouldUseCDrawScanline(m_sel.key)) + { + jmp(reinterpret_cast(static_cast( + &GSDrawScanline::CDrawScanline))); + return; + } + const bool need_tex = m_sel.fb && m_sel.tfx != TFX_NONE; const bool need_clut = need_tex && m_sel.tlu; @@ -591,7 +602,7 @@ L("exit"); } /// Inputs: a0=pixels, a1=left, a2[x64]=top, a3[x64]=v -void GSDrawScanlineCodeGenerator2::Init() +void GSDrawScanlineCodeGenerator::Init() { if (!m_sel.notest) { @@ -872,7 +883,7 @@ void GSDrawScanlineCodeGenerator2::Init() /// Outputs[x86]: xym0=z xym2=s, xym3=t, xym4=q, xym5=rb, xym6=ga, xym7=test /// Destroys[x86]: all /// Destroys[x64]: xym0, xym1, xym2, xym3 -void GSDrawScanlineCodeGenerator2::Step() +void GSDrawScanlineCodeGenerator::Step() { // steps -= 4; @@ -1019,7 +1030,7 @@ void GSDrawScanlineCodeGenerator2::Step() /// Inputs: xym0[x86]=z, xym7[x64]=z0, t1=fza_base, t0=fza_offset, _test /// Outputs: t2=za /// Destroys: rax, xym0, temp1, temp2 -void GSDrawScanlineCodeGenerator2::TestZ(const XYm& temp1, const XYm& temp2) +void GSDrawScanlineCodeGenerator::TestZ(const XYm& temp1, const XYm& temp2) { if (!m_sel.zb) { @@ -1151,7 +1162,7 @@ void GSDrawScanlineCodeGenerator2::TestZ(const XYm& temp1, const XYm& temp2) /// Input[x86]: xym4=q, xym2=s, xym3=t /// Output: _rb, _ga /// Destroys everything except xym7[x86] -void GSDrawScanlineCodeGenerator2::SampleTexture() +void GSDrawScanlineCodeGenerator::SampleTexture() { if (!m_sel.fb || m_sel.tfx == TFX_NONE) { @@ -1256,7 +1267,7 @@ void GSDrawScanlineCodeGenerator2::SampleTexture() /// Input[x64]: xym2=uv0, xym3=uv1 (ltf), xym4=uf, xym7=vf (!needsMoreRegs) /// Output: _rb, _ga /// Destroys all registers except outputs, xmm4 and xmm7 -void GSDrawScanlineCodeGenerator2::SampleTexture_TexelReadHelper(int mip_offset) +void GSDrawScanlineCodeGenerator::SampleTexture_TexelReadHelper(int mip_offset) { const bool needsMoreRegs = isYmm; @@ -1421,7 +1432,7 @@ void GSDrawScanlineCodeGenerator2::SampleTexture_TexelReadHelper(int mip_offset) } } -void GSDrawScanlineCodeGenerator2::Wrap(const XYm& uv) +void GSDrawScanlineCodeGenerator::Wrap(const XYm& uv) { // Registers free from SampleTexture const XYm& mask = xym0; @@ -1480,7 +1491,7 @@ void GSDrawScanlineCodeGenerator2::Wrap(const XYm& uv) /// Destroys[x86]: xym0, xym1, xym2, xym3, xym4[!sse41] /// Destroys[x64]: xym0, xym1, xym5, xym6, xym7[!sse41] -void GSDrawScanlineCodeGenerator2::Wrap(const XYm& uv0, const XYm& uv1) +void GSDrawScanlineCodeGenerator::Wrap(const XYm& uv0, const XYm& uv1) { // Registers free from SampleTexture const XYm& mask = xym0; @@ -1552,7 +1563,7 @@ void GSDrawScanlineCodeGenerator2::Wrap(const XYm& uv0, const XYm& uv1) /// Input[x86]: xym4=q, xym2=s, xym3=t /// Output: _rb, _ga /// Destroys everything except xym7[x86] -void GSDrawScanlineCodeGenerator2::SampleTextureLOD() +void GSDrawScanlineCodeGenerator::SampleTextureLOD() { if (!m_sel.fb || m_sel.tfx == TFX_NONE) { @@ -1946,7 +1957,7 @@ void GSDrawScanlineCodeGenerator2::SampleTextureLOD() } } -void GSDrawScanlineCodeGenerator2::WrapLOD(const XYm& uv) +void GSDrawScanlineCodeGenerator::WrapLOD(const XYm& uv) { // Registers free from SampleTexture const XYm& mask = xym0; @@ -2001,7 +2012,7 @@ void GSDrawScanlineCodeGenerator2::WrapLOD(const XYm& uv) } } -void GSDrawScanlineCodeGenerator2::WrapLOD(const XYm& uv0, const XYm& uv1) +void GSDrawScanlineCodeGenerator::WrapLOD(const XYm& uv0, const XYm& uv1) { // Registers free from SampleTexture const XYm& mask = xym0; @@ -2067,7 +2078,7 @@ void GSDrawScanlineCodeGenerator2::WrapLOD(const XYm& uv0, const XYm& uv1) /// Input: _ga /// Output: xym2[x86]=gaf (TFX_HIGHLIGHT || TFX_HIGHLIGHT2 && !tcc) /// Destroys: xym0, xym1, xym3[x86], xym4[x86] -void GSDrawScanlineCodeGenerator2::AlphaTFX() +void GSDrawScanlineCodeGenerator::AlphaTFX() { if (!m_sel.fb) { @@ -2207,7 +2218,7 @@ void GSDrawScanlineCodeGenerator2::AlphaTFX() } /// Output: _fm, _zm -void GSDrawScanlineCodeGenerator2::ReadMask() +void GSDrawScanlineCodeGenerator::ReadMask() { if (m_sel.fwrite) { @@ -2222,7 +2233,7 @@ void GSDrawScanlineCodeGenerator2::ReadMask() /// Input: _ga, _fm, _zm /// Destroys: xym0, xym1 -void GSDrawScanlineCodeGenerator2::TestAlpha() +void GSDrawScanlineCodeGenerator::TestAlpha() { switch (m_sel.atst) { @@ -2295,7 +2306,7 @@ void GSDrawScanlineCodeGenerator2::TestAlpha() /// Input: xym2[x86]=gaf, _rb, _ga /// Destroys: xym0, xym1, xym2 -void GSDrawScanlineCodeGenerator2::ColorTFX() +void GSDrawScanlineCodeGenerator::ColorTFX() { if (!m_sel.fwrite) { @@ -2372,7 +2383,7 @@ void GSDrawScanlineCodeGenerator2::ColorTFX() /// Input: _rb, _ga /// Destroys: xym0, xym1, xym2[x86] -void GSDrawScanlineCodeGenerator2::Fog() +void GSDrawScanlineCodeGenerator::Fog() { if (!m_sel.fwrite || !m_sel.fge) { @@ -2397,7 +2408,7 @@ void GSDrawScanlineCodeGenerator2::Fog() } /// Outputs: _fd, rbx=fa -void GSDrawScanlineCodeGenerator2::ReadFrame() +void GSDrawScanlineCodeGenerator::ReadFrame() { if (!m_sel.fb) { @@ -2418,7 +2429,7 @@ void GSDrawScanlineCodeGenerator2::ReadFrame() /// Input: _fd, _test /// Destroys: xym0, xym1 -void GSDrawScanlineCodeGenerator2::TestDestAlpha() +void GSDrawScanlineCodeGenerator::TestDestAlpha() { if (!m_sel.date || (m_sel.fpsm != 0 && m_sel.fpsm != 2)) { @@ -2465,7 +2476,7 @@ void GSDrawScanlineCodeGenerator2::TestDestAlpha() /// Input: _fm, _zm, _test /// Output: edx=fzm /// Destroys: xym0, xym1 -void GSDrawScanlineCodeGenerator2::WriteMask() +void GSDrawScanlineCodeGenerator::WriteMask() { if (m_sel.notest) { @@ -2513,7 +2524,7 @@ void GSDrawScanlineCodeGenerator2::WriteMask() /// Inputs: t2=za, edx=fzm, _zm /// Destroys: xym0, xym1, xym7 -void GSDrawScanlineCodeGenerator2::WriteZBuf() +void GSDrawScanlineCodeGenerator::WriteZBuf() { if (!m_sel.zwrite) { @@ -2552,7 +2563,7 @@ void GSDrawScanlineCodeGenerator2::WriteZBuf() /// Input: _fd, _rb, _ga /// Destroys: xym0, xym1, xym4, xym7[x86], xym15[x64] -void GSDrawScanlineCodeGenerator2::AlphaBlend() +void GSDrawScanlineCodeGenerator::AlphaBlend() { if (!m_sel.fwrite) { @@ -2820,7 +2831,7 @@ void GSDrawScanlineCodeGenerator2::AlphaBlend() /// Input: rbx=fa, rdx=fzm, _fd, _fm /// Destroys: rax, xym0, xym1, xym5, xym6, xym7[x86], xmm15[x64] -void GSDrawScanlineCodeGenerator2::WriteFrame() +void GSDrawScanlineCodeGenerator::WriteFrame() { if (!m_sel.fwrite) { @@ -2920,7 +2931,7 @@ void GSDrawScanlineCodeGenerator2::WriteFrame() } /// Destroys: tmp[isYmm] -void GSDrawScanlineCodeGenerator2::ReadPixel(const XYm& dst, const XYm& tmp, const AddressReg& addr) +void GSDrawScanlineCodeGenerator::ReadPixel(const XYm& dst, const XYm& tmp, const AddressReg& addr) { RegExp base = _m_local__gd__vm + addr * 2; #if USING_XMM @@ -2938,9 +2949,9 @@ void GSDrawScanlineCodeGenerator2::ReadPixel(const XYm& dst, const XYm& tmp, con } #if USING_XMM -void GSDrawScanlineCodeGenerator2::WritePixel(const XYm& src_, const AddressReg& addr, const Reg8& mask, bool fast, int psm, int fz) +void GSDrawScanlineCodeGenerator::WritePixel(const XYm& src_, const AddressReg& addr, const Reg8& mask, bool fast, int psm, int fz) #else -void GSDrawScanlineCodeGenerator2::WritePixel(const XYm& src_, const AddressReg& addr, const Reg32& mask, bool fast, int psm, int fz) +void GSDrawScanlineCodeGenerator::WritePixel(const XYm& src_, const AddressReg& addr, const Reg32& mask, bool fast, int psm, int fz) #endif { #if USING_XMM @@ -3065,7 +3076,7 @@ void GSDrawScanlineCodeGenerator2::WritePixel(const XYm& src_, const AddressReg& } } -void GSDrawScanlineCodeGenerator2::WritePixel(const Xmm& src, const AddressReg& addr, u8 i, u8 j, int psm) +void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const AddressReg& addr, u8 i, u8 j, int psm) { constexpr int s_offsets[8] = {0, 2, 8, 10, 16, 18, 24, 26}; @@ -3104,7 +3115,7 @@ void GSDrawScanlineCodeGenerator2::WritePixel(const Xmm& src, const AddressReg& /// rdx = m_local.clut (x86 && m_sel.tlu) /// Destroys: rax, src, tmp1, tmp2 /// Destroys rbx (!use_lod) -void GSDrawScanlineCodeGenerator2::ReadTexel1(const XYm& dst, const XYm& src, const XYm& tmp1, const XYm& tmp2, int mip_offset) +void GSDrawScanlineCodeGenerator::ReadTexel1(const XYm& dst, const XYm& src, const XYm& tmp1, const XYm& tmp2, int mip_offset) { const XYm no(-1); // Hopefully this will assert if we accidentally use it ReadTexelImpl(dst, tmp1, src, no, no, no, tmp2, no, 1, mip_offset); @@ -3120,7 +3131,7 @@ void GSDrawScanlineCodeGenerator2::ReadTexel1(const XYm& dst, const XYm& src, co /// rdx = m_local.clut (x86 && m_sel.tlu) /// Destroys: rax /// Destroys rbx (!use_lod) -void GSDrawScanlineCodeGenerator2::ReadTexel4( +void GSDrawScanlineCodeGenerator::ReadTexel4( const XYm& d0, const XYm& d1, const XYm& d2s0, const XYm& d3s1, const XYm& s2, const XYm& s3, @@ -3130,7 +3141,7 @@ void GSDrawScanlineCodeGenerator2::ReadTexel4( ReadTexelImpl(d0, d1, d2s0, d3s1, s2, s3, tmp1, tmp2, 4, mip_offset); } -void GSDrawScanlineCodeGenerator2::ReadTexelImpl( +void GSDrawScanlineCodeGenerator::ReadTexelImpl( const XYm& d0, const XYm& d1, const XYm& d2s0, const XYm& d3s1, const XYm& s2, const XYm& s3, @@ -3145,7 +3156,7 @@ void GSDrawScanlineCodeGenerator2::ReadTexelImpl( #endif } -void GSDrawScanlineCodeGenerator2::ReadTexelImplLoadTexLOD(int lod, int mip_offset) +void GSDrawScanlineCodeGenerator::ReadTexelImplLoadTexLOD(int lod, int mip_offset) { AddressReg texIn = _m_local__gd__tex; Address lod_addr = m_sel.lcm ? _rip_global_offset(lod.i.U32[0], sizeof(u32) * lod) : _rip_local_offset(temp.lod.i.U32[0], sizeof(u32) * lod); @@ -3153,7 +3164,7 @@ void GSDrawScanlineCodeGenerator2::ReadTexelImplLoadTexLOD(int lod, int mip_offs mov(rbx, ptr[texIn + rbx * wordsize + mip_offset]); } -void GSDrawScanlineCodeGenerator2::ReadTexelImplYmm( +void GSDrawScanlineCodeGenerator::ReadTexelImplYmm( const Ymm& d0, const Ymm& d1, const Ymm& d2s0, const Ymm& d3s1, const Ymm& s2, const Ymm& s3, @@ -3231,7 +3242,7 @@ void GSDrawScanlineCodeGenerator2::ReadTexelImplYmm( } } -void GSDrawScanlineCodeGenerator2::ReadTexelImplSSE4( +void GSDrawScanlineCodeGenerator::ReadTexelImplSSE4( const Xmm& d0, const Xmm& d1, const Xmm& d2s0, const Xmm& d3s1, const Xmm& s2, const Xmm& s3, @@ -3275,7 +3286,7 @@ void GSDrawScanlineCodeGenerator2::ReadTexelImplSSE4( } } -void GSDrawScanlineCodeGenerator2::ReadTexelImpl(const Xmm& dst, const Xmm& addr, u8 i, bool texInRBX, bool preserveDst) +void GSDrawScanlineCodeGenerator::ReadTexelImpl(const Xmm& dst, const Xmm& addr, u8 i, bool texInRBX, bool preserveDst) { pxAssert(i < 4); diff --git a/pcsx2/GS/Renderers/SW/GSDrawScanlineCodeGenerator.all.h b/pcsx2/GS/Renderers/SW/GSDrawScanlineCodeGenerator.all.h index 3180865260263..46ab6e36d2be1 100644 --- a/pcsx2/GS/Renderers/SW/GSDrawScanlineCodeGenerator.all.h +++ b/pcsx2/GS/Renderers/SW/GSDrawScanlineCodeGenerator.all.h @@ -21,9 +21,8 @@ MULTI_ISA_UNSHARED_START -class GSDrawScanlineCodeGenerator2 : public GSNewCodeGenerator +class GSDrawScanlineCodeGenerator : public GSNewCodeGenerator { - using _parent = GSNewCodeGenerator; using XYm = DRAW_SCANLINE_VECTOR_REGISTER; constexpr static bool isXmm = std::is_same::value; @@ -72,7 +71,7 @@ class GSDrawScanlineCodeGenerator2 : public GSNewCodeGenerator const XYm _z, _f, _s, _t, _q, _f_rb, _f_ga; public: - GSDrawScanlineCodeGenerator2(Xbyak::CodeGenerator* base, const ProcessorFeatures& cpu, u64 key); + GSDrawScanlineCodeGenerator(u64 key, void* code, size_t maxsize); void Generate(); private: diff --git a/pcsx2/GS/Renderers/SW/GSDrawScanlineCodeGenerator.cpp b/pcsx2/GS/Renderers/SW/GSDrawScanlineCodeGenerator.cpp deleted file mode 100644 index f1384d90347bb..0000000000000 --- a/pcsx2/GS/Renderers/SW/GSDrawScanlineCodeGenerator.cpp +++ /dev/null @@ -1,90 +0,0 @@ -// SPDX-FileCopyrightText: 2002-2023 PCSX2 Dev Team -// SPDX-License-Identifier: LGPL-3.0+ - -#include "GSDrawScanlineCodeGenerator.h" -#include "GSDrawScanlineCodeGenerator.all.h" -#include "GSDrawScanline.h" - -#include "common/Console.h" - -#include -#include -#include - -MULTI_ISA_UNSHARED_IMPL; - -static bool shouldUseCDrawScanline(u64 key) -{ - static std::map s_use_c_draw_scanline; - static std::mutex s_use_c_draw_scanline_mutex; - - static const char* const fname = getenv("USE_C_DRAW_SCANLINE"); - if (!fname) - return false; - - std::lock_guard l(s_use_c_draw_scanline_mutex); - - if (s_use_c_draw_scanline.empty()) - { - std::ifstream file(fname); - if (file) - { - for (std::string str; std::getline(file, str);) - { - u64 key; - char yn; - if (sscanf(str.c_str(), "%" PRIx64 " %c", &key, &yn) == 2) - { - if (yn != 'Y' && yn != 'N' && yn != 'y' && yn != 'n') - Console.Warning("Failed to parse %s: Not y/n", str.c_str()); - s_use_c_draw_scanline[key] = (yn == 'Y' || yn == 'y') ? true : false; - } - else - { - Console.Warning("Failed to process line %s", str.c_str()); - } - } - } - } - - auto idx = s_use_c_draw_scanline.find(key); - if (idx == s_use_c_draw_scanline.end()) - { - s_use_c_draw_scanline[key] = false; - // Rewrite file - FILE* file = fopen(fname, "w"); - if (file) - { - for (const auto& pair : s_use_c_draw_scanline) - { - fprintf(file, "%016" PRIX64 " %c %s\n", pair.first, pair.second ? 'Y' : 'N', GSScanlineSelector(pair.first).to_string().c_str()); - } - fclose(file); - } - else - { - Console.Warning("Failed to write C draw scanline usage config: %s", strerror(errno)); - } - return false; - } - - return idx->second; -} - -GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(u64 key, void* code, size_t maxsize) - : Xbyak::CodeGenerator(maxsize, code) -{ - m_sel.key = key; - - if (m_sel.breakpoint) - db(0xCC); - - if (shouldUseCDrawScanline(key)) - { - jmp(reinterpret_cast(static_cast( - &GSDrawScanline::CDrawScanline))); - return; - } - - GSDrawScanlineCodeGenerator2(this, g_cpu, m_sel.key).Generate(); -} diff --git a/pcsx2/GS/Renderers/SW/GSDrawScanlineCodeGenerator.h b/pcsx2/GS/Renderers/SW/GSDrawScanlineCodeGenerator.h deleted file mode 100644 index 8a969183e7d6a..0000000000000 --- a/pcsx2/GS/Renderers/SW/GSDrawScanlineCodeGenerator.h +++ /dev/null @@ -1,24 +0,0 @@ -// SPDX-FileCopyrightText: 2002-2023 PCSX2 Dev Team -// SPDX-License-Identifier: LGPL-3.0+ - -#pragma once - -#include "GS/Renderers/SW/GSScanlineEnvironment.h" -#include "GS/Renderers/SW/GSNewCodeGenerator.h" -#include "GS/GSUtil.h" -#include "GS/MultiISA.h" - -MULTI_ISA_UNSHARED_START - -class GSDrawScanlineCodeGenerator : public Xbyak::CodeGenerator -{ - GSDrawScanlineCodeGenerator(const GSDrawScanlineCodeGenerator&) = delete; - void operator=(const GSDrawScanlineCodeGenerator&) = delete; - - GSScanlineSelector m_sel; - -public: - GSDrawScanlineCodeGenerator(u64 key, void* code, size_t maxsize); -}; - -MULTI_ISA_UNSHARED_END diff --git a/pcsx2/GS/Renderers/SW/GSNewCodeGenerator.h b/pcsx2/GS/Renderers/SW/GSNewCodeGenerator.h index 66a1f03374584..1ff7563871d26 100644 --- a/pcsx2/GS/Renderers/SW/GSNewCodeGenerator.h +++ b/pcsx2/GS/Renderers/SW/GSNewCodeGenerator.h @@ -3,6 +3,9 @@ #pragma once +#include "GS/MultiISA.h" +#include "common/Assertions.h" + // Xbyak pulls in windows.h, and breaks everything. #ifdef _WIN32 #include "common/RedtapeWindows.h" @@ -13,8 +16,6 @@ #include "xbyak/xbyak.h" #include "xbyak/xbyak_util.h" -#include "GS/MultiISA.h" -#include "common/Assertions.h" /// Code generator that automatically selects between SSE and AVX, x86 and x64 so you don't have to /// Should make combined SSE and AVX codegen much easier @@ -41,7 +42,7 @@ class GSNewCodeGenerator } public: - Xbyak::CodeGenerator& actual; + Xbyak::CodeGenerator actual; using AddressReg = Xbyak::Reg64; using RipType = Xbyak::RegRip; @@ -58,14 +59,17 @@ class GSNewCodeGenerator const RipType rip{}; const Xbyak::AddressFrame ptr{0}, byte{8}, word{16}, dword{32}, qword{64}, xword{128}, yword{256}, zword{512}; - GSNewCodeGenerator(Xbyak::CodeGenerator* actual, const ProcessorFeatures& cpu) - : actual(*actual) - , hasAVX(cpu.vectorISA >= ProcessorFeatures::VectorISA::AVX) - , hasAVX2(cpu.vectorISA >= ProcessorFeatures::VectorISA::AVX2) - , hasFMA(cpu.hasFMA) + GSNewCodeGenerator(void* code, size_t maxsize) + : actual(maxsize, code) + , hasAVX(g_cpu.vectorISA >= ProcessorFeatures::VectorISA::AVX) + , hasAVX2(g_cpu.vectorISA >= ProcessorFeatures::VectorISA::AVX2) + , hasFMA(g_cpu.hasFMA) { } + size_t GetSize() const { return actual.getSize(); } + const u8* GetCode() const { return actual.getCode(); } + // ------------ Forwarding instructions ------------ // Note: Only instructions used by codegen were added here, so if you're modifying codegen, you may need to add instructions here diff --git a/pcsx2/GS/Renderers/SW/GSRendererSW.cpp b/pcsx2/GS/Renderers/SW/GSRendererSW.cpp index bdd4742df6a05..0a982f803e596 100644 --- a/pcsx2/GS/Renderers/SW/GSRendererSW.cpp +++ b/pcsx2/GS/Renderers/SW/GSRendererSW.cpp @@ -4,6 +4,7 @@ #include "GS/Renderers/SW/GSRendererSW.h" #include "GS/GSGL.h" #include "GS/GSPng.h" +#include "GS/GSUtil.h" #include "common/StringUtil.h" diff --git a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.all.cpp b/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.all.cpp index 4124b503bb15b..aa85045562ce2 100644 --- a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.all.cpp +++ b/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.all.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2002-2023 PCSX2 Dev Team +// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team // SPDX-License-Identifier: LGPL-3.0+ #include "GSSetupPrimCodeGenerator.all.h" @@ -37,8 +37,8 @@ using namespace Xbyak; #define _rip_local_d_p(x) _rip_local_d(x) #endif -GSSetupPrimCodeGenerator2::GSSetupPrimCodeGenerator2(Xbyak::CodeGenerator* base, const ProcessorFeatures& cpu, u64 key) - : _parent(base, cpu) +GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(u64 key, void* code, size_t maxsize) + : GSNewCodeGenerator(code, maxsize) , many_regs(false) // On x86 arg registers are very temporary but on x64 they aren't, so on x86 some registers overlap #ifdef _WIN32 @@ -61,7 +61,7 @@ GSSetupPrimCodeGenerator2::GSSetupPrimCodeGenerator2(Xbyak::CodeGenerator* base, m_en.c = m_sel.fb && !(m_sel.tfx == TFX_DECAL && m_sel.tcc) ? 1 : 0; } -void GSSetupPrimCodeGenerator2::broadcastf128(const XYm& reg, const Address& mem) +void GSSetupPrimCodeGenerator::broadcastf128(const XYm& reg, const Address& mem) { #if SETUP_PRIM_USING_YMM vbroadcastf128(reg, mem); @@ -70,7 +70,7 @@ void GSSetupPrimCodeGenerator2::broadcastf128(const XYm& reg, const Address& mem #endif } -void GSSetupPrimCodeGenerator2::broadcastss(const XYm& reg, const Address& mem) +void GSSetupPrimCodeGenerator::broadcastss(const XYm& reg, const Address& mem) { if (hasAVX) { @@ -83,7 +83,7 @@ void GSSetupPrimCodeGenerator2::broadcastss(const XYm& reg, const Address& mem) } } -void GSSetupPrimCodeGenerator2::Generate() +void GSSetupPrimCodeGenerator::Generate() { bool needs_shift = ((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS) || m_en.t || (m_en.c && m_sel.iip); many_regs = isYmm && !m_sel.notest && needs_shift; @@ -140,7 +140,7 @@ void GSSetupPrimCodeGenerator2::Generate() Perf::any.RegisterKey(actual.getCode(), actual.getSize(), "GSSetupPrim_", m_sel.key); } -void GSSetupPrimCodeGenerator2::Depth_XMM() +void GSSetupPrimCodeGenerator::Depth_XMM() { if (!m_en.z && !m_en.f) { @@ -227,7 +227,7 @@ void GSSetupPrimCodeGenerator2::Depth_XMM() } } -void GSSetupPrimCodeGenerator2::Depth_YMM() +void GSSetupPrimCodeGenerator::Depth_YMM() { if (!m_en.z && !m_en.f) { @@ -313,7 +313,7 @@ void GSSetupPrimCodeGenerator2::Depth_YMM() } } -void GSSetupPrimCodeGenerator2::Texture() +void GSSetupPrimCodeGenerator::Texture() { if (!m_en.t) { @@ -385,7 +385,7 @@ void GSSetupPrimCodeGenerator2::Texture() } } -void GSSetupPrimCodeGenerator2::Color() +void GSSetupPrimCodeGenerator::Color() { if (!m_en.c) { diff --git a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.all.h b/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.all.h index 0c72d1c2f0119..b97a52e899792 100644 --- a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.all.h +++ b/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.all.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2002-2023 PCSX2 Dev Team +// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team // SPDX-License-Identifier: LGPL-3.0+ #pragma once @@ -19,9 +19,8 @@ MULTI_ISA_UNSHARED_START -class GSSetupPrimCodeGenerator2 : public GSNewCodeGenerator +class GSSetupPrimCodeGenerator : public GSNewCodeGenerator { - using _parent = GSNewCodeGenerator; using XYm = SETUP_PRIM_VECTOR_REGISTER; using Xmm = Xbyak::Xmm; @@ -42,7 +41,7 @@ class GSSetupPrimCodeGenerator2 : public GSNewCodeGenerator const AddressReg _64_vertex, _index, _dscan, _m_local, t1; public: - GSSetupPrimCodeGenerator2(Xbyak::CodeGenerator* base, const ProcessorFeatures& cpu, u64 key); + GSSetupPrimCodeGenerator(u64 key, void* code, size_t maxsize); void Generate(); private: diff --git a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.cpp b/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.cpp deleted file mode 100644 index d25739f5c4783..0000000000000 --- a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.cpp +++ /dev/null @@ -1,20 +0,0 @@ -// SPDX-FileCopyrightText: 2002-2023 PCSX2 Dev Team -// SPDX-License-Identifier: LGPL-3.0+ - -#include "GSSetupPrimCodeGenerator.h" -#include "GSSetupPrimCodeGenerator.all.h" - -MULTI_ISA_UNSHARED_IMPL; - -GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(u64 key, void* code, size_t maxsize) - : Xbyak::CodeGenerator(maxsize, code) -{ - m_sel.key = key; - - m_en.z = m_sel.zb ? 1 : 0; - m_en.f = m_sel.fb && m_sel.fge ? 1 : 0; - m_en.t = m_sel.fb && m_sel.tfx != TFX_NONE ? 1 : 0; - m_en.c = m_sel.fb && !(m_sel.tfx == TFX_DECAL && m_sel.tcc) ? 1 : 0; - - GSSetupPrimCodeGenerator2(this, g_cpu, key).Generate(); -} diff --git a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.h b/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.h deleted file mode 100644 index 49b0bdc1dd46d..0000000000000 --- a/pcsx2/GS/Renderers/SW/GSSetupPrimCodeGenerator.h +++ /dev/null @@ -1,28 +0,0 @@ -// SPDX-FileCopyrightText: 2002-2023 PCSX2 Dev Team -// SPDX-License-Identifier: LGPL-3.0+ - -#pragma once - -#include "GS/Renderers/SW/GSScanlineEnvironment.h" -#include "GS/Renderers/SW/GSNewCodeGenerator.h" -#include "GS/GSUtil.h" -#include "GS/MultiISA.h" - -MULTI_ISA_UNSHARED_START - -class GSSetupPrimCodeGenerator : public Xbyak::CodeGenerator -{ - void operator=(const GSSetupPrimCodeGenerator&); - - GSScanlineSelector m_sel; - - struct - { - u32 z : 1, f : 1, t : 1, c : 1; - } m_en; - -public: - GSSetupPrimCodeGenerator(u64 key, void* code, size_t maxsize); -}; - -MULTI_ISA_UNSHARED_END diff --git a/pcsx2/pcsx2.vcxproj b/pcsx2/pcsx2.vcxproj index 60279c4414ff4..584fb19b6578e 100644 --- a/pcsx2/pcsx2.vcxproj +++ b/pcsx2/pcsx2.vcxproj @@ -284,7 +284,6 @@ - @@ -301,7 +300,6 @@ - @@ -620,7 +618,6 @@ - @@ -637,7 +634,6 @@ - diff --git a/pcsx2/pcsx2.vcxproj.filters b/pcsx2/pcsx2.vcxproj.filters index 11181437fff29..a380ff0dfdbb0 100644 --- a/pcsx2/pcsx2.vcxproj.filters +++ b/pcsx2/pcsx2.vcxproj.filters @@ -1046,18 +1046,12 @@ System\Ps2\GS\Renderers\Software - - System\Ps2\GS\Renderers\Software - System\Ps2\GS\Renderers\Software System\Ps2\GS\Renderers\Software - - System\Ps2\GS\Renderers\Software - System\Ps2\GS\Renderers\Software @@ -1952,9 +1946,6 @@ System\Ps2\GS\Renderers\Software - - System\Ps2\GS\Renderers\Software - System\Ps2\GS\Renderers\Software @@ -1967,9 +1958,6 @@ System\Ps2\GS\Renderers\Software - - System\Ps2\GS\Renderers\Software - System\Ps2\GS\Renderers\Software