From 4b88256df25612e371bf31ea5feb00c0f2133441 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Tue, 30 Jan 2024 15:03:16 +0000 Subject: [PATCH] GS/HW: Allow palette lookups from depth and deswizzle manual deswizzles --- bin/resources/shaders/dx11/tfx.fx | 4 +++ bin/resources/shaders/opengl/tfx_fs.glsl | 4 +-- bin/resources/shaders/vulkan/tfx.glsl | 4 +++ pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 32 +++++++++++++++++++++--- pcsx2/GS/Renderers/HW/GSRendererHW.h | 3 +++ pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 19 +++++++++++++- pcsx2/GS/Renderers/Metal/tfx.metal | 4 ++- pcsx2/ShaderCacheVersion.h | 2 +- 8 files changed, 64 insertions(+), 8 deletions(-) diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index 80915b1ef78de..d4580c3cf12a9 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -491,6 +491,10 @@ float4 sample_depth(float2 st, float2 pos) { t.a = t.a >= 128.0f ? 255.0f * TA.y : ((PS_AEM == 0) || any(bool3(t.rgb))) ? 255.0f * TA.x : 0.0f; } + else if (PS_PAL_FMT != 0 && !PS_TALES_OF_ABYSS_HLE && !PS_URBAN_CHAOS_HLE) + { + t = trunc(sample_4p(uint4(t.aaaa))[0] * 255.0f + 0.05f); + } return t; } diff --git a/bin/resources/shaders/opengl/tfx_fs.glsl b/bin/resources/shaders/opengl/tfx_fs.glsl index 33728f964e6b9..d49c1cf628f0c 100644 --- a/bin/resources/shaders/opengl/tfx_fs.glsl +++ b/bin/resources/shaders/opengl/tfx_fs.glsl @@ -440,15 +440,15 @@ vec4 sample_depth(vec2 st) #endif - // warning t ranges from 0 to 255 #if (PS_AEM_FMT == FMT_24) t.a = ( (PS_AEM == 0) || any(bvec3(t.rgb)) ) ? 255.0f * TA.x : 0.0f; #elif (PS_AEM_FMT == FMT_16) t.a = t.a >= 128.0f ? 255.0f * TA.y : ( (PS_AEM == 0) || any(bvec3(t.rgb)) ) ? 255.0f * TA.x : 0.0f; +#elif PS_PAL_FMT != 0 && !PS_TALES_OF_ABYSS_HLE && !PS_URBAN_CHAOS_HLE + t = trunc(sample_4p(uvec4(t.aaaa))[0] * 255.0f + 0.05f); #endif - return t; } diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index 73c80a03f6b52..1850ddc19111a 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -689,6 +689,10 @@ vec4 sample_depth(vec2 st, ivec2 pos) { t.a = t.a >= 128.0f ? 255.0f * TA.y : ((PS_AEM == 0) || any(bvec3(t.rgb))) ? 255.0f * TA.x : 0.0f; } + #elif PS_PAL_FMT != 0 && !PS_TALES_OF_ABYSS_HLE && !PS_URBAN_CHAOS_HLE + { + t = trunc(sample_4p(uvec4(t.aaaa))[0] * 255.0f + 0.05f); + } #endif return t; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 9dd3eb5acee29..33f6ac9f38369 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -1248,8 +1248,8 @@ bool GSRendererHW::IsUsingAsInBlend() bool GSRendererHW::IsTBPFrameOrZ(u32 tbp) { - const bool is_frame = (m_cached_ctx.FRAME.Block() == tbp); - const bool is_z = (m_cached_ctx.ZBUF.Block() == tbp); + const bool is_frame = (m_cached_ctx.FRAME.Block() == tbp) && (GSUtil::GetChannelMask(m_cached_ctx.FRAME.PSM) & GSUtil::GetChannelMask(m_cached_ctx.TEX0.PSM)); + const bool is_z = (m_cached_ctx.ZBUF.Block() == tbp) && (GSUtil::GetChannelMask(m_cached_ctx.ZBUF.PSM) & GSUtil::GetChannelMask(m_cached_ctx.TEX0.PSM)); if (!is_frame && !is_z) return false; @@ -1273,6 +1273,27 @@ bool GSRendererHW::IsTBPFrameOrZ(u32 tbp) return (is_frame && !no_rt) || (is_z && !no_ds); } +void GSRendererHW::HandleManualDeswizzle() +{ + // Check if it's doing manual deswizzling first (draws are 32x16), if they are, check if the Z is flat, if not, we're gonna have to get creative and swap around the quandrants, but that's a TODO. + GSVertex* v = &m_vertex.buff[0]; + + for (u32 i = 0; i < m_vertex.tail; i += 2) + { + if ((abs((v[i + 1].U) - (v[i].U)) >> 4) != 32 || (abs((v[i + 1].V) - (v[i].V)) >> 4) != 16) + return; + } + + if (m_vt.m_eq.z) + { + GSVector4i tex_rect = GSVector4i(m_vt.m_min.t.x, m_vt.m_min.t.y, m_vt.m_max.t.x, m_vt.m_max.t.y); + ReplaceVerticesWithSprite(m_r, tex_rect, GSVector2i(1 << m_cached_ctx.TEX0.TW, 1 << m_cached_ctx.TEX0.TH), m_context->scissor.in); + } + else + { + DevCon.Warning("Swizzled depth palette draw with non-flat Z draw %d", s_n); + } +} void GSRendererHW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) { @@ -2411,7 +2432,7 @@ void GSRendererHW::Draw() // create that target, because the clear isn't black, it'll hang around and never get invalidated. const bool is_square = (t_size.y == t_size.x) && m_r.w >= 1023 && PrimitiveCoversWithoutGaps(); const bool is_clear = is_possible_mem_clear && is_square; - const bool possible_shuffle = draw_sprite_tex && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt && GSLocalMemory::m_psm[src->m_from_target_TEX0.PSM].depth == GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].depth) && + const bool possible_shuffle = draw_sprite_tex && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || IsPossibleChannelShuffle()); rt = g_texture_cache->LookupTarget(FRAME_TEX0, t_size, target_scale, GSTextureCache::RenderTarget, true, @@ -4495,6 +4516,11 @@ __ri void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Target* rt, const GSVector4 half_pixel = RealignTargetTextureCoordinate(tex); m_conf.cb_vs.texture_offset = GSVector2(half_pixel.x, half_pixel.y); + + if (m_vt.m_primclass == GS_SPRITE_CLASS && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].pal > 0 && (tex->m_from_target_TEX0.PSM & 0x30) == 0x30 && m_index.tail >= 4) + { + HandleManualDeswizzle(); + } } else if (tex->m_palette) { diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.h b/pcsx2/GS/Renderers/HW/GSRendererHW.h index 7c0175df62f2b..16f5c0fdcf534 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.h +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.h @@ -221,6 +221,9 @@ class GSRendererHW : public GSRenderer /// Returns true if the specified texture address matches the frame or Z buffer. bool IsTBPFrameOrZ(u32 tbp); + //// Returns true if the draws appear to be a manual deswizzle. + void HandleManualDeswizzle(); + /// Offsets the current draw, used for RT-in-RT. Offsets are relative to the *current* FBP, not the new FBP. void OffsetDraw(s32 fbp_offset, s32 zbp_offset, s32 xoffset, s32 yoffset); diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index 7421eef4c72cf..ae2ca42e152d8 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -1626,8 +1626,17 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const depth_TEX0.U32[1] = TEX0.U32[1]; src = LookupDepthSource(false, depth_TEX0, TEXA, CLAMP, req_rect, possible_shuffle, linear, frame_fbp, req_color, req_alpha); - if(src != nullptr) + if (src != nullptr) + { + + if (TEX0.PSM == PSMT8H) + { + // Attach palette for GPU texture conversion + AttachPaletteToSource(src, psm_s.pal, true); + } + return src; + } } else { @@ -1640,7 +1649,15 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const src = LookupDepthSource(false, TEX0, TEXA, CLAMP, req_rect, possible_shuffle, linear, frame_fbp, req_color, req_alpha, true); if (src != nullptr) + { + if (TEX0.PSM == PSMT8H) + { + // Attach palette for GPU texture conversion + AttachPaletteToSource(src, psm_s.pal, true); + } + return src; + } } } } diff --git a/pcsx2/GS/Renderers/Metal/tfx.metal b/pcsx2/GS/Renderers/Metal/tfx.metal index 294c49b048c41..d0758d44ce557 100644 --- a/pcsx2/GS/Renderers/Metal/tfx.metal +++ b/pcsx2/GS/Renderers/Metal/tfx.metal @@ -604,7 +604,9 @@ struct PSMain t.a = (!PS_AEM || any(t.rgb != 0)) ? 255.f * cb.ta.x : 0.f; else if (PS_AEM_FMT == FMT_16) t.a = t.a >= 128.f ? 255.f * cb.ta.y : (!PS_AEM || any(t.rgb != 0)) ? 255.f * cb.ta.x : 0.f; - + else if (PS_PAL_FMT != 0 && !PS_TALES_OF_ABYSS_HLE && !PS_URBAN_CHAOS_HLE) + t = trunc(sample_4p(uint4(t.aaaa))[0] * 255.0f + 0.05f); + return t; } diff --git a/pcsx2/ShaderCacheVersion.h b/pcsx2/ShaderCacheVersion.h index f267c91147b29..b3c320e991e6a 100644 --- a/pcsx2/ShaderCacheVersion.h +++ b/pcsx2/ShaderCacheVersion.h @@ -3,4 +3,4 @@ /// Version number for GS and other shaders. Increment whenever any of the contents of the /// shaders change, to invalidate the cache. -static constexpr u32 SHADER_CACHE_VERSION = 38; +static constexpr u32 SHADER_CACHE_VERSION = 39;