From 76db6f6fbb17bd76342689217a05d8852f3f8f3a Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Wed, 1 Jan 2025 01:01:47 +0000 Subject: [PATCH] GS/HW: Further fixes for RT in RT changes in behaviour --- bin/resources/shaders/dx11/tfx.fx | 7 +- bin/resources/shaders/opengl/tfx_fs.glsl | 7 +- bin/resources/shaders/vulkan/tfx.glsl | 7 +- pcsx2/GS/GSState.cpp | 10 + pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 320 +++++++++++++++++++---- pcsx2/GS/Renderers/HW/GSRendererHW.h | 2 +- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 254 ++++++++++++------ pcsx2/GS/Renderers/HW/GSTextureCache.h | 10 +- pcsx2/GS/Renderers/Metal/tfx.metal | 7 +- 9 files changed, 478 insertions(+), 146 deletions(-) diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index b425368d9fb41..5d6b76d7d4489 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -1123,11 +1123,8 @@ PS_OUTPUT ps_main(PS_INPUT input) { if (PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) { - C.rb = C.br; - float g_temp = C.g; - - C.g = C.a; - C.a = g_temp; + C.b = C.r; + C.a = C.g; } else if(PS_PROCESS_BA & SHUFFLE_READ) { diff --git a/bin/resources/shaders/opengl/tfx_fs.glsl b/bin/resources/shaders/opengl/tfx_fs.glsl index d6834c29d4837..66bdfa340a79a 100644 --- a/bin/resources/shaders/opengl/tfx_fs.glsl +++ b/bin/resources/shaders/opengl/tfx_fs.glsl @@ -1086,11 +1086,8 @@ void ps_main() C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u))); #elif PS_SHUFFLE_ACROSS #if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) - C.rb = C.br; - float g_temp = C.g; - - C.g = C.a; - C.a = g_temp; + C.b = C.r; + C.a = C.g; #elif(PS_PROCESS_BA & SHUFFLE_READ) C.rb = C.bb; C.ga = C.aa; diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index 34a89a9a68438..1ecf891e181fb 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -1350,11 +1350,8 @@ void main() // Write RB part. Mask will take care of the correct destination #elif PS_SHUFFLE_ACROSS #if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) - C.rb = C.br; - float g_temp = C.g; - - C.g = C.a; - C.a = g_temp; + C.b = C.r; + C.a = C.g; #elif(PS_PROCESS_BA & SHUFFLE_READ) C.rb = C.bb; C.ga = C.aa; diff --git a/pcsx2/GS/GSState.cpp b/pcsx2/GS/GSState.cpp index 977d5f416905a..a095abaff0478 100644 --- a/pcsx2/GS/GSState.cpp +++ b/pcsx2/GS/GSState.cpp @@ -3095,6 +3095,16 @@ __forceinline bool GSState::IsAutoFlushDraw(u32 prim) if (!(GSUtil::GetChannelMask(m_context->TEX0.PSM) & GSUtil::GetChannelMask(m_context->FRAME.PSM, m_context->FRAME.FBMSK | ~(GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk)))) return false; + // Try to detect shuffles, because these will not autoflush, they by design clash. + if (GSLocalMemory::m_psm[m_context->FRAME.PSM].bpp == 16 && GSLocalMemory::m_psm[m_context->TEX0.PSM].bpp == 16) + { + // Pretty confident here... + GSVertex* buffer = &m_vertex.buff[0]; + const bool const_spacing = (buffer[m_index.buff[0]].U - buffer[m_index.buff[0]].XYZ.X) == (m_v.U - m_v.XYZ.X); + + if (const_spacing) + return false; + } const u32 frame_mask = GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk; const bool frame_hit = m_context->FRAME.Block() == m_context->TEX0.TBP0 && !(m_context->TEST.ATE && m_context->TEST.ATST == 0 && m_context->TEST.AFAIL == 2) && ((m_context->FRAME.FBMSK & frame_mask) != frame_mask); // There's a strange behaviour we need to test on a PS2 here, if the FRAME is a Z format, like Powerdrome something swaps over, and it seems Alpha Fail of "FB Only" writes to the Z.. it's odd. diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 10fbebd6ed53a..f341430407131 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -346,7 +346,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, tex_pos &= 0xFF; shuffle_across = (((tex_pos + 8) >> 4) ^ ((pos + 8) >> 4)) & 0x8; - const bool full_width = ((second_vert.XYZ.X - first_vert.XYZ.X) >> 4) >= 16 && m_r.width() > 8; + const bool full_width = !shuffle_across && (((second_vert.XYZ.X + 9) - first_vert.XYZ.X) >> 4) >= 16 && m_r.width() > 8; process_ba = ((pos > 112 && pos < 136) || full_width) ? SHUFFLE_WRITE : 0; process_rg = (!process_ba || full_width) ? SHUFFLE_WRITE : 0; // "same group" means it can read blue and write alpha using C32 tricks @@ -471,7 +471,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, GSVector4::storeh(&v[1].ST.S, st); } } - m_r = fpr; + m_r = r; m_vertex.head = m_vertex.tail = m_vertex.next = 2; m_index.tail = 2; return; @@ -968,11 +968,11 @@ GSVector2i GSRendererHW::GetValidSize(const GSTextureCache::Source* tex) return GSVector2i(width, height); } -GSVector2i GSRendererHW::GetTargetSize(const GSTextureCache::Source* tex) +GSVector2i GSRendererHW::GetTargetSize(const GSTextureCache::Source* tex, const bool can_expand) { const GSVector2i valid_size = GetValidSize(tex); - return g_texture_cache->GetTargetSize(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, valid_size.x, valid_size.y); + return g_texture_cache->GetTargetSize(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, valid_size.x, valid_size.y, can_expand); } bool GSRendererHW::IsPossibleChannelShuffle() const @@ -1029,7 +1029,8 @@ bool GSRendererHW::IsPageCopy() const if (!PRIM->TME) return false; - const GSDrawingContext& next_ctx = m_env.CTXT[m_backed_up_ctx]; + const int get_next_ctx = (m_state_flush_reason == CONTEXTCHANGE) ? m_env.PRIM.CTXT : m_backed_up_ctx; + const GSDrawingContext& next_ctx = m_env.CTXT[get_next_ctx]; if (next_ctx.TEX0.TBP0 != (m_cached_ctx.TEX0.TBP0 + 0x20)) return false; @@ -2450,7 +2451,7 @@ void GSRendererHW::Draw() GIFRegTEX0 TEX0 = {}; GSTextureCache::Source* src = nullptr; TextureMinMaxResult tmm; - + bool possible_shuffle = false; // Disable texture mapping if the blend is black and using alpha from vertex. if (m_process_texture) { @@ -2567,7 +2568,7 @@ void GSRendererHW::Draw() GIFRegTEX0 FRAME_TEX0; bool shuffle_target = false; - if (!no_rt && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) + if (!no_rt && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp >= 16) { if (m_cached_ctx.FRAME.Block() != m_cached_ctx.TEX0.TBP0) { @@ -2590,16 +2591,22 @@ void GSRendererHW::Draw() const GSVertex* v = &m_vertex.buff[0]; const int first_x = ((v[0].XYZ.X - m_context->XYOFFSET.OFX) + 8) >> 4; - const int first_u = PRIM->FST ? ((v[0].U + 8) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[0].ST.S / v[1].RGBAQ.Q)) + 0.5f); - const int second_u = PRIM->FST ? ((v[1].U + 8) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[1].ST.S / v[1].RGBAQ.Q)) + 0.5f); - const bool shuffle_coords = (first_x ^ first_u) & 8; - const int draw_width = std::abs(v[1].XYZ.X - v[0].XYZ.X) >> 4; + const int first_u = PRIM->FST ? ((v[0].U + 9) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[0].ST.S / v[1].RGBAQ.Q))); + const int second_u = PRIM->FST ? ((v[1].U + 9) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[1].ST.S / v[1].RGBAQ.Q)) + 0.6f); + // offset coordinates swap around RG/BA. (Ace Combat) + const u32 minv = m_cached_ctx.CLAMP.MINV; + const u32 minu = m_cached_ctx.CLAMP.MINU; + const bool rgba_shuffle = ((m_cached_ctx.CLAMP.WMS == m_cached_ctx.CLAMP.WMT && m_cached_ctx.CLAMP.WMS == CLAMP_REGION_REPEAT) && (minu && minv)); + const bool shuffle_coords = ((first_x ^ first_u) & 8) || rgba_shuffle; + // Round up half of second coord, it can sometimes be slightly under. + const int draw_width = std::abs(v[1].XYZ.X + 9 - v[0].XYZ.X) >> 4; const int read_width = std::abs(second_u - first_u); - shuffle_target = shuffle_coords && draw_width == 8 && draw_width == read_width; + shuffle_target = shuffle_coords && (draw_width & 7) == 0 && std::abs(draw_width - read_width) <= 1; } } - const bool possible_shuffle = !no_rt && (((shuffle_target && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || (m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0 && ((m_cached_ctx.TEX0.PSM & 0x6) || m_cached_ctx.FRAME.PSM != m_cached_ctx.TEX0.PSM))) || IsPossibleChannelShuffle()); + + possible_shuffle = !no_rt && (((shuffle_target /*&& GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16*/) /*|| (m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0 && ((m_cached_ctx.TEX0.PSM & 0x6) || m_cached_ctx.FRAME.PSM != m_cached_ctx.TEX0.PSM))*/) || IsPossibleChannelShuffle()); const bool need_aem_color = GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].trbpp <= 24 && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].pal == 0 && ((PRIM->ABE && m_context->ALPHA.C == 0) || IsDiscardingDstAlpha()) && m_draw_env->TEXA.AEM; const u32 color_mask = (m_vt.m_max.c > GSVector4i::zero()).mask(); const bool texture_function_color = m_cached_ctx.TEX0.TFX == TFX_DECAL || (color_mask & 0xFFF) || (m_cached_ctx.TEX0.TFX > TFX_DECAL && (color_mask & 0xF000)); @@ -2625,6 +2632,7 @@ void GSRendererHW::Draw() return; } + possible_shuffle &= src && (src->m_from_target != nullptr); // We don't know the alpha range of direct sources when we first tried to optimize the alpha test. // Moving the texture lookup before the ATST optimization complicates things a lot, so instead, // recompute it, and everything derived from it again if it changes. @@ -2663,8 +2671,16 @@ void GSRendererHW::Draw() } } + + + // If we're Z Testing for greater (or Z is above zero), it's not going to pass any new pixels anyway. + // Urban Reign trolls by scissoring a draw to a target at 0x0-0x117F to 378x449 which ends up the size being rounded up to 640x480 + // causing the buffer to expand to around 0x1400, which makes a later framebuffer at 0x1180 to fail to be created correctly. + // We can cheese this by checking if the Z is masked and the resultant colour is going to be black anyway. + const bool can_expand = !(m_cached_ctx.ZBUF.ZMSK && (PRIM->ABE && (m_context->ALPHA.A == 1 && m_draw_env->COLCLAMP.CLAMP == 1))); + // Estimate size based on the scissor rectangle and height cache. - GSVector2i t_size = GetTargetSize(src); + GSVector2i t_size = GetTargetSize(src, can_expand); const GSVector4i t_size_rect = GSVector4i::loadh(t_size); // Ensure draw rect is clamped to framebuffer size. Necessary for updating valid area. @@ -2745,7 +2761,7 @@ void GSRendererHW::Draw() ZBUF_TEX0.TBW = m_channel_shuffle ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; - if (!ds) + if (!ds && m_cached_ctx.FRAME.FBP != m_cached_ctx.ZBUF.ZBP) { ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil, true, 0, false, force_preload, preserve_depth, m_r, src); @@ -2801,21 +2817,34 @@ void GSRendererHW::Draw() if (!no_rt) { - const bool possible_shuffle = draw_sprite_tex && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) && - GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || + possible_shuffle |= draw_sprite_tex && m_primitive_covers_without_gaps != NoGapsType::FullCover && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || IsPossibleChannelShuffle()); // FBW is going to be wrong for channel shuffling into a new target, so take it from the source. FRAME_TEX0.U64 = 0; FRAME_TEX0.TBP0 = ((m_last_channel_shuffle_end_block + 1) == m_cached_ctx.FRAME.Block() && possible_shuffle) ? m_last_channel_shuffle_fbp : m_cached_ctx.FRAME.Block(); - FRAME_TEX0.TBW = (possible_shuffle && (m_last_channel_shuffle_end_block + 1) && src->m_target) ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; + FRAME_TEX0.TBW = (possible_shuffle && IsPossibleChannelShuffle() && src && src->m_from_target) ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; FRAME_TEX0.PSM = m_cached_ctx.FRAME.PSM; - // Don't clamp on shuffle, the height cache may troll us with the REAL height. if (!possible_shuffle && m_split_texture_shuffle_pages == 0) m_r = m_r.rintersect(t_size_rect); + // Do the lookup with the real format on a shuffle, if possible. + if (possible_shuffle && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && GSLocalMemory ::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) + { + // Creating a new target on a shuffle, possible temp buffer, but let's try to get the real format. + const int get_next_ctx = (m_state_flush_reason == CONTEXTCHANGE) ? m_env.PRIM.CTXT : m_backed_up_ctx; + const GSDrawingContext& next_ctx = m_env.CTXT[get_next_ctx]; + + if (next_ctx.FRAME.Block() == FRAME_TEX0.TBP0 && next_ctx.FRAME.PSM != FRAME_TEX0.PSM) + FRAME_TEX0.PSM = next_ctx.FRAME.PSM; + else if (next_ctx.TEX0.TBP0 == FRAME_TEX0.TBP0 && next_ctx.TEX0.PSM != FRAME_TEX0.PSM) + FRAME_TEX0.PSM = next_ctx.TEX0.PSM; + else + FRAME_TEX0.PSM = PSMCT32; // Guess full color if no upcoming hint, it'll fix itself later. + } + // Normally we would use 1024 here to match the clear above, but The Godfather does a 1023x1023 draw instead // (very close to 1024x1024, but apparently the GS rounds down..). So, catch that here, we don't want to // create that target, because the clear isn't black, it'll hang around and never get invalidated. @@ -2828,7 +2857,7 @@ void GSRendererHW::Draw() rt = g_texture_cache->LookupTarget(FRAME_TEX0, t_size, ((src && src->m_scale != 1) && GSConfig.UserHacks_NativeScaling == GSNativeScaling::Normal && !possible_shuffle) ? GetTextureScaleFactor() : target_scale, GSTextureCache::RenderTarget, true, fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, unclamped_draw_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(), - GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear, src, no_ds ? -1 : (m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0)); + GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear, src, (no_ds || !ds) ? -1 : (m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0)); // Draw skipped because it was a clear and there was no target. if (!rt) @@ -2865,13 +2894,68 @@ void GSRendererHW::Draw() } else if (rt->m_TEX0.TBP0 != m_cached_ctx.FRAME.Block()) { - GSVertex* v = &m_vertex.buff[0]; - int vertical_offset = ((std::abs(static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5) / std::max(rt->m_TEX0.TBW, 1U)) * frame_psm.pgs.y; // I know I could just not shift it.. + int vertical_offset = ((static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) / 32) / std::max(static_cast(rt->m_TEX0.TBW), 1)) * frame_psm.pgs.y; // I know I could just not shift it.. - const int horizontal_offset = (std::abs(static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5) % std::max(rt->m_TEX0.TBW, 1U)) * frame_psm.pgs.x; + const int horizontal_offset = ((static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) / 32) % static_cast(std::max(rt->m_TEX0.TBW, 1U))) * frame_psm.pgs.x; // Used to reduce the offset made later in channel shuffles m_target_offset = std::abs(static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5); + if (vertical_offset < 0) + { + rt->m_TEX0.TBP0 = m_cached_ctx.FRAME.Block(); + GSVector2i new_scaled_size = rt->m_unscaled_size * rt->m_scale; + // Make sure to use the original format for the offset. + int new_offset = std::abs((vertical_offset / frame_psm.pgs.y) * GSLocalMemory::m_psm[rt->m_TEX0.PSM].pgs.y); + + new_scaled_size.y += new_offset * rt->m_scale; + GSTexture* tex = g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, true); + //if (!tex) + // return nullptr; + //m_target_memory_usage += tex->GetMemUsage(); + GSVector4i dRect = GSVector4i(0, new_offset * rt->m_scale, new_scaled_size.x, new_scaled_size.y); + g_gs_device->StretchRect(rt->m_texture, GSVector4(0,0,1,1), tex, GSVector4(dRect), ShaderConvert::COPY, false); + + + if (src && src->m_from_target && src->m_from_target == rt) + { + src->m_texture = rt->m_texture; + src->m_target_direct = false; + src->m_shared_texture = false; + } + else + { + //m_target_memory_usage -= dst->m_texture->GetMemUsage(); + g_gs_device->Recycle(rt->m_texture); + } + + rt->m_valid.y += new_offset; + rt->m_valid.w += new_offset; + rt->m_drawn_since_read.y += new_offset; + rt->m_drawn_since_read.w += new_offset; + rt->m_texture = tex; + rt->m_unscaled_size = new_scaled_size / rt->m_scale; + + t_size.y += std::abs(vertical_offset); + vertical_offset = 0; + } + + // Z isn't offset but RT is, so we need a temp Z to align it, hopefully nothing will ever write to the Z too, right?? + if (ds && vertical_offset && (m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) != (m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) + { + + int z_vertical_offset = ((static_cast(m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) / 32) / std::max(rt->m_TEX0.TBW, 1U)) * GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].pgs.y; + int z_offset = vertical_offset; + GL_CACHE("RT in RT Z copy on draw %d z_vert_offset %d z_offset %d", s_n, z_vertical_offset, z_offset); + GSVector4i dRect = GSVector4i(0, z_offset * ds->m_scale, ds->m_unscaled_size.x * ds->m_scale, std::min(z_offset + m_r.w + 1, z_offset + ds->m_unscaled_size.y) * ds->m_scale); + int new_height = std::max(static_cast(ds->m_unscaled_size.y * ds->m_scale), dRect.w); + GSTexture* tex = g_gs_device->CreateDepthStencil(ds->m_unscaled_size.x * ds->m_scale, new_height, GSTexture::Format::DepthStencil, true); + g_gs_device->StretchRect(ds->m_texture, GSVector4(0.0f, z_vertical_offset / static_cast(ds->m_unscaled_size.y), 1.0f, std::min(z_vertical_offset + m_r.w + 1, ds->m_unscaled_size.y) / static_cast(ds->m_unscaled_size.y)), tex, GSVector4(dRect), ShaderConvert::DEPTH_COPY, false); + + g_texture_cache->SetTemporaryZ(tex); + } + + GSVertex* v = &m_vertex.buff[0]; + for (u32 i = 0; i < m_vertex.tail; i++) { v[i].XYZ.Y += vertical_offset << 4; @@ -2898,7 +2982,7 @@ void GSRendererHW::Draw() // Don't resize if the BPP don't match. if (frame_psm.bpp == GSLocalMemory::m_psm[rt->m_TEX0.PSM].bpp) { - if (t_size.y <= 0) + if (m_r.w > rt->m_unscaled_size.y) { u32 new_height = m_r.w; @@ -2906,8 +2990,11 @@ void GSRendererHW::Draw() new_height /= 2; //DevCon.Warning("Resizing texture %d x %d draw %d", rt->m_unscaled_size.x, new_height, s_n); rt->ResizeTexture(rt->m_unscaled_size.x, new_height); - rt->UpdateValidity(m_r, true); - rt->UpdateDrawn(m_r, true); + + const bool frame_masked = ((m_cached_ctx.FRAME.FBMSK & frame_psm.fmsk) == frame_psm.fmsk) || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST == ATST_NEVER && !(m_cached_ctx.TEST.AFAIL & AFAIL_FB_ONLY)); + + rt->UpdateValidity(m_r, !frame_masked); + rt->UpdateDrawn(m_r, !frame_masked); } } } @@ -2936,6 +3023,75 @@ void GSRendererHW::Draw() m_last_channel_shuffle_end_block = 0xFFFF; } + // Only run if DS was new and matched the framebuffer. + if (!no_ds && !ds) + { + ZBUF_TEX0.U64 = 0; + ZBUF_TEX0.TBP0 = m_cached_ctx.ZBUF.Block(); + ZBUF_TEX0.TBW = m_cached_ctx.FRAME.FBW; + ZBUF_TEX0.PSM = m_cached_ctx.ZBUF.PSM; + + ds = g_texture_cache->LookupTarget(ZBUF_TEX0, t_size, target_scale, GSTextureCache::DepthStencil, + m_cached_ctx.DepthWrite(), 0, false, force_preload, preserve_depth, preserve_depth, unclamped_draw_rect, IsPossibleChannelShuffle(), is_possible_mem_clear && ZBUF_TEX0.TBP0 != m_cached_ctx.FRAME.Block(), false, + src, -1); + + ZBUF_TEX0.TBW = m_channel_shuffle ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; + + // This should never happen, but just to be safe.. + if (!ds) + { + ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil, + true, 0, false, force_preload, preserve_depth, m_r, src); + if (!ds) [[unlikely]] + { + GL_INS("ERROR: Failed to create ZBUF target, skipping."); + CleanupDraw(true); + return; + } + } + else + { + // If it failed to check depth test earlier, we can now check the top bits from the alpha to get a bit more accurate picture. + if (((zm && m_cached_ctx.TEST.ZTST > ZTST_ALWAYS) || (m_vt.m_eq.z && m_cached_ctx.TEST.ZTST == ZTST_GEQUAL)) && GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].trbpp == 32) + { + if (ds->m_alpha_max != 0) + { + const u32 max_z = (static_cast(ds->m_alpha_max + 1) << 24) - 1; + + switch (m_cached_ctx.TEST.ZTST) + { + case ZTST_GEQUAL: + // Every Z value will pass + if (max_z <= m_vt.m_min.p.z) + { + m_cached_ctx.TEST.ZTST = ZTST_ALWAYS; + if (zm) + { + ds = nullptr; + no_ds = true; + } + } + break; + case ZTST_GREATER: + // Every Z value will pass + if (max_z < m_vt.m_min.p.z) + { + m_cached_ctx.TEST.ZTST = ZTST_ALWAYS; + if (zm) + { + ds = nullptr; + no_ds = true; + } + } + break; + default: + break; + } + } + } + } + } + if (m_process_texture) { GIFRegCLAMP MIP_CLAMP = m_cached_ctx.CLAMP; @@ -2949,7 +3105,8 @@ void GSRendererHW::Draw() const int first_u = PRIM->FST ? ((v[0].U + 8) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[0].ST.S / v[1].RGBAQ.Q)) + 0.5f); const bool shuffle_coords = (first_x ^ first_u) & 8; const u32 draw_end = GSLocalMemory::GetEndBlockAddress(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r) + 1; - const bool draw_uses_target = src->m_from_target && ((src->m_from_target_TEX0.TBP0 <= m_cached_ctx.FRAME.Block() && + const u32 draw_start = GSLocalMemory::GetStartBlockAddress(m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r); + const bool draw_uses_target = src->m_from_target && ((src->m_from_target_TEX0.TBP0 <= draw_start && src->m_from_target->UnwrappedEndBlock() > m_cached_ctx.FRAME.Block()) || (m_cached_ctx.FRAME.Block() < src->m_from_target_TEX0.TBP0 && draw_end > src->m_from_target_TEX0.TBP0)); @@ -3190,8 +3347,8 @@ void GSRendererHW::Draw() // The FBW should also be okay, since it's coming from the source. if (rt) { - const bool update_fbw = rt->m_last_draw == s_n && (m_channel_shuffle && src->m_target) && (!PRIM->ABE || IsOpaque() || m_context->ALPHA.IsBlack()); - rt->m_TEX0.TBW = update_fbw ? FRAME_TEX0.TBW : std::max(rt->m_TEX0.TBW, FRAME_TEX0.TBW); + const bool update_fbw = !m_in_target_draw && (m_channel_shuffle && src->m_target) && (!PRIM->ABE || IsOpaque() || m_context->ALPHA.IsBlack()); + rt->m_TEX0.TBW = update_fbw ? ((src && src->m_from_target && src->m_32_bits_fmt) ? src->m_from_target->m_TEX0.TBW : FRAME_TEX0.TBW) : std::max(rt->m_TEX0.TBW, FRAME_TEX0.TBW); rt->m_TEX0.PSM = FRAME_TEX0.PSM; } if (ds) @@ -3200,6 +3357,11 @@ void GSRendererHW::Draw() ds->m_TEX0.PSM = ZBUF_TEX0.PSM; } } + // Probably grabbed an old 16bit target (Band Hero) + /*else if (m_texture_shuffle && GSLocalMemory::m_psm[rt->m_TEX0.PSM].bpp == 16) + { + rt->m_TEX0.PSM = PSMCT32; + }*/ // Figure out which channels we're writing. if (rt) @@ -3217,7 +3379,7 @@ void GSRendererHW::Draw() GSVector2i new_size = t_size; // We need to adjust the size if it's a texture shuffle as we could end up making the RT twice the size. - if (src && m_texture_shuffle && m_split_texture_shuffle_pages == 0) + if (src && m_texture_shuffle && !m_copy_16bit_to_target_shuffle && m_split_texture_shuffle_pages == 0) { if ((new_size.x > src->m_valid_rect.z && m_vt.m_max.p.x == new_size.x) || (new_size.y > src->m_valid_rect.w && m_vt.m_max.p.y == new_size.y)) { @@ -3228,9 +3390,18 @@ void GSRendererHW::Draw() } } + if (m_in_target_draw && src && m_channel_shuffle && src->m_from_target && src->m_from_target == rt && m_cached_ctx.TEX0.TBP0 == src->m_from_target->m_TEX0.TBP0) + { + new_size.y = std::max(new_size.y, static_cast((((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) >> 5) / rt->m_TEX0.TBW) * frame_psm.pgs.y) * 2); + GSVector4i new_valid = rt->m_valid; + new_valid.w = new_size.y; + rt->UpdateValidity(new_valid, true); + } + // We still need to make sure the dimensions of the targets match. - const int new_w = std::max(new_size.x, std::max(rt ? rt->m_unscaled_size.x : 0, ds ? ds->m_unscaled_size.x : 0)); - const int new_h = std::max(new_size.y, std::max(rt ? rt->m_unscaled_size.y : 0, ds ? ds->m_unscaled_size.y : 0)); + // Limit new size to 2048, the GS can't address more than this so may avoid some bugs/crashes. + const int new_w = std::min(2048, std::max(new_size.x, std::max(rt ? rt->m_unscaled_size.x : 0, ds ? ds->m_unscaled_size.x : 0))); + const int new_h = std::min(2048, std::max(new_size.y, std::max(rt ? rt->m_unscaled_size.y : 0, ds ? ds->m_unscaled_size.y : 0))); if (rt) { const u32 old_end_block = rt->m_end_block; @@ -3242,6 +3413,25 @@ void GSRendererHW::Draw() if (rt->GetUnscaledWidth() != new_w || rt->GetUnscaledHeight() != new_h) GL_INS("Resize RT from %dx%d to %dx%d", rt->GetUnscaledWidth(), rt->GetUnscaledHeight(), new_w, new_h); + // May not be needed/could cause problems with garbage loaded from GS memory + if (preserve_rt_color) + { + RGBAMask mask; + mask._u32 = 0xF; + + if (new_w > rt->m_unscaled_size.x) + { + GSVector4i width_dirty_rect = GSVector4i(rt->m_unscaled_size.x, 0, new_w, new_h); + g_texture_cache->AddDirtyRectTarget(rt, width_dirty_rect, rt->m_TEX0.PSM, rt->m_TEX0.TBW, mask); + } + + if (new_h > rt->m_unscaled_size.y) + { + GSVector4i height_dirty_rect = GSVector4i(0, rt->m_unscaled_size.y, new_w, new_h); + g_texture_cache->AddDirtyRectTarget(rt, height_dirty_rect, rt->m_TEX0.PSM, rt->m_TEX0.TBW, mask); + } + } + rt->ResizeTexture(new_w, new_h); if (!m_texture_shuffle && !m_channel_shuffle) @@ -3261,9 +3451,11 @@ void GSRendererHW::Draw() } const GSVector4i update_rect = m_r.rintersect(GSVector4i::loadh(GSVector2i(new_w, new_h))); + // if frame is masked or afailing always to never write frame, wanna make sure we don't touch it. This might happen if DATE or Alpha Test is being used to write to Z. + const bool frame_masked = ((m_cached_ctx.FRAME.FBMSK & frame_psm.fmsk) == frame_psm.fmsk) || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST == ATST_NEVER && !(m_cached_ctx.TEST.AFAIL & AFAIL_FB_ONLY)); // Limit to 2x the vertical height of the resolution (for double buffering) - rt->UpdateValidity(update_rect, can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle)); - rt->UpdateDrawn(update_rect, can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle)); + rt->UpdateValidity(update_rect, !frame_masked && (can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle))); + rt->UpdateDrawn(update_rect, !frame_masked && (can_update_size || (m_r.w <= (resolution.y * 2) && !m_texture_shuffle))); // Probably changing to double buffering, so invalidate any old target that was next to it. // This resolves an issue where the PCRTC will find the old target in FMV's causing flashing. // Grandia Xtreme, Onimusha Warlord. @@ -3293,7 +3485,7 @@ void GSRendererHW::Draw() const bool new_rect = ds->m_valid.rempty(); const bool new_height = new_h > ds->GetUnscaledHeight(); const int old_height = ds->m_texture->GetHeight(); - + const GSVector4i old_rect = ds->GetUnscaledRect(); pxAssert(ds->GetScale() == target_scale); if (ds->GetUnscaledWidth() != new_w || ds->GetUnscaledHeight() != new_h) GL_INS("Resize DS from %dx%d to %dx%d", ds->GetUnscaledWidth(), ds->GetUnscaledHeight(), new_w, new_h); @@ -3306,8 +3498,12 @@ void GSRendererHW::Draw() } // Limit to 2x the vertical height of the resolution (for double buffering) - ds->UpdateValidity(m_r, can_update_size || m_r.w <= (resolution.y * 2)); - ds->UpdateDrawn(m_r, can_update_size || m_r.w <= (resolution.y * 2)); + // Dark cloud writes to 424 when the buffer is only 416 high, but masks the Z. + // Updating the valid causes the Z to overlap the framebuffer, which is obviously incorrect. + const bool z_masked = m_cached_ctx.ZBUF.ZMSK; + + ds->UpdateValidity(m_r, !z_masked && (can_update_size || m_r.w <= (resolution.y * 2))); + ds->UpdateDrawn(m_r, !z_masked && (can_update_size || m_r.w <= (resolution.y * 2))); if (!new_rect && new_height && old_end_block != ds->m_end_block) { @@ -3406,7 +3602,9 @@ void GSRendererHW::Draw() { s = GetDrawDumpPath("%05d_f%lld_rz0_%05x_%s.bmp", s_n, frame, m_cached_ctx.ZBUF.Block(), psm_str(m_cached_ctx.ZBUF.PSM)); - if (ds->m_texture) + if (g_texture_cache->GetTemporaryZ()) + g_texture_cache->GetTemporaryZ()->Save(s); + else if (ds->m_texture) ds->m_texture->Save(s); } } @@ -3495,9 +3693,10 @@ void GSRendererHW::Draw() if ((fm & fm_mask) != fm_mask && rt) { + const bool frame_masked = ((m_cached_ctx.FRAME.FBMSK & frame_psm.fmsk) == frame_psm.fmsk) || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST == ATST_NEVER && !(m_cached_ctx.TEST.AFAIL & AFAIL_FB_ONLY)); //rt->m_valid = rt->m_valid.runion(r); // Limit to 2x the vertical height of the resolution (for double buffering) - rt->UpdateValidity(real_rect, can_update_size || (real_rect.w <= (resolution.y * 2) && !m_texture_shuffle)); + rt->UpdateValidity(real_rect, !frame_masked && (can_update_size || (real_rect.w <= (resolution.y * 2) && !m_texture_shuffle))); g_texture_cache->InvalidateVideoMem(context->offset.fb, real_rect, false); @@ -3508,15 +3707,31 @@ void GSRendererHW::Draw() if (zm != 0xffffffff && ds) { + const bool z_masked = m_cached_ctx.ZBUF.ZMSK; + //ds->m_valid = ds->m_valid.runion(r); // Limit to 2x the vertical height of the resolution (for double buffering) - ds->UpdateValidity(real_rect, can_update_size || (real_rect.w <= (resolution.y * 2) && !m_texture_shuffle)); + ds->UpdateValidity(real_rect, !z_masked && (can_update_size || (real_rect.w <= (resolution.y * 2) && !m_texture_shuffle))); g_texture_cache->InvalidateVideoMem(context->offset.zb, real_rect, false); // Remove overwritten RTs at the ZBP. g_texture_cache->InvalidateVideoMemType( GSTextureCache::RenderTarget, m_cached_ctx.ZBUF.Block(), m_cached_ctx.ZBUF.PSM, zm); + + + if (g_texture_cache->GetTemporaryZ()) + { + if (m_cached_ctx.DepthWrite()) + { + int vertical_offset = ((static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) / 32) / std::max(static_cast(rt->m_TEX0.TBW), 1)) * frame_psm.pgs.y; + int z_vertical_offset = ((static_cast(m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0) / 32) / std::max(rt->m_TEX0.TBW, 1U)) * GSLocalMemory::m_psm[m_cached_ctx.ZBUF.PSM].pgs.y; + int z_offset = vertical_offset; + GL_CACHE("RT in RT Z copy back draw %d z_vert_offset %d z_offset %d", s_n, z_vertical_offset, z_offset); + GSVector4i dRect = GSVector4i(0, z_vertical_offset * ds->m_scale, ds->m_unscaled_size.x * ds->m_scale, std::min(z_vertical_offset + m_r.w + 1 - vertical_offset, ds->m_unscaled_size.y) * ds->m_scale); + g_gs_device->StretchRect(g_texture_cache->GetTemporaryZ(), GSVector4(0.0f, z_offset / static_cast(g_texture_cache->GetTemporaryZ()->GetHeight()), 1.0f, std::min(real_rect.w + 1, ds->m_unscaled_size.y + z_offset) / static_cast(g_texture_cache->GetTemporaryZ()->GetHeight())), ds->m_texture, GSVector4(dRect), ShaderConvert::DEPTH_COPY, false); + } + } } // @@ -4008,7 +4223,7 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool m_conf.ps.urban_chaos_hle = 1; } } - else if (m_index.tail < 64 && m_cached_ctx.CLAMP.WMT == 3) + else if (m_index.tail <= 64 && !IsPageCopy() && m_cached_ctx.CLAMP.WMT == 3) { // Blood will tell. I think it is channel effect too but again // implemented in a different way. I don't want to add more CRC stuff. So @@ -4165,7 +4380,7 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool // Performance GPU note: it could be wise to reduce the size to // the rendered size of the framebuffer - if (GSConfig.UserHacks_TextureInsideRt == GSTextureInRtMode::Disabled || (!m_in_target_draw && NextDrawMatchesShuffle())) + if (GSConfig.UserHacks_TextureInsideRt == GSTextureInRtMode::Disabled || (!m_in_target_draw && IsPageCopy())) { GSVertex* s = &m_vertex.buff[0]; s[0].XYZ.X = static_cast(m_context->XYOFFSET.OFX + 0); @@ -5589,6 +5804,13 @@ bool GSRendererHW::CanUseTexIsFB(const GSTextureCache::Target* rt, const GSTextu return false; } + // the texture is offset, and the frame isn't also offset, we can't do this. + if (tex->GetRegion().HasX() || tex->GetRegion().HasY()) + { + if (m_cached_ctx.FRAME.Block() != m_cached_ctx.TEX0.TBP0) + return false; + } + // If we're a shuffle, tex-is-fb is always fine. if (m_texture_shuffle || m_channel_shuffle) { @@ -5738,6 +5960,7 @@ void GSRendererHW::CleanupDraw(bool invalidate_temp_src) if (invalidate_temp_src) g_texture_cache->InvalidateTemporarySource(); + g_texture_cache->InvalidateTemporaryZ(); // Restore Scissor. m_context->UpdateScissor(); @@ -5777,7 +6000,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta m_conf.cb_vs.texture_offset = {}; m_conf.ps.scanmsk = env.SCANMSK.MSK; m_conf.rt = rt ? rt->m_texture : nullptr; - m_conf.ds = ds ? ds->m_texture : nullptr; + m_conf.ds = ds ? (g_texture_cache->GetTemporaryZ() ? g_texture_cache->GetTemporaryZ() : ds->m_texture) : nullptr; // Z setup has to come before channel shuffle EmulateZbuffer(ds); @@ -6148,7 +6371,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta const bool full_cover = rt->m_valid.rintersect(m_r).eq(rt->m_valid) && m_primitive_covers_without_gaps == NoGapsType::FullCover && !(DATE || !always_passing_alpha || !IsDepthAlwaysPassing()); // Restrict this to only when we're overwriting the whole target. - new_scale_rt_alpha = full_cover; + new_scale_rt_alpha = full_cover || rt->m_last_draw >= s_n; } } @@ -7233,7 +7456,7 @@ bool GSRendererHW::TryGSMemClear(bool no_rt, bool preserve_rt, bool invalidate_r g_texture_cache->InvalidateContainedTargets( GSLocalMemory::GetStartBlockAddress( m_cached_ctx.FRAME.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.FRAME.PSM, m_r), - rt_end_bp, m_cached_ctx.FRAME.PSM); + rt_end_bp, m_cached_ctx.FRAME.PSM, m_cached_ctx.FRAME.FBW); GSUploadQueue clear_queue; clear_queue.draw = s_n; @@ -7256,7 +7479,7 @@ bool GSRendererHW::TryGSMemClear(bool no_rt, bool preserve_rt, bool invalidate_r g_texture_cache->InvalidateContainedTargets( GSLocalMemory::GetStartBlockAddress( m_cached_ctx.ZBUF.Block(), m_cached_ctx.FRAME.FBW, m_cached_ctx.ZBUF.PSM, m_r), - ds_end_bp, m_cached_ctx.ZBUF.PSM); + ds_end_bp, m_cached_ctx.ZBUF.PSM, m_cached_ctx.FRAME.FBW); } } @@ -7598,9 +7821,10 @@ ClearType GSRendererHW::IsConstantDirectWriteMemClear() && !(m_draw_env->SCANMSK.MSK & 2) && !m_cached_ctx.TEST.ATE // no alpha test && !m_cached_ctx.TEST.DATE // no destination alpha test && (!m_cached_ctx.TEST.ZTE || m_cached_ctx.TEST.ZTST == ZTST_ALWAYS) // no depth test - && (m_vt.m_eq.rgba == 0xFFFF || m_vertex.next == 2)) // constant color write + && (m_vt.m_eq.rgba == 0xFFFF || m_vertex.next == 2) // constant color write + && !(PRIM->ABE && m_context->ALPHA.IsCdInBlend())) // Not keeping dest color { - if (PRIM->ABE && (!m_context->ALPHA.IsOpaque() || m_cached_ctx.FRAME.FBMSK)) + if (PRIM->ABE && !m_context->ALPHA.IsOpaque() || m_cached_ctx.FRAME.FBMSK) return ClearWithDraw; return NormalClear; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.h b/pcsx2/GS/Renderers/HW/GSRendererHW.h index 5db0ed8e77ffc..34c03cad7543c 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.h +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.h @@ -216,7 +216,7 @@ class GSRendererHW : public GSRenderer void MergeSprite(GSTextureCache::Source* tex); float GetTextureScaleFactor() override; GSVector2i GetValidSize(const GSTextureCache::Source* tex = nullptr); - GSVector2i GetTargetSize(const GSTextureCache::Source* tex = nullptr); + GSVector2i GetTargetSize(const GSTextureCache::Source* tex = nullptr, const bool can_expand = true); void Reset(bool hardware_reset) override; void UpdateSettings(const Pcsx2Config::GSOptions& old_config) override; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index 61ba346c5895a..1e452cdb58e21 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -149,7 +149,8 @@ void GSTextureCache::AddDirtyRectTarget(Target* target, GSVector4i rect, u32 psm if (rect.rempty()) return; - + if (rect.w > 2048) + DevCon.Warning("BAd"); std::vector::iterator it = target->m_dirty.end(); while (it != target->m_dirty.begin()) { @@ -274,6 +275,15 @@ GSVector4i GSTextureCache::TranslateAlignedRectByPage(u32 tbp, u32 tebp, u32 tbw const int inc_horizontal_offset = (page_offset % src_pgw) * src_page_size.x; in_rect = (in_rect + GSVector4i(0, inc_vertical_offset).xyxy()).max_i32(GSVector4i(0)); in_rect = (in_rect + GSVector4i(inc_horizontal_offset, 0).xyxy()).max_i32(GSVector4i(0)); + + // Project Snowblind and Tomb Raider access the rect offset by 1 page and use a region to correct it, we need to account for that here. + if (in_rect.x >= (dst_pgw * dst_page_size.x)) + { + in_rect.z -= dst_pgw * dst_page_size.x; + in_rect.x -= dst_pgw * dst_page_size.x; + in_rect.y += dst_page_size.y; + in_rect.w += dst_page_size.y; + } page_offset = 0; single_page = (in_rect.width() / src_page_size.x) <= 1 && (in_rect.height() / src_page_size.y) <= 1; } @@ -1448,8 +1458,8 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const // Make sure the texture actually is INSIDE the RT, it's possibly not valid if it isn't. // Also check BP >= TBP, create source isn't equpped to expand it backwards and all data comes from the target. (GH3) else if (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && - (GSLocalMemory::m_psm[color_psm].bpp >= 16 || (possible_shuffle && GSLocalMemory::m_psm[color_psm].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) && // Channel shuffles or non indexed lookups. - t->m_age <= 1 && (!found_t || t->m_last_draw > dst->m_last_draw) && CanTranslate(bp, bw, psm, block_boundary_rect, t->m_TEX0.TBP0, t->m_TEX0.PSM, t->m_TEX0.TBW)) + (GSLocalMemory::m_psm[color_psm].bpp >= 16 || (/*possible_shuffle &&*/ GSLocalMemory::m_psm[color_psm].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32 && t->m_TEX0.TBW >= (bw * 2))) && // Channel shuffles or non indexed lookups. + t->m_age <= 1 && (!found_t || t->m_last_draw > dst->m_last_draw) /*&& CanTranslate(bp, bw, psm, block_boundary_rect, t->m_TEX0.TBP0, t->m_TEX0.PSM, t->m_TEX0.TBW)*/) { if (!t->HasValidBitsForFormat(psm, req_color, req_alpha) && !(possible_shuffle && GSLocalMemory::m_psm[psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) @@ -1481,7 +1491,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const } if (bp > t->m_TEX0.TBP0) { - GSVector4i new_rect = possible_shuffle ? block_boundary_rect : rect; + GSVector4i new_rect = (GSLocalMemory::m_psm[color_psm].bpp != GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp) ? block_boundary_rect : rect; if (linear) { new_rect.z -= 1; @@ -1586,15 +1596,18 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const // Omitting that check here seemed less risky than blowing CS targets out... const GSVector2i& page_size = GSLocalMemory::m_psm[src_psm].pgs; const GSOffset offset(GSLocalMemory::m_psm[src_psm].info, bp, bw, psm); + const u32 offset_bp = offset.bn(region.GetMinX(), region.GetMinY()); if (bp < t->m_TEX0.TBP0 && region.HasX() && region.HasY() && (region.GetMinX() & (page_size.x - 1)) == 0 && (region.GetMinY() & (page_size.y - 1)) == 0 && - offset.bn(region.GetMinX(), region.GetMinY()) == t->m_TEX0.TBP0) + (offset.bn(region.GetMinX(), region.GetMinY()) == t->m_TEX0.TBP0 || + (offset_bp >= t->m_TEX0.TBP0) && ((((offset_bp - t->m_TEX0.TBP0) >> 5) % bw) + (rect.width() / page_size.x)) <= bw)) { GL_CACHE("TC: Target 0x%x detected in front of TBP 0x%x with %d,%d offset (%d pages)", t->m_TEX0.TBP0, TEX0.TBP0, region.GetMinX(), region.GetMinY(), (region.GetMinY() / page_size.y) * TEX0.TBW + (region.GetMinX() / page_size.x)); - x_offset = -region.GetMinX(); - y_offset = -region.GetMinY(); + + x_offset = ((((offset_bp - t->m_TEX0.TBP0) >> 5) % bw) * page_size.x) - region.GetMinX(); + y_offset = ((((offset_bp - t->m_TEX0.TBP0) >> 5) / bw) * page_size.y) - region.GetMinY(); dst = t; tex_merge_rt = false; found_t = true; @@ -1827,7 +1840,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe // TODO: Move all frame stuff to its own routine too. if (!is_frame) { - for (auto i = list.begin(); i != list.end(); ++i) + for (auto i = list.begin(); i != list.end();) { Target* t = *i; @@ -1838,6 +1851,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe if (dst && (GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw)) { DevCon.Warning("Ignoring target at %x as one at %x is newer", t->m_TEX0.TBP0, dst->m_TEX0.TBP0); + i++; continue; } @@ -1896,21 +1910,26 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe InvalidateSourcesFromTarget(t); i = list.erase(i); delete t; + + continue; } } // Probably pointing to half way through the target - else if (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets) + else if (!min_rect.rempty()&& GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets) { - if (offset != -1 && (bp - t->m_TEX0.TBP0) != offset) + // Problem: Project - Snowblind and Tomb Raider offset the RT but not the Z + /*if (offset != -1 && (bp - t->m_TEX0.TBP0) != offset) + { continue; + }*/ const u32 widthpage_offset = (std::abs(static_cast(bp - t->m_TEX0.TBP0)) >> 5) % std::max(t->m_TEX0.TBW, 1U); - const bool is_aligned_ok = widthpage_offset == 0 || ((widthpage_offset + TEX0.TBW) <= t->m_TEX0.TBW) || min_rect.width() <= 64 || (widthpage_offset == (t->m_TEX0.TBW >> 1) && (static_cast(min_rect.width()) <= (widthpage_offset * 64))); - if ((!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))) && is_aligned_ok && (t->m_TEX0.TBW == TEX0.TBW || (TEX0.TBW == 1 && t->m_TEX0.TBW > 1)) && t->Inside(bp, TEX0.TBW, TEX0.PSM, min_rect)) - { + const bool is_aligned_ok = widthpage_offset == 0 || (t->m_TEX0.TBW == TEX0.TBW && ((min_rect.z >> 6) + widthpage_offset) <= TEX0.TBW) || ((widthpage_offset + TEX0.TBW) <= t->m_TEX0.TBW) || min_rect.width() <= 64 || (widthpage_offset == (t->m_TEX0.TBW >> 1) && (static_cast(min_rect.width()) <= (widthpage_offset * 64))); + if ((!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))) && is_aligned_ok && (t->m_TEX0.TBW == TEX0.TBW || (TEX0.TBW == 1 && t->m_TEX0.TBW > 1)) && t->Overlaps(bp, TEX0.TBW, TEX0.PSM, min_rect)) + { /*TEX0.TBP0 == ((((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 1) + t->m_TEX0.TBP0)*/ // If it's too old, it's probably not a real target to jump in to anymore. - if ((GSState::s_n - t->m_last_draw) > 10 && (!t->m_dirty.empty() || (!is_shuffle && - !(widthpage_offset == 0/*TEX0.TBP0 == ((((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 1) + t->m_TEX0.TBP0)*/ || min_rect.width() <= 64 || + /*if ((GSState::s_n - t->m_last_draw) > 10 && (!t->m_dirty.empty() || (!is_shuffle && + !(widthpage_offset == 0 || min_rect.width() <= 64 || (widthpage_offset == (t->m_TEX0.TBW >> 1) && min_rect.width() == widthpage_offset * 64))))) { GL_INS("TC: Deleting RT BP 0x%x BW %d PSM %s due to change in target", t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM)); @@ -1918,6 +1937,16 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe i = list.erase(i); delete t; } + else*/ + if (!is_shuffle && !GSUtil::HasSameSwizzleBits(t->m_TEX0.PSM, TEX0.PSM)) + { + GL_INS("TC: Deleting RT BP 0x%x BW %d PSM %s due to change in target", t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM)); + InvalidateSourcesFromTarget(t); + i = list.erase(i); + delete t; + + continue; + } else { //DevCon.Warning("Here draw %d wanted %x PSM %x got %x PSM %x offset of %d pages width %d pages draw width %d", GSState::s_n, bp, TEX0.PSM, t->m_TEX0.TBP0, t->m_TEX0.PSM, (bp - t->m_TEX0.TBP0) >> 5, t->m_TEX0.TBW, draw_rect.width()); @@ -1931,6 +1960,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe } } } + + i++; } } else @@ -2085,7 +2116,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe dst->m_alpha_min = 0; dst->m_alpha_max = 0; } - else if (!is_shuffle && std::abs(static_cast(GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp - GSLocalMemory::m_psm[TEX0.PSM].bpp)) == 16) + else if (std::abs(static_cast(GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp - GSLocalMemory::m_psm[TEX0.PSM].bpp)) == 16) { dst->Update(false); @@ -2094,7 +2125,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe new_scaled_size = ScaleRenderTargetSize(dst->m_unscaled_size, scale); dRect = (GSVector4(GSVector4i::loadh(dst->m_unscaled_size)) * GSVector4(scale)).ceil(); - + if (scale_down) { if ((new_size.y * 2) < 1024) @@ -2116,34 +2147,38 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe dst->m_valid.y /= 2; dst->m_valid.w /= 2; } - GL_INS("TC Convert to 16bit: %dx%d: %dx%d @ %f -> %dx%d @ %f", dst->m_unscaled_size.x, dst->m_unscaled_size.y, - dst->m_texture->GetWidth(), dst->m_texture->GetHeight(), dst->m_scale, new_scaled_size.x, new_scaled_size.y, - scale); - //DevCon.Warning("Scale %s draw %d", scale_down ? "down" : "up", GSState::s_n); - GSTexture* tex = type == RenderTarget ? g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, true) : - g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, true); - if (!tex) - return nullptr; - m_target_memory_usage += tex->GetMemUsage(); + if (!is_shuffle) + { + GL_INS("TC Convert to 16bit: %dx%d: %dx%d @ %f -> %dx%d @ %f", dst->m_unscaled_size.x, dst->m_unscaled_size.y, + dst->m_texture->GetWidth(), dst->m_texture->GetHeight(), dst->m_scale, new_scaled_size.x, new_scaled_size.y, + scale); + //DevCon.Warning("Scale %s draw %d", scale_down ? "down" : "up", GSState::s_n); + GSTexture* tex = type == RenderTarget ? g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, true) : + g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, true); + if (!tex) + return nullptr; + m_target_memory_usage += tex->GetMemUsage(); - g_gs_device->StretchRect(dst->m_texture, sRect, tex, dRect, (type == RenderTarget) ? ShaderConvert::COPY : ShaderConvert::DEPTH_COPY, false); + g_gs_device->StretchRect(dst->m_texture, sRect, tex, dRect, (type == RenderTarget) ? ShaderConvert::COPY : ShaderConvert::DEPTH_COPY, false); - - if (src && src->m_from_target && src->m_from_target == dst) - { - src->m_texture = dst->m_texture; - src->m_target_direct = false; - src->m_shared_texture = false; - } - else - { - m_target_memory_usage -= dst->m_texture->GetMemUsage(); - g_gs_device->Recycle(dst->m_texture); + + if (src && src->m_from_target && src->m_from_target == dst) + { + src->m_texture = dst->m_texture; + src->m_target_direct = false; + src->m_shared_texture = false; + } + else + { + m_target_memory_usage -= dst->m_texture->GetMemUsage(); + g_gs_device->Recycle(dst->m_texture); + } + + dst->m_texture = tex; + dst->m_unscaled_size = new_size; } - + // New format or doing a shuffle to a 32bit target that used to be 16bit dst->m_TEX0.PSM = TEX0.PSM; - dst->m_texture = tex; - dst->m_unscaled_size = new_size; } @@ -2347,7 +2382,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe dst->m_valid_rgb = dst_match->m_valid_rgb; dst->m_was_dst_matched = true; dst_match->m_was_dst_matched = true; - dst_match->m_valid_rgb = false; + dst_match->m_valid_rgb = preserve_rgb; if (GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[dst_match->m_TEX0.PSM].bpp > 16) dst->m_TEX0.TBW = dst_match->m_TEX0.TBW; // Be careful of shuffles of the depth as C16, but using a buffer width of 16 (Mercenaries). @@ -2572,7 +2607,7 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons if (valid_draw_size && supported_fmt) { - const GSVector4i newrect = GSVector4i::loadh(size); + const GSVector4i newrect = GSVector4i::loadh(valid_size); const u32 rect_end = GSLocalMemory::GetUnwrappedEndBlockAddress(TEX0.TBP0, TEX0.TBW, TEX0.PSM, newrect); RGBAMask rgba; @@ -2711,12 +2746,14 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons auto j = i; Target* t = *j; - if (dst != t && t->m_TEX0.TBW == dst->m_TEX0.TBW && t->m_TEX0.PSM == dst->m_TEX0.PSM /*&& t->m_TEX0.TBW >= 4*/) + if (dst != t && t->m_TEX0.PSM == dst->m_TEX0.PSM /*&& t->m_TEX0.TBW >= 4*/) if (t->Overlaps(dst->m_TEX0.TBP0, dst->m_TEX0.TBW, dst->m_TEX0.PSM, dst->m_valid)) { + const u32 buffer_width = std::max(1U, dst->m_TEX0.TBW); + // If the two targets are misaligned, it's likely a relocation, so we can just kill the old target. // Kill targets that are overlapping new targets, but ignore the copy if the old target is dirty because we favour GS memory. - if (((((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) % dst->m_TEX0.TBW) != 0) && !t->m_dirty.empty()) + if (((((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) % buffer_width) != 0) && !t->m_dirty.empty()) { InvalidateSourcesFromTarget(t); i = list.erase(j); @@ -2739,16 +2776,16 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons { const int rt_pages = ((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 5; const int overlapping_pages = std::min(rt_pages, static_cast((dst->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 5); - const int overlapping_pages_height = (overlapping_pages / dst->m_TEX0.TBW) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y; + const int overlapping_pages_height = (overlapping_pages / buffer_width) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y; - if (overlapping_pages_height == 0 || (overlapping_pages % dst->m_TEX0.TBW)) + if (overlapping_pages_height == 0 || (overlapping_pages % buffer_width)) { // No overlap top copy or the widths don't match. i++; continue; } - const int dst_offset = ((((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) / dst->m_TEX0.TBW) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y); + const int dst_offset = ((((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) / buffer_width) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y); if ((dst_offset + t->m_valid.w) > dst->m_unscaled_size.y && !dst->ResizeTexture(dst->m_unscaled_size.x, (dst_offset + t->m_valid.w), true)) { @@ -2757,22 +2794,26 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons continue; } - const int dst_offset_width = (((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) % dst->m_TEX0.TBW) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.x; - const int dst_offset_height = ((((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) / dst->m_TEX0.TBW) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y); + const int dst_offset_width = (((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) % buffer_width) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.x; + const int dst_offset_height = ((((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) / buffer_width) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y); const int dst_offset_scaled_width = dst_offset_width * dst->m_scale; const int dst_offset_scaled_height = dst_offset_height * dst->m_scale; const GSVector4i dst_rect_scale = GSVector4i(t->m_valid.x, dst_offset_height, t->m_valid.z, dst_offset_height + overlapping_pages_height); if (((!hw_clear && (preserve_target || preload)) || dst_rect_scale.rintersect(draw_rect).rempty()) && dst->GetScale() == t->GetScale()) { - const int copy_width = ((t->m_texture->GetWidth()) > (dst->m_texture->GetWidth()) ? (dst->m_texture->GetWidth()) : t->m_texture->GetWidth()) - dst_offset_scaled_width; - const int copy_height = overlapping_pages_height * t->m_scale; + int copy_width = ((t->m_texture->GetWidth()) > (dst->m_texture->GetWidth()) ? (dst->m_texture->GetWidth()) : t->m_texture->GetWidth()) - dst_offset_scaled_width; + int copy_height = overlapping_pages_height * t->m_scale; GL_INS("RT double buffer copy from FBP 0x%x, %dx%d => %d,%d", t->m_TEX0.TBP0, copy_width, copy_height, 0, dst_offset_scaled_height); // Clear the dirty first t->Update(); dst->Update(); - + if (copy_width < 0) + { + copy_width = 0; + DevCon.Warning("Copy has no width"); + } if ((dst_offset + copy_height) > dst->m_unscaled_size.y) DevCon.Warning("Way too tall draw %d", GSState::s_n); @@ -2787,6 +2828,12 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons } else { + // Clamp size if it goes out of bounds. + if ((copy_width + dst_offset_scaled_width) > dst->m_unscaled_size.x || (copy_height + dst_offset_scaled_height) > dst->m_unscaled_size.y) + { + copy_width = std::min(copy_width, static_cast((dst->m_unscaled_size.x * dst->m_scale) - dst_offset_scaled_width)); + copy_height = std::min(copy_height, static_cast((dst->m_unscaled_size.y * dst->m_scale) - dst_offset_scaled_height)); + } // Invalidate has been moved to after DrawPrims(), because we might kill the current sources' backing. g_gs_device->CopyRect(t->m_texture, dst->m_texture, GSVector4i(0, 0, copy_width, copy_height), dst_offset_scaled_width, dst_offset_scaled_height); } @@ -2808,6 +2855,7 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons InvalidateSourcesFromTarget(t); i = list.erase(j); delete t; + continue; } } i++; @@ -3160,7 +3208,7 @@ bool GSTextureCache::PrepareDownloadTexture(u32 width, u32 height, GSTexture::Fo return true; } -void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm) +void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm, u32 write_bw) { const bool preserve_alpha = (GSLocalMemory::m_psm[write_psm].trbpp == 24); for (int type = 0; type < 2; type++) @@ -3169,22 +3217,24 @@ void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 wr for (auto i = list.begin(); i != list.end();) { Target* const t = *i; - if (start_bp != t->m_TEX0.TBP0 && (t->m_TEX0.TBP0 < start_bp || t->UnwrappedEndBlock() > end_bp)) + if ((start_bp > t->UnwrappedEndBlock() || end_bp < t->m_TEX0.TBP0) || (start_bp != t->m_TEX0.TBP0 && (t->m_TEX0.TBP0 < start_bp || t->UnwrappedEndBlock() > end_bp) && t->m_dirty.empty())) { ++i; continue; } + const u32 total_pages = ((end_bp + 1) - t->m_TEX0.TBP0) >> 5; // Not covering the whole target, and a different format, so just dirty it. - if (start_bp == t->m_TEX0.TBP0 && (t->UnwrappedEndBlock() > end_bp) && write_psm != t->m_TEX0.PSM) + /*if (start_bp >= t->m_TEX0.TBP0 && (t->UnwrappedEndBlock() > end_bp) && write_psm != t->m_TEX0.PSM && write_bw == t->m_TEX0.TBW) { const GSLocalMemory::psm_t& target_psm = GSLocalMemory::m_psm[write_psm]; - u32 total_pages = (end_bp - t->m_TEX0.TBP0) >> 5; - GSVector4i dirty_area = GSVector4i(0, 0, t->m_valid.z, (total_pages / t->m_TEX0.TBW) * target_psm.pgs.y); - InvalidateVideoMem(g_gs_renderer->m_mem.GetOffset(t->m_TEX0.TBP0, t->m_TEX0.TBW, write_psm), dirty_area, true); + const u32 page_offset = ((start_bp - t->m_TEX0.TBP0) >> 5); + const u32 vertical_offset = (page_offset / t->m_TEX0.TBW) * target_psm.pgs.y; + GSVector4i dirty_area = GSVector4i(page_offset % t->m_TEX0.TBW, vertical_offset, t->m_valid.z, vertical_offset + ((total_pages / t->m_TEX0.TBW) * target_psm.pgs.y)); + InvalidateVideoMem(g_gs_renderer->m_mem.GetOffset(t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM), dirty_area, true); ++i; continue; - } + }*/ InvalidateSourcesFromTarget(t); @@ -3867,6 +3917,19 @@ bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u if (alpha_only && (!dst || GSLocalMemory::m_psm[dst->m_TEX0.PSM].bpp != 32)) return false; + // This is probably copying to a new buffer but using the original one as an offset, so better to use a new texture, if we don't find one. + if (dst && DBP == SBP && dy > dst->m_unscaled_size.y) + { + u32 new_DBP = DBP + (((dy / GSLocalMemory::m_psm[dst->m_TEX0.PSM].pgs.y) * DBW) << 5); + + dst = nullptr; + + DBP = new_DBP; + dy = 0; + + dst = GetExactTarget(DBP, DBW, dpsm_s.depth ? DepthStencil : RenderTarget, DBP); + } + // Beware of the case where a game might create a larger texture by moving a bunch of chunks around. // We use dx/dy == 0 and the TBW check as a safeguard to make sure these go through to local memory. // We can also recreate the target if it's previously been created in the height cache with a valid size. @@ -3940,7 +4003,7 @@ bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u // Make sure the copy doesn't go out of bounds (it shouldn't). if ((scaled_dx + scaled_w) > dst->m_texture->GetWidth() || (scaled_dy + scaled_h) > dst->m_texture->GetHeight()) return false; - DevCon.Warning("HW Move 0x%x[BW:%u PSM:%s] to 0x%x[BW:%u PSM:%s] <%d,%d->%d,%d> -> <%d,%d->%d,%d>", SBP, SBW, + GL_CACHE("HW Move after draw %d 0x%x[BW:%u PSM:%s] to 0x%x[BW:%u PSM:%s] <%d,%d->%d,%d> -> <%d,%d->%d,%d>", GSState::s_n, SBP, SBW, psm_str(SPSM), DBP, DBW, psm_str(DPSM), sx, sy, sx + w, sy + h, dx, dy, dx + w, dy + h); const bool cover_whole_target = dst->m_type == RenderTarget && GSVector4i(dx, dy, dx + w, dy + h).rintersect(dst->m_valid).eq(dst->m_valid); @@ -4066,6 +4129,7 @@ bool GSTextureCache::Move(u32 SBP, u32 SBW, u32 SPSM, int sx, int sy, u32 DBP, u // Invalidate any sources that overlap with the target (since they're now stale). InvalidateVideoMem(g_gs_renderer->m_mem.GetOffset(DBP, DBW, DPSM), GSVector4i(dx, dy, dx + w, dy + h), false); + return true; } @@ -4252,7 +4316,7 @@ GSTextureCache::Target* GSTextureCache::GetExactTarget(u32 BP, u32 BW, int type, { Target* t = *it; - if (t->m_TEX0.TBP0 == BP && t->m_TEX0.TBW == BW && t->UnwrappedEndBlock() >= end_bp) + if ((t->m_TEX0.TBP0 == BP || (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && t->m_TEX0.TBP0 < BP && ((BP >> 5) % t->m_TEX0.TBW) == 0)) && t->m_TEX0.TBW == BW && t->UnwrappedEndBlock() >= end_bp) { rts.MoveFront(it.Index()); return t; @@ -4313,7 +4377,7 @@ GSTextureCache::Target* GSTextureCache::FindOverlappingTarget(u32 BP, u32 BW, u3 return FindOverlappingTarget(BP, end_bp); } -GSVector2i GSTextureCache::GetTargetSize(u32 bp, u32 fbw, u32 psm, s32 min_width, s32 min_height) +GSVector2i GSTextureCache::GetTargetSize(u32 bp, u32 fbw, u32 psm, s32 min_width, s32 min_height, bool can_expand) { TargetHeightElem search = {}; search.bp = bp; @@ -4327,14 +4391,17 @@ GSVector2i GSTextureCache::GetTargetSize(u32 bp, u32 fbw, u32 psm, s32 min_width TargetHeightElem& elem = const_cast(*it); if (elem.bits == search.bits) { - if (elem.width < min_width || elem.height < min_height) + if (can_expand) { - DbgCon.WriteLn("Expand size at %x %u %u from %ux%u to %ux%u", bp, fbw, psm, elem.width, elem.height, - min_width, min_height); - } + if (elem.width < min_width || elem.height < min_height) + { + DbgCon.WriteLn("Expand size at %x %u %u from %ux%u to %ux%u", bp, fbw, psm, elem.width, elem.height, + min_width, min_height); + } - elem.width = std::max(elem.width, min_width); - elem.height = std::max(elem.height, min_height); + elem.width = std::max(elem.width, min_width); + elem.height = std::max(elem.height, min_height); + } m_target_heights.MoveFront(it.Index()); elem.age = 0; @@ -4965,6 +5032,9 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con g_gs_device->ConvertToIndexedTexture(sTex, dst->m_scale, x_offset, y_offset, std::max(dst->m_TEX0.TBW, 1u) * 64, dst->m_TEX0.PSM, dTex, std::max(TEX0.TBW, 1u) * 64, TEX0.PSM); + + src->m_region.SetX((x_offset / GSLocalMemory::m_psm[dst->m_TEX0.PSM].pgs.x) * GSLocalMemory::m_psm[TEX0.PSM].pgs.x, tw); + src->m_region.SetY((y_offset / GSLocalMemory::m_psm[dst->m_TEX0.PSM].pgs.y) * GSLocalMemory::m_psm[TEX0.PSM].pgs.y, th); } else { @@ -5116,8 +5186,10 @@ GSTextureCache::Source* GSTextureCache::CreateMergedSource(GIFRegTEX0 TEX0, GIFR { // We *should* be able to use the TBW here as an indicator of size... except Destroy All Humans 2 sets // TBW to 10, and samples from 64 through 703... which means it'd be grabbing the next row at the end. - const int tex_width = std::max(64 * TEX0.TBW, region.GetMaxX()); - const int tex_height = region.HasY() ? region.GetHeight() : (1 << TEX0.TH); + // Round the size up to the next block + const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM]; + const int tex_width = (std::max(64 * TEX0.TBW, region.GetMaxX()) + (psm_s.bs.x - 1)) & ~(psm_s.bs.x - 1); + const int tex_height = ((region.HasY() ? region.GetHeight() : (1 << TEX0.TH)) + (psm_s.bs.y - 1)) & ~(psm_s.bs.y - 1); const int scaled_width = static_cast(static_cast(tex_width) * scale); const int scaled_height = static_cast(static_cast(tex_height) * scale); @@ -6579,9 +6651,11 @@ void GSTextureCache::Target::ResizeValidity(const GSVector4i& rect) m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5); - m_end_block += offset; + + if (offset) + m_end_block = m_end_block + ((std::max(m_TEX0.TBW, 1U) << 5) - offset); } - + // Else No valid size, so need to resize down. // GL_CACHE("ResizeValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w); @@ -6589,13 +6663,18 @@ void GSTextureCache::Target::ResizeValidity(const GSVector4i& rect) void GSTextureCache::Target::UpdateValidity(const GSVector4i& rect, bool can_resize) { + if (m_TEX0.TBP0 == 0x1a00 && rect.w == 448 && can_resize) + DevCon.Warning("Here"); + if (m_valid.eq(GSVector4i::zero())) { m_valid = rect; m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5); - m_end_block += offset; + + if (offset) + m_end_block = m_end_block + ((std::max(m_TEX0.TBW, 1U) << 5) - offset); } else if (can_resize) { @@ -6603,7 +6682,9 @@ void GSTextureCache::Target::UpdateValidity(const GSVector4i& rect, bool can_res m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5); - m_end_block += offset; + + if (offset) + m_end_block = m_end_block + ((std::max(m_TEX0.TBW, 1U) << 5) - offset); } // GL_CACHE("UpdateValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w); } @@ -6974,6 +7055,29 @@ void GSTextureCache::InvalidateTemporarySource() m_temporary_source = nullptr; } +void GSTextureCache::SetTemporaryZ(GSTexture* temp_z) +{ + m_temporary_z = temp_z; +} + +GSTexture* GSTextureCache::GetTemporaryZ() +{ + if (!m_temporary_z) + return nullptr; + + return m_temporary_z; +} + + +void GSTextureCache::InvalidateTemporaryZ() +{ + if (!m_temporary_z) + return; + + g_gs_device->Recycle(m_temporary_z); + m_temporary_z = nullptr; +} + void GSTextureCache::InjectHashCacheTexture(const HashCacheKey& key, GSTexture* tex, const std::pair& alpha_minmax) { // When we insert we update memory usage. Old texture gets removed below. diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.h b/pcsx2/GS/Renderers/HW/GSTextureCache.h index b08b5fcb1922b..af65ab165139f 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.h +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.h @@ -427,6 +427,7 @@ class GSTextureCache std::unordered_map m_surface_offset_cache; Source* m_temporary_source = nullptr; // invalidated after the draw + GSTexture* m_temporary_z = nullptr; // invalidated after the draw std::unique_ptr m_color_download_texture; std::unique_ptr m_uint16_download_texture; @@ -504,11 +505,11 @@ class GSTextureCache Target* FindOverlappingTarget(u32 BP, u32 end_bp) const; Target* FindOverlappingTarget(u32 BP, u32 BW, u32 PSM, GSVector4i rc) const; - GSVector2i GetTargetSize(u32 bp, u32 fbw, u32 psm, s32 min_width, s32 min_height); + GSVector2i GetTargetSize(u32 bp, u32 fbw, u32 psm, s32 min_width, s32 min_height, bool can_expand = true); bool HasTargetInHeightCache(u32 bp, u32 fbw, u32 psm, u32 max_age = std::numeric_limits::max(), bool move_front = true); bool Has32BitTarget(u32 bp); - void InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm = PSMCT32); + void InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 write_psm = PSMCT32, u32 write_bw = 1); void InvalidateVideoMemType(int type, u32 bp, u32 write_psm = PSMCT32, u32 write_fbmsk = 0, bool dirty_only = false); void InvalidateVideoMemSubTarget(GSTextureCache::Target* rt); void InvalidateVideoMem(const GSOffset& off, const GSVector4i& r, bool target = true); @@ -551,6 +552,11 @@ class GSTextureCache /// Invalidates a temporary source, a partial copy only created from the current RT/DS for the current draw. void InvalidateTemporarySource(); + void SetTemporaryZ(GSTexture* temp_z); + GSTexture* GetTemporaryZ(); + + /// Invalidates a temporary Z, a partial copy only created from the current DS for the current draw when Z is not offset but RT is + void InvalidateTemporaryZ(); /// Injects a texture into the hash cache, by using GSTexture::Swap(), transitively applying to all sources. Ownership of tex is transferred. void InjectHashCacheTexture(const HashCacheKey& key, GSTexture* tex, const std::pair& alpha_minmax); diff --git a/pcsx2/GS/Renderers/Metal/tfx.metal b/pcsx2/GS/Renderers/Metal/tfx.metal index a13c6cdb30c0c..f6e4fc04be382 100644 --- a/pcsx2/GS/Renderers/Metal/tfx.metal +++ b/pcsx2/GS/Renderers/Metal/tfx.metal @@ -1168,11 +1168,8 @@ struct PSMain { if (PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) { - C.rb = C.br; - float g_temp = C.g; - - C.g = C.a; - C.a = g_temp; + C.b = C.r; + C.a = C.g; } else if(PS_PROCESS_BA & SHUFFLE_READ) {