Skip to content

Commit

Permalink
GS/HW: Prefer blend second pass over barriers for AMD.
Browse files Browse the repository at this point in the history
Allow to use blend second pass on VK/GL on AMD, barriers are slower so let's use blend second pass if possible.
  • Loading branch information
lightningterror committed Apr 29, 2024
1 parent 85d51c6 commit 1217c11
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 15 deletions.
29 changes: 15 additions & 14 deletions pcsx2/GS/Renderers/Common/GSDevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -734,20 +734,21 @@ class GSDevice : public GSAlignedClass<32>
// clang-format off
struct FeatureSupport
{
bool broken_point_sampler : 1; ///< Issue with AMD cards, see tfx shader for details
bool vs_expand : 1; ///< Supports expanding points/lines/sprites in the vertex shader
bool primitive_id : 1; ///< Supports primitive ID for use with prim tracking destination alpha algorithm
bool texture_barrier : 1; ///< Supports sampling rt and hopefully texture barrier
bool provoking_vertex_last: 1; ///< Supports using the last vertex in a primitive as the value for flat shading.
bool point_expand : 1; ///< Supports point expansion in hardware.
bool line_expand : 1; ///< Supports line expansion in hardware.
bool prefer_new_textures : 1; ///< Allocate textures up to the pool size before reusing them, to avoid render pass restarts.
bool dxt_textures : 1; ///< Supports DXTn texture compression, i.e. S3TC and BC1-3.
bool bptc_textures : 1; ///< Supports BC6/7 texture compression.
bool framebuffer_fetch : 1; ///< Can sample from the framebuffer without texture barriers.
bool stencil_buffer : 1; ///< Supports stencil buffer, and can use for DATE.
bool cas_sharpening : 1; ///< Supports sufficient functionality for contrast adaptive sharpening.
bool test_and_sample_depth: 1; ///< Supports concurrently binding the depth-stencil buffer for sampling and depth testing.
bool broken_point_sampler : 1; ///< Issue with AMD cards, see tfx shader for details
bool vs_expand : 1; ///< Supports expanding points/lines/sprites in the vertex shader
bool primitive_id : 1; ///< Supports primitive ID for use with prim tracking destination alpha algorithm
bool texture_barrier : 1; ///< Supports sampling rt and hopefully texture barrier
bool provoking_vertex_last : 1; ///< Supports using the last vertex in a primitive as the value for flat shading.
bool point_expand : 1; ///< Supports point expansion in hardware.
bool line_expand : 1; ///< Supports line expansion in hardware.
bool prefer_new_textures : 1; ///< Allocate textures up to the pool size before reusing them, to avoid render pass restarts.
bool dxt_textures : 1; ///< Supports DXTn texture compression, i.e. S3TC and BC1-3.
bool bptc_textures : 1; ///< Supports BC6/7 texture compression.
bool framebuffer_fetch : 1; ///< Can sample from the framebuffer without texture barriers.
bool stencil_buffer : 1; ///< Supports stencil buffer, and can use for DATE.
bool cas_sharpening : 1; ///< Supports sufficient functionality for contrast adaptive sharpening.
bool test_and_sample_depth : 1; ///< Supports concurrently binding the depth-stencil buffer for sampling and depth testing.
bool prefer_blend_second_pass: 1; ///< Barriers are slow on AMD, we can prefer blend 2 pass instead.
FeatureSupport()
{
memset(this, 0, sizeof(*this));
Expand Down
1 change: 1 addition & 0 deletions pcsx2/GS/Renderers/DX11/GSDevice11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ GSDevice11::GSDevice11()
m_features.stencil_buffer = true;
m_features.cas_sharpening = true;
m_features.test_and_sample_depth = false;
m_features.prefer_blend_second_pass = false;
}

GSDevice11::~GSDevice11() = default;
Expand Down
1 change: 1 addition & 0 deletions pcsx2/GS/Renderers/DX12/GSDevice12.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1209,6 +1209,7 @@ bool GSDevice12::CheckFeatures()
m_features.stencil_buffer = true;
m_features.cas_sharpening = true;
m_features.test_and_sample_depth = false;
m_features.prefer_blend_second_pass = false;
m_features.vs_expand = !GSConfig.DisableVertexShaderExpand;

m_features.dxt_textures = SupportsTextureFormat(DXGI_FORMAT_BC1_UNORM) &&
Expand Down
23 changes: 22 additions & 1 deletion pcsx2/GS/Renderers/HW/GSRendererHW.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4048,7 +4048,7 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
// HW blend can be done in multiple passes when there's no overlap.
// Blend second pass is only useful when texture barriers aren't supported.
// Speed wise Texture barriers > blend second pass > texture copies.
const bool blend_second_pass_support = !features.texture_barrier && no_prim_overlap && is_basic_blend;
const bool blend_second_pass_support = (!features.texture_barrier || features.prefer_blend_second_pass) && no_prim_overlap && is_basic_blend;
const bool bmix1_second_pass = blend_second_pass_support && blend_mix1 && (alpha_c0_high_max_one || alpha_c2_high_one) && m_conf.ps.blend_d == 2;
// We don't want to enable blend mix if we are doing a second pass, it's useless.
blend_mix &= !bmix1_second_pass;
Expand All @@ -4066,6 +4066,23 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
bool sw_blending = false;
if (features.texture_barrier)
{
// Prefer blend second pass over sw blend when primitives don't overlap on AMD.
// Barriers are slower on AMD so we can use blend second pass if possible.
const bool ad_second_pass_no_rta_scale = alpha_c1_high_no_rta_correct && (blend_flag & (BLEND_HW3 | BLEND_HW5 | BLEND_HW6 | BLEND_HW7 | BLEND_HW9));
const bool blend_hw4_second_pass = (alpha_c0_high_max_one || alpha_c2_high_one) && (blend_flag & BLEND_HW4);
const bool blend_hw8_second_pass = (blend_flag & BLEND_HW8);
bool prefer_blend_second_pass = features.prefer_blend_second_pass && blend_second_pass_support // Check if blend second pass is supported.
// Don't enable if we want to do sw blend on draw.
&& !prefer_sw_blend
// Limited to clamp 1, it will sw blend anyway on wrap.
&& COLCLAMP.CLAMP
// No dithering headaches, just sw blend it.
&& m_conf.ps.dither == 0
// PABE will require sw blend.
&& !(m_draw_env->PABE.PABE && GetAlphaMinMax().min < 128)
// Type of blends/formulas.
&& (bmix1_second_pass || ad_second_pass_no_rta_scale || blend_hw4_second_pass || blend_hw8_second_pass);

const bool blend_requires_barrier = (blend_flag & BLEND_A_MAX) // Impossible blending
// Sw blend, either full barrier or one barrier with no overlap.
|| prefer_sw_blend
Expand Down Expand Up @@ -4095,11 +4112,15 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, bool& DAT
sw_blending |= m_vt.m_primclass == GS_SPRITE_CLASS && m_drawlist.size() < 100;
[[fallthrough]];
case AccBlendLevel::Basic:
// Disable prefer_blend_second_pass if higher blending requires sw blend.
prefer_blend_second_pass &= sw_blending;
// Prefer sw blend if possible.
color_dest_blend &= !prefer_sw_blend;
accumulation_blend &= !prefer_sw_blend;
// Enable sw blending for barriers.
sw_blending |= blend_requires_barrier;
// Disable any sw blend if we prefer blend second pass.
sw_blending &= !prefer_blend_second_pass;
// Enable sw blending for free blending.
sw_blending |= free_blend;
// Do not run BLEND MIX if sw blending is already present, it's less accurate.
Expand Down
1 change: 1 addition & 0 deletions pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm
Original file line number Diff line number Diff line change
Expand Up @@ -900,6 +900,7 @@ static void OnMainThread(Fn&& fn)
m_features.stencil_buffer = true;
m_features.cas_sharpening = true;
m_features.test_and_sample_depth = true;
m_features.prefer_blend_second_pass = false;

// Init metal stuff
m_fn_constants = MRCTransfer([MTLFunctionConstantValues new]);
Expand Down
1 change: 1 addition & 0 deletions pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -742,6 +742,7 @@ bool GSDeviceOGL::CheckFeatures(bool& buggy_pbo)
m_features.prefer_new_textures = false;
m_features.stencil_buffer = true;
m_features.test_and_sample_depth = m_features.texture_barrier;
m_features.prefer_blend_second_pass = vendor_id_amd;

// NVIDIA GPUs prior to Kepler appear to have broken vertex shader buffer loading.
// Use bindless textures (introduced in Kepler) to differentiate.
Expand Down
3 changes: 3 additions & 0 deletions pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2667,6 +2667,9 @@ bool GSDeviceVK::CheckFeatures()
// Buggy drivers with broken barriers probably have no chance using GENERAL layout for depth either...
m_features.test_and_sample_depth = m_features.texture_barrier;

// 2 pass is faster than barriers on amd.
m_features.prefer_blend_second_pass = isAMD;

// Use D32F depth instead of D32S8 when we have framebuffer fetch.
m_features.stencil_buffer &= !m_features.framebuffer_fetch;

Expand Down

0 comments on commit 1217c11

Please sign in to comment.