diff --git a/ref/vk/shaders/bounce.comp b/ref/vk/shaders/bounce.comp index 445c339cf3..d87b4faa5a 100644 --- a/ref/vk/shaders/bounce.comp +++ b/ref/vk/shaders/bounce.comp @@ -32,6 +32,7 @@ layout(set = 0, binding = 13, rgba8) uniform readonly image2D base_color_a; layout(set = 0, binding = 20, rgba16f) uniform writeonly image2D out_indirect_diffuse; layout(set = 0, binding = 21, rgba16f) uniform writeonly image2D out_indirect_specular; +layout(set = 0, binding = 22, rgba16f) uniform writeonly image2D out_first_bounce_direction; // for spherical harmonics denoising layout(set = 0, binding = 30, std430) readonly buffer ModelHeaders { ModelHeader a[]; } model_headers; layout(set = 0, binding = 31, std430) readonly buffer Kusochki { Kusok a[]; } kusochki; @@ -94,6 +95,8 @@ bool getHit(vec3 origin, vec3 direction, inout RayPayloadPrimary payload) { payload.hit_t.w = L; payload.emissive.rgb = sampleSkybox(direction); return false; + } else { + payload.emissive.rgb = vec3(0.); // emissive polygons already calculated in directional polygon lighting pass (remove fireflyes here) } primaryRayHit(rq, payload); @@ -219,6 +222,9 @@ void computeBounces(MaterialEx mat, vec3 pos, vec3 direction, inout vec3 diffuse else specular += contribution; + if (i == 0) + imageStore(out_first_bounce_direction, pix, vec4(normalize(bounce_direction), 0.f)); // for spherical harmonics denoising + if (!did_hit) break; @@ -243,6 +249,12 @@ void main() { return; } const vec2 uv = (gl_GlobalInvocationID.xy + .5) / res * 2. - 1.; + + if ((ubo.ubo.renderer_flags & RENDERER_FLAG_DISABLE_GI) != 0) { + imageStore(out_indirect_diffuse, pix, vec4(0.)); + imageStore(out_indirect_specular, pix, vec4(0.)); + return; + } #ifdef BRDF_COMPARE g_mat_gltf2 = pix.x > ubo.ubo.res.x / INDIRECT_SCALE / 2.; @@ -265,11 +277,17 @@ void main() { MaterialEx mat; mat.prop.base_color = base_a.rgb; - mat.prop.metalness = material_data.g; - mat.prop.roughness = material_data.r; mat.geometry_normal = geometry_normal; mat.shading_normal = shading_normal; + if ((ubo.ubo.renderer_flags & RENDERER_FLAG_ONLY_DIFFUSE_GI) != 0) { + mat.prop.metalness = 0.0; + mat.prop.roughness = 1.0; + } else { + mat.prop.metalness = material_data.g; + mat.prop.roughness = material_data.r; + } + computeBounces(mat, pos_t.xyz, direction, diffuse, specular); } @@ -288,6 +306,11 @@ void main() { DEBUG_VALIDATE_RANGE_VEC3("bounce.specular", specular, 0., 1e6); #endif + if ((ubo.ubo.renderer_flags & RENDERER_FLAG_ONLY_DIFFUSE_GI) != 0) { + diffuse += specular; + specular = vec3(0.); + } + imageStore(out_indirect_diffuse, pix, vec4(diffuse, 0.f)); imageStore(out_indirect_specular, pix, vec4(specular, 0.f)); } diff --git a/ref/vk/shaders/denoiser.comp b/ref/vk/shaders/denoiser.comp index c3ec77813b..0de52cdbe0 100644 --- a/ref/vk/shaders/denoiser.comp +++ b/ref/vk/shaders/denoiser.comp @@ -30,22 +30,23 @@ layout(set = 0, binding = 11) uniform UBO { UniformBuffer ubo; } ubo; layout(set = 0, binding = 12, rgba16f) uniform readonly image2D indirect_diffuse; layout(set = 0, binding = 13, rgba16f) uniform readonly image2D indirect_diffuse_atrous1; layout(set = 0, binding = 14, rgba16f) uniform readonly image2D indirect_specular; +layout(set = 0, binding = 15, rgba16f) uniform readonly image2D indirect_diffuse_denoised_by_sh; -layout(set = 0, binding = 15, rgba16f) uniform image2D out_temporal_diffuse; -layout(set = 0, binding = 16, rgba16f) uniform image2D prev_temporal_diffuse; +layout(set = 0, binding = 16, rgba16f) uniform image2D out_temporal_diffuse; +layout(set = 0, binding = 17, rgba16f) uniform image2D prev_temporal_diffuse; -layout(set = 0, binding = 17, rgba16f) uniform image2D out_temporal_specular; -layout(set = 0, binding = 18, rgba16f) uniform image2D prev_temporal_specular; +layout(set = 0, binding = 18, rgba16f) uniform image2D out_temporal_specular; +layout(set = 0, binding = 19, rgba16f) uniform image2D prev_temporal_specular; //#define DEBUG_NOISE #ifdef DEBUG_NOISE -layout(set = 0, binding = 19) uniform sampler3D blue_noise_texture; +layout(set = 0, binding = 20) uniform sampler3D blue_noise_texture; #include "bluenoise.glsl" #endif -layout(set = 0, binding = 20, rgba16f) uniform readonly image2D legacy_blend; +layout(set = 0, binding = 21, rgba16f) uniform readonly image2D legacy_blend; -//layout(set = 0, binding = 21) uniform sampler2D textures[MAX_TEXTURES]; +//layout(set = 0, binding = 22) uniform sampler2D textures[MAX_TEXTURES]; #include "atrous.glsl" @@ -98,7 +99,11 @@ Components dontBlurSamples(const ivec2 res, const ivec2 pix) { const ivec2 p_indirect = pix / INDIRECT_SCALE; c.direct_diffuse += imageLoad(light_point_diffuse, p).rgb; c.direct_diffuse += imageLoad(light_poly_diffuse, p).rgb; + if ((ubo.ubo.renderer_flags & RENDERER_FLAG_DENOISE_GI_BY_SH) == 0) { c.indirect_diffuse += imageLoad(indirect_diffuse, p_indirect).rgb; + } else { + c.indirect_diffuse += imageLoad(indirect_diffuse_denoised_by_sh, p).rgb; + } c.direct_specular += imageLoad(light_poly_specular, p).rgb; c.direct_specular += imageLoad(light_point_specular, p).rgb; c.indirect_specular += imageLoad(indirect_specular, p_indirect).rgb; @@ -142,7 +147,11 @@ Components boxBlurSamples(ivec2 res, ivec2 pix) { res /= 2; pix /= 2; + + if ((ubo.ubo.renderer_flags & RENDERER_FLAG_DENOISE_GI_BY_SH) == 0) { BOX_BLUR(c.indirect_diffuse, indirect_diffuse, res, pix, INDIRECT_DIFFUSE_KERNEL); + } + BOX_BLUR(c.indirect_specular, indirect_specular, res, pix, INDIRECT_SPECULAR_KERNEL); return c; @@ -309,7 +318,8 @@ Components blurSamples(const ivec2 res, const ivec2 pix) { c.direct_diffuse += imageLoad(light_poly_diffuse, p).rgb * direct_diffuse_scale; } - if (all(lessThan(abs(ivec2(x, y)), ivec2(INDIRECT_DIFFUSE_KERNEL))) && do_indirect) + if ((ubo.ubo.renderer_flags & RENDERER_FLAG_DENOISE_GI_BY_SH) == 0 && + all(lessThan(abs(ivec2(x, y)), ivec2(INDIRECT_DIFFUSE_KERNEL))) && do_indirect) { // TODO indirect operates at different scale, do a separate pass const float indirect_diffuse_scale = scale @@ -452,6 +462,11 @@ void main() { vec3 diffuse = c.direct_diffuse + c.indirect_diffuse; vec3 specular = c.direct_specular + c.indirect_specular; + + if ((ubo.ubo.renderer_flags & RENDERER_FLAG_DENOISE_GI_BY_SH) != 0) { + diffuse += imageLoad(indirect_diffuse_denoised_by_sh, pix).rgb; + } + { //#define DISABLE_TEMPORAL_DENOISER #ifndef DISABLE_TEMPORAL_DENOISER diff --git a/ref/vk/shaders/denoiser_config.glsl b/ref/vk/shaders/denoiser_config.glsl new file mode 100644 index 0000000000..d3c39b1e38 --- /dev/null +++ b/ref/vk/shaders/denoiser_config.glsl @@ -0,0 +1,11 @@ + +// not plane, it's sphere, but working +#define NEAR_PLANE_OFFSET 5. + +// we downsample gi map and store bounces positions in neighboor texels +// downsample image dimensions by 2 = store 4 bounces +// downsample image dimensions by 3 = store 9 bounces +#define GI_DOWNSAMPLE 2 + +// max bounces for testing bounces visiblity +#define GI_BOUNCES_MAX 1 diff --git a/ref/vk/shaders/denoiser_utils.glsl b/ref/vk/shaders/denoiser_utils.glsl new file mode 100644 index 0000000000..7299b92dce --- /dev/null +++ b/ref/vk/shaders/denoiser_utils.glsl @@ -0,0 +1,197 @@ +#ifndef lk_dnsr_utils_LK_12231312 +#define lk_dnsr_utils_LK_12231312 1 + +#define TEXEL_FLAG_TRANSPARENT 1 +#define TEXEL_FLAG_REFRACTION 2 + +// clamp light exposition without loosing of color +vec3 clamp_color(vec3 color, float clamp_value) { + float max_color = max(max(color.r, color.g), color.b); + return max_color > clamp_value ? (color / max_color) * clamp_value : color; +} + +// 3-th component is transparent texel status 0 or 1 +ivec3 PixToCheckerboard(ivec2 pix, ivec2 res) { + int is_transparent_texel = (pix.x + pix.y) % 2; + ivec2 out_pix = ivec2(pix.x / 2 + is_transparent_texel * (res.x / 2), pix.y); + return ivec3(out_pix, is_transparent_texel); +} + +// 3-th component is transparent texel status 0 or 1, targeted to nesessary texel status +ivec3 PixToCheckerboard(ivec2 pix, ivec2 res, int is_transparent_texel) { + ivec2 out_pix = ivec2(pix.x / 2 + is_transparent_texel * (res.x / 2), pix.y); + return ivec3(out_pix, is_transparent_texel); +} + +// optional choose checkerboard conversion if there is real transparence or not +ivec3 PixToCheckerboard(ivec2 pix, ivec2 res, int is_transparent_texel, int texel_flags) { + if (texel_flags == TEXEL_FLAG_TRANSPARENT || texel_flags == TEXEL_FLAG_REFRACTION) { + return PixToCheckerboard(pix, res, is_transparent_texel); + } + return PixToCheckerboard(pix, res); +} + + +// 3-th component is transparent texel status 0 or 1 +ivec3 CheckerboardToPix(ivec2 pix, ivec2 res) { + int half_res = res.x / 2; + int is_transparent_texel = pix.x / half_res; + int out_pix_x = (pix.x % half_res) * 2; + int row_index = pix.y % 2; + int checker_addition = is_transparent_texel + row_index - row_index*is_transparent_texel*2; + ivec2 out_pix = ivec2(out_pix_x + checker_addition, pix.y); + return ivec3(out_pix, is_transparent_texel); +} + + +vec3 OriginWorldPosition(mat4 inv_view) { + return (inv_view * vec4(0, 0, 0, 1)).xyz; +} + +vec3 ScreenToWorldDirection(vec2 uv, mat4 inv_view, mat4 inv_proj) { + vec4 target = inv_proj * vec4(uv.x, uv.y, 1, 1); + vec3 direction = (inv_view * vec4(normalize(target.xyz), 0)).xyz; + return normalize(direction); +} + +vec3 WorldPositionFromDirection(vec3 origin, vec3 direction, float depth) { + return origin + normalize(direction) * depth; +} + +vec3 FarPlaneDirectedVector(vec2 uv, vec3 forward, mat4 inv_view, mat4 inv_proj) { + vec3 dir = ScreenToWorldDirection(uv, inv_view, inv_proj); + float plane_length = dot(forward, dir); + return dir / max(0.001, plane_length); +} + +vec2 WorldPositionToUV(vec3 position, mat4 proj, mat4 view) { + vec4 clip_space = proj * vec4((view * vec4(position, 1.)).xyz, 1.); + return clip_space.xy / clip_space.w; +} + +vec3 WorldPositionToUV2(vec3 position, mat4 inv_proj, mat4 inv_view) { + const vec3 out_of_bounds = vec3(0.,0.,-1.); + const float near_plane_treshold = 1.; + vec3 origin = OriginWorldPosition(inv_view); + vec3 forwardDirection = normalize(ScreenToWorldDirection(vec2(0.), inv_view, inv_proj)); + float depth = dot(forwardDirection, position - origin); + if (depth < near_plane_treshold) return out_of_bounds; + vec3 positionNearPlane = (position - origin) / depth; + vec3 rightForwardDirection = ScreenToWorldDirection(vec2(1., 0.), inv_view, inv_proj); + vec3 upForwardDirection = ScreenToWorldDirection(vec2(0., 1.), inv_view, inv_proj); + rightForwardDirection /= dot(forwardDirection, rightForwardDirection); + upForwardDirection /= dot(forwardDirection, upForwardDirection); + vec3 rightDirection = rightForwardDirection - forwardDirection; + vec3 upDirection = upForwardDirection - forwardDirection; + float x = dot(normalize(rightDirection), positionNearPlane - forwardDirection) / length(rightDirection); + float y = dot(normalize(upDirection), positionNearPlane - forwardDirection) / length(upDirection); + if (x < -1. || y < -1. || x > 1. || y > 1.) return out_of_bounds; + return vec3(x, y, 1.); +} + +float normpdf2(in float x2, in float sigma) { return 0.39894*exp(-0.5*x2/(sigma*sigma))/sigma; } +float normpdf(in float x, in float sigma) { return normpdf2(x*x, sigma); } + +ivec2 UVToPix(vec2 uv, ivec2 res) { + vec2 screen_uv = uv * 0.5 + vec2(0.5); + return ivec2(screen_uv.x * float(res.x), screen_uv.y * float(res.y)); +} + +vec2 PixToUV(ivec2 pix, ivec2 res) { + return (vec2(pix) /*+ vec2(0.5)*/) / vec2(res) * 2. - vec2(1.); +} + +vec3 PBRMix(vec3 base_color_a, vec3 diffuse, vec3 specular, float metalness) { + vec3 metal_colour = specular * base_color_a; + vec3 dielectric_colour = mix(diffuse * base_color_a, specular, 0.04); // like in Unreal + return mix(dielectric_colour, metal_colour, metalness); +} + +vec3 PBRMixFresnel(vec3 base_color_a, vec3 diffuse, vec3 specular, float metalness, float fresnel) { + vec3 metal_colour = specular * base_color_a; + float diffuse_specular_factor = mix(0.2, 0.04, fresnel); + vec3 dielectric_colour = mix(diffuse * base_color_a, specular, diffuse_specular_factor); + return mix(dielectric_colour, metal_colour, metalness); +} + +int per_frame_offset = 0; + +int quarterPart(ivec2 pix_in) { + ivec2 pix = pix_in % 2; + return (pix.x + 2 * pix.y + per_frame_offset) % 4; +} + +int ninefoldPart(ivec2 pix_in) { + ivec2 pix = pix_in % 3; + return (pix.x + 3 * pix.y + per_frame_offset) % 9; +} + +int texel_transparent_type(float transparent_alpha) { + return abs(transparent_alpha) < 0.05 ? 0 : transparent_alpha > 0. ? 2 : 3; +} + +int checker_texel(ivec2 pix) { + return (pix.x + pix.y) % 2; +} + +ivec2 closest_checker_texel(ivec2 pix, int source_checker_texel) { + return checker_texel(pix) == source_checker_texel ? pix : pix + ivec2(1, 0); +} + + +#ifndef M_PI +#define M_PI 3.1488 +#endif + +// Schlick's approximation to Fresnel term +// f90 should be 1.0, except for the trick used by Schuler (see 'shadowedF90' function) +vec3 evalFresnelSchlickM(vec3 f0, float f90, float NdotS) { + return f0 + (f90 - f0) * pow(1.0f - NdotS, 5.0f); +} + +float luminanceM(vec3 rgb) { + return dot(rgb, vec3(0.2126f, 0.7152f, 0.0722f)); +} + +vec3 randomizedOnHemisphere(vec3 randomVec, vec3 normal) { + float directionality = dot(normal, randomVec); + if (directionality > 0.) return normalize(randomVec); + return -normalize(randomVec); +} + +vec3 sampleSphere(vec2 uv) +{ + float y = 2.0 * uv.x - 1; + float theta = 2.0 * M_PI * uv.y; + float r = sqrt(1.0 - y * y); + return vec3(cos(theta) * r, y, sin(theta) * r); +} + +// Microfacet bounce from this example https://www.shadertoy.com/view/Md3yWl + +vec3 SphereRand( vec2 rand ) +{ + rand += vec2(.5); + float sina = rand.x*2. - 1.; + float b = 6.283*rand.y; + float cosa = sqrt(1.-sina*sina); + return vec3(cosa*cos(b),sina,cosa*sin(b)); +} + +vec3 PowRand( vec3 rand, vec3 axis, float fpow ) +{ + //vec3 r = normalize(rand - vec3(0.5)); + vec3 r = sampleSphere(rand.xz); + //vec3 r = SphereRand(rand.xy); + float d = dot(r,axis); + r -= d*axis; + r = normalize(r); + float h = d*.5+.5; + r *= sqrt( 1. - pow( h, 2./(fpow+1.) ) ); + r += axis*sqrt(1.-dot(r,r)); + return r; +} + +#define FIX_NAN(COLOR) (any(isnan(COLOR)) ? vec4(0.) : COLOR) + +#endif // #ifndef lk_dnsr_utils_LK_12231312 diff --git a/ref/vk/shaders/diffuse_gi_sh_atrous.glsl b/ref/vk/shaders/diffuse_gi_sh_atrous.glsl new file mode 100644 index 0000000000..820312c998 --- /dev/null +++ b/ref/vk/shaders/diffuse_gi_sh_atrous.glsl @@ -0,0 +1,105 @@ + +#ifndef KERNEL_X +#define KERNEL_X 1 +#endif + +#ifndef KERNEL_Y +#define KERNEL_Y 1 +#endif + + +#ifndef OFFSET +#define OFFSET ivec(1, 1) +#endif + +#ifndef DEPTH_THRESHOLD +#define DEPTH_THRESHOLD 0.1 +#endif + +#define GI_BLUR_NORMALS_THRESHOLD_LOW 0.5 +#define GI_BLUR_NORMALS_THRESHOLD_MAX 0.9 + +#include "noise.glsl" +#include "brdf.h" +#include "utils.glsl" +#include "denoiser_config.glsl" +#include "denoiser_utils.glsl" + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(set = 0, binding = 0, rgba16f) uniform image2D OUTPUT_GI_1; +layout(set = 0, binding = 1, rgba16f) uniform image2D OUTPUT_GI_2; + +layout(set = 0, binding = 2, rgba16f) uniform readonly image2D INPUT_GI_1; +layout(set = 0, binding = 3, rgba16f) uniform readonly image2D INPUT_GI_2; +layout(set = 0, binding = 4, rgba8) uniform readonly image2D material_rmxx; +layout(set = 0, binding = 5, rgba32f) uniform readonly image2D position_t; +layout(set = 0, binding = 6, rgba16f) uniform readonly image2D normals_gs; + +#define GLSL +#include "ray_interop.h" +#undef GLSL + +layout(set = 0, binding = 7) uniform UBO { UniformBuffer ubo; } ubo; + +void main() { + ivec2 res = ivec2(imageSize(INPUT_GI_1)); + ivec2 pix = ivec2(gl_GlobalInvocationID); + + if ((ubo.ubo.renderer_flags & RENDERER_FLAG_DENOISE_GI_BY_SH) == 0) { + return; + } + + const vec4 gi_sh2_src = FIX_NAN(imageLoad(INPUT_GI_2, pix)); + const float depth = FIX_NAN(imageLoad(position_t, pix)).w; + const float metalness_factor = /*FIX_NAN(imageLoad(material_rmxx, pix)).y > .5 ? 1. : 0.*/ 1.; + const vec3 normal = normalDecode(FIX_NAN(imageLoad(normals_gs, pix)).xy); + + vec4 gi_sh1 = vec4(0.); + vec2 gi_sh2 = vec2(0.); + + float weight_sum = 0.; + for (int x = -KERNEL_X; x <= KERNEL_X; ++x) { + for (int y = -KERNEL_Y; y <= KERNEL_Y; ++y) { + const ivec2 p = (pix + ivec2(x, y) * OFFSET); + if (any(greaterThanEqual(p, res)) || any(lessThan(p, ivec2(0)))) { + continue; + } + + // metal surfaces have gi after 2 bounce, diffuse after 1, don't mix them +// const float current_metalness = FIX_NAN(imageLoad(material_rmxx, p)).y; +// if (abs(metalness_factor - current_metalness) > .5) +// continue; + + const vec4 current_gi_sh1 = FIX_NAN(imageLoad(INPUT_GI_1, p)); + const vec4 current_gi_sh2 = FIX_NAN(imageLoad(INPUT_GI_2, p)); + const vec3 current_normal = normalDecode(FIX_NAN(imageLoad(normals_gs, p)).xy); + + const float depth_current = FIX_NAN(imageLoad(position_t, p)).w; + const float depth_offset = abs(depth - depth_current) / max(0.001, depth); + const float gi_depth_factor = 1. - smoothstep(0., DEPTH_THRESHOLD, depth_offset); + const float normals_factor = smoothstep(GI_BLUR_NORMALS_THRESHOLD_LOW, GI_BLUR_NORMALS_THRESHOLD_MAX, dot(normal, current_normal)); + + float weight = gi_depth_factor * normals_factor; // square blur for more efficient light spreading + +// #ifdef SPREAD_UPSCALED +// weight *= (GI_DOWNSAMPLE * GI_DOWNSAMPLE); +// #endif + +// const float sigma = KERNEL_X / 2.; +// const float weight = normpdf(x, sigma) * normpdf(y, sigma) * gi_depth_factor * normals_factor; + + gi_sh1 += current_gi_sh1 * weight; + gi_sh2 += current_gi_sh2.xy * weight; + weight_sum += weight; + } + } + + if (weight_sum > 0.) { + gi_sh1 /= weight_sum; + gi_sh2 /= weight_sum; + } + + imageStore(OUTPUT_GI_1, pix, gi_sh1); + imageStore(OUTPUT_GI_2, pix, vec4(gi_sh2, gi_sh2_src.zw)); +} diff --git a/ref/vk/shaders/diffuse_gi_sh_denoise_init.comp b/ref/vk/shaders/diffuse_gi_sh_denoise_init.comp new file mode 100644 index 0000000000..6960a9bcc8 --- /dev/null +++ b/ref/vk/shaders/diffuse_gi_sh_denoise_init.comp @@ -0,0 +1,67 @@ +#version 460 + +#include "noise.glsl" +#include "utils.glsl" +#include "brdf.h" +#include "denoiser_config.glsl" +#include "denoiser_utils.glsl" +#include "spherical_harmonics.glsl" +#include "color_spaces.glsl" + +#define GI_LIMIT_LUMINANCE 1.0 // aggressive and dumb removing fireflyes + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(set = 0, binding = 0, rgba16f) uniform image2D out_sh1_ping; +layout(set = 0, binding = 1, rgba16f) uniform image2D out_sh2_ping; + +layout(set = 0, binding = 2, rgba8) uniform readonly image2D base_color_a; +layout(set = 0, binding = 3, rgba32f) uniform readonly image2D position_t; +layout(set = 0, binding = 4, rgba16f) uniform readonly image2D indirect_diffuse; +layout(set = 0, binding = 5, rgba16f) uniform readonly image2D first_bounce_direction; + +void main() { + ivec2 res = ivec2(imageSize(base_color_a)); + ivec2 pix = ivec2(gl_GlobalInvocationID); + + if (any(greaterThanEqual(pix, res))) { + return; + } + + if (any(lessThan(pix, res / GI_DOWNSAMPLE))) { + vec3 global_illumination = FIX_NAN(imageLoad(indirect_diffuse, pix)).rgb; + + // limit irradiance for fireflyes reducion + float gi_lum = luminance(global_illumination); + if (gi_lum > 0.) { + float limit = smoothstep(0., GI_LIMIT_LUMINANCE, gi_lum); + global_illumination = mix(global_illumination, global_illumination / gi_lum, limit); + } + + vec4 gi_sh1 = vec4(0.); + vec2 gi_sh2 = vec2(0.); + + // store indirectional light in spherical harmonics + if (any(greaterThan(global_illumination.rgb, vec3(0.)))) { + const vec3 indirect_color = global_illumination.rgb * STORAGE_SCALE_LF; + const vec3 direction = FIX_NAN(imageLoad(first_bounce_direction, pix)).xyz; + + if (length(direction) > 0.) { + SH low_freq = irradiance_to_SH(indirect_color, normalize(direction)); + gi_sh1 = low_freq.shY; + gi_sh2 = low_freq.CoCg; + } + } + + for(int x = 0; x < GI_DOWNSAMPLE; x++) { + for(int y = 0; y < GI_DOWNSAMPLE; y++) { + const ivec2 pix_upscaled = pix * GI_DOWNSAMPLE + ivec2(x,y); + if (any(greaterThanEqual(pix_upscaled, res))) + continue; + + imageStore(out_sh1_ping, pix_upscaled, gi_sh1); + imageStore(out_sh2_ping, pix_upscaled, vec4(gi_sh2, 0., 0.)); + } + } + } +} diff --git a/ref/vk/shaders/diffuse_gi_sh_denoise_pass_1.comp b/ref/vk/shaders/diffuse_gi_sh_denoise_pass_1.comp new file mode 100644 index 0000000000..586f3c52f4 --- /dev/null +++ b/ref/vk/shaders/diffuse_gi_sh_denoise_pass_1.comp @@ -0,0 +1,12 @@ +#version 460 +#extension GL_GOOGLE_include_directive : require + +#define OFFSET ivec2(1, 1) + +#define INPUT_GI_1 sh1_ping +#define INPUT_GI_2 sh2_ping + +#define OUTPUT_GI_1 out_sh1_pong +#define OUTPUT_GI_2 out_sh2_pong + +#include "diffuse_gi_sh_atrous.glsl" diff --git a/ref/vk/shaders/diffuse_gi_sh_denoise_pass_2.comp b/ref/vk/shaders/diffuse_gi_sh_denoise_pass_2.comp new file mode 100644 index 0000000000..291ae25a6d --- /dev/null +++ b/ref/vk/shaders/diffuse_gi_sh_denoise_pass_2.comp @@ -0,0 +1,12 @@ +#version 460 +#extension GL_GOOGLE_include_directive : require + +#define OFFSET ivec2(2, 2) + +#define INPUT_GI_1 sh1_pong +#define INPUT_GI_2 sh2_pong + +#define OUTPUT_GI_1 out_sh1_ping +#define OUTPUT_GI_2 out_sh2_ping + +#include "diffuse_gi_sh_atrous.glsl" diff --git a/ref/vk/shaders/diffuse_gi_sh_denoise_pass_3.comp b/ref/vk/shaders/diffuse_gi_sh_denoise_pass_3.comp new file mode 100644 index 0000000000..87060f14d2 --- /dev/null +++ b/ref/vk/shaders/diffuse_gi_sh_denoise_pass_3.comp @@ -0,0 +1,12 @@ +#version 460 +#extension GL_GOOGLE_include_directive : require + +#define OFFSET ivec2(4, 4) + +#define INPUT_GI_1 sh1_ping +#define INPUT_GI_2 sh2_ping + +#define OUTPUT_GI_1 out_sh1_pong +#define OUTPUT_GI_2 out_sh2_pong + +#include "diffuse_gi_sh_atrous.glsl" diff --git a/ref/vk/shaders/diffuse_gi_sh_denoise_pass_4.comp b/ref/vk/shaders/diffuse_gi_sh_denoise_pass_4.comp new file mode 100644 index 0000000000..50a9a604b2 --- /dev/null +++ b/ref/vk/shaders/diffuse_gi_sh_denoise_pass_4.comp @@ -0,0 +1,12 @@ +#version 460 +#extension GL_GOOGLE_include_directive : require + +#define OFFSET ivec2(8, 8) + +#define INPUT_GI_1 sh1_pong +#define INPUT_GI_2 sh2_pong + +#define OUTPUT_GI_1 out_sh1_ping +#define OUTPUT_GI_2 out_sh2_ping + +#include "diffuse_gi_sh_atrous.glsl" diff --git a/ref/vk/shaders/diffuse_gi_sh_denoise_pass_5.comp b/ref/vk/shaders/diffuse_gi_sh_denoise_pass_5.comp new file mode 100644 index 0000000000..0ebfa93e3b --- /dev/null +++ b/ref/vk/shaders/diffuse_gi_sh_denoise_pass_5.comp @@ -0,0 +1,12 @@ +#version 460 +#extension GL_GOOGLE_include_directive : require + +#define OFFSET ivec2(32, 32) + +#define INPUT_GI_1 sh1_ping +#define INPUT_GI_2 sh2_ping + +#define OUTPUT_GI_1 out_sh1_pong +#define OUTPUT_GI_2 out_sh2_pong + +#include "diffuse_gi_sh_atrous.glsl" diff --git a/ref/vk/shaders/diffuse_gi_sh_denoise_save.comp b/ref/vk/shaders/diffuse_gi_sh_denoise_save.comp new file mode 100644 index 0000000000..0019ca5df1 --- /dev/null +++ b/ref/vk/shaders/diffuse_gi_sh_denoise_save.comp @@ -0,0 +1,66 @@ +#version 460 + +#include "brdf.h" +#include "noise.glsl" +#include "utils.glsl" +#include "color_spaces.glsl" +#include "denoiser_config.glsl" +#include "denoiser_utils.glsl" +#include "spherical_harmonics.glsl" + + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(set = 0, binding = 0, rgba16f) uniform writeonly image2D out_indirect_diffuse_denoised_by_sh; +layout(set = 0, binding = 1, rgba16f) uniform writeonly image2D out_indirect_diffuse; +layout(set = 0, binding = 2, rgba16f) uniform writeonly image2D out_indirect_diffuse_atrous1; + +layout(set = 0, binding = 3, rgba8) uniform readonly image2D base_color_a; +layout(set = 0, binding = 4, rgba16f) uniform readonly image2D normals_gs; +layout(set = 0, binding = 5, rgba16f) uniform readonly image2D sh1_pong; +layout(set = 0, binding = 6, rgba16f) uniform readonly image2D sh2_pong; + +#define GLSL +#include "ray_interop.h" +#undef GLSL + +layout(set = 0, binding = 7) uniform UBO { UniformBuffer ubo; } ubo; + +void readNormals(ivec2 uv, out vec3 geometry_normal, out vec3 shading_normal) { + const vec4 n = FIX_NAN(imageLoad(normals_gs, uv)); + geometry_normal = normalDecode(n.xy); + shading_normal = normalDecode(n.zw); +} + + +void main() { + ivec2 res = ivec2(imageSize(base_color_a)); + ivec2 pix = ivec2(gl_GlobalInvocationID); + + if (any(greaterThanEqual(pix, res))) { + return; + } + + if ((ubo.ubo.renderer_flags & RENDERER_FLAG_DENOISE_GI_BY_SH) == 0) { + imageStore(out_indirect_diffuse_denoised_by_sh, pix, vec4(0.)); + return; + } + + vec3 geometry_normal, shading_normal; + readNormals(pix, geometry_normal, shading_normal); + + // albedo + const vec4 base_color_src = FIX_NAN(imageLoad(base_color_a, pix)); + const vec3 base_color = SRGBtoLINEAR(base_color_src.rgb); + + // global illumination re-light + SH low_freq; + low_freq.shY = FIX_NAN(imageLoad(sh1_pong, pix)); + low_freq.CoCg = FIX_NAN(imageLoad(sh2_pong, pix)).xy; + + const vec3 diffuse_gi = project_SH_irradiance(low_freq, shading_normal) / STORAGE_SCALE_LF; + + imageStore(out_indirect_diffuse_denoised_by_sh, pix, vec4(diffuse_gi, 0.)); + imageStore(out_indirect_diffuse, pix, vec4(0.)); + imageStore(out_indirect_diffuse_atrous1, pix, vec4(0.)); +} diff --git a/ref/vk/shaders/indirect_diffuse_atrous1.comp b/ref/vk/shaders/indirect_diffuse_atrous1.comp index 84da9424be..45b683217b 100644 --- a/ref/vk/shaders/indirect_diffuse_atrous1.comp +++ b/ref/vk/shaders/indirect_diffuse_atrous1.comp @@ -25,6 +25,12 @@ void main() { const ivec2 res = ubo.ubo.res; const ivec2 pix = ivec2(gl_GlobalInvocationID); + // skip this pass if we used other denoising pipeline + if ((ubo.ubo.renderer_flags & RENDERER_FLAG_DENOISE_GI_BY_SH) != 0) { + imageStore(out_indirect_diffuse_atrous1, pix, vec4(0.)); + return; + } + const ivec2 res_scaled = res / INDIRECT_SCALE; if (any(greaterThanEqual(pix, res_scaled))) { return; diff --git a/ref/vk/shaders/ray_interop.h b/ref/vk/shaders/ray_interop.h index 65414b0ea3..df819ac3f4 100644 --- a/ref/vk/shaders/ray_interop.h +++ b/ref/vk/shaders/ray_interop.h @@ -130,7 +130,7 @@ struct Kusok { }; struct PointLight { - vec4 origin_r2; // vec4(center.xyz, radius²) + vec4 origin_r2; // vec4(center.xyz, radius²) vec4 color_stopdot; vec4 dir_stopdot2; @@ -193,6 +193,11 @@ struct LightCluster { #define DEBUG_FLAG_WHITE_FURNACE (1<<0) +#define RENDERER_FLAG_ONLY_DIFFUSE_GI (1<<0) +#define RENDERER_FLAG_SEPARATED_REFLECTION (1<<1) +#define RENDERER_FLAG_DENOISE_GI_BY_SH (1<<2) +#define RENDERER_FLAG_DISABLE_GI (1<<3) + struct UniformBuffer { mat4 inv_proj, inv_view; mat4 prev_inv_proj, prev_inv_view; @@ -204,6 +209,8 @@ struct UniformBuffer { uint debug_display_only; uint debug_flags; + + uint renderer_flags; }; #undef PAD diff --git a/ref/vk/shaders/rt.json b/ref/vk/shaders/rt.json index 2197957cef..db7f38b001 100644 --- a/ref/vk/shaders/rt.json +++ b/ref/vk/shaders/rt.json @@ -41,6 +41,27 @@ "indiff_at1": { "comp": "indirect_diffuse_atrous1" }, + "indiff_sh_init": { + "comp": "diffuse_gi_sh_denoise_init" + }, + "indiff_sh_pass1": { + "comp": "diffuse_gi_sh_denoise_pass_1" + }, + "indiff_sh_pass2": { + "comp": "diffuse_gi_sh_denoise_pass_2" + }, + "indiff_sh_pass3": { + "comp": "diffuse_gi_sh_denoise_pass_3" + }, + "indiff_sh_pass4": { + "comp": "diffuse_gi_sh_denoise_pass_4" + }, + "indiff_sh_pass5": { + "comp": "diffuse_gi_sh_denoise_pass_5" + }, + "indiff_sh_save": { + "comp": "diffuse_gi_sh_denoise_save" + }, "denoiser": { "comp": "denoiser" }, diff --git a/ref/vk/shaders/spherical_harmonics.glsl b/ref/vk/shaders/spherical_harmonics.glsl new file mode 100644 index 0000000000..fe0a5dbabd --- /dev/null +++ b/ref/vk/shaders/spherical_harmonics.glsl @@ -0,0 +1,97 @@ +// Copypasted from Quake 2 RTX +// https://github.com/NVIDIA/Q2RTX +// +// Original licence code: +// +// Copyright (C) 2018 Christoph Schied +// Copyright (C) 2019, NVIDIA CORPORATION. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +// + +#define STORAGE_SCALE_LF 1024.0 + +struct SH +{ + vec4 shY; + vec2 CoCg; +}; + +vec3 project_SH_irradiance(SH sh, vec3 N) +{ + float d = dot(sh.shY.xyz, N); + float Y = 2.0 * (1.023326 * d + 0.886226 * sh.shY.w); + Y = max(Y, 0.0); + + sh.CoCg *= Y * 0.282095 / (sh.shY.w + 1e-6); + + float T = Y - sh.CoCg.y * 0.5; + float G = sh.CoCg.y + T; + float B = T - sh.CoCg.x * 0.5; + float R = B + sh.CoCg.x; + + return max(vec3(R, G, B), vec3(0.0)); +} + +SH irradiance_to_SH(vec3 color, vec3 dir) +{ + SH result; + + float Co = color.r - color.b; + float t = color.b + Co * 0.5; + float Cg = color.g - t; + float Y = max(t + Cg * 0.5, 0.0); + + result.CoCg = vec2(Co, Cg); + + float L00 = 0.282095; + float L1_1 = 0.488603 * dir.y; + float L10 = 0.488603 * dir.z; + float L11 = 0.488603 * dir.x; + + result.shY = vec4 (L11, L1_1, L10, L00) * Y; + + return result; +} + +vec3 SH_to_irradiance(SH sh) +{ + float Y = sh.shY.w / 0.282095; + + float T = Y - sh.CoCg.y * 0.5; + float G = sh.CoCg.y + T; + float B = T - sh.CoCg.x * 0.5; + float R = B + sh.CoCg.x; + + return max(vec3(R, G, B), vec3(0.0)); +} + +void accumulate_SH(inout SH accum, SH b, float scale) +{ + accum.shY += b.shY * scale; + accum.CoCg += b.CoCg * scale; +} + +SH mix_SH(SH a, SH b, float s) +{ + SH result; + result.shY = mix(a.shY, b.shY, vec4(s)); + result.CoCg = mix(a.CoCg, b.CoCg, vec2(s)); + return result; +} + +float fade_by_depth(float depthA, float depthB, float max_offset) { + return 1. - smoothstep(0., max_offset, abs(depthA - depthB)); +} diff --git a/ref/vk/vk_cvar.c b/ref/vk/vk_cvar.c index 64e4e0c9b4..cfdb58c4bc 100644 --- a/ref/vk/vk_cvar.c +++ b/ref/vk/vk_cvar.c @@ -40,6 +40,10 @@ void VK_LoadCvarsAfterInit( void ) if (vk_core.rtx) { rt_enable = gEngine.Cvar_Get( "rt_enable", "1", FCVAR_GLCONFIG, "Enable or disable Ray Tracing mode" ); rt_bounces = gEngine.Cvar_Get( "rt_bounces", "3", FCVAR_GLCONFIG, "Path tracing ray bounces" ); + rt_only_diffuse_gi = gEngine.Cvar_Get("rt_only_diffuse_gi", "", FCVAR_GLCONFIG, "Make global illumination only diffuse"); + rt_separated_reflection = gEngine.Cvar_Get("rt_separated_reflection", "", FCVAR_GLCONFIG, "Add separated high quality reflection pass"); + rt_denoise_gi_by_sh = gEngine.Cvar_Get("rt_denoise_gi_by_sh", "", FCVAR_GLCONFIG, "Denoise global illumination by spherical harmonics"); + rt_disable_gi = gEngine.Cvar_Get("rt_disable_gi", "", FCVAR_GLCONFIG, "Disable global illumination calculation"); } else { rt_enable = gEngine.Cvar_Get( "rt_enable", "0", FCVAR_READ_ONLY, "DISABLED: Ray tracing is not supported by your hardware/drivers" ); } diff --git a/ref/vk/vk_cvar.h b/ref/vk/vk_cvar.h index c15f2f8657..e9dac33ea9 100644 --- a/ref/vk/vk_cvar.h +++ b/ref/vk/vk_cvar.h @@ -26,6 +26,10 @@ void VK_LoadCvarsAfterInit( void ); X(rt_force_disable) \ X(rt_enable) \ X(rt_bounces) \ + X(rt_only_diffuse_gi) \ + X(rt_separated_reflection) \ + X(rt_denoise_gi_by_sh) \ + X(rt_disable_gi) \ #define EXTERN_CVAR(cvar) extern cvar_t *cvar; DECLARE_CVAR(EXTERN_CVAR) diff --git a/ref/vk/vk_descriptor.c b/ref/vk/vk_descriptor.c index 93727e26a1..86cafab59e 100644 --- a/ref/vk/vk_descriptor.c +++ b/ref/vk/vk_descriptor.c @@ -173,7 +173,7 @@ void VK_DescriptorsCreate(vk_descriptors_t *desc) void VK_DescriptorsWrite(const vk_descriptors_t *desc, int set_slot) { - VkWriteDescriptorSet wds[32]; + VkWriteDescriptorSet wds[64]; ASSERT(ARRAYSIZE(wds) >= desc->num_bindings); for (int i = 0; i < desc->num_bindings; ++i){ const VkDescriptorSetLayoutBinding *binding = desc->bindings + i; diff --git a/ref/vk/vk_devmem.c b/ref/vk/vk_devmem.c index 29e9dcd614..d82a3af207 100644 --- a/ref/vk/vk_devmem.c +++ b/ref/vk/vk_devmem.c @@ -2,7 +2,7 @@ #include "alolcator.h" #include "r_speeds.h" -#define MAX_DEVMEM_ALLOC_SLOTS 32 +#define MAX_DEVMEM_ALLOC_SLOTS 128 #define DEFAULT_ALLOCATION_SIZE (64 * 1024 * 1024) #define MODULE_NAME "devmem" diff --git a/ref/vk/vk_meatpipe.c b/ref/vk/vk_meatpipe.c index 3203472319..8145caead0 100644 --- a/ref/vk/vk_meatpipe.c +++ b/ref/vk/vk_meatpipe.c @@ -169,7 +169,7 @@ static struct ray_pass_s *pipelineLoadRT(load_context_t *ctx, int i, const char return ret; } -#define MAX_BINDINGS 32 +#define MAX_BINDINGS 64 static qboolean readBindings(load_context_t *ctx, VkDescriptorSetLayoutBinding *bindings, vk_meatpipe_pass_t* pass ) { pass->resource_map = NULL; int write_from = -1; diff --git a/ref/vk/vk_rtx.c b/ref/vk/vk_rtx.c index 96a47d3761..4c51229232 100644 --- a/ref/vk/vk_rtx.c +++ b/ref/vk/vk_rtx.c @@ -52,7 +52,7 @@ enum { ExternalResource_COUNT, }; -#define MAX_RESOURCES 32 +#define MAX_RESOURCES 128 typedef struct { char name[64]; @@ -266,6 +266,15 @@ static void prepareUniformBuffer( const vk_ray_frame_render_args_t *args, int fr ubo->debug_flags = g_rtx.debug.rt_debug_flags_value; ubo->random_seed = getRandomSeed(); + +#define SET_RENDERER_FLAG(cvar,flag) (CVAR_TO_BOOL(cvar) ? flag : 0) + + ubo->renderer_flags = SET_RENDERER_FLAG(rt_only_diffuse_gi, RENDERER_FLAG_ONLY_DIFFUSE_GI) | + SET_RENDERER_FLAG(rt_separated_reflection, RENDERER_FLAG_SEPARATED_REFLECTION) | + SET_RENDERER_FLAG(rt_denoise_gi_by_sh, RENDERER_FLAG_DENOISE_GI_BY_SH) | + SET_RENDERER_FLAG(rt_disable_gi, RENDERER_FLAG_DISABLE_GI); + +#undef SET_RENDERER_FLAG } typedef struct {