diff --git a/.editorconfig b/.editorconfig index 77807af0d6..84af569000 100644 --- a/.editorconfig +++ b/.editorconfig @@ -2,7 +2,7 @@ root = true [*] -charset = latin1 +charset = utf-8 end_of_line = lf indent_style = tab insert_final_newline = true diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 4728662711..1b397fc94a 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -1,5 +1,5 @@ name: Build & Deploy Engine -on: +on: push: paths-ignore: - '**.md' @@ -21,10 +21,10 @@ jobs: fail-fast: false matrix: include: - - os: ubuntu-20.04 + - os: ubuntu-22.04 targetos: linux targetarch: amd64 - - os: ubuntu-20.04 + - os: ubuntu-22.04 targetos: linux targetarch: i386 # TODO enable and test ref_vk for it too @@ -57,7 +57,7 @@ jobs: targetarch: i386 env: SDL_VERSION: 2.26.2 - VULKAN_SDK_VERSION: 1.3.239 + VULKAN_SDK_VERSION: 1.3.296 GH_CPU_ARCH: ${{ matrix.targetarch }} ANDROID_SDK_TOOLS_VER: 4333796 steps: diff --git a/.gitignore b/.gitignore index 994693c8e7..ef9d884634 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ # Other *.save +prefix/ # Qt Creator for some reason creates *.user.$version files, so exclude it too *.user* diff --git a/engine/platform/sdl/vid_sdl.c b/engine/platform/sdl/vid_sdl.c index a27eceaec6..61326f735a 100644 --- a/engine/platform/sdl/vid_sdl.c +++ b/engine/platform/sdl/vid_sdl.c @@ -861,7 +861,11 @@ qboolean VID_CreateWindow( int width, int height, window_mode_t window_mode ) if( !GL_UpdateContext( )) return false; - + } + else if( glw_state.context_type == REF_VULKAN ) + { + // FIXME this is probably not correct place or way to do it, just copypasting GL stuff + VID_StartupGamma(); } #else // SDL_VERSION_ATLEAST( 2, 0, 0 ) diff --git a/ref/vk/NOTES.md b/ref/vk/NOTES.md index adc0d7006a..216b505ccf 100644 --- a/ref/vk/NOTES.md +++ b/ref/vk/NOTES.md @@ -1177,3 +1177,26 @@ Cons: ridiculous texture explosion - `performTracing()` write resource desc values passed from outside on each call - new resources are added in `reloadMainpipe()` - resource with zero refcount are destroyed in `cleanupResources()` + + +# 2024-11-26 +`./waf clangdb` produces `compile_commands.json` file inside of the build directory. All the paths in the file are relative to that directory. +If the build directory is something 2nd level, like `build/amd64-debug`, and the file is then symlinked to (as nvim/lsp/clangd only looks for the file in the root and in the `./build` dir), then it confuses nvim/lsp/clangd. +Solution: make build dir literally just `./build`. + + +# 2024-11-27 E381 +## Removing staging flush + +### vk_scene.c/reloadPatches() +- Can ignore for now + +### Staging full +- (I) Just allocate another buffer for staging +- (II) Figure out why the hell do we need so much staging memory + - PBR/remastered textures + - possible solution: lazy/ondemand loading + +### vk_brush.c / collect emissive surfaces +- (I) try to merge emissive collection with surface loading +- (II) convert from pushing material data to pulling. Not really clear how to do easily. diff --git a/ref/vk/TODO.md b/ref/vk/TODO.md index 6d04930d65..2065dafcd9 100644 --- a/ref/vk/TODO.md +++ b/ref/vk/TODO.md @@ -1,7 +1,59 @@ ## Next + +## Upcoming +- [ ] framectl frame tracking, e.g.: + - [ ] wait for frame fence only really before actually starting to build combuf in R_BeginFrame() + - why: there should be nothing to synchronize with + - why: more straightforward dependency tracking + - why not: waiting on frame fence allows freeing up staging and other temp memory +- [ ] Remove second semaphore from submit, replace it with explicit barriers for e.g. geom buffer + - [x] why: best practice validation complains about too wide ALL_COMMANDS semaphore + - why: explicit barriers are more clear, better perf possible too + - [ ] Do not lose barrier-tracking state between frames - [ ] Render graph - [ ] performance profiling and comparison +## 2024-12-17 E385 +- [x] fix rendering on amdgpu+radv +### After stream +- [x] cleanup TLAS creation and building code + +## 2024-12-12 E384 +- [x] track image sync state with the image object itself (and not with vk_resource) + +### After stream +- [x] Proper staging-vs-frame tracking, replace tag with something sensitive + - currently assert fails because there's 1 frame latency, not one. + - [x] comment for future: full staging might want to wait for previous frame to finish +- [x] zero vkCmdPipelineBarriers calls + - [x] grep for anything else + +## 2024-12-10 E383 +- [x] Add transfer stage to submit semaphore separating command buffer: fixes sync for rt +- [x] Issue staging commit for a bunch of RT buffers (likely not all of them) +- [x] move destination buffer tracking to outside of staging: + - [x] vk_geometry + - [x] vk_light: grid, metadata + - [x] vk_ray_accel: TLAS geometries + - [x] vk_ray_model: kusochki +- [x] staging should not be aware of cmdbuf either + - [x] `R_VkStagingCommit()`: -- removed + - [x] `R_VkStagingGetCommandBuffer()` -- removed +- [x] Go through all staged buffers and make sure that they are committed +- [x] Commit staging in right places for right buffers +- [x] Add mode staging debug tracking/logs + +### After stream +- [x] Fix glitch geometry + - [x] Which specific models produce it? Use nsight btw + +## 2024-05-24 E379 +- [ ] refactor staging: + - [ ] move destination image tracking to outside of staging + - [x] vk_image ← vk_texture (E380) + - [x] implement generic staging regions (E380) + - [ ] implement stricter staging regions tracking + ## 2024-05-07 E376 - [ ] resource manager - [x] extract all resource mgmt from vk_rtx into a designated file diff --git a/ref/vk/alolcator.c b/ref/vk/alolcator.c index 544e0b7248..120da4bc95 100644 --- a/ref/vk/alolcator.c +++ b/ref/vk/alolcator.c @@ -287,7 +287,7 @@ uint32_t aloRingAlloc(alo_ring_t* ring, uint32_t size, uint32_t alignment) { // 1. Check if we have enough space immediately in front of head if (pos + size <= ring->size) { - ring->head = (pos + size) % ring->size; + ring->head = pos + size; return pos; } diff --git a/ref/vk/arrays.c b/ref/vk/arrays.c new file mode 100644 index 0000000000..fe6d970c86 --- /dev/null +++ b/ref/vk/arrays.c @@ -0,0 +1,52 @@ +#include "arrays.h" + +#include "vk_core.h" // Mem_Malloc + +#include // NULL + + +void arrayDynamicInit(array_dynamic_t *array, int item_size) { + array->items = NULL; + array->count = 0; + array->capacity = 0; + array->item_size = item_size; +} + +void arrayDynamicDestroy(array_dynamic_t *array) { + if (array->items) + Mem_Free(array->items); +} + +static void arrayDynamicEnsureCapacity(array_dynamic_t *array, int min_capacity) { + if (array->capacity >= min_capacity) + return; + + if (array->capacity == 0) + array->capacity = 2; + + while (array->capacity < min_capacity) + array->capacity = array->capacity * 3 / 2; + + void *new_buffer = Mem_Malloc(vk_core.pool, array->capacity * array->item_size); + if (array->items) { + memcpy(new_buffer, array->items, array->count * array->item_size); + Mem_Free(array->items); + } + array->items = new_buffer; +} + +void arrayDynamicResize(array_dynamic_t *array, int count) { + arrayDynamicEnsureCapacity(array, count); + array->count = count; +} + +void arrayDynamicAppend(array_dynamic_t *array, void *item) { + const int new_count = array->count + 1; + arrayDynamicEnsureCapacity(array, new_count); + + if (item) + memcpy((char*)array->items + array->count * array->item_size, item, array->item_size); + + array->count = new_count; +} + diff --git a/ref/vk/arrays.h b/ref/vk/arrays.h new file mode 100644 index 0000000000..560cf879c7 --- /dev/null +++ b/ref/vk/arrays.h @@ -0,0 +1,87 @@ +#pragma once + +#include // size_t + +#define VIEW_DECLARE_CONST(TYPE, NAME) \ + struct { \ + const TYPE *items; \ + int count; \ + } NAME + +// Array with compile-time maximum size +#define BOUNDED_ARRAY_DECLARE(TYPE, NAME, MAX_SIZE) \ + struct { \ + TYPE items[MAX_SIZE]; \ + int count; \ + } NAME + +#define BOUNDED_ARRAY(TYPE, NAME, MAX_SIZE) \ + BOUNDED_ARRAY_DECLARE(TYPE, NAME, MAX_SIZE) = {0} + +#define BOUNDED_ARRAY_HAS_SPACE(array_, space_) \ + ((COUNTOF((array_).items) - (array_).count) >= space_) + +#define BOUNDED_ARRAY_APPEND_UNSAFE(array_) \ + ((array_).items[(array_).count++]) + +#define BOUNDED_ARRAY_APPEND_ITEM(var, item) \ + do { \ + ASSERT(BOUNDED_ARRAY_HAS_SPACE(var, 1)); \ + var.items[var.count++] = item; \ + } while(0) + + +// Dynamically-sized array +// I. Type-agnostic + +typedef struct array_dynamic_s { + void *items; + size_t count, capacity; + size_t item_size; +} array_dynamic_t; + +void arrayDynamicInit(array_dynamic_t *array, int item_size); +void arrayDynamicDestroy(array_dynamic_t *array); + +void arrayDynamicReserve(array_dynamic_t *array, int capacity); +void arrayDynamicAppend(array_dynamic_t *array, void *item); +#define arrayDynamicAppendItem(array, item) \ + do { \ + ASSERT((array)->item_size == sizeof(&(item))); \ + arrayDynamicAppend(array, item); \ + } while (0) +/* void *arrayDynamicGet(array_dynamic_t *array, int index); */ +/* #define arrayDynamicAt(array, type, index) \ */ +/* (ASSERT((array)->item_size == sizeof(type)), \ */ +/* ASSERT((array)->count > (index)), \ */ +/* arrayDynamicGet(array, index)) */ +void arrayDynamicResize(array_dynamic_t *array, int count); +//void arrayDynamicErase(array_dynamic_t *array, int begin, int end); + +//void arrayDynamicInsert(array_dynamic_t *array, int before, int count, void *items); + +// II. Type-specific +#define ARRAY_DYNAMIC_DECLARE(TYPE, NAME) \ + struct { \ + TYPE *items; \ + size_t count, capacity; \ + size_t item_size; \ + } NAME + +#define arrayDynamicInitT(array) \ + arrayDynamicInit((array_dynamic_t*)array, sizeof((array)->items[0])) + +#define arrayDynamicDestroyT(array) \ + arrayDynamicDestroy((array_dynamic_t*)array) + +#define arrayDynamicResizeT(array, size) \ + arrayDynamicResize((array_dynamic_t*)(array), (size)) + +#define arrayDynamicAppendT(array, item) \ + arrayDynamicAppend((array_dynamic_t*)(array), (item)) + +#define arrayDynamicInsertT(array, before, count, items) \ + arrayDynamicInsert((array_dynamic_t*)(array), before, count, items) + +#define arrayDynamicAppendManyT(array, items_count, items) \ + arrayDynamicInsert((array_dynamic_t*)(array), (array)->count, items_count, items) diff --git a/ref/vk/r_block.c b/ref/vk/r_block.c index 8924464a4c..470a73d25a 100644 --- a/ref/vk/r_block.c +++ b/ref/vk/r_block.c @@ -14,7 +14,7 @@ typedef struct r_blocks_block_s { // <--- pool --><-- ring ---> // offset ? ---> -int allocMetablock(r_blocks_t *blocks) { +static int allocMetablock(r_blocks_t *blocks) { return aloIntPoolAlloc(&blocks->blocks.freelist); // TODO grow if needed } diff --git a/ref/vk/r_speeds.c b/ref/vk/r_speeds.c index 64c772ae12..d3bde354aa 100644 --- a/ref/vk/r_speeds.c +++ b/ref/vk/r_speeds.c @@ -946,6 +946,7 @@ void R_SpeedsDisplayMore(uint32_t prev_frame_index, const struct vk_combuf_scope speedsPrintf( "Driver: %u.%u.%u, Vulkan: %u.%u.%u\n", XVK_PARSE_VERSION(vk_core.physical_device.properties.driverVersion), XVK_PARSE_VERSION(vk_core.physical_device.properties.apiVersion)); + speedsPrintf( "Resolution: %ux%u\n", vk_frame.width, vk_frame.height); } const uint32_t events = g_aprof.events_last_frame - prev_frame_index; diff --git a/ref/vk/r_textures.c b/ref/vk/r_textures.c index 4a581d267c..55a7611c2e 100644 --- a/ref/vk/r_textures.c +++ b/ref/vk/r_textures.c @@ -177,14 +177,15 @@ static void createDefaultTextures( void ) // emo-texture from quake1 pic = Common_FakeImage( 16, 16, 1, IMAGE_HAS_COLOR ); + uint *const buffer = PTR_CAST(uint, pic->buffer); for( y = 0; y < 16; y++ ) { for( x = 0; x < 16; x++ ) { if(( y < 8 ) ^ ( x < 8 )) - ((uint *)pic->buffer)[y*16+x] = 0xFFFF00FF; - else ((uint *)pic->buffer)[y*16+x] = 0xFF000000; + buffer[y*16+x] = 0xFFFF00FF; + else buffer[y*16+x] = 0xFF000000; } } @@ -211,19 +212,19 @@ static void createDefaultTextures( void ) // white texture pic = Common_FakeImage( 4, 4, 1, IMAGE_HAS_COLOR ); for( x = 0; x < 16; x++ ) - ((uint *)pic->buffer)[x] = 0xFFFFFFFF; + buffer[x] = 0xFFFFFFFF; tglob.whiteTexture = R_TextureUploadFromBufferNew( REF_WHITE_TEXTURE, pic, TF_COLORMAP ); // gray texture pic = Common_FakeImage( 4, 4, 1, IMAGE_HAS_COLOR ); for( x = 0; x < 16; x++ ) - ((uint *)pic->buffer)[x] = 0xFF7F7F7F; + buffer[x] = 0xFF7F7F7F; tglob.grayTexture = R_TextureUploadFromBufferNew( REF_GRAY_TEXTURE, pic, TF_COLORMAP ); // black texture pic = Common_FakeImage( 4, 4, 1, IMAGE_HAS_COLOR ); for( x = 0; x < 16; x++ ) - ((uint *)pic->buffer)[x] = 0xFF000000; + buffer[x] = 0xFF000000; tglob.blackTexture = R_TextureUploadFromBufferNew( REF_BLACK_TEXTURE, pic, TF_COLORMAP ); // cinematic dummy diff --git a/ref/vk/ray_pass.c b/ref/vk/ray_pass.c index 6b985aa404..1a7e48387d 100644 --- a/ref/vk/ray_pass.c +++ b/ref/vk/ray_pass.c @@ -270,14 +270,24 @@ void RayPassPerform(struct ray_pass_s *pass, vk_combuf_t *combuf, ray_pass_perfo const qboolean write = i >= pass->desc.write_from; R_VkResourceAddToBarrier(res, write, pass->pipeline_type, &barrier); + } + + DEBUG_BEGIN(combuf->cmdbuf, pass->debug_name); + R_VkBarrierCommit(combuf, &barrier, pass->pipeline_type); + + for (int i = 0; i < num_bindings; ++i) { + const int index = args.resources_map ? args.resources_map[i] : i; + vk_resource_t* const res = args.resources[index]; const vk_descriptor_value_t *const src_value = &res->value; vk_descriptor_value_t *const dst_value = pass->desc.riptors.values + i; + // layout is only known after barrier + // FIXME this is not true, it can be known earlier if (res->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) { dst_value->image = (VkDescriptorImageInfo) { - .imageLayout = write ? res->write.image_layout : res->read.image_layout, - .imageView = src_value->image_object->view, + .imageLayout = res->ref.image->sync.layout, + .imageView = res->ref.image->view, .sampler = VK_NULL_HANDLE, }; } else { @@ -287,9 +297,6 @@ void RayPassPerform(struct ray_pass_s *pass, vk_combuf_t *combuf, ray_pass_perfo VK_DescriptorsWrite(&pass->desc.riptors, args.frame_set_slot); - DEBUG_BEGIN(combuf->cmdbuf, pass->debug_name); - R_VkBarrierCommit(combuf->cmdbuf, &barrier, pass->pipeline_type); - switch (pass->type) { case RayPassType_Tracing: { diff --git a/ref/vk/unordered_roadmap.c b/ref/vk/unordered_roadmap.c index e98935a7d5..50917a5360 100644 --- a/ref/vk/unordered_roadmap.c +++ b/ref/vk/unordered_roadmap.c @@ -10,6 +10,7 @@ #define ERR(msg, ...) fprintf(stderr, msg, ##__VA_ARGS__) #define ASSERT(...) assert(__VA_ARGS__) #define COUNTOF(a) (sizeof(a)/sizeof(a[0])) +#define PTR_CAST(type, ptr) ((type*)(void*)(ptr)) #endif #if defined(_WIN32) && !defined(strcasecmp) @@ -49,7 +50,7 @@ void urmomInit(const urmom_desc_t* desc) { ASSERT((desc->count & (desc->count - 1)) == 0); for (int i = 0; i < desc->count; ++i) { - urmom_header_t *hdr = (urmom_header_t*)(ptr + desc->item_size * i); + urmom_header_t *hdr = PTR_CAST(urmom_header_t, ptr + desc->item_size * i); hdr->state = 0; hdr->hash = 0; } @@ -92,7 +93,7 @@ int urmomFind(const urmom_desc_t* desc, const char* key) { const int start_index = hash & mask; for (int index = start_index;;) { - const urmom_header_t *hdr = (urmom_header_t*)(ptr + desc->item_size * index); + const urmom_header_t *hdr = PTR_CAST(const urmom_header_t, ptr + desc->item_size * index); if (URMOM_IS_OCCUPIED(*hdr)) { if (hdr->hash == hash && sameKey(desc->type, key, hdr->key)) @@ -122,7 +123,7 @@ urmom_insert_t urmomInsert(const urmom_desc_t* desc, const char *key) { int index = start_index; int first_available = -1; for (;;) { - const urmom_header_t *hdr = (urmom_header_t*)(ptr + desc->item_size * index); + const urmom_header_t *hdr = PTR_CAST(const urmom_header_t, ptr + desc->item_size * index); if (URMOM_IS_OCCUPIED(*hdr)) { if (hdr->hash == hash && sameKey(desc->type, key, hdr->key)) @@ -149,7 +150,7 @@ urmom_insert_t urmomInsert(const urmom_desc_t* desc, const char *key) { if (first_available < 0) return (urmom_insert_t){.index = -1, .created = 0}; - urmom_header_t *hdr = (urmom_header_t*)(ptr + desc->item_size * first_available); + urmom_header_t *hdr = PTR_CAST(urmom_header_t, ptr + desc->item_size * first_available); hdr->hash = hash; hdr->state = 1; @@ -169,7 +170,7 @@ int urmomRemove(const urmom_desc_t* desc, const char *key) { void urmomRemoveByIndex(const urmom_desc_t* desc, int index) { char *ptr = desc->array; - urmom_header_t *hdr = (urmom_header_t*)(ptr + desc->item_size * index); + urmom_header_t *hdr = PTR_CAST(urmom_header_t, ptr + desc->item_size * index); if (!URMOM_IS_OCCUPIED(*hdr)) { ERR("Hashmap=%p(is=%d, n=%d): lot %d is not occupied", desc->array, desc->item_size, desc->count, index); diff --git a/ref/vk/vk_brush.c b/ref/vk/vk_brush.c index 6e9d552e09..64c073839a 100644 --- a/ref/vk/vk_brush.c +++ b/ref/vk/vk_brush.c @@ -1,7 +1,6 @@ #include "vk_brush.h" #include "vk_core.h" -#include "vk_const.h" #include "vk_math.h" #include "r_textures.h" #include "vk_lightmap.h" @@ -10,9 +9,9 @@ #include "vk_light.h" #include "vk_mapents.h" #include "r_speeds.h" -#include "vk_staging.h" #include "vk_logs.h" #include "profiler.h" +#include "arrays.h" #include #include @@ -59,8 +58,7 @@ typedef struct vk_brush_model_s { vk_vertex_t *conveyors_vertices; // Polylights which need to be added per-frame dynamically - struct rt_light_add_polygon_s *dynamic_polylights; - int dynamic_polylights_count; + ARRAY_DYNAMIC_DECLARE(struct rt_light_add_polygon_s, dynamic_polylights); } vk_brush_model_t; typedef struct { @@ -814,8 +812,8 @@ void R_BrushModelDraw( const cl_entity_t *ent, int render_mode, float blend, con render_mode = bmodel->patch_rendermode; // Add dynamic polylights if any - for (int i = 0; i < bmodel->dynamic_polylights_count; ++i) { - rt_light_add_polygon_t *const polylight = bmodel->dynamic_polylights + i; + for (int i = 0; i < bmodel->dynamic_polylights.count; ++i) { + rt_light_add_polygon_t *const polylight = bmodel->dynamic_polylights.items + i; polylight->transform_row = (const matrix3x4*)transform; polylight->dynamic = true; RT_LightAddPolygon(polylight); @@ -1031,7 +1029,9 @@ typedef struct { vk_render_geometry_t *out_geometries; vk_vertex_t *out_vertices; uint16_t *out_indices; + const xvk_mapent_func_any_t *func_any; qboolean is_worldmodel; + qboolean is_static; } fill_geometries_args_t; static void getSurfaceNormal( const msurface_t *surf, vec3_t out_normal) { @@ -1273,12 +1273,102 @@ static const xvk_mapent_func_any_t *getModelFuncAnyPatch( const model_t *const m return NULL; } +typedef struct { + const model_t *mod; + const xvk_mapent_func_any_t *func_any; + qboolean is_static; + vk_brush_model_t *bmodel; + const msurface_t *surf; + int surface_index; + brush_surface_type_e type; + int tex_id; + const xvk_patch_surface_t *psurf; + vk_render_geometry_t *model_geometry; + int *emissive_surfaces_count; +} SurfaceHandleEmissiveArgs; + +static void surfaceHandleEmissive(SurfaceHandleEmissiveArgs args) { + VectorClear(args.model_geometry->emissive); + + switch (args.type) { + case BrushSurface_Regular: + case BrushSurface_Water: + // No known cases, also needs to be dynamic case BrushSurface_WaterSide: + break; + // Animated textures are enumerated in `R_BrushModelDraw()` and are added as dynamic lights + // when their current frame is emissive. Do not add such surfaces here to avoid adding them twice. + // TODO: Most of the animated surfaces are techically static: i.e. they don't really move. + // Make a special case for static lights that can be off. + case BrushSurface_Animated: + default: + return; + } + + vec3_t emissive; + if (args.psurf && (args.psurf->flags & Patch_Surface_Emissive)) { + VectorCopy(args.psurf->emissive, emissive); + } else if (RT_GetEmissiveForTexture(emissive, args.tex_id)) { + // emissive + } else { + // not emissive, continue to the next + return; + } + + DEBUG("emissive[%d] surf_index=%d tex_id=%d patch=%d(%#x) => emissive=(%f,%f,%f)", + *args.emissive_surfaces_count, args.surface_index, args.tex_id, !!args.psurf, args.psurf?args.psurf->flags:0, emissive[0], emissive[1], emissive[2]); + + (*args.emissive_surfaces_count)++; + + /* const qboolean is_water = type == BrushSurface_Water; */ + VectorCopy(emissive, args.model_geometry->emissive); + + rt_light_add_polygon_t polylight; + if (!loadPolyLight(&polylight, args.mod, args.surface_index, args.surf, emissive)) + return; + + // func_any surfaces do not really belong to BSP+PVS system, so they can't be used + // for lights visibility calculation directly. + if (args.func_any && args.func_any->origin_patched) { + // TODO this is not really dynamic, but this flag signals using MovingSurface visibility calc + polylight.dynamic = true; + matrix3x4 m; + Matrix3x4_LoadIdentity(m); + Matrix3x4_SetOrigin(m, args.func_any->origin[0], args.func_any->origin[1], args.func_any->origin[2]); + polylight.transform_row = &m; + } + + // Static emissive surfaces are added immediately, as they are drawn all the time. + // Non-static ones will be applied later when the model is actually rendered + // Non-static brush models may move around and so must have their emissive surfaces treated as dynamic + if (args.is_static) { + RT_LightAddPolygon(&polylight); + + /* TODO figure out when this is needed. + * This is needed in cases where we can dive into emissive acid, which should illuminate what's under it + * Likely, this is not a correct fix, though, see https://github.com/w23/xash3d-fwgs/issues/56 + if (is_water) { + // Add backside for water + for (int i = 0; i < polylight.num_vertices; ++i) { + vec3_t tmp; + VectorCopy(polylight.vertices[i], tmp); + VectorCopy(polylight.vertices[polylight.num_vertices-1-i], polylight.vertices[i]); + VectorCopy(tmp, polylight.vertices[polylight.num_vertices-1-i]); + RT_LightAddPolygon(&polylight); + } + } + */ + } else { + arrayDynamicAppendT(&args.bmodel->dynamic_polylights, &polylight); + } +} + static qboolean fillBrushSurfaces(fill_geometries_args_t args) { int vertex_offset = 0; int num_geometries = 0; int animated_count = 0; int conveyors_count = 0; int conveyors_vertices_count = 0; + int emissive_surfaces_count = 0; vk_vertex_t *p_vert = args.out_vertices; uint16_t *p_ind = args.out_indices; @@ -1316,6 +1406,24 @@ static qboolean fillBrushSurfaces(fill_geometries_args_t args) { const xvk_patch_surface_t *const psurf = R_VkPatchGetSurface(surface_index); const brush_surface_type_e type = getSurfaceType(surf, surface_index, args.is_worldmodel); + + // Check whether this surface is emissive early, before bailing out on surface type. + // TODO consider moving this to outside of this loop, as it still might skip some surfaces + // e.g. if the model doesn't have any static surfaces at all. + surfaceHandleEmissive((SurfaceHandleEmissiveArgs){ + .mod = args.mod, + .func_any = args.func_any, + .is_static = args.is_static, + .bmodel = args.bmodel, + .surf = surf, + .surface_index = surface_index, + .type = type, + .tex_id = tex_id, + .psurf = psurf, + .model_geometry = model_geometry, + .emissive_surfaces_count = &emissive_surfaces_count, + }); + switch (type) { case BrushSurface_Water: case BrushSurface_WaterSide: @@ -1403,8 +1511,6 @@ static qboolean fillBrushSurfaces(fill_geometries_args_t args) { if (type == BrushSurface_Animated) model_geometry->ye_olde_texture = -1; - VectorClear(model_geometry->emissive); - model_geometry->surf_deprecate = surf; model_geometry->vertex_offset = args.base_vertex_offset; @@ -1542,7 +1648,7 @@ static qboolean fillBrushSurfaces(fill_geometries_args_t args) { if (area2 <= 0.) { // Do not produce triangle if it has zero area // NOTE: this is suboptimal in the sense that points that might be necessary for proper - // normal smoothing might be skippedk. In case that this causes undesirable rendering + // normal smoothing might be skipped. In case that this causes undesirable rendering // artifacts, a more proper triangulation algorithm, that doesn't skip points, would // be needed. E.g. ear clipping. /* diagnostics @@ -1582,6 +1688,12 @@ static qboolean fillBrushSurfaces(fill_geometries_args_t args) { } // for mod->nummodelsurfaces } + // Apply all emissive surfaces found + if (emissive_surfaces_count > 0) { + INFO("Loaded %d polylights, %d dynamic for %s model %s", + emissive_surfaces_count, (int)args.bmodel->dynamic_polylights.count, args.is_static ? "static" : "movable", args.mod->name); + } + ASSERT(args.sizes.num_surfaces == num_geometries); ASSERT(args.sizes.animated_count == animated_count); ASSERT(args.sizes.conveyors_count == conveyors_count); @@ -1615,6 +1727,8 @@ static qboolean createRenderModel( const model_t *mod, vk_brush_model_t *bmodel, } const r_geometry_range_lock_t geom_lock = R_GeometryRangeLock(&bmodel->geometry); + const xvk_mapent_func_any_t *func_any = getModelFuncAnyPatch(mod); + const qboolean is_static = is_worldmodel || (func_any && func_any->origin_patched); const qboolean fill_result = fillBrushSurfaces((fill_geometries_args_t){ .mod = mod, @@ -1625,7 +1739,9 @@ static qboolean createRenderModel( const model_t *mod, vk_brush_model_t *bmodel, .out_geometries = geometries, .out_vertices = geom_lock.vertices, .out_indices = geom_lock.indices, + .func_any = func_any, .is_worldmodel = is_worldmodel, + .is_static = is_static, }); R_GeometryRangeUnlock( &geom_lock ); @@ -1671,6 +1787,8 @@ qboolean R_BrushModelLoad( model_t *mod, qboolean is_worldmodel ) { Matrix4x4_LoadIdentity(bmodel->prev_transform); bmodel->prev_time = gpGlobals->time; + arrayDynamicInitT(&bmodel->dynamic_polylights); + const model_sizes_t sizes = computeSizes( mod, is_worldmodel ); if (is_worldmodel) { @@ -1719,8 +1837,7 @@ static void R_BrushModelDestroy( vk_brush_model_t *bmodel ) { ASSERT(bmodel->engine_model->cache.data == bmodel); ASSERT(bmodel->engine_model->type == mod_brush); - if (bmodel->dynamic_polylights) - Mem_Free(bmodel->dynamic_polylights); + arrayDynamicDestroyT(&bmodel->dynamic_polylights); if (bmodel->conveyors_vertices) Mem_Free(bmodel->conveyors_vertices); @@ -1814,163 +1931,6 @@ static qboolean loadPolyLight(rt_light_add_polygon_t *out_polygon, const model_t return true; } -void R_VkBrushModelCollectEmissiveSurfaces( const struct model_s *mod, qboolean is_worldmodel ) { - vk_brush_model_t *const bmodel = mod->cache.data; - ASSERT(bmodel); - - const xvk_mapent_func_any_t *func_any = getModelFuncAnyPatch(mod); - const qboolean is_static = is_worldmodel || (func_any && func_any->origin_patched); - - typedef struct { - int model_surface_index; - int surface_index; - const msurface_t *surf; - vec3_t emissive; - qboolean is_water; - } emissive_surface_t; - emissive_surface_t emissive_surfaces[MAX_SURFACE_LIGHTS]; - int geom_indices[MAX_SURFACE_LIGHTS]; - int emissive_surfaces_count = 0; - - // Load list of all emissive surfaces - for( int i = 0; i < mod->nummodelsurfaces; ++i) { - const int surface_index = mod->firstmodelsurface + i; - const msurface_t *surf = mod->surfaces + surface_index; - const brush_surface_type_e type = getSurfaceType(surf, surface_index, is_worldmodel); - - switch (type) { - case BrushSurface_Regular: - case BrushSurface_Water: - // No known cases, also needs to be dynamic case BrushSurface_WaterSide: - break; - // Animated textures are enumerated in `R_BrushModelDraw()` and are added as dynamic lights - // when their current frame is emissive. Do not add such surfaces here to avoid adding them twice. - // TODO: Most of the animated surfaces are techically static: i.e. they don't really move. - // Make a special case for static lights that can be off. - case BrushSurface_Animated: - default: - continue; - } - - const int tex_id = surf->texinfo->texture->gl_texturenum; // TODO animation? - - vec3_t emissive; - const xvk_patch_surface_t *const psurf = R_VkPatchGetSurface(surface_index); - if (psurf && (psurf->flags & Patch_Surface_Emissive)) { - VectorCopy(psurf->emissive, emissive); - } else if (RT_GetEmissiveForTexture(emissive, tex_id)) { - // emissive - } else { - // not emissive, continue to the next - continue; - } - - DEBUG("%d: i=%d surf_index=%d tex_id=%d patch=%d(%#x) => emissive=(%f,%f,%f)", emissive_surfaces_count, i, surface_index, tex_id, !!psurf, psurf?psurf->flags:0, emissive[0], emissive[1], emissive[2]); - - if (emissive_surfaces_count == MAX_SURFACE_LIGHTS) { - ERR("Too many emissive surfaces for model %s: max=%d", mod->name, MAX_SURFACE_LIGHTS); - break; - } - - emissive_surface_t* const surface = &emissive_surfaces[emissive_surfaces_count++]; - surface->model_surface_index = i; - surface->surface_index = surface_index; - surface->surf = surf; - surface->is_water = type == BrushSurface_Water; - VectorCopy(emissive, surface->emissive); - } - - // Clear old per-geometry emissive values. The new emissive values will be assigned by the loop below only to the relevant geoms - // This is relevant for updating lights during development - for (int i = 0; i < bmodel->render_model.num_geometries; ++i) { - vk_render_geometry_t *const geom = bmodel->render_model.geometries + i; - VectorClear(geom->emissive); - } - - // Non-static brush models may move around and so must have their emissive surfaces treated as dynamic - if (!is_static) { - if (bmodel->dynamic_polylights) - Mem_Free(bmodel->dynamic_polylights); - bmodel->dynamic_polylights_count = 0; - bmodel->dynamic_polylights = Mem_Malloc(vk_core.pool, sizeof(bmodel->dynamic_polylights[0]) * emissive_surfaces_count); - } - - // Apply all emissive surfaces found - int geom_indices_count = 0; - for (int i = 0; i < emissive_surfaces_count; ++i) { - const emissive_surface_t* const s = emissive_surfaces + i; - rt_light_add_polygon_t polylight; - if (!loadPolyLight(&polylight, mod, s->surface_index, s->surf, s->emissive)) - continue; - - // func_any surfaces do not really belong to BSP+PVS system, so they can't be used - // for lights visibility calculation directly. - if (func_any && func_any->origin_patched) { - // TODO this is not really dynamic, but this flag signals using MovingSurface visibility calc - polylight.dynamic = true; - matrix3x4 m; - Matrix3x4_LoadIdentity(m); - Matrix3x4_SetOrigin(m, func_any->origin[0], func_any->origin[1], func_any->origin[2]); - polylight.transform_row = &m; - } - - // Static emissive surfaces are added immediately, as they are drawn all the time. - // Non-static ones will be applied later when the model is actually rendered - if (is_static) { - RT_LightAddPolygon(&polylight); - - /* TODO figure out when this is needed. - * This is needed in cases where we can dive into emissive acid, which should illuminate what's under it - * Likely, this is not a correct fix, though, see https://github.com/w23/xash3d-fwgs/issues/56 - if (s->is_water) { - // Add backside for water - for (int i = 0; i < polylight.num_vertices; ++i) { - vec3_t tmp; - VectorCopy(polylight.vertices[i], tmp); - VectorCopy(polylight.vertices[polylight.num_vertices-1-i], polylight.vertices[i]); - VectorCopy(tmp, polylight.vertices[polylight.num_vertices-1-i]); - RT_LightAddPolygon(&polylight); - } - } - */ - } else { - ASSERT(bmodel->dynamic_polylights_count < emissive_surfaces_count); - bmodel->dynamic_polylights[bmodel->dynamic_polylights_count++] = polylight; - } - - // Assign the emissive value to the right geometry - if (bmodel->surface_to_geometry_index) { // Can be absent for water-only models - const int geom_index = bmodel->surface_to_geometry_index[s->model_surface_index]; - if (geom_index != -1) { // can be missing for water surfaces - ASSERT(geom_index >= 0); - ASSERT(geom_index < bmodel->render_model.num_geometries); - ASSERT(geom_indices_count < COUNTOF(geom_indices)); - geom_indices[geom_indices_count++] = geom_index; - VectorCopy(polylight.emissive, bmodel->render_model.geometries[geom_index].emissive); - } - } - } - - if (emissive_surfaces_count > 0) { - // Update emissive values in kusochki. This is required because initial R_BrushModelLoad happens before we've read - // RAD data in vk_light.c, so the emissive values are empty. This is the place and time where we actually get to - // know them, so let's fixup things. - // TODO minor optimization: sort geom_indices to have a better chance for them to be sequential - - { - // Make sure that staging has been flushed. - // Updating materials leads to staging an upload to the same memory that we've just staged an upload to. - // This doesn't please the validator. - // Ensure that these uploads are not mixed into the same unsynchronized stream. - // TODO this might be not great for performance (extra waiting for GPU), so a better solution should be considered. E.g. tracking and barrier-syncing regions to-be-reuploaded. - R_VkStagingFlushSync(); - } - - R_RenderModelUpdateMaterials(&bmodel->render_model, geom_indices, geom_indices_count); - INFO("Loaded %d polylights for %s model %s", emissive_surfaces_count, is_static ? "static" : "movable", mod->name); - } -} - void R_BrushUnloadTextures( model_t *mod ) { int i; diff --git a/ref/vk/vk_brush.h b/ref/vk/vk_brush.h index 66c44afd73..03884c7355 100644 --- a/ref/vk/vk_brush.h +++ b/ref/vk/vk_brush.h @@ -18,6 +18,4 @@ void R_BrushModelDraw( const cl_entity_t *ent, int render_mode, float blend, con const texture_t *R_TextureAnimation( const cl_entity_t *ent, const msurface_t *s ); -void R_VkBrushModelCollectEmissiveSurfaces( const struct model_s *mod, qboolean is_worldmodel ); - void R_BrushUnloadTextures( model_t *mod ); diff --git a/ref/vk/vk_buffer.c b/ref/vk/vk_buffer.c index 9de7524b0d..44eea986d9 100644 --- a/ref/vk/vk_buffer.c +++ b/ref/vk/vk_buffer.c @@ -1,4 +1,10 @@ #include "vk_buffer.h" +#include "vk_logs.h" +#include "vk_combuf.h" + +#include "arrays.h" + +#define LOG_MODULE buf qboolean VK_BufferCreate(const char *debug_name, vk_buffer_t *buf, uint32_t size, VkBufferUsageFlags usage, VkMemoryPropertyFlags flags) { @@ -28,13 +34,17 @@ qboolean VK_BufferCreate(const char *debug_name, vk_buffer_t *buf, uint32_t size XVK_CHECK(vkBindBufferMemory(vk_core.device, buf->buffer, buf->devmem.device_memory, buf->devmem.offset)); buf->mapped = buf->devmem.mapped; - buf->size = size; + buf->name = debug_name; + + INFO("Created buffer=%llx, name=\"%s\", size=%u", (unsigned long long)buf->buffer, debug_name, size); return true; } void VK_BufferDestroy(vk_buffer_t *buf) { + // FIXME destroy staging slot + if (buf->buffer) { vkDestroyBuffer(vk_core.device, buf->buffer, NULL); buf->buffer = VK_NULL_HANDLE; @@ -116,3 +126,120 @@ uint32_t R_DEBuffer_Alloc(r_debuffer_t* debuf, r_lifetime_t lifetime, uint32_t s void R_DEBuffer_Flip(r_debuffer_t* debuf) { R_FlippingBuffer_Flip(&debuf->dynamic); } + +#define MAX_STAGING_BUFFERS 16 +#define MAX_STAGING_ENTRIES 2048 + +// TODO this should be part of the vk_buffer_t object itself +typedef struct { + vk_buffer_t *buffer; + r_vkstaging_user_handle_t staging_handle; + VkBuffer staging_buffer; + BOUNDED_ARRAY_DECLARE(VkBufferCopy, regions, MAX_STAGING_ENTRIES); +} r_vk_staging_buffer_t; + +// TODO remove this when staging is tracked by the buffer object itself +static struct { + BOUNDED_ARRAY_DECLARE(r_vk_staging_buffer_t, staging, MAX_STAGING_BUFFERS); +} g_buf; + +static r_vk_staging_buffer_t *findExistingStagingSlotForBuffer(vk_buffer_t *buf) { + for (int i = 0; i < g_buf.staging.count; ++i) { + r_vk_staging_buffer_t *const stb = g_buf.staging.items + i; + if (stb->buffer == buf) + return stb; + } + + return NULL; +} + +static void stagingBufferPush(void* userptr, struct vk_combuf_s *combuf, uint32_t pending) { + r_vk_staging_buffer_t *const stb = userptr; + ASSERT(pending == stb->regions.count); + R_VkBufferStagingCommit(stb->buffer, combuf); +} + +static r_vk_staging_buffer_t *findOrCreateStagingSlotForBuffer(vk_buffer_t *buf) { + r_vk_staging_buffer_t *stb = findExistingStagingSlotForBuffer(buf); + if (stb) + return stb; + + ASSERT(BOUNDED_ARRAY_HAS_SPACE(g_buf.staging, 1)); + stb = &BOUNDED_ARRAY_APPEND_UNSAFE(g_buf.staging); + stb->staging_buffer = VK_NULL_HANDLE; + stb->buffer = buf; + stb->regions.count = 0; + stb->staging_handle = R_VkStagingUserCreate((r_vkstaging_user_create_t){ + .name = buf->name, + .userptr = stb, + .push = stagingBufferPush, + }); + return stb; +} + +vk_buffer_locked_t R_VkBufferLock(vk_buffer_t *buf, vk_buffer_lock_t lock) { + //DEBUG("Lock buf=%p size=%d region=%d..%d", buf, lock.size, lock.offset, lock.offset + lock.size); + + r_vk_staging_buffer_t *const stb = findOrCreateStagingSlotForBuffer(buf); + ASSERT(stb); + + r_vkstaging_region_t staging_lock = R_VkStagingLock(stb->staging_handle, lock.size); + ASSERT(staging_lock.ptr); + + // TODO perf: adjacent region coalescing + + ASSERT(BOUNDED_ARRAY_HAS_SPACE(stb->regions, 1)); + BOUNDED_ARRAY_APPEND_UNSAFE(stb->regions) = (VkBufferCopy){ + .srcOffset = staging_lock.offset, + .dstOffset = lock.offset, + .size = lock.size, + }; + + if (stb->staging_buffer != VK_NULL_HANDLE) + // TODO implement this if staging ever grows to multiple buffers + ASSERT(stb->staging_buffer == staging_lock.buffer); + else + stb->staging_buffer = staging_lock.buffer; + + return (vk_buffer_locked_t) { + .ptr = staging_lock.ptr, + .impl_ = { + .buf = buf, + }, + }; +} + +void R_VkBufferUnlock(vk_buffer_locked_t lock) { + //DEBUG("buf=%llx staging pending++", (unsigned long long)lock.impl_.buf->buffer); + // Nothing to do? +} + +void R_VkBufferStagingCommit(vk_buffer_t *buf, struct vk_combuf_s *combuf) { + r_vk_staging_buffer_t *const stb = findExistingStagingSlotForBuffer(buf); + if (!stb || stb->regions.count == 0) + return; + + const r_vkcombuf_barrier_buffer_t barrier[] = {{ + .buffer = buf, + .access = VK_ACCESS_TRANSFER_WRITE_BIT, + }}; + + R_VkCombufIssueBarrier(combuf, (r_vkcombuf_barrier_t) { + .stage = VK_PIPELINE_STAGE_2_COPY_BIT, + .buffers = { barrier, COUNTOF(barrier) }, + .images = { NULL, 0 }, + }); + + //TODO const int begin_index = R_VkCombufScopeBegin(combuf, g_staging.buffer_upload_scope_id); + + const VkCommandBuffer cmdbuf = combuf->cmdbuf; + DEBUG_NV_CHECKPOINTF(cmdbuf, "staging dst_buffer=%p count=%d", buf->buffer, stb->regions.count); + //DEBUG("buffer=%p copy %d regions from staging buffer=%p", buf->buffer, stb->regions.count, stb->staging); + vkCmdCopyBuffer(cmdbuf, stb->staging_buffer, buf->buffer, stb->regions.count, stb->regions.items); + + DEBUG("buf=%llx staging pending-=%u", (unsigned long long)buf->buffer, stb->regions.count); + R_VkStagingUnlockBulk(stb->staging_handle, stb->regions.count); + stb->regions.count = 0; + + //TODO R_VkCombufScopeEnd(combuf, begin_index, VK_PIPELINE_STAGE_TRANSFER_BIT); +} diff --git a/ref/vk/vk_buffer.h b/ref/vk/vk_buffer.h index 5e977fe022..9404c9cd57 100644 --- a/ref/vk/vk_buffer.h +++ b/ref/vk/vk_buffer.h @@ -2,15 +2,23 @@ #include "vk_core.h" #include "vk_devmem.h" +#include "vk_staging.h" #include "r_flipping.h" -#include "alolcator.h" + +typedef struct { + uint32_t combuf_tag; + r_vksync_scope_t write, read; +} r_vksync_state_t; typedef struct vk_buffer_s { + const char *name; // static vk_devmem_t devmem; VkBuffer buffer; void *mapped; uint32_t size; + + r_vksync_state_t sync; } vk_buffer_t; qboolean VK_BufferCreate(const char *debug_name, vk_buffer_t *buf, uint32_t size, VkBufferUsageFlags usage, VkMemoryPropertyFlags flags); @@ -31,3 +39,24 @@ typedef enum { void R_DEBuffer_Init(r_debuffer_t *debuf, uint32_t static_size, uint32_t dynamic_size); uint32_t R_DEBuffer_Alloc(r_debuffer_t* debuf, r_lifetime_t lifetime, uint32_t size, uint32_t align); void R_DEBuffer_Flip(r_debuffer_t* debuf); + +typedef struct { + void *ptr; + + struct { + vk_buffer_t *buf; + } impl_; +} vk_buffer_locked_t; + +typedef struct { + uint32_t offset; + uint32_t size; +} vk_buffer_lock_t; + +vk_buffer_locked_t R_VkBufferLock(vk_buffer_t *buf, vk_buffer_lock_t lock); + +void R_VkBufferUnlock(vk_buffer_locked_t lock); + +// Commits any staged regions for the specified buffer +struct vk_combuf_s; +void R_VkBufferStagingCommit(vk_buffer_t *buf, struct vk_combuf_s *combuf); diff --git a/ref/vk/vk_combuf.c b/ref/vk/vk_combuf.c index ba7339a03b..1460d74f56 100644 --- a/ref/vk/vk_combuf.c +++ b/ref/vk/vk_combuf.c @@ -1,11 +1,19 @@ #include "vk_combuf.h" #include "vk_commandpool.h" +#include "vk_buffer.h" +#include "vk_logs.h" +#include "vk_image.h" #include "profiler.h" +#define LOG_MODULE combuf + #define MAX_COMMANDBUFFERS 6 #define MAX_QUERY_COUNT 128 +#define MAX_BUFFER_BARRIERS 16 +#define MAX_IMAGE_BARRIERS 16 + #define BEGIN_INDEX_TAG 0x10000000 typedef struct { @@ -16,6 +24,8 @@ typedef struct { int scopes[MAX_GPU_SCOPES]; int scopes_count; } profiler; + + uint32_t tag; } vk_combuf_impl_t; static struct { @@ -31,6 +41,8 @@ static struct { int scopes_count; int entire_combuf_scope_id; + + uint32_t tag; } g_combuf; qboolean R_VkCombuf_Init( void ) { @@ -58,6 +70,7 @@ qboolean R_VkCombuf_Init( void ) { } g_combuf.entire_combuf_scope_id = R_VkGpuScope_Register("GPU"); + g_combuf.tag = 1; // Do not start with special value of zero return true; } @@ -94,6 +107,13 @@ void R_VkCombufClose( vk_combuf_t* pub ) { void R_VkCombufBegin( vk_combuf_t* pub ) { vk_combuf_impl_t *const cb = (vk_combuf_impl_t*)pub; + g_combuf.tag++; + // Skip zero as special initial value for objects meaning "not yet used in combuf" + if (g_combuf.tag == 0) + g_combuf.tag = 1; + + cb->tag = g_combuf.tag; + cb->profiler.scopes_count = 0; const VkCommandBufferBeginInfo beginfo = { @@ -120,6 +140,344 @@ static const char* myStrdup(const char *src) { return ret; } +#define ACCESS_WRITE_BITS (0 \ + | VK_ACCESS_2_SHADER_WRITE_BIT \ + | VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT \ + | VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT \ + | VK_ACCESS_2_TRANSFER_WRITE_BIT \ + | VK_ACCESS_2_HOST_WRITE_BIT \ + | VK_ACCESS_2_MEMORY_WRITE_BIT \ + | VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT \ + | VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR \ + ) + +#define ACCESS_READ_BITS (0 \ + | VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT \ + | VK_ACCESS_2_INDEX_READ_BIT \ + | VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT \ + | VK_ACCESS_2_UNIFORM_READ_BIT \ + | VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT \ + | VK_ACCESS_2_SHADER_READ_BIT \ + | VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT \ + | VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT \ + | VK_ACCESS_2_TRANSFER_READ_BIT \ + | VK_ACCESS_2_HOST_READ_BIT \ + | VK_ACCESS_2_MEMORY_READ_BIT \ + | VK_ACCESS_2_SHADER_SAMPLED_READ_BIT \ + | VK_ACCESS_2_SHADER_STORAGE_READ_BIT \ + | VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_KHR \ + ) + +#define ACCESS_KNOWN_BITS (ACCESS_WRITE_BITS | ACCESS_READ_BITS) + +#define PRINT_FLAG(mask, flag) \ + if ((flag) & (mask)) DEBUG("%s%s", prefix, #flag) +static void printAccessMask(const char *prefix, VkAccessFlags2 access) { + PRINT_FLAG(access, VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT); + PRINT_FLAG(access, VK_ACCESS_2_INDEX_READ_BIT); + PRINT_FLAG(access, VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT); + PRINT_FLAG(access, VK_ACCESS_2_UNIFORM_READ_BIT); + PRINT_FLAG(access, VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT); + PRINT_FLAG(access, VK_ACCESS_2_SHADER_READ_BIT); + PRINT_FLAG(access, VK_ACCESS_2_SHADER_WRITE_BIT); + PRINT_FLAG(access, VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT); + PRINT_FLAG(access, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT); + PRINT_FLAG(access, VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT); + PRINT_FLAG(access, VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT); + PRINT_FLAG(access, VK_ACCESS_2_TRANSFER_READ_BIT); + PRINT_FLAG(access, VK_ACCESS_2_TRANSFER_WRITE_BIT); + PRINT_FLAG(access, VK_ACCESS_2_HOST_READ_BIT); + PRINT_FLAG(access, VK_ACCESS_2_HOST_WRITE_BIT); + PRINT_FLAG(access, VK_ACCESS_2_MEMORY_READ_BIT); + PRINT_FLAG(access, VK_ACCESS_2_MEMORY_WRITE_BIT); + PRINT_FLAG(access, VK_ACCESS_2_SHADER_SAMPLED_READ_BIT); + PRINT_FLAG(access, VK_ACCESS_2_SHADER_STORAGE_READ_BIT); + PRINT_FLAG(access, VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT); + PRINT_FLAG(access, VK_ACCESS_2_VIDEO_DECODE_READ_BIT_KHR); + PRINT_FLAG(access, VK_ACCESS_2_VIDEO_DECODE_WRITE_BIT_KHR); + PRINT_FLAG(access, VK_ACCESS_2_VIDEO_ENCODE_READ_BIT_KHR); + PRINT_FLAG(access, VK_ACCESS_2_VIDEO_ENCODE_WRITE_BIT_KHR); + PRINT_FLAG(access, VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT); + PRINT_FLAG(access, VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT); + PRINT_FLAG(access, VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT); + PRINT_FLAG(access, VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT); + PRINT_FLAG(access, VK_ACCESS_2_COMMAND_PREPROCESS_READ_BIT_NV); + PRINT_FLAG(access, VK_ACCESS_2_COMMAND_PREPROCESS_WRITE_BIT_NV); + PRINT_FLAG(access, VK_ACCESS_2_COMMAND_PREPROCESS_READ_BIT_EXT); + PRINT_FLAG(access, VK_ACCESS_2_COMMAND_PREPROCESS_WRITE_BIT_EXT); + PRINT_FLAG(access, VK_ACCESS_2_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT_KHR); + PRINT_FLAG(access, VK_ACCESS_2_SHADING_RATE_IMAGE_READ_BIT_NV); + PRINT_FLAG(access, VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_KHR); + PRINT_FLAG(access, VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR); + PRINT_FLAG(access, VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_NV); + PRINT_FLAG(access, VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_NV); + PRINT_FLAG(access, VK_ACCESS_2_FRAGMENT_DENSITY_MAP_READ_BIT_EXT); + PRINT_FLAG(access, VK_ACCESS_2_COLOR_ATTACHMENT_READ_NONCOHERENT_BIT_EXT); + PRINT_FLAG(access, VK_ACCESS_2_DESCRIPTOR_BUFFER_READ_BIT_EXT); + PRINT_FLAG(access, VK_ACCESS_2_INVOCATION_MASK_READ_BIT_HUAWEI); + PRINT_FLAG(access, VK_ACCESS_2_SHADER_BINDING_TABLE_READ_BIT_KHR); + PRINT_FLAG(access, VK_ACCESS_2_MICROMAP_READ_BIT_EXT); + PRINT_FLAG(access, VK_ACCESS_2_MICROMAP_WRITE_BIT_EXT); + PRINT_FLAG(access, VK_ACCESS_2_OPTICAL_FLOW_READ_BIT_NV); + PRINT_FLAG(access, VK_ACCESS_2_OPTICAL_FLOW_WRITE_BIT_NV); +} + +static void printStageMask(const char *prefix, VkPipelineStageFlags2 stages) { + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_HOST_BIT); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_COPY_BIT); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_RESOLVE_BIT); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_BLIT_BIT); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_CLEAR_BIT); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_INDEX_INPUT_BIT); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_CONDITIONAL_RENDERING_BIT_EXT); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_COMMAND_PREPROCESS_BIT_NV); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_COMMAND_PREPROCESS_BIT_EXT); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_SHADING_RATE_IMAGE_BIT_NV); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_FRAGMENT_DENSITY_PROCESS_BIT_EXT); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_EXT); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_SUBPASS_SHADER_BIT_HUAWEI); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_INVOCATION_MASK_BIT_HUAWEI); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_COPY_BIT_KHR); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_MICROMAP_BUILD_BIT_EXT); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_CLUSTER_CULLING_SHADER_BIT_HUAWEI); + PRINT_FLAG(stages, VK_PIPELINE_STAGE_2_OPTICAL_FLOW_BIT_NV); +} + +static qboolean makeBufferBarrier(VkBufferMemoryBarrier2* out_bmb, const r_vkcombuf_barrier_buffer_t *const bufbar, VkPipelineStageFlags2 dst_stage, uint32_t cb_tag) { + vk_buffer_t *const buf = bufbar->buffer; + const qboolean is_write = (bufbar->access & ACCESS_WRITE_BITS) != 0; + const qboolean is_read = (bufbar->access & ACCESS_READ_BITS) != 0; + ASSERT((bufbar->access & ~(ACCESS_KNOWN_BITS)) == 0); + + if (buf->sync.combuf_tag != cb_tag) { + // This buffer hasn't been yet used in this command buffer, no need to issue a barrier + buf->sync.combuf_tag = cb_tag; + buf->sync.write = is_write + ? (r_vksync_scope_t){.access = bufbar->access & ACCESS_WRITE_BITS, .stage = dst_stage} + : (r_vksync_scope_t){.access = 0, .stage = 0 }; + buf->sync.read = is_read + ? (r_vksync_scope_t){.access = bufbar->access & ACCESS_READ_BITS, .stage = dst_stage} + : (r_vksync_scope_t){.access = 0, .stage = 0 }; + return false; + } + + *out_bmb = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .pNext = NULL, + .buffer = buf->buffer, + .offset = 0, + .size = VK_WHOLE_SIZE, + .dstStageMask = dst_stage, + .dstAccessMask = bufbar->access, + }; + + // TODO: support read-and-write scenarios + ASSERT(is_read ^ is_write); + if (is_write) { + // Write is synchronized with previous reads and writes + out_bmb->srcStageMask = buf->sync.write.stage | buf->sync.read.stage; + out_bmb->srcAccessMask = buf->sync.write.access | buf->sync.read.access; + + // Store where write happened + buf->sync.write.access = bufbar->access; + buf->sync.write.stage = dst_stage; + + // If there were no previous reads or writes, there no reason to synchronize with anything + if (out_bmb->srcStageMask == 0) + return false; + + // Reset read state + // TOOD is_read? for read-and-write + buf->sync.read.access = 0; + buf->sync.read.stage = 0; + } + + if (is_read) { + // Read is synchronized with previous writes only + out_bmb->srcStageMask = buf->sync.write.stage; + out_bmb->srcAccessMask = buf->sync.write.access; + + // Check whether this is a new barrier + if ((buf->sync.read.access & bufbar->access) != bufbar->access + && (buf->sync.read.stage & dst_stage) != dst_stage) { + // Remember this read happened + buf->sync.read.access |= bufbar->access; + buf->sync.read.stage |= dst_stage; + } else { + // Already synchronized, no need to do anything + return false; + } + + // Also skip issuing a barrier, if there were no previous writes -- nothing to sync with + // Note that this needs to happen late, as all reads must still be recorded in sync.read fields + if (buf->sync.write.stage == 0) + return false; + } + + if (LOG_VERBOSE) { + DEBUG(" srcAccessMask = %llx", (unsigned long long)out_bmb->srcAccessMask); + printAccessMask(" ", out_bmb->srcAccessMask); + DEBUG(" dstAccessMask = %llx", (unsigned long long)out_bmb->dstAccessMask); + printAccessMask(" ", out_bmb->dstAccessMask); + DEBUG(" srcStageMask = %llx", (unsigned long long)out_bmb->srcStageMask); + printStageMask(" ", out_bmb->srcStageMask); + DEBUG(" dstStageMask = %llx", (unsigned long long)out_bmb->dstStageMask); + printStageMask(" ", out_bmb->dstStageMask); + } + + return true; +} + +static qboolean makeImageBarrier(VkImageMemoryBarrier2* out_imb, const r_vkcombuf_barrier_image_t *const imgbar, VkPipelineStageFlags2 dst_stage) { + r_vk_image_t *const img = imgbar->image; + const qboolean is_write = (imgbar->access & ACCESS_WRITE_BITS) != 0; + const qboolean is_read = (imgbar->access & ACCESS_READ_BITS) != 0; + const VkImageLayout old_layout = (!is_read) ? VK_IMAGE_LAYOUT_UNDEFINED : img->sync.layout; + const qboolean is_layout_transfer = imgbar->layout != old_layout; + ASSERT((imgbar->access & ~(ACCESS_KNOWN_BITS)) == 0); + + *out_imb = (VkImageMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, + .pNext = NULL, + .srcStageMask = img->sync.write.stage, + .srcAccessMask = img->sync.write.access, + .dstStageMask = dst_stage, + .dstAccessMask = imgbar->access, + .oldLayout = old_layout, + .newLayout = imgbar->layout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = img->image, + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; + + // TODO: support read-and-write scenarios + //ASSERT(is_read ^ is_write); + + if (is_write || is_layout_transfer) { + out_imb->srcStageMask |= img->sync.read.stage; + out_imb->srcAccessMask |= img->sync.read.access; + + img->sync.write.access = imgbar->access; + img->sync.write.stage = dst_stage; + + img->sync.read.access = 0; + img->sync.read.stage = 0; + } + + if (is_read) { + const qboolean same_access = (img->sync.read.access & imgbar->access) != imgbar->access; + const qboolean same_stage = (img->sync.read.stage & dst_stage) != dst_stage; + + if (same_access && same_stage && !is_layout_transfer) + return false; + + img->sync.read.access |= imgbar->access; + img->sync.read.stage |= dst_stage; + } + + if (!is_layout_transfer && out_imb->srcAccessMask == 0 && out_imb->srcStageMask == 0) { + return false; + } + + if (LOG_VERBOSE) { + DEBUG(" srcAccessMask = %llx", (unsigned long long)out_imb->srcAccessMask); + printAccessMask(" ", out_imb->srcAccessMask); + DEBUG(" dstAccessMask = %llx", (unsigned long long)out_imb->dstAccessMask); + printAccessMask(" ", out_imb->dstAccessMask); + DEBUG(" srcStageMask = %llx", (unsigned long long)out_imb->srcStageMask); + printStageMask(" ", out_imb->srcStageMask); + DEBUG(" dstStageMask = %llx", (unsigned long long)out_imb->dstStageMask); + printStageMask(" ", out_imb->dstStageMask); + DEBUG(" oldLayout = %s (%llx)", R_VkImageLayoutName(out_imb->oldLayout), (unsigned long long)out_imb->oldLayout); + DEBUG(" newLayout = %s (%llx)", R_VkImageLayoutName(out_imb->newLayout), (unsigned long long)out_imb->newLayout); + } + + // Store new layout + img->sync.layout = imgbar->layout; + + return true; +} + +void R_VkCombufIssueBarrier(vk_combuf_t* combuf, r_vkcombuf_barrier_t bar) { + vk_combuf_impl_t *const cb = (vk_combuf_impl_t*)combuf; + + BOUNDED_ARRAY(VkBufferMemoryBarrier2, buffer_barriers, MAX_BUFFER_BARRIERS); + for (int i = 0; i < bar.buffers.count; ++i) { + const r_vkcombuf_barrier_buffer_t *const bufbar = bar.buffers.items + i; + if (LOG_VERBOSE) { + DEBUG(" buf[%d]: buf=%llx barrier:", i, (unsigned long long)bufbar->buffer->buffer); + } + + VkBufferMemoryBarrier2 bmb; + if (!makeBufferBarrier(&bmb, bufbar, bar.stage, cb->tag)) { + continue; + } + + BOUNDED_ARRAY_APPEND_ITEM(buffer_barriers, bmb); + } + + BOUNDED_ARRAY(VkImageMemoryBarrier2, image_barriers, MAX_IMAGE_BARRIERS); + for (int i = 0; i < bar.images.count; ++i) { + const r_vkcombuf_barrier_image_t *const imgbar = bar.images.items + i; + if (LOG_VERBOSE) { + DEBUG(" img[%d]: img=%llx (%s) barrier:", i, (unsigned long long)imgbar->image->image, imgbar->image->name); + } + + VkImageMemoryBarrier2 imb; + if (!makeImageBarrier(&imb, imgbar, bar.stage)) { + continue; + } + + BOUNDED_ARRAY_APPEND_ITEM(image_barriers, imb); + } + + if (buffer_barriers.count == 0 && image_barriers.count == 0) + return; + + vkCmdPipelineBarrier2(combuf->cmdbuf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pNext = NULL, + .dependencyFlags = 0, + .bufferMemoryBarrierCount = buffer_barriers.count, + .pBufferMemoryBarriers = buffer_barriers.items, + .imageMemoryBarrierCount = image_barriers.count, + .pImageMemoryBarriers = image_barriers.items, + }); +} + + int R_VkGpuScope_Register(const char *name) { // Find existing scope with the same name for (int i = 0; i < g_combuf.scopes_count; ++i) { diff --git a/ref/vk/vk_combuf.h b/ref/vk/vk_combuf.h index 783e49d719..1ca6980b07 100644 --- a/ref/vk/vk_combuf.h +++ b/ref/vk/vk_combuf.h @@ -1,6 +1,7 @@ #pragma once #include "vk_core.h" +#include "arrays.h" #define MAX_GPU_SCOPES 64 @@ -18,6 +19,30 @@ void R_VkCombufBegin( vk_combuf_t* ); void R_VkCombufEnd( vk_combuf_t* ); +struct vk_buffer_s; +typedef struct { + struct vk_buffer_s *buffer; + VkAccessFlags2 access; +} r_vkcombuf_barrier_buffer_t; + +struct r_vk_image_s; +typedef struct { + struct r_vk_image_s *image; + VkImageLayout layout; + VkAccessFlags2 access; +} r_vkcombuf_barrier_image_t; + +typedef struct { + VkPipelineStageFlags2 stage; + VIEW_DECLARE_CONST(r_vkcombuf_barrier_buffer_t, buffers); + VIEW_DECLARE_CONST(r_vkcombuf_barrier_image_t, images); +} r_vkcombuf_barrier_t; + +// Immediately issues a barrier for the set of resources given desired usage and resources states +void R_VkCombufIssueBarrier(vk_combuf_t*, r_vkcombuf_barrier_t); + + +// TODO rename consistently int R_VkGpuScope_Register(const char *name); int R_VkCombufScopeBegin(vk_combuf_t*, int scope_id); diff --git a/ref/vk/vk_common.h b/ref/vk/vk_common.h index 9fb8d36faf..4ed94c8035 100644 --- a/ref/vk/vk_common.h +++ b/ref/vk/vk_common.h @@ -7,7 +7,7 @@ #include "com_strings.h" #include "crtlib.h" -#define ASSERT(x) if(!( x )) gEngine.Host_Error( "assert %s failed at %s:%d\n", #x, __FILE__, __LINE__ ) +#define ASSERT(x) do { if(!( x )) gEngine.Host_Error( "assert %s failed at %s:%d\n", #x, __FILE__, __LINE__ ); } while (0) // TODO ASSERTF(x, fmt, ...) #define Mem_Malloc( pool, size ) gEngine._Mem_Alloc( pool, size, false, __FILE__, __LINE__ ) @@ -22,6 +22,10 @@ #define COUNTOF(a) (sizeof(a)/sizeof((a)[0])) +// Sliences -Werror=cast-align +// TODO assert for proper alignment for type_ +#define PTR_CAST(type_, ptr_) ((type_*)(void*)(ptr_)) + inline static int clampi32(int v, int min, int max) { if (v < min) return min; if (v > max) return max; @@ -30,19 +34,3 @@ inline static int clampi32(int v, int min, int max) { extern ref_api_t gEngine; extern ref_globals_t *gpGlobals; - -// TODO improve and make its own file -#define BOUNDED_ARRAY_DECLARE(NAME, TYPE, MAX_SIZE) \ - struct { \ - TYPE items[MAX_SIZE]; \ - int count; \ - } NAME - -#define BOUNDED_ARRAY(NAME, TYPE, MAX_SIZE) \ - BOUNDED_ARRAY_DECLARE(NAME, TYPE, MAX_SIZE) = {0} - -#define BOUNDED_ARRAY_APPEND(var, item) \ - do { \ - ASSERT(var.count < COUNTOF(var.items)); \ - var.items[var.count++] = item; \ - } while(0) diff --git a/ref/vk/vk_core.c b/ref/vk/vk_core.c index e1c7fcb1fd..48763d5152 100644 --- a/ref/vk/vk_core.c +++ b/ref/vk/vk_core.c @@ -3,7 +3,7 @@ #include "vk_common.h" #include "r_textures.h" #include "vk_overlay.h" -#include "vk_renderstate.h" +#include "vk_image.h" #include "vk_staging.h" #include "vk_framectl.h" #include "vk_brush.h" @@ -24,6 +24,7 @@ #include "vk_combuf.h" #include "vk_entity_data.h" #include "vk_logs.h" +#include "arrays.h" // FIXME move this rt-specific stuff out #include "vk_light.h" @@ -39,7 +40,6 @@ #include "debugbreak.h" #include -#include #define LOG_MODULE core @@ -114,7 +114,7 @@ static const char* device_extensions_extra[] = { VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME, }; -VkBool32 VKAPI_PTR debugCallback( +static VkBool32 VKAPI_PTR debugCallback( VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, VkDebugUtilsMessageTypeFlagsEXT messageTypes, const VkDebugUtilsMessengerCallbackDataEXT* pCallbackData, @@ -123,16 +123,9 @@ VkBool32 VKAPI_PTR debugCallback( (void)(messageTypes); (void)(messageSeverity); - if (Q_strcmp(pCallbackData->pMessageIdName, "VUID-vkMapMemory-memory-00683") == 0) - return VK_FALSE; - - /* if (messageSeverity != VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) { */ - /* gEngine.Con_Printf(S_WARN "Validation: %s\n", pCallbackData->pMessage); */ - /* } */ - // TODO better messages, not only errors, what are other arguments for, ... if (messageSeverity == VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) { - gEngine.Con_Printf(S_ERROR "%s\n", pCallbackData->pMessage); + gEngine.Con_Printf(S_ERROR "vk/dbg: %s\n", pCallbackData->pMessage); #ifdef _MSC_VER __debugbreak(); #else @@ -140,9 +133,9 @@ VkBool32 VKAPI_PTR debugCallback( #endif } else { if (Q_strcmp(pCallbackData->pMessageIdName, "UNASSIGNED-DEBUG-PRINTF") == 0) { - gEngine.Con_Printf(S_ERROR "%s\n", pCallbackData->pMessage); + gEngine.Con_Printf(S_ERROR "vk/dbg: %s\n", pCallbackData->pMessage); } else { - gEngine.Con_Printf(S_WARN "%s\n", pCallbackData->pMessage); + gEngine.Con_Printf(S_WARN "vk/dbg: %s\n", pCallbackData->pMessage); } } @@ -184,19 +177,19 @@ static qboolean createInstance( void ) // TODO support versions 1.0 and 1.1 for simple traditional rendering // This would require using older physical device features and props query structures // .apiVersion = vk_core.rtx ? VK_API_VERSION_1_2 : VK_API_VERSION_1_1, - .apiVersion = VK_API_VERSION_1_2, + .apiVersion = VK_API_VERSION_1_3, .applicationVersion = VK_MAKE_VERSION(0, 0, 0), // TODO .engineVersion = VK_MAKE_VERSION(0, 0, 0), .pApplicationName = "", .pEngineName = "xash3d-fwgs", }; - BOUNDED_ARRAY(validation_features, VkValidationFeatureEnableEXT, 8); - BOUNDED_ARRAY_APPEND(validation_features, VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT); - BOUNDED_ARRAY_APPEND(validation_features, VK_VALIDATION_FEATURE_ENABLE_BEST_PRACTICES_EXT); + BOUNDED_ARRAY(VkValidationFeatureEnableEXT, validation_features, 8); + BOUNDED_ARRAY_APPEND_ITEM(validation_features, VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT); + BOUNDED_ARRAY_APPEND_ITEM(validation_features, VK_VALIDATION_FEATURE_ENABLE_BEST_PRACTICES_EXT); if (!!gEngine.Sys_CheckParm("-vkdbg_shaderprintf")) - BOUNDED_ARRAY_APPEND(validation_features, VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT); + BOUNDED_ARRAY_APPEND_ITEM(validation_features, VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT); const VkValidationFeaturesEXT validation_ext = { .sType = VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT, @@ -567,6 +560,13 @@ static qboolean createDevice( void ) { head = NULL; } + VkPhysicalDeviceVulkan13Features vk13_features = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES, + .pNext = head, + .synchronization2 = VK_TRUE, + }; + head = &vk13_features; + VkPhysicalDeviceFeatures2 features = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, .pNext = head, @@ -729,6 +729,7 @@ static const r_vk_module_t *const modules[] = { qboolean R_VkInit( void ) { // FIXME !!!! handle initialization errors properly: destroy what has already been created + INFO("R_VkInit"); vk_core.validate = !!gEngine.Sys_CheckParm("-vkvalidate"); vk_core.debug = vk_core.validate || !!(gEngine.Sys_CheckParm("-vkdebug") || gEngine.Sys_CheckParm("-gldebug")); @@ -793,6 +794,9 @@ qboolean R_VkInit( void ) VK_LoadCvarsAfterInit(); + if (!R_VkImageInit()) + return false; + if (!R_VkCombuf_Init()) return false; @@ -847,6 +851,7 @@ qboolean R_VkInit( void ) R_SpriteInit(); R_BeamInit(); + INFO("R_VkInit done"); return true; } diff --git a/ref/vk/vk_core.h b/ref/vk/vk_core.h index 55112fa0aa..41ab363ea5 100644 --- a/ref/vk/vk_core.h +++ b/ref/vk/vk_core.h @@ -69,6 +69,7 @@ const char *R_VkResultName(VkResult result); const char *R_VkPresentModeName(VkPresentModeKHR present_mode); const char *R_VkFormatName(VkFormat format); const char *R_VkColorSpaceName(VkColorSpaceKHR colorspace); +const char *R_VkImageLayoutName(VkImageLayout); #define SET_DEBUG_NAME(object, type, name) \ do { \ @@ -224,6 +225,7 @@ do { \ X(vkGetImageMemoryRequirements) \ X(vkBindImageMemory) \ X(vkCmdPipelineBarrier) \ + X(vkCmdPipelineBarrier2) \ X(vkCmdCopyBufferToImage) \ X(vkCmdCopyBuffer) \ X(vkQueueWaitIdle) \ @@ -276,3 +278,9 @@ do { \ INSTANCE_FUNCS(X) INSTANCE_DEBUG_FUNCS(X) #undef X + +// TODO is there a better place for this, vk_utils.h? +typedef struct { + VkAccessFlags2 access; + VkPipelineStageFlagBits2 stage; +} r_vksync_scope_t; diff --git a/ref/vk/vk_cvar.c b/ref/vk/vk_cvar.c index 64e4e0c9b4..d04e39b647 100644 --- a/ref/vk/vk_cvar.c +++ b/ref/vk/vk_cvar.c @@ -29,6 +29,7 @@ void VK_LoadCvars( void ) vk_device_target_id = gEngine.Cvar_Get( "vk_device_target_id", "", FCVAR_GLCONFIG, "Selected video device id" ); vk_debug_log = gEngine.Cvar_Get("vk_debug_log_", "", FCVAR_GLCONFIG | FCVAR_READ_ONLY, ""); + R_LogSetVerboseModules( vk_debug_log->string ); gEngine.Cmd_AddCommand("vk_debug_log", setDebugLog, "Set modules to enable debug logs for"); } diff --git a/ref/vk/vk_descriptor.h b/ref/vk/vk_descriptor.h index 8464ba3b79..93e4c9fde2 100644 --- a/ref/vk/vk_descriptor.h +++ b/ref/vk/vk_descriptor.h @@ -29,7 +29,6 @@ typedef union { VkDescriptorImageInfo image; const VkDescriptorImageInfo *image_array; VkWriteDescriptorSetAccelerationStructureKHR accel; - const struct r_vk_image_s *image_object; } vk_descriptor_value_t; typedef struct { diff --git a/ref/vk/vk_framectl.c b/ref/vk/vk_framectl.c index a7e43176f2..3b7ac39acd 100644 --- a/ref/vk/vk_framectl.c +++ b/ref/vk/vk_framectl.c @@ -10,7 +10,12 @@ #include "vk_staging.h" #include "vk_commandpool.h" #include "vk_combuf.h" +#include "vk_logs.h" +#include "vk_buffer.h" +#include "vk_geometry.h" + +#include "arrays.h" #include "profiler.h" #include "r_speeds.h" @@ -18,6 +23,8 @@ #include +#define LOG_MODULE fctl + extern ref_globals_t *gpGlobals; vk_framectl_t vk_frame = {0}; @@ -43,7 +50,7 @@ typedef struct { // so we can't reuse the same one for two purposes and need to mnozhit sunchnosti VkSemaphore sem_done2; - vk_combuf_t *staging_combuf; + uint32_t staging_frame_tag; } vk_framectl_frame_t; static struct { @@ -141,7 +148,7 @@ static VkRenderPass createRenderPass( VkFormat depth_format, qboolean ray_tracin .pDepthStencilAttachment = &depth_attachment, }; - BOUNDED_ARRAY(dependencies, VkSubpassDependency, 2); + BOUNDED_ARRAY(VkSubpassDependency, dependencies, 2); if (vk_core.rtx) { const VkSubpassDependency color = { .srcSubpass = VK_SUBPASS_EXTERNAL, @@ -152,7 +159,7 @@ static VkRenderPass createRenderPass( VkFormat depth_format, qboolean ray_tracin .dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT, }; - BOUNDED_ARRAY_APPEND(dependencies, color); + BOUNDED_ARRAY_APPEND_ITEM(dependencies, color); } else { const VkSubpassDependency color = { .srcSubpass = VK_SUBPASS_EXTERNAL, @@ -163,7 +170,7 @@ static VkRenderPass createRenderPass( VkFormat depth_format, qboolean ray_tracin .dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT, }; - BOUNDED_ARRAY_APPEND(dependencies, color); + BOUNDED_ARRAY_APPEND_ITEM(dependencies, color); } const VkSubpassDependency depth = { @@ -175,7 +182,7 @@ static VkRenderPass createRenderPass( VkFormat depth_format, qboolean ray_tracin .dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT, .dependencyFlags = 0, }; - BOUNDED_ARRAY_APPEND(dependencies, depth); + BOUNDED_ARRAY_APPEND_ITEM(dependencies, depth); const VkRenderPassCreateInfo rpci = { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, @@ -257,12 +264,8 @@ void R_BeginFrame( qboolean clearScene ) { APROF_SCOPE_BEGIN(begin_frame); { - const vk_combuf_scopes_t gpurofl[] = { - frame->staging_combuf ? R_VkCombufScopesGet(frame->staging_combuf) : (vk_combuf_scopes_t){.entries_count=0}, - R_VkCombufScopesGet(frame->combuf), - }; - - R_SpeedsDisplayMore(prev_frame_event_index, frame->staging_combuf ? gpurofl : gpurofl + 1, frame->staging_combuf ? 2 : 1); + const vk_combuf_scopes_t gpurofl[] = { R_VkCombufScopesGet(frame->combuf) }; + R_SpeedsDisplayMore(prev_frame_event_index, gpurofl, COUNTOF(gpurofl)); } if (vk_core.rtx && FBitSet( rt_enable->flags, FCVAR_CHANGED )) { @@ -274,11 +277,12 @@ void R_BeginFrame( qboolean clearScene ) { ASSERT(!g_frame.current.framebuffer.framebuffer); - R_VkStagingFrameBegin(); + // TODO explicit frame dependency synced on frame-end-event/sema + R_VkStagingFrameCompleted(frame->staging_frame_tag); g_frame.current.framebuffer = R_VkSwapchainAcquire( frame->sem_framebuffer_ready ); - vk_frame.width = g_frame.current.framebuffer.width; - vk_frame.height = g_frame.current.framebuffer.height; + vk_frame.width = g_frame.current.framebuffer.image.width; + vk_frame.height = g_frame.current.framebuffer.image.height; VK_RenderBegin( vk_frame.rtx_enabled ); @@ -297,25 +301,60 @@ void VK_RenderFrame( const struct ref_viewpass_s *rvp ) static void enqueueRendering( vk_combuf_t* combuf, qboolean draw ) { APROF_SCOPE_DECLARE_BEGIN(enqueue, __FUNCTION__); - const VkClearValue clear_value[] = { - {.color = {{1., 0., 0., 0.}}}, - {.depthStencil = {1., 0.}} // TODO reverse-z - }; + const uint32_t frame_width = g_frame.current.framebuffer.image.width; + const uint32_t frame_height = g_frame.current.framebuffer.image.height; ASSERT(g_frame.current.phase == Phase_FrameBegan); + // TODO: should be done by rendering when it requests textures + R_VkImageUploadCommit(combuf, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | (vk_frame.rtx_enabled ? VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT : 0)); + const VkCommandBuffer cmdbuf = combuf->cmdbuf; - VK_Render_FIXME_Barrier(cmdbuf); - if (vk_frame.rtx_enabled) - VK_RenderEndRTX( combuf, g_frame.current.framebuffer.view, g_frame.current.framebuffer.image, g_frame.current.framebuffer.width, g_frame.current.framebuffer.height ); + if (vk_frame.rtx_enabled) { + VK_RenderEndRTX( combuf, &g_frame.current.framebuffer.image ); + } else { + // FIXME: how to do this properly before render pass? + // Needed to avoid VUID-vkCmdCopyBuffer-renderpass + vk_buffer_t* const geom = R_GeometryBuffer_Get(); + R_VkBufferStagingCommit(geom, combuf); + R_VkCombufIssueBarrier(combuf, (r_vkcombuf_barrier_t){ + .stage = VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT, + .buffers = { + .count = 1, + .items = &(r_vkcombuf_barrier_buffer_t){ + .buffer = geom, + .access = VK_ACCESS_2_INDEX_READ_BIT | VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT, + }, + }, + }); + } if (draw) { + const r_vkcombuf_barrier_image_t dst_use[] = {{ + .image = &g_frame.current.framebuffer.image, + .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + .access = VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, + }}; + R_VkCombufIssueBarrier(combuf, (r_vkcombuf_barrier_t) { + .stage = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT, + .images = { + .items = dst_use, + .count = COUNTOF(dst_use), + }, + }); + + const VkClearValue clear_value[] = { + // *_UNORM is float + {.color = {.float32 = {1.f, 0.f, 0.f, 0.f}}}, + {.depthStencil = {1., 0.}} // TODO reverse-z + }; const VkRenderPassBeginInfo rpbi = { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, .renderPass = vk_frame.rtx_enabled ? vk_frame.render_pass.after_ray_tracing : vk_frame.render_pass.raster, - .renderArea.extent.width = g_frame.current.framebuffer.width, - .renderArea.extent.height = g_frame.current.framebuffer.height, + .renderArea.extent.width = frame_width, + .renderArea.extent.height = frame_height, .clearValueCount = ARRAYSIZE(clear_value), .pClearValues = clear_value, .framebuffer = g_frame.current.framebuffer.framebuffer, @@ -324,11 +363,11 @@ static void enqueueRendering( vk_combuf_t* combuf, qboolean draw ) { { const VkViewport viewport[] = { - {0.f, 0.f, (float)g_frame.current.framebuffer.width, (float)g_frame.current.framebuffer.height, 0.f, 1.f}, + {0.f, 0.f, (float)frame_width, (float)frame_height, 0.f, 1.f}, }; const VkRect2D scissor[] = {{ {0, 0}, - {g_frame.current.framebuffer.width, g_frame.current.framebuffer.height}, + {frame_width, frame_height}, }}; vkCmdSetViewport(cmdbuf, 0, ARRAYSIZE(viewport), viewport); @@ -337,16 +376,22 @@ static void enqueueRendering( vk_combuf_t* combuf, qboolean draw ) { } if (!vk_frame.rtx_enabled) - VK_RenderEnd( cmdbuf, draw, - g_frame.current.framebuffer.width, g_frame.current.framebuffer.height, + VK_RenderEnd( combuf, draw, + frame_width, frame_height, g_frame.current.index ); R_VkOverlay_DrawAndFlip( cmdbuf, draw ); - if (draw) + if (draw) { vkCmdEndRenderPass(cmdbuf); + // Render pass's finalLayout transitions the image into this one + g_frame.current.framebuffer.image.sync.read.access = 0; + g_frame.current.framebuffer.image.sync.write.access = 0; + g_frame.current.framebuffer.image.sync.layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + } + g_frame.current.phase = Phase_RenderingEnqueued; APROF_SCOPE_END(enqueue); } @@ -361,45 +406,58 @@ static void submit( vk_combuf_t* combuf, qboolean wait, qboolean draw ) { vk_framectl_frame_t *const frame = g_frame.frames + g_frame.current.index; vk_framectl_frame_t *const prev_frame = g_frame.frames + (g_frame.current.index + 1) % MAX_CONCURRENT_FRAMES; + // Push things from staging that weren't explicitly pulled by frame builder + frame->staging_frame_tag = R_VkStagingFrameEpilogue(combuf); + R_VkCombufEnd(combuf); - frame->staging_combuf = R_VkStagingFrameEnd(); - const VkCommandBuffer cmdbufs[] = { - frame->staging_combuf ? frame->staging_combuf->cmdbuf : NULL, - cmdbuf, - }; + BOUNDED_ARRAY(VkCommandBuffer, cmdbufs, 2); + BOUNDED_ARRAY_APPEND_ITEM(cmdbufs, cmdbuf); { - const VkPipelineStageFlags stageflags[] = { - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, - }; - // TODO for RT renderer we only touch framebuffer at the very end of rendering/cmdbuf. - // Can we postpone waitinf for framebuffer semaphore until we actually need it. - BOUNDED_ARRAY(waitophores, VkSemaphore, 2); - BOUNDED_ARRAY(signalphores, VkSemaphore, 2); + BOUNDED_ARRAY(VkSemaphore, waitophores, 2); + BOUNDED_ARRAY(VkPipelineStageFlags, wait_stageflags, 2); + BOUNDED_ARRAY(VkSemaphore, signalphores, 2); if (draw) { - BOUNDED_ARRAY_APPEND(waitophores, frame->sem_framebuffer_ready); - BOUNDED_ARRAY_APPEND(signalphores, frame->sem_done); + BOUNDED_ARRAY_APPEND_ITEM(waitophores, frame->sem_framebuffer_ready); + BOUNDED_ARRAY_APPEND_ITEM(wait_stageflags, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT); + + BOUNDED_ARRAY_APPEND_ITEM(signalphores, frame->sem_done); } - BOUNDED_ARRAY_APPEND(waitophores, prev_frame->sem_done2); - BOUNDED_ARRAY_APPEND(signalphores, frame->sem_done2); + BOUNDED_ARRAY_APPEND_ITEM(waitophores, prev_frame->sem_done2); + // TODO remove this second semaphore altogether, replace it with properly tracked barriers. + // Why: would allow more parallelizm between consecutive frames. + BOUNDED_ARRAY_APPEND_ITEM(wait_stageflags, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT | VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT); + BOUNDED_ARRAY_APPEND_ITEM(signalphores, frame->sem_done2); + + DEBUG("submit: frame=%d, staging_tag=%u, combuf=%p, wait for semaphores[%d]={%llx, %llx}, signal semaphores[%d]={%llx, %llx}", + g_frame.current.index, + frame->staging_frame_tag, + frame->combuf->cmdbuf, + waitophores.count, + (unsigned long long)waitophores.items[0], + (unsigned long long)waitophores.items[1], + signalphores.count, + (unsigned long long)signalphores.items[0], + (unsigned long long)signalphores.items[1] + ); + + ASSERT(waitophores.count == wait_stageflags.count); const VkSubmitInfo subinfo = { .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .pNext = NULL, .waitSemaphoreCount = waitophores.count, .pWaitSemaphores = waitophores.items, - .pWaitDstStageMask = stageflags, - .commandBufferCount = cmdbufs[0] ? 2 : 1, - .pCommandBuffers = cmdbufs[0] ? cmdbufs : cmdbufs + 1, + .pWaitDstStageMask = wait_stageflags.items, + .commandBufferCount = cmdbufs.count, + .pCommandBuffers = cmdbufs.items, .signalSemaphoreCount = signalphores.count, .pSignalSemaphores = signalphores.items, }; - //gEngine.Con_Printf("SYNC: wait for semaphore %d, signal semaphore %d\n", (g_frame.current.index + 1) % MAX_CONCURRENT_FRAMES, g_frame.current.index); XVK_CHECK(vkQueueSubmit(vk_core.queue, 1, &subinfo, frame->fence_done)); g_frame.current.phase = Phase_Submitted; } @@ -474,7 +532,6 @@ qboolean VK_FrameCtlInit( void ) // Signal first frame semaphore as done { - const VkPipelineStageFlags stageflags = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; const VkSubmitInfo subinfo = { .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .pNext = NULL, @@ -482,7 +539,7 @@ qboolean VK_FrameCtlInit( void ) .pCommandBuffers = NULL, .waitSemaphoreCount = 0, .pWaitSemaphores = NULL, - .pWaitDstStageMask = &stageflags, + .pWaitDstStageMask = NULL, .signalSemaphoreCount = 1, .pSignalSemaphores = &g_frame.frames[0].sem_done2, }; @@ -531,15 +588,15 @@ static qboolean canBlitFromSwapchainToFormat( VkFormat dest_format ) { static rgbdata_t *R_VkReadPixels( void ) { const VkFormat dest_format = VK_FORMAT_R8G8B8A8_UNORM; - r_vk_image_t dest_image; - const VkImage frame_image = g_frame.current.framebuffer.image; + r_vk_image_t temp_image; + r_vk_image_t *const framebuffer_image = &g_frame.current.framebuffer.image; rgbdata_t *r_shot = NULL; qboolean blit = canBlitFromSwapchainToFormat( dest_format ); vk_combuf_t *const combuf = g_frame.frames[g_frame.current.index].combuf; const VkCommandBuffer cmdbuf = combuf->cmdbuf; - if (frame_image == VK_NULL_HANDLE) { + if (framebuffer_image->image == VK_NULL_HANDLE) { gEngine.Con_Printf(S_ERROR "no current image, can't take screenshot\n"); return NULL; } @@ -559,63 +616,47 @@ static rgbdata_t *R_VkReadPixels( void ) { .flags = 0, .memory_props = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT, }; - dest_image = R_VkImageCreate(&xic); + temp_image = R_VkImageCreate(&xic); } // Make sure that all rendering ops are enqueued const qboolean draw = true; enqueueRendering( combuf, draw ); - { - // Barrier 1: dest image - const VkImageMemoryBarrier image_barrier[2] = {{ - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .image = dest_image.image, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, - .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - .subresourceRange = (VkImageSubresourceRange) { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }}, { // Barrier 2: source swapchain image - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .image = frame_image, - .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, - .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, - .oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, - .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - .subresourceRange = (VkImageSubresourceRange) { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }}}; - - vkCmdPipelineBarrier(cmdbuf, - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, 0, NULL, 0, NULL, ARRAYSIZE(image_barrier), image_barrier); - } - // Blit/transfer if (blit) { - const VkImageBlit blit = { - .srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .srcSubresource.layerCount = 1, - .dstSubresource.layerCount = 1, - .srcOffsets = {{0}, {vk_frame.width, vk_frame.height, 1}}, - .dstOffsets = {{0}, {vk_frame.width, vk_frame.height, 1}} - }; - vkCmdBlitImage(cmdbuf, - frame_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - dest_image.image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &blit, VK_FILTER_NEAREST); + R_VkImageBlit(combuf, &(r_vkimage_blit_args){ + .src = { + .image = framebuffer_image, + .width = vk_frame.width, + .height = vk_frame.height, + .depth = 1, + }, + .dst = { + .image = &temp_image, + .width = vk_frame.width, + .height = vk_frame.height, + .depth = 1, + }, + }); } else { + const r_vkcombuf_barrier_image_t image_barriers[] = {{ + .image = &temp_image, + .access = VK_ACCESS_2_TRANSFER_WRITE_BIT, + .layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + }, { + .image = framebuffer_image, + .access = VK_ACCESS_2_TRANSFER_READ_BIT, + .layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + }}; + R_VkCombufIssueBarrier(combuf, (r_vkcombuf_barrier_t){ + .stage = VK_PIPELINE_STAGE_2_COPY_BIT, + .images = { + .count = COUNTOF(image_barriers), + .items = image_barriers, + }, + }); + const VkImageCopy copy = { .srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, @@ -627,46 +668,31 @@ static rgbdata_t *R_VkReadPixels( void ) { }; vkCmdCopyImage(cmdbuf, - frame_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - dest_image.image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©); + framebuffer_image->image, framebuffer_image->sync.layout, + temp_image.image, temp_image.sync.layout, 1, ©); gEngine.Con_Printf(S_WARN "Blit is not supported, screenshot will likely have mixed components; TODO: swizzle in software\n"); } { - // Barrier 1: dest image - VkImageMemoryBarrier image_barrier[2] = {{ - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .image = dest_image.image, - .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT, - .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - .newLayout = VK_IMAGE_LAYOUT_GENERAL, - .subresourceRange = (VkImageSubresourceRange) { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }}, { // Barrier 2: source swapchain image - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .image = frame_image, - .srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT, - .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT, - .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - .newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, - .subresourceRange = (VkImageSubresourceRange) { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }}}; - - vkCmdPipelineBarrier(cmdbuf, - VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, - 0, 0, NULL, 0, NULL, ARRAYSIZE(image_barrier), image_barrier); + const r_vkcombuf_barrier_image_t image_barriers[] = {{ + // Temp image: prepare for reading on CPU + .image = &temp_image, + .access = VK_ACCESS_2_MEMORY_READ_BIT, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }, { + // Framebuffer image: prepare for displaying + .image = framebuffer_image, + .access = 0, + .layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, + }}; + R_VkCombufIssueBarrier(combuf, (r_vkcombuf_barrier_t){ + .stage = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | VK_PIPELINE_STAGE_2_HOST_BIT, + .images = { + .count = COUNTOF(image_barriers), + .items = image_barriers, + }, + }); } { @@ -680,8 +706,8 @@ static rgbdata_t *R_VkReadPixels( void ) { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, }; VkSubresourceLayout layout; - const char *mapped = dest_image.devmem.mapped; - vkGetImageSubresourceLayout(vk_core.device, dest_image.image, &subres, &layout); + const char *mapped = temp_image.devmem.mapped; + vkGetImageSubresourceLayout(vk_core.device, temp_image.image, &subres, &layout); mapped += layout.offset; @@ -724,7 +750,7 @@ static rgbdata_t *R_VkReadPixels( void ) { } } - R_VkImageDestroy( &dest_image ); + R_VkImageDestroy( &temp_image ); return r_shot; } diff --git a/ref/vk/vk_geometry.c b/ref/vk/vk_geometry.c index d9725381e5..886ca80a1a 100644 --- a/ref/vk/vk_geometry.c +++ b/ref/vk/vk_geometry.c @@ -59,14 +59,12 @@ void R_GeometryRangeFree(const r_geometry_range_t* range) { } r_geometry_range_lock_t R_GeometryRangeLock(const r_geometry_range_t *range) { - const vk_staging_buffer_args_t staging_args = { - .buffer = g_geom.buffer.buffer, + const vk_buffer_lock_t staging_args = { .offset = range->block_handle.offset, .size = range->block_handle.size, - .alignment = 4, }; - const vk_staging_region_t staging = R_VkStagingLockForBuffer(staging_args); + const vk_buffer_locked_t staging = R_VkBufferLock(&g_geom.buffer, staging_args); ASSERT(staging.ptr); const uint32_t vertices_size = range->vertices.count * sizeof(vk_vertex_t); @@ -76,25 +74,23 @@ r_geometry_range_lock_t R_GeometryRangeLock(const r_geometry_range_t *range) { return (r_geometry_range_lock_t){ .vertices = (vk_vertex_t *)staging.ptr, - .indices = (uint16_t *)((char*)staging.ptr + vertices_size), + .indices = PTR_CAST(uint16_t, (char*)staging.ptr + vertices_size), .impl_ = { - .staging_handle = staging.handle, + .staging_handle = staging, }, }; } r_geometry_range_lock_t R_GeometryRangeLockSubrange(const r_geometry_range_t *range, int vertices_offset, int vertices_count ) { - const vk_staging_buffer_args_t staging_args = { - .buffer = g_geom.buffer.buffer, + const vk_buffer_lock_t staging_args = { .offset = range->block_handle.offset + sizeof(vk_vertex_t) * vertices_offset, .size = sizeof(vk_vertex_t) * vertices_count, - .alignment = 4, }; ASSERT(staging_args.offset >= range->block_handle.offset); ASSERT(staging_args.offset + staging_args.size <= range->block_handle.offset + range->block_handle.size); - const vk_staging_region_t staging = R_VkStagingLockForBuffer(staging_args); + const vk_buffer_locked_t staging = R_VkBufferLock(&g_geom.buffer, staging_args); ASSERT(staging.ptr); ASSERT( range->block_handle.offset % sizeof(vk_vertex_t) == 0 ); @@ -103,13 +99,13 @@ r_geometry_range_lock_t R_GeometryRangeLockSubrange(const r_geometry_range_t *ra .vertices = (vk_vertex_t *)staging.ptr, .indices = NULL, .impl_ = { - .staging_handle = staging.handle, + .staging_handle = staging, }, }; } void R_GeometryRangeUnlock(const r_geometry_range_lock_t *lock) { - R_VkStagingUnlock(lock->impl_.staging_handle); + R_VkBufferUnlock(lock->impl_.staging_handle); } qboolean R_GeometryBufferAllocOnceAndLock(r_geometry_buffer_lock_t *lock, int vertex_count, int index_count) { @@ -129,14 +125,12 @@ qboolean R_GeometryBufferAllocOnceAndLock(r_geometry_buffer_lock_t *lock, int ve { const uint32_t vertices_offset = offset / sizeof(vk_vertex_t); const uint32_t indices_offset = (offset + vertices_size) / sizeof(uint16_t); - const vk_staging_buffer_args_t staging_args = { - .buffer = g_geom.buffer.buffer, + const vk_buffer_lock_t staging_args = { .offset = offset, .size = total_size, - .alignment = 4, }; - const vk_staging_region_t staging = R_VkStagingLockForBuffer(staging_args); + const vk_buffer_locked_t staging = R_VkBufferLock(&g_geom.buffer, staging_args); ASSERT(staging.ptr); ASSERT( offset % sizeof(vk_vertex_t) == 0 ); @@ -150,11 +144,11 @@ qboolean R_GeometryBufferAllocOnceAndLock(r_geometry_buffer_lock_t *lock, int ve }, .indices = { .count = index_count, - .ptr = (uint16_t *)((char*)staging.ptr + vertices_size), + .ptr = PTR_CAST(uint16_t, (char*)staging.ptr + vertices_size), .unit_offset = indices_offset, }, .impl_ = { - .staging_handle = staging.handle, + .handle_ = staging, }, }; } @@ -166,7 +160,7 @@ qboolean R_GeometryBufferAllocOnceAndLock(r_geometry_buffer_lock_t *lock, int ve } void R_GeometryBufferUnlock( const r_geometry_buffer_lock_t *lock ) { - R_VkStagingUnlock(lock->impl_.staging_handle); + R_VkBufferUnlock(lock->impl_.handle_); } void R_GeometryBuffer_MapClear( void ) { @@ -203,6 +197,6 @@ void R_GeometryBuffer_Flip(void) { R_BlocksClearOnce(&g_geom.alloc); } -VkBuffer R_GeometryBuffer_Get(void) { - return g_geom.buffer.buffer; +vk_buffer_t* R_GeometryBuffer_Get(void) { + return &g_geom.buffer; } diff --git a/ref/vk/vk_geometry.h b/ref/vk/vk_geometry.h index 7063798801..3e73b44847 100644 --- a/ref/vk/vk_geometry.h +++ b/ref/vk/vk_geometry.h @@ -1,6 +1,7 @@ #pragma once #include "vk_common.h" #include "r_block.h" +#include "vk_buffer.h" // FIXME vk_buffer_locked_t should not be exposed #include "vk_core.h" #include @@ -41,12 +42,15 @@ typedef struct { r_geometry_range_t R_GeometryRangeAlloc(int vertices, int indices); void R_GeometryRangeFree(const r_geometry_range_t*); +// TODO combine with r_geometry_buffer_lock_t typedef struct { vk_vertex_t *vertices; uint16_t *indices; struct { - int staging_handle; + // FIXME hide behind some index in geometry buffer + // Think: what's the max simultaneously locked regions count + vk_buffer_locked_t staging_handle; } impl_; } r_geometry_range_lock_t; @@ -69,7 +73,9 @@ typedef struct { } indices; struct { - int staging_handle; + // FIXME hide behind some index in geometry buffer + // Think: what's the max simultaneously locked regions count + vk_buffer_locked_t handle_; } impl_; } r_geometry_buffer_lock_t; @@ -88,6 +94,4 @@ void R_GeometryBuffer_Shutdown(void); void R_GeometryBuffer_Flip(void); -// FIXME is there a better way? -VkBuffer R_GeometryBuffer_Get(void); - +vk_buffer_t* R_GeometryBuffer_Get(void); diff --git a/ref/vk/vk_image.c b/ref/vk/vk_image.c index c2ede53fc2..4b550468ed 100644 --- a/ref/vk/vk_image.c +++ b/ref/vk/vk_image.c @@ -2,12 +2,15 @@ #include "vk_staging.h" #include "vk_combuf.h" #include "vk_logs.h" +#include "arrays.h" #include "xash3d_mathlib.h" // Q_max // Long type lists functions #include "vk_image_extra.h" +#define LOG_MODULE img + static const VkImageUsageFlags usage_bits_implying_views = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT | @@ -108,17 +111,34 @@ r_vk_image_t R_VkImageCreate(const r_vk_image_create_t *create) { } } + Q_strncpy(image.name, create->debug_name, sizeof(image.name)); image.width = create->width; image.height = create->height; image.depth = create->depth; image.mips = create->mips; image.layers = create->layers; image.flags = create->flags; + image.image_size = memreq.size; + image.upload_slot = -1; return image; } +static void cancelUpload( r_vk_image_t *img ); + void R_VkImageDestroy(r_vk_image_t *img) { + // Need to make sure that there are no references to this image anywhere. + // It might have been added to upload queue, but then immediately deleted, leaving references + // in the queue. See https://github.com/w23/xash3d-fwgs/issues/464 + cancelUpload(img); + + // Image destroy calls are not explicitly synchronized with rendering. GPU might still be + // processing previous frame. We need to make sure that GPU is done by the time we start + // messing with any VkImage objects. + // TODO: textures are usually destroyed in bulk, so we don't really need to wait for each one. + // TODO: check with framectl for any in-flight frames or any other GPU activity + XVK_CHECK(vkDeviceWaitIdle(vk_core.device)); + if (img->view_unorm != VK_NULL_HANDLE) vkDestroyImageView(vk_core.device, img->view_unorm, NULL); @@ -132,135 +152,306 @@ void R_VkImageDestroy(r_vk_image_t *img) { *img = (r_vk_image_t){0}; } -void R_VkImageClear(VkCommandBuffer cmdbuf, VkImage image) { - const VkImageMemoryBarrier image_barriers[] = { { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .image = image, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, - .newLayout = VK_IMAGE_LAYOUT_GENERAL, - .subresourceRange = (VkImageSubresourceRange) { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }} }; +void R_VkImageClear(r_vk_image_t *img, struct vk_combuf_s* combuf, const VkClearColorValue* value) { + const VkImageSubresourceRange ranges[] = {{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }}; + const r_vkcombuf_barrier_image_t ib[] = {{ + .image = img, + // Could be VK_IMAGE_LAYOUT_GENERAL too + .layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .access = VK_ACCESS_2_TRANSFER_WRITE_BIT, + }}; + R_VkCombufIssueBarrier(combuf, (r_vkcombuf_barrier_t){ + .stage = VK_PIPELINE_STAGE_2_CLEAR_BIT, + .images = { + .items = ib, + .count = COUNTOF(ib), + }, + }); + + const VkClearColorValue zero = {0}; + vkCmdClearColorImage(combuf->cmdbuf, img->image, img->sync.layout, + value ? value : &zero, + COUNTOF(ranges), ranges); +} + +void R_VkImageBlit(struct vk_combuf_s *combuf, const r_vkimage_blit_args *args ) { + const r_vkcombuf_barrier_image_t ib[] = {{ + .image = args->src.image, + .layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .access = VK_ACCESS_2_TRANSFER_READ_BIT, + }, { + .image = args->dst.image, + .layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .access = VK_ACCESS_2_TRANSFER_WRITE_BIT, + }}; + R_VkCombufIssueBarrier(combuf, (r_vkcombuf_barrier_t){ + .stage = VK_PIPELINE_STAGE_2_BLIT_BIT, + .images = { + .items = ib, + .count = COUNTOF(ib), + }, + }); - const VkClearColorValue clear_value = {0}; + { + VkImageBlit region = {0}; + region.srcOffsets[1].x = args->src.width ? args->src.width : args->src.image->width; + region.srcOffsets[1].y = args->src.height ? args->src.height : args->src.image->height; + region.srcOffsets[1].z = args->src.depth ? args->src.depth : args->src.image->depth; - vkCmdPipelineBarrier(cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, - 0, NULL, 0, NULL, COUNTOF(image_barriers), image_barriers); + region.dstOffsets[1].x = args->dst.width ? args->dst.width : args->dst.image->width; + region.dstOffsets[1].y = args->dst.height ? args->dst.height : args->dst.image->height; + region.dstOffsets[1].z = args->dst.depth ? args->dst.depth : args->dst.image->depth; - vkCmdClearColorImage(cmdbuf, image, VK_IMAGE_LAYOUT_GENERAL, &clear_value, 1, &image_barriers->subresourceRange); + region.srcSubresource.aspectMask = region.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + region.srcSubresource.layerCount = region.dstSubresource.layerCount = 1; // VK_REMAINING_ARRAY_LAYERS requires maintenance5. No need to use it now. + vkCmdBlitImage(combuf->cmdbuf, + args->src.image->image, args->src.image->sync.layout, + args->dst.image->image, args->dst.image->sync.layout, + 1, ®ion, + VK_FILTER_NEAREST); + } } -void R_VkImageBlit(VkCommandBuffer cmdbuf, const r_vkimage_blit_args *blit_args) { - { - const VkImageMemoryBarrier image_barriers[] = { { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .image = blit_args->src.image, - .srcAccessMask = blit_args->src.srcAccessMask, - .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, - .oldLayout = blit_args->src.oldLayout, - .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - .subresourceRange = - (VkImageSubresourceRange){ - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }, { +typedef struct { + r_vk_image_t *image; + + struct { + // arena for entire layers * mips image + r_vkstaging_region_t lock; + + // current write offset into the arena + int cursor; + } staging; + + struct { + int begin, cursor, end; + } slices; +} image_upload_t; + +static struct { + r_vkstaging_user_handle_t staging; + + ARRAY_DYNAMIC_DECLARE(image_upload_t, images); + ARRAY_DYNAMIC_DECLARE(VkBufferImageCopy, slices); + ARRAY_DYNAMIC_DECLARE(VkImageMemoryBarrier, barriers); +} g_image_upload; + +static void imageStagingPush(void* userptr, struct vk_combuf_s *combuf, uint32_t allocations) { + (void)userptr; + const VkPipelineStageFlags2 assume_stage + = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + R_VkImageUploadCommit(combuf, assume_stage); +} + +qboolean R_VkImageInit(void) { + arrayDynamicInitT(&g_image_upload.images); + arrayDynamicInitT(&g_image_upload.slices); + arrayDynamicInitT(&g_image_upload.barriers); + + g_image_upload.staging = R_VkStagingUserCreate((r_vkstaging_user_create_t){ + .name = "image", + .userptr = NULL, + .push = imageStagingPush, + }); + + return true; +} + +void R_VkImageShutdown(void) { + ASSERT(g_image_upload.images.count == 0); + R_VkStagingUserDestroy(g_image_upload.staging); + arrayDynamicDestroyT(&g_image_upload.images); + arrayDynamicDestroyT(&g_image_upload.slices); + arrayDynamicDestroyT(&g_image_upload.barriers); +} + +void R_VkImageUploadCommit( struct vk_combuf_s *combuf, VkPipelineStageFlagBits dst_stages ) { + const int images_count = g_image_upload.images.count; + if (images_count == 0) + return; + + DEBUG("Uploading %d images", images_count); + + static int gpu_scope_id = -2; + if (gpu_scope_id == -2) + gpu_scope_id = R_VkGpuScope_Register("image_upload"); + const int gpu_scope_begin = R_VkCombufScopeBegin(combuf, gpu_scope_id); + + // Pre-allocate temp barriers buffer + arrayDynamicResizeT(&g_image_upload.barriers, images_count); + + // 1. Phase I: prepare all images to be transferred into + // 1.a Set up barriers for every valid image + int barriers_count = 0; + for (int i = 0; i < images_count; ++i) { + image_upload_t *const up = g_image_upload.images.items + i; + if (!up->image) { + DEBUG("Skipping image upload slot %d", i); + continue; + } + + ASSERT(up->image->upload_slot == i); + + g_image_upload.barriers.items[barriers_count++] = (VkImageMemoryBarrier) { .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .image = blit_args->dst.image, - .srcAccessMask = blit_args->dst.srcAccessMask, + .image = up->image->image, + .srcAccessMask = 0, .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .oldLayout = blit_args->dst.oldLayout, + .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - .subresourceRange = - (VkImageSubresourceRange){ - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - } }; - - vkCmdPipelineBarrier(cmdbuf, - blit_args->in_stage, - VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, 0, NULL, 0, NULL, COUNTOF(image_barriers), image_barriers); + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = up->image->mips, + .baseArrayLayer = 0, + .layerCount = up->image->layers, + }, + }; } - { - VkImageBlit region = {0}; - region.srcOffsets[1].x = blit_args->src.width; - region.srcOffsets[1].y = blit_args->src.height; - region.srcOffsets[1].z = 1; - region.dstOffsets[1].x = blit_args->dst.width; - region.dstOffsets[1].y = blit_args->dst.height; - region.dstOffsets[1].z = 1; - region.srcSubresource.aspectMask = region.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - region.srcSubresource.layerCount = region.dstSubresource.layerCount = 1; - vkCmdBlitImage(cmdbuf, - blit_args->src.image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - blit_args->dst.image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - 1, ®ion, - VK_FILTER_NEAREST); + // 1.b Invoke the barriers + vkCmdPipelineBarrier(combuf->cmdbuf, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, // TODO VK_PIPELINE_STAGE_2_COPY_BIT + 0, 0, NULL, 0, NULL, + barriers_count, g_image_upload.barriers.items + ); + + // 2. Phase 2: issue copy commands for each valid image + for (int i = 0; i < images_count; ++i) { + image_upload_t *const up = g_image_upload.images.items + i; + if (!up->image) + continue; + + const int slices_count = up->slices.end - up->slices.begin; + DEBUG("Uploading image \"%s\": buffer=%08llx slices=%d", up->image->name, (unsigned long long)up->staging.lock.buffer, slices_count); + + ASSERT(up->staging.lock.buffer != VK_NULL_HANDLE); + ASSERT(up->slices.end == up->slices.cursor); + ASSERT(slices_count > 0); + + for (int j = 0; j < slices_count; ++j) { + const VkBufferImageCopy *const slice = g_image_upload.slices.items + up->slices.begin + j; + DEBUG(" slice[%d]: off=%llu rowl=%d height=%d off=(%d,%d,%d) ext=(%d,%d,%d)", + j, (unsigned long long)slice->bufferOffset, slice->bufferRowLength, slice->bufferImageHeight, + slice->imageOffset.x, + slice->imageOffset.y, + slice->imageOffset.z, + slice->imageExtent.width, + slice->imageExtent.height, + slice->imageExtent.depth + ); + } + + vkCmdCopyBufferToImage(combuf->cmdbuf, + up->staging.lock.buffer, + up->image->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + slices_count, + g_image_upload.slices.items + up->slices.begin); } - { - VkImageMemoryBarrier image_barriers[] = { - { + // 3. Phase 3: change all images layout to shader read only optimal + // 3.a Set up barriers for layout transition + barriers_count = 0; + for (int i = 0; i < images_count; ++i) { + image_upload_t *const up = g_image_upload.images.items + i; + if (!up->image) + continue; + + // Update image tracking state + up->image->sync.layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + up->image->sync.read.access = VK_ACCESS_SHADER_READ_BIT; + up->image->sync.read.stage = dst_stages; + up->image->sync.write.access = VK_ACCESS_TRANSFER_WRITE_BIT; + up->image->sync.write.stage = VK_PIPELINE_STAGE_2_TRANSFER_BIT; + + g_image_upload.barriers.items[barriers_count++] = (VkImageMemoryBarrier) { .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .image = blit_args->dst.image, + .image = up->image->image, .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - .newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, - .subresourceRange = - (VkImageSubresourceRange){ - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }}; - vkCmdPipelineBarrier(cmdbuf, - VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - 0, 0, NULL, 0, NULL, COUNTOF(image_barriers), image_barriers); + .newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = up->image->mips, + .baseArrayLayer = 0, + .layerCount = up->image->layers, + }, + }; + + // Mark image as uploaded + up->image->upload_slot = -1; + up->image = NULL; + + // TODO it would be nice to track uploading status further: + // 1. When uploading cmdbuf has been submitted to the GPU + // 2. When that cmdbuf has been processed. + // But that would entail quite a bit more state tracking, etc etc. Discomfort. } + + // 3.b Submit the barriers + // It's a massive set of barriers (1e3+), so using manual barriers instead of automatic combuf ones + vkCmdPipelineBarrier(combuf->cmdbuf, + VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stages, + 0, 0, NULL, 0, NULL, + barriers_count, (VkImageMemoryBarrier*)g_image_upload.barriers.items + ); + + R_VkStagingUnlockBulk(g_image_upload.staging, barriers_count); + + R_VkCombufScopeEnd(combuf, gpu_scope_begin, VK_PIPELINE_STAGE_TRANSFER_BIT); + + // Clear out image upload queue + arrayDynamicResizeT(&g_image_upload.images, 0); + arrayDynamicResizeT(&g_image_upload.slices, 0); + arrayDynamicResizeT(&g_image_upload.barriers, 0); } void R_VkImageUploadBegin( r_vk_image_t *img ) { - const VkImageMemoryBarrier image_barrier = { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .image = img->image, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, - .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - .subresourceRange = (VkImageSubresourceRange) { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = img->mips, - .baseArrayLayer = 0, - .layerCount = img->layers, - } - }; + ASSERT(img->upload_slot == -1); - // Command buffer might be invalidated on any slice load - const VkCommandBuffer cmdbuf = R_VkStagingGetCommandBuffer(); - vkCmdPipelineBarrier(cmdbuf, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, 0, NULL, 0, NULL, 1, &image_barrier); + /* TODO compute staging slices sizes properly + const uint32_t texel_block_size = R_VkImageFormatTexelBlockSize(img->format); + for (int layer = 0; layer < img->layers; ++layer) { + for (int mip = 0; mip < img->mips; ++mip) { + const int width = Q_max( 1, ( img->width >> mip )); + const int height = Q_max( 1, ( img->height >> mip )); + const int depth = Q_max( 1, ( img->depth >> mip )); + const size_t mip_size = CalcImageSize( pic->type, width, height, depth ); + } + } + */ + const size_t staging_size = img->image_size; + + // This is done speculatively to preserve internal image_upload invariant. + // Speculation: we might end up with staging implementation that, upon discovering that it ran out of free memory, + // would notify other modules that they'd need to commit their staging data, and thus we'd return to this module's + // R_VkImageUploadCommit(), which needs to see valid data. Therefore, don't touch its state until + // R_VkStagingLock returns. + const r_vkstaging_region_t staging_lock = R_VkStagingLock(g_image_upload.staging, staging_size); + + img->upload_slot = g_image_upload.images.count; + arrayDynamicAppendT(&g_image_upload.images, NULL); + image_upload_t *const up = g_image_upload.images.items + img->upload_slot; + + up->image = img; + up->staging.lock = staging_lock; + up->staging.cursor = 0; + + const int slices = img->layers * img->mips; + up->slices.begin = up->slices.cursor = g_image_upload.slices.count; + up->slices.end = up->slices.begin + slices; + + //arrayDynamicAppendManyT(&g_image_upload.slices, slices, NULL); + arrayDynamicResizeT(&g_image_upload.slices, g_image_upload.slices.count + slices); } void R_VkImageUploadSlice( r_vk_image_t *img, int layer, int mip, int size, const void *data ) { @@ -269,63 +460,65 @@ void R_VkImageUploadSlice( r_vk_image_t *img, int layer, int mip, int size, cons const uint32_t depth = Q_max(1, img->depth >> mip); const uint32_t texel_block_size = R_VkImageFormatTexelBlockSize(img->format); - const vk_staging_image_args_t staging_args = { - .image = img->image, - .region = (VkBufferImageCopy) { - .bufferOffset = 0, - .bufferRowLength = 0, - .bufferImageHeight = 0, - .imageSubresource = (VkImageSubresourceLayers){ - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .mipLevel = mip, - .baseArrayLayer = layer, - .layerCount = 1, - }, - .imageExtent = (VkExtent3D){ - .width = width, - .height = height, - .depth = depth, - }, + ASSERT(img->upload_slot >= 0); + ASSERT(img->upload_slot < g_image_upload.images.count); + + image_upload_t *const up = g_image_upload.images.items + img->upload_slot; + ASSERT(up->image == img); + + ASSERT(up->slices.cursor < up->slices.end); + ASSERT(up->staging.cursor < img->image_size); + ASSERT(img->image_size - up->staging.cursor >= size); + + memcpy((char*)up->staging.lock.ptr + up->staging.cursor, data, size); + + g_image_upload.slices.items[up->slices.cursor] = (VkBufferImageCopy) { + .bufferOffset = up->staging.lock.offset + up->staging.cursor, + .bufferRowLength = 0, + .bufferImageHeight = 0, + .imageSubresource = (VkImageSubresourceLayers){ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .mipLevel = mip, + .baseArrayLayer = layer, + .layerCount = 1, + }, + .imageExtent = (VkExtent3D){ + .width = width, + .height = height, + .depth = depth, }, - .layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - .size = size, - .alignment = texel_block_size, }; - { - const vk_staging_region_t staging = R_VkStagingLockForImage(staging_args); - ASSERT(staging.ptr); - memcpy(staging.ptr, data, size); - R_VkStagingUnlock(staging.handle); - } + up->staging.cursor += size; + up->slices.cursor += 1; } void R_VkImageUploadEnd( r_vk_image_t *img ) { - // TODO Don't change layout here. Alternatively: - // I. Attach layout metadata to the image, and request its change next time it is used. - // II. Build-in layout transfer to staging commit and do it there on commit. - - const VkImageMemoryBarrier image_barrier = { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .image = img->image, - .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - .newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, - .subresourceRange = (VkImageSubresourceRange) { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = img->mips, - .baseArrayLayer = 0, - .layerCount = img->layers, - } - }; + ASSERT(img->upload_slot >= 0); + ASSERT(img->upload_slot < g_image_upload.images.count); + + image_upload_t *const up = g_image_upload.images.items + img->upload_slot; + ASSERT(up->image == img); + + ASSERT(up->slices.cursor == up->slices.end); + ASSERT(up->staging.cursor <= img->image_size); +} + +static void cancelUpload( r_vk_image_t *img ) { + // Skip already uploaded (or never uploaded) images + if (img->upload_slot < 0) + return; + + WARN("Canceling uploading image \"%s\"", img->name); + + image_upload_t *const up = g_image_upload.images.items + img->upload_slot; + ASSERT(up->image == img); + + // Technically we won't need that staging region anymore at all, but it doesn't matter, + // it's just easier to mark it to be freed this way. + R_VkStagingUnlockBulk(g_image_upload.staging, 1); - // Commit is needed to make sure that all previous image loads have been submitted to cmdbuf - const VkCommandBuffer cmdbuf = R_VkStagingCommit()->cmdbuf; - vkCmdPipelineBarrier(cmdbuf, - VK_PIPELINE_STAGE_TRANSFER_BIT, - // FIXME incorrect, we also use them in compute and potentially ray tracing shaders - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, - 0, 0, NULL, 0, NULL, 1, &image_barrier); + // Mark upload slot as unused, and image as not subjet to uploading + up->image = NULL; + img->upload_slot = -1; } diff --git a/ref/vk/vk_image.h b/ref/vk/vk_image.h index 55da9f9243..7d62217b0c 100644 --- a/ref/vk/vk_image.h +++ b/ref/vk/vk_image.h @@ -2,7 +2,12 @@ #include "vk_core.h" #include "vk_devmem.h" +qboolean R_VkImageInit(void); +void R_VkImageShutdown(void); + typedef struct r_vk_image_s { + char name[64]; + vk_devmem_t devmem; VkImage image; VkImageView view; @@ -15,6 +20,14 @@ typedef struct r_vk_image_s { int mips, layers; VkFormat format; uint32_t flags; + uint32_t image_size; + + int upload_slot; + + struct { + VkImageLayout layout; + r_vksync_scope_t write, read; + } sync; } r_vk_image_t; enum { @@ -37,22 +50,24 @@ typedef struct { r_vk_image_t R_VkImageCreate(const r_vk_image_create_t *create); void R_VkImageDestroy(r_vk_image_t *img); -void R_VkImageClear(VkCommandBuffer cmdbuf, VkImage image); +struct vk_combuf_s; +void R_VkImageClear(r_vk_image_t *img, struct vk_combuf_s* combuf, const VkClearColorValue*); typedef struct { - VkPipelineStageFlags in_stage; struct { - VkImage image; - int width, height; - VkImageLayout oldLayout; - VkAccessFlags srcAccessMask; + r_vk_image_t *image; + int width, height, depth; } src, dst; } r_vkimage_blit_args; -void R_VkImageBlit( VkCommandBuffer cmdbuf, const r_vkimage_blit_args *blit_args ); +void R_VkImageBlit(struct vk_combuf_s *combuf, const r_vkimage_blit_args *blit_args ); uint32_t R_VkImageFormatTexelBlockSize( VkFormat format ); +// Expects *img to be pinned and valid until either cancel or commit is called void R_VkImageUploadBegin( r_vk_image_t *img ); void R_VkImageUploadSlice( r_vk_image_t *img, int layer, int mip, int size, const void *data ); void R_VkImageUploadEnd( r_vk_image_t *img ); + +// Upload all enqueued images using the given command buffer +void R_VkImageUploadCommit( struct vk_combuf_s *combuf, VkPipelineStageFlagBits dst_stages ); diff --git a/ref/vk/vk_light.c b/ref/vk/vk_light.c index a7549696c5..21b0e839ef 100644 --- a/ref/vk/vk_light.c +++ b/ref/vk/vk_light.c @@ -1200,11 +1200,10 @@ static void uploadGridRange( int begin, int end ) { ASSERT( count > 0 ); const int size = count * sizeof(struct LightCluster); - const vk_staging_region_t locked = R_VkStagingLockForBuffer( (vk_staging_buffer_args_t) { - .buffer = g_lights_.buffer.buffer, - .offset = sizeof(struct LightsMetadata) + begin * sizeof(struct LightCluster), - .size = size, - .alignment = 16, // WHY? + const vk_buffer_locked_t locked = R_VkBufferLock(&g_lights_.buffer, + (vk_buffer_lock_t) { + .offset = sizeof(struct LightsMetadata) + begin * sizeof(struct LightCluster), + .size = size, } ); ASSERT(locked.ptr); @@ -1222,7 +1221,7 @@ static void uploadGridRange( int begin, int end ) { memcpy(dst->polygons, src->polygons, sizeof(uint8_t) * src->num_polygons); } - R_VkStagingUnlock( locked.handle ); + R_VkBufferUnlock( locked ); g_lights_.stats.ranges_uploaded++; } @@ -1296,13 +1295,12 @@ static void uploadPointLights( struct LightsMetadata *metadata ) { } } -vk_lights_bindings_t VK_LightsUpload( void ) { +vk_lights_bindings_t VK_LightsUpload( struct vk_combuf_s *combuf ) { APROF_SCOPE_DECLARE_BEGIN(upload, __FUNCTION__); - const vk_staging_region_t locked = R_VkStagingLockForBuffer( (vk_staging_buffer_args_t) { - .buffer = g_lights_.buffer.buffer, - .offset = 0, - .size = sizeof(struct LightsMetadata), - .alignment = 16, // WHY? + const vk_buffer_locked_t locked = R_VkBufferLock(&g_lights_.buffer, + (vk_buffer_lock_t) { + .offset = 0, + .size = sizeof(struct LightsMetadata), } ); ASSERT(locked.ptr); @@ -1316,7 +1314,7 @@ vk_lights_bindings_t VK_LightsUpload( void ) { uploadPolygonLights( metadata ); uploadPointLights( metadata ); - R_VkStagingUnlock( locked.handle ); + R_VkBufferUnlock( locked ); uploadGrid(); @@ -1324,8 +1322,10 @@ vk_lights_bindings_t VK_LightsUpload( void ) { APROF_SCOPE_END(upload); + R_VkBufferStagingCommit(&g_lights_.buffer, combuf); + return (vk_lights_bindings_t){ - .buffer = g_lights_.buffer.buffer, + .buffer = &g_lights_.buffer, .metadata = { .offset = 0, .size = sizeof(struct LightsMetadata), diff --git a/ref/vk/vk_light.h b/ref/vk/vk_light.h index e800075c67..d127b312be 100644 --- a/ref/vk/vk_light.h +++ b/ref/vk/vk_light.h @@ -1,5 +1,6 @@ #pragma once +#include "vk_buffer.h" #include "vk_const.h" #include "vk_core.h" @@ -81,12 +82,13 @@ void RT_LightsFrameBegin( void ); void RT_LightsFrameEnd( void ); typedef struct { - VkBuffer buffer; + vk_buffer_t *buffer; struct { uint32_t offset, size; } metadata, grid; } vk_lights_bindings_t; -vk_lights_bindings_t VK_LightsUpload( void ); +struct vk_combuf_s; +vk_lights_bindings_t VK_LightsUpload( struct vk_combuf_s* ); qboolean RT_GetEmissiveForTexture( vec3_t out, int texture_id ); diff --git a/ref/vk/vk_logs.h b/ref/vk/vk_logs.h index 8f427776f5..91e671ed7d 100644 --- a/ref/vk/vk_logs.h +++ b/ref/vk/vk_logs.h @@ -15,6 +15,11 @@ X(rt) \ X(rmain) \ X(sprite) \ + X(img) \ + X(staging) \ + X(buf) \ + X(fctl) \ + X(combuf) \ enum { #define X(m) LogModule_##m, diff --git a/ref/vk/vk_misc.c b/ref/vk/vk_misc.c index afa1230b8e..2506b6602a 100644 --- a/ref/vk/vk_misc.c +++ b/ref/vk/vk_misc.c @@ -331,3 +331,39 @@ const char *R_VkColorSpaceName(VkColorSpaceKHR colorspace) { default: return "UNKNOWN"; } } + +const char *R_VkImageLayoutName(VkImageLayout layout) { + switch (layout) { + case VK_IMAGE_LAYOUT_UNDEFINED: return "VK_IMAGE_LAYOUT_UNDEFINED"; + case VK_IMAGE_LAYOUT_GENERAL: return "VK_IMAGE_LAYOUT_GENERAL"; + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: return "VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL"; + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: return "VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL"; + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: return "VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL"; + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: return "VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL"; + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: return "VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL"; + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: return "VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL"; + case VK_IMAGE_LAYOUT_PREINITIALIZED: return "VK_IMAGE_LAYOUT_PREINITIALIZED"; + case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL: return "VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL"; + case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL: return "VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL"; + case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL: return "VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL"; + case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL: return "VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL"; + case VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL: return "VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL"; + case VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL: return "VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL"; + case VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL: return "VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL"; + case VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL: return "VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL"; + case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR: return "VK_IMAGE_LAYOUT_PRESENT_SRC_KHR"; + case VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR: return "VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR"; + case VK_IMAGE_LAYOUT_VIDEO_DECODE_SRC_KHR: return "VK_IMAGE_LAYOUT_VIDEO_DECODE_SRC_KHR"; + case VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR: return "VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR"; + case VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR: return "VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR"; + case VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT: return "VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT"; + case VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR: return "VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR"; + case VK_IMAGE_LAYOUT_RENDERING_LOCAL_READ_KHR: return "VK_IMAGE_LAYOUT_RENDERING_LOCAL_READ_KHR"; + case VK_IMAGE_LAYOUT_VIDEO_ENCODE_DST_KHR: return "VK_IMAGE_LAYOUT_VIDEO_ENCODE_DST_KHR"; + case VK_IMAGE_LAYOUT_VIDEO_ENCODE_SRC_KHR: return "VK_IMAGE_LAYOUT_VIDEO_ENCODE_SRC_KHR"; + case VK_IMAGE_LAYOUT_VIDEO_ENCODE_DPB_KHR: return "VK_IMAGE_LAYOUT_VIDEO_ENCODE_DPB_KHR"; + case VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT: return "VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT"; + case VK_IMAGE_LAYOUT_MAX_ENUM: break; + } + return "UNKNOWN"; +} diff --git a/ref/vk/vk_ray_accel.c b/ref/vk/vk_ray_accel.c index ef7cf98af1..705384207e 100644 --- a/ref/vk/vk_ray_accel.c +++ b/ref/vk/vk_ray_accel.c @@ -5,12 +5,12 @@ #include "vk_ray_internal.h" #include "r_speeds.h" #include "vk_combuf.h" -#include "vk_staging.h" #include "vk_math.h" #include "vk_geometry.h" #include "vk_render.h" #include "vk_logs.h" +#include "arrays.h" #include "profiler.h" #include "xash3d_mathlib.h" @@ -18,12 +18,19 @@ #define MODULE_NAME "accel" #define LOG_MODULE rt +#define MAX_SCRATCH_BUFFER (32*1024*1024) +// FIXME compute this by lazily allocating #define MAX_ACCELS_BUFFER (128*1024*1024) +#define MAX_ACCELS_BUFFER (256*1024*1024) + typedef struct rt_blas_s { const char *debug_name; rt_blas_usage_e usage; VkAccelerationStructureKHR blas; + // Zero if not built + VkDeviceAddress address; + // Max dynamic geoms for usage == kBlasBuildDynamicFast int max_geoms; @@ -33,7 +40,8 @@ typedef struct rt_blas_s { VkAccelerationStructureGeometryKHR *geoms; uint32_t *max_prim_counts; VkAccelerationStructureBuildRangeInfoKHR *ranges; - qboolean built; + + qboolean is_built, needs_to_be_built; } build; } rt_blas_t; @@ -44,13 +52,14 @@ static struct { // TODO: unify this with render buffer -- really? // Needs: AS_STORAGE_BIT, SHADER_DEVICE_ADDRESS_BIT vk_buffer_t accels_buffer; + VkDeviceAddress accels_buffer_addr; struct alo_pool_s *accels_buffer_alloc; // Temp: lives only during a single frame (may have many in flight) // Used for building ASes; // Needs: AS_STORAGE_BIT, SHADER_DEVICE_ADDRESS_BIT vk_buffer_t scratch_buffer; - VkDeviceAddress accels_buffer_addr, scratch_buffer_addr; + VkDeviceAddress scratch_buffer_addr; // Temp-ish: used for making TLAS, contains addressed to all used BLASes // Lifetime and nature of usage similar to scratch_buffer @@ -60,11 +69,21 @@ static struct { VkDeviceAddress tlas_geom_buffer_addr; r_flipping_buffer_t tlas_geom_buffer_alloc; - // TODO need several TLASes for N frames in flight - VkAccelerationStructureKHR tlas; + struct { + VkAccelerationStructureKHR handle; + + VkAccelerationStructureGeometryKHR geometry; + uint32_t max_prim_count; + VkAccelerationStructureBuildRangeInfoKHR range_info; + VkAccelerationStructureBuildGeometryInfoKHR geometry_info; + VkAccelerationStructureBuildSizesInfoKHR sizes_info; + } tlas; + // Per-frame data that is accumulated between RayFrameBegin and End calls struct { + BOUNDED_ARRAY_DECLARE(rt_draw_instance_t, instances, MAX_INSTANCES); + uint32_t scratch_offset; // for building dynamic blases } frame; @@ -74,9 +93,8 @@ static struct { } stats; struct { - // TODO two arrays for a single vkCmdBuildAccelerationStructuresKHR() call - // FIXME This is for testing only - BOUNDED_ARRAY_DECLARE(blas, rt_blas_t*, 256); + BOUNDED_ARRAY_DECLARE(VkAccelerationStructureBuildGeometryInfoKHR, geometry_infos, MAX_INSTANCES); + BOUNDED_ARRAY_DECLARE(VkAccelerationStructureBuildRangeInfoKHR*, range_infos, MAX_INSTANCES); } build; cvar_t *cv_force_culling; @@ -123,35 +141,69 @@ static VkDeviceAddress getAccelAddress(VkAccelerationStructureKHR as) { return vkGetAccelerationStructureDeviceAddressKHR(vk_core.device, &asdai); } -static qboolean buildAccel(VkBuffer geometry_buffer, VkAccelerationStructureBuildGeometryInfoKHR *build_info, uint32_t scratch_buffer_size, const VkAccelerationStructureBuildRangeInfoKHR *build_ranges) { - // FIXME this is definitely not the right place. We should upload everything in bulk, and only then build blases in bulk too - vk_combuf_t *const combuf = R_VkStagingCommit(); +static void tlasCreate(void) { + g_accel.tlas.geometry = (VkAccelerationStructureGeometryKHR) { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, + .geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR, + .geometry.instances = + (VkAccelerationStructureGeometryInstancesDataKHR){ + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR, + .data.deviceAddress = 0, + .arrayOfPointers = VK_FALSE, + }, + }; + g_accel.tlas.max_prim_count = MAX_INSTANCES; + g_accel.tlas.range_info = (VkAccelerationStructureBuildRangeInfoKHR) { + .primitiveCount = g_accel.frame.instances.count, + }; + g_accel.tlas.geometry_info = (VkAccelerationStructureBuildGeometryInfoKHR) { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, + .type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR, + .flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR, + .mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR, + .geometryCount = 1, + .pGeometries = &g_accel.tlas.geometry, + .srcAccelerationStructure = VK_NULL_HANDLE, + }; + g_accel.tlas.sizes_info = getAccelSizes(&g_accel.tlas.geometry_info, &g_accel.tlas.max_prim_count); + g_accel.tlas.handle = createAccel("TLAS", g_accel.tlas.geometry_info.type, g_accel.tlas.sizes_info.accelerationStructureSize); + ASSERT(g_accel.tlas.handle != VK_NULL_HANDLE); + g_accel.tlas.geometry_info.dstAccelerationStructure = g_accel.tlas.handle; +} + +static void tlasBuild(vk_combuf_t *combuf, VkDeviceAddress instances_addr) { + R_VkBufferStagingCommit(&g_accel.tlas_geom_buffer, combuf); { - const VkBufferMemoryBarrier bmb[] = { { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR | VK_ACCESS_SHADER_READ_BIT, // FIXME - .buffer = geometry_buffer, - .offset = 0, // FIXME - .size = VK_WHOLE_SIZE, // FIXME - } }; - vkCmdPipelineBarrier(combuf->cmdbuf, - VK_PIPELINE_STAGE_TRANSFER_BIT, - //VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, - VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR | VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, - 0, 0, NULL, COUNTOF(bmb), bmb, 0, NULL); + const r_vkcombuf_barrier_buffer_t buffers[] = {{ + .buffer = &g_accel.accels_buffer, + .access = VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_KHR, // TODO? WRITE? we're writing tlas here too + }, { + .buffer = &g_accel.tlas_geom_buffer, + .access = VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_KHR, + }}; + R_VkCombufIssueBarrier(combuf, (r_vkcombuf_barrier_t){ + .stage = VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, + .buffers = { + .count = COUNTOF(buffers), + .items = buffers, + }, + }); } + const uint32_t scratch_buffer_size = g_accel.tlas.sizes_info.buildScratchSize; + //gEngine.Con_Reportf("sratch offset = %d, req=%d", g_accel.frame.scratch_offset, scratch_buffer_size); if (MAX_SCRATCH_BUFFER < g_accel.frame.scratch_offset + scratch_buffer_size) { ERR("Scratch buffer overflow: left %u bytes, but need %u", MAX_SCRATCH_BUFFER - g_accel.frame.scratch_offset, scratch_buffer_size); - return false; + ASSERT(!"Scratch buffer overflow"); } - build_info->scratchData.deviceAddress = g_accel.scratch_buffer_addr + g_accel.frame.scratch_offset; + g_accel.tlas.geometry.geometry.instances.data.deviceAddress = instances_addr; + g_accel.tlas.range_info.primitiveCount = g_accel.frame.instances.count; + g_accel.tlas.geometry_info.scratchData.deviceAddress = g_accel.scratch_buffer_addr + g_accel.frame.scratch_offset; //uint32_t scratch_offset_initial = g_accel.frame.scratch_offset; g_accel.frame.scratch_offset += scratch_buffer_size; @@ -159,108 +211,20 @@ static qboolean buildAccel(VkBuffer geometry_buffer, VkAccelerationStructureBuil //gEngine.Con_Reportf("AS=%p, n_geoms=%u, scratch: %#x %d %#x", *args->p_accel, args->n_geoms, scratch_offset_initial, scratch_buffer_size, scratch_offset_initial + scratch_buffer_size); - g_accel.stats.accels_built++; - static int scope_id = -2; if (scope_id == -2) - scope_id = R_VkGpuScope_Register("build_as"); + scope_id = R_VkGpuScope_Register("build_tlas"); const int begin_index = R_VkCombufScopeBegin(combuf, scope_id); - const VkAccelerationStructureBuildRangeInfoKHR *p_build_ranges = build_ranges; - vkCmdBuildAccelerationStructuresKHR(combuf->cmdbuf, 1, build_info, &p_build_ranges); + const VkAccelerationStructureBuildRangeInfoKHR *p_build_ranges = &g_accel.tlas.range_info; + vkCmdBuildAccelerationStructuresKHR(combuf->cmdbuf, 1, &g_accel.tlas.geometry_info, &p_build_ranges); R_VkCombufScopeEnd(combuf, begin_index, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR); - - return true; -} - -// TODO split this into smaller building blocks in a separate module -qboolean createOrUpdateAccelerationStructure(vk_combuf_t *combuf, const as_build_args_t *args) { - ASSERT(args->geoms); - ASSERT(args->n_geoms > 0); - ASSERT(args->p_accel); - - const qboolean should_create = *args->p_accel == VK_NULL_HANDLE; - - VkAccelerationStructureBuildGeometryInfoKHR build_info = { - .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, - .type = args->type, - .flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR, - .mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR, - .geometryCount = args->n_geoms, - .pGeometries = args->geoms, - .srcAccelerationStructure = VK_NULL_HANDLE, - }; - - const VkAccelerationStructureBuildSizesInfoKHR build_size = getAccelSizes(&build_info, args->max_prim_counts); - - if (should_create) { - *args->p_accel = createAccel(args->debug_name, args->type, build_size.accelerationStructureSize); - - if (!args->p_accel) - return false; - - if (args->out_accel_addr) - *args->out_accel_addr = getAccelAddress(*args->p_accel); - - if (args->inout_size) - *args->inout_size = build_size.accelerationStructureSize; - - // gEngine.Con_Reportf("AS=%p, n_geoms=%u, build: %#x %d %#x", *args->p_accel, args->n_geoms, buffer_offset, asci.size, buffer_offset + asci.size); - } - - // If not enough data for building, just create - if (!combuf || !args->build_ranges) - return true; - - if (args->inout_size) - ASSERT(*args->inout_size >= build_size.accelerationStructureSize); - - build_info.dstAccelerationStructure = *args->p_accel; - const VkBuffer geometry_buffer = R_GeometryBuffer_Get(); - return buildAccel(geometry_buffer, &build_info, build_size.buildScratchSize, args->build_ranges); -} - -static void createTlas( vk_combuf_t *combuf, VkDeviceAddress instances_addr ) { - const VkAccelerationStructureGeometryKHR tl_geom[] = { - { - .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, - //.flags = VK_GEOMETRY_OPAQUE_BIT, - .geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR, - .geometry.instances = - (VkAccelerationStructureGeometryInstancesDataKHR){ - .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR, - .data.deviceAddress = instances_addr, - .arrayOfPointers = VK_FALSE, - }, - }, - }; - const uint32_t tl_max_prim_counts[COUNTOF(tl_geom)] = { MAX_INSTANCES }; - const VkAccelerationStructureBuildRangeInfoKHR tl_build_range = { - .primitiveCount = g_ray_model_state.frame.instances_count, - }; - const as_build_args_t asrgs = { - .geoms = tl_geom, - .max_prim_counts = tl_max_prim_counts, - .build_ranges = !combuf ? NULL : &tl_build_range, - .n_geoms = COUNTOF(tl_geom), - .type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR, - // we can't really rebuild TLAS because instance count changes are not allowed .dynamic = true, - .dynamic = false, - .p_accel = &g_accel.tlas, - .debug_name = "TLAS", - .out_accel_addr = NULL, - .inout_size = NULL, - }; - if (!createOrUpdateAccelerationStructure(combuf, &asrgs)) { - gEngine.Host_Error("Could not create/update TLAS\n"); - return; - } } static qboolean blasPrepareBuild(struct rt_blas_s *blas, VkDeviceAddress geometry_addr) { ASSERT(blas); ASSERT(blas->blas); - if (blas->build.built && blas->usage == kBlasBuildStatic) { + if (blas->build.is_built && blas->usage == kBlasBuildStatic) { ASSERT(!"Attempting to build static BLAS twice"); return false; } @@ -290,89 +254,117 @@ static qboolean blasPrepareBuild(struct rt_blas_s *blas, VkDeviceAddress geometr //gEngine.Con_Reportf("AS=%p, n_geoms=%u, scratch: %#x %d %#x", *args->p_accel, args->n_geoms, scratch_offset_initial, scratch_buffer_size, scratch_offset_initial + scratch_buffer_size); - g_accel.stats.accels_built++; - return true; } -static void buildBlases(vk_combuf_t *combuf) { - (void)(combuf); +static void blasBuildEnqueue(rt_blas_t* blas, VkDeviceAddress geometry_buffer_adderss) { + // If all sequences match, no rebuild is needed + if (!blas->build.needs_to_be_built) + return; + + // FIXME handle: at the very least we could just ignore this BLAS for this frame + ASSERT(blasPrepareBuild(blas, geometry_buffer_adderss)); - const VkBuffer geometry_buffer = R_GeometryBuffer_Get(); - const VkDeviceAddress geometry_addr = R_VkBufferGetDeviceAddress(geometry_buffer); + // Mark as built, and also store address for future use + blas->build.is_built = true; + blas->build.needs_to_be_built = false; - // FIXME get rid of this when staging doesn't own copying ops anymore - vk_combuf_t *const combuf_staging_fixme = R_VkStagingCommit(); + BOUNDED_ARRAY_APPEND_ITEM(g_accel.build.geometry_infos, blas->build.info); + BOUNDED_ARRAY_APPEND_ITEM(g_accel.build.range_infos, blas->build.ranges); + ASSERT(g_accel.build.geometry_infos.count == g_accel.build.range_infos.count); +} - // TODO remove, should be handled by render graph +static void blasBuildPerform(vk_combuf_t *combuf, vk_buffer_t *geom) { + R_VkBufferStagingCommit(geom, combuf); { - const VkBufferMemoryBarrier bmb[] = { { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR | VK_ACCESS_SHADER_READ_BIT, - .buffer = geometry_buffer, - .offset = 0, - .size = VK_WHOLE_SIZE, - } }; - vkCmdPipelineBarrier(combuf_staging_fixme->cmdbuf, - VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR | VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, - 0, 0, NULL, COUNTOF(bmb), bmb, 0, NULL); + const r_vkcombuf_barrier_buffer_t buffers[] = {{ + .buffer = &g_accel.accels_buffer, + .access = VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR, + }, { + .buffer = geom, + .access = VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_KHR, + }}; + R_VkCombufIssueBarrier(combuf, (r_vkcombuf_barrier_t){ + .stage = VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, + .buffers = { + .count = COUNTOF(buffers), + .items = buffers, + }, + }); } - for (int i = 0; i < g_accel.build.blas.count; ++i) { - rt_blas_t *const blas = g_accel.build.blas.items[i]; - if (!blasPrepareBuild(blas, geometry_addr)) - // FIXME handle - continue; - - static int scope_id = -2; - if (scope_id == -2) - scope_id = R_VkGpuScope_Register("build_as"); - const int begin_index = R_VkCombufScopeBegin(combuf_staging_fixme, scope_id); - const VkAccelerationStructureBuildRangeInfoKHR *p_build_ranges = blas->build.ranges; - // TODO one call to build them all - vkCmdBuildAccelerationStructuresKHR(combuf_staging_fixme->cmdbuf, 1, &blas->build.info, &p_build_ranges); - R_VkCombufScopeEnd(combuf_staging_fixme, begin_index, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR); - - blas->build.built = true; - } + ASSERT(g_accel.build.geometry_infos.count == g_accel.build.range_infos.count); + const uint32_t count = g_accel.build.geometry_infos.count; + if (count == 0) + return; // Nothing to build + + static int scope_id = -2; + if (scope_id == -2) + scope_id = R_VkGpuScope_Register("build_blases"); - g_accel.build.blas.count = 0; + const int begin_index = R_VkCombufScopeBegin(combuf, scope_id); + vkCmdBuildAccelerationStructuresKHR(combuf->cmdbuf, count, + g_accel.build.geometry_infos.items, + (const VkAccelerationStructureBuildRangeInfoKHR* const *)g_accel.build.range_infos.items); + + R_VkCombufScopeEnd(combuf, begin_index, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR); + + g_accel.stats.accels_built = count; + g_accel.build.geometry_infos.count = 0; + g_accel.build.range_infos.count = 0; } vk_resource_t RT_VkAccelPrepareTlas(vk_combuf_t *combuf) { APROF_SCOPE_DECLARE_BEGIN(prepare, __FUNCTION__); - ASSERT(g_ray_model_state.frame.instances_count > 0); - buildBlases(combuf); + const uint32_t instances_count = g_accel.frame.instances.count; + + if (instances_count == 0) { + APROF_SCOPE_END(prepare); + return (vk_resource_t){ + .type = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, + .value = (vk_descriptor_value_t){ + .accel = (VkWriteDescriptorSetAccelerationStructureKHR) { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, + .accelerationStructureCount = 0, + .pAccelerationStructures = NULL, + .pNext = NULL, + }, + }, + }; + } DEBUG_BEGIN(combuf->cmdbuf, "prepare tlas"); R_FlippingBuffer_Flip( &g_accel.tlas_geom_buffer_alloc ); - const uint32_t instance_offset = R_FlippingBuffer_Alloc(&g_accel.tlas_geom_buffer_alloc, g_ray_model_state.frame.instances_count, 1); + const uint32_t instance_offset = R_FlippingBuffer_Alloc(&g_accel.tlas_geom_buffer_alloc, instances_count, 1); ASSERT(instance_offset != ALO_ALLOC_FAILED); + vk_buffer_t* const geom = R_GeometryBuffer_Get(); + const VkDeviceAddress geometry_buffer_address = R_VkBufferGetDeviceAddress(geom->buffer); + // Upload all blas instances references to GPU mem { - const vk_staging_region_t headers_lock = R_VkStagingLockForBuffer((vk_staging_buffer_args_t){ - .buffer = g_ray_model_state.model_headers_buffer.buffer, - .offset = 0, - .size = g_ray_model_state.frame.instances_count * sizeof(struct ModelHeader), - .alignment = 16, + const vk_buffer_locked_t headers_lock = R_VkBufferLock(&g_ray_model_state.model_headers_buffer, + (vk_buffer_lock_t){ + .offset = 0, + .size = instances_count * sizeof(struct ModelHeader), }); ASSERT(headers_lock.ptr); VkAccelerationStructureInstanceKHR* inst = ((VkAccelerationStructureInstanceKHR*)g_accel.tlas_geom_buffer.mapped) + instance_offset; - for (int i = 0; i < g_ray_model_state.frame.instances_count; ++i) { - const rt_draw_instance_t* const instance = g_ray_model_state.frame.instances + i; - ASSERT(instance->blas_addr != 0); + for (uint32_t i = 0; i < instances_count; ++i) { + const rt_draw_instance_t* const instance = g_accel.frame.instances.items + i; + + blasBuildEnqueue(instance->blas, geometry_buffer_address); + + ASSERT(instance->blas->address != 0); inst[i] = (VkAccelerationStructureInstanceKHR){ .instanceCustomIndex = instance->kusochki_offset, .instanceShaderBindingTableRecordOffset = 0, - .accelerationStructureReference = instance->blas_addr, + .accelerationStructureReference = instance->blas->address, }; const VkGeometryInstanceFlagsKHR flags = @@ -419,49 +411,22 @@ vk_resource_t RT_VkAccelPrepareTlas(vk_combuf_t *combuf) { Matrix4x4_ToArrayFloatGL(instance->prev_transform_row, (float*)header->prev_transform); } - R_VkStagingUnlock(headers_lock.handle); + R_VkBufferUnlock(headers_lock); + R_VkBufferStagingCommit(&g_ray_model_state.model_headers_buffer, combuf); } - g_accel.stats.instances_count = g_ray_model_state.frame.instances_count; + g_accel.stats.instances_count = instances_count; - // Barrier for building all BLASes - // BLAS building is now in cmdbuf, need to synchronize with results - { - VkBufferMemoryBarrier bmb[] = {{ - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - .srcAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR, // | VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR, - .buffer = g_accel.accels_buffer.buffer, - // FIXME this is completely wrong. Offset ans size are BLAS-specifig - .offset = instance_offset * sizeof(VkAccelerationStructureInstanceKHR), - .size = g_ray_model_state.frame.instances_count * sizeof(VkAccelerationStructureInstanceKHR), - }}; - vkCmdPipelineBarrier(combuf->cmdbuf, - VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, - VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, - 0, 0, NULL, COUNTOF(bmb), bmb, 0, NULL); - } + // Build all scheduled BLASes + blasBuildPerform(combuf, geom); // 2. Build TLAS - createTlas(combuf, g_accel.tlas_geom_buffer_addr + instance_offset * sizeof(VkAccelerationStructureInstanceKHR)); + tlasBuild(combuf, g_accel.tlas_geom_buffer_addr + instance_offset * sizeof(VkAccelerationStructureInstanceKHR)); DEBUG_END(combuf->cmdbuf); - // TODO return vk_resource_t with callback to all this "do the preparation and barriers" crap, instead of doing it here - { - const VkBufferMemoryBarrier bmb[] = { { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - .srcAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - // FIXME also incorrect -- here we must barrier on tlas_geom_buffer, not accels_buffer - .buffer = g_accel.accels_buffer.buffer, - .offset = 0, - .size = VK_WHOLE_SIZE, - } }; - vkCmdPipelineBarrier(combuf->cmdbuf, - VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, - VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - 0, 0, NULL, COUNTOF(bmb), bmb, 0, NULL); - } + // Consume instances into this frame, no further instances are expected + g_accel.frame.instances.count = 0; + g_accel.frame.scratch_offset = 0; APROF_SCOPE_END(prepare); return (vk_resource_t){ @@ -470,7 +435,7 @@ vk_resource_t RT_VkAccelPrepareTlas(vk_combuf_t *combuf) { .accel = (VkWriteDescriptorSetAccelerationStructureKHR) { .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, .accelerationStructureCount = 1, - .pAccelerationStructures = &g_accel.tlas, + .pAccelerationStructures = &g_accel.tlas.handle, .pNext = NULL, }, }, @@ -517,8 +482,8 @@ qboolean RT_VkAccelInit(void) { } void RT_VkAccelShutdown(void) { - if (g_accel.tlas != VK_NULL_HANDLE) - vkDestroyAccelerationStructureKHR(vk_core.device, g_accel.tlas, NULL); + if (g_accel.tlas.handle != VK_NULL_HANDLE) + vkDestroyAccelerationStructureKHR(vk_core.device, g_accel.tlas.handle, NULL); VK_BufferDestroy(&g_accel.scratch_buffer); VK_BufferDestroy(&g_accel.accels_buffer); @@ -542,20 +507,21 @@ void RT_VkAccelNewMap(void) { // Recreate tlas // Why here and not in init: to make sure that its memory is preserved. Map init will clear all memory regions. { - if (g_accel.tlas != VK_NULL_HANDLE) { - vkDestroyAccelerationStructureKHR(vk_core.device, g_accel.tlas, NULL); - g_accel.tlas = VK_NULL_HANDLE; + if (g_accel.tlas.handle != VK_NULL_HANDLE) { + vkDestroyAccelerationStructureKHR(vk_core.device, g_accel.tlas.handle, NULL); + g_accel.tlas.handle = VK_NULL_HANDLE; } - createTlas(VK_NULL_HANDLE, g_accel.tlas_geom_buffer_addr); + tlasCreate(); } } -void RT_VkAccelFrameBegin(void) { - g_accel.frame.scratch_offset = 0; -} - static void blasFillGeometries(rt_blas_t *blas, const vk_render_geometry_t *geoms, int geoms_count) { + // geoms_count is not constant for dynamic models, and it shouldn't exceed max_geoms by design + ASSERT(geoms_count <= blas->max_geoms); + + blas->build.info.geometryCount = geoms_count; + for (int i = 0; i < geoms_count; ++i) { const vk_render_geometry_t *mg = geoms + i; const uint32_t prim_count = mg->element_count / 3; @@ -593,6 +559,7 @@ struct rt_blas_s* RT_BlasCreate(rt_blas_create_t args) { blas->debug_name = args.name; blas->usage = args.usage; + blas->max_geoms = args.geoms_count; blas->build.info = (VkAccelerationStructureBuildGeometryInfoKHR){ .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, @@ -628,6 +595,7 @@ struct rt_blas_s* RT_BlasCreate(rt_blas_create_t args) { blas->build.sizes = getAccelSizes(&blas->build.info, blas->build.max_prim_counts); blas->blas = createAccel(blas->debug_name, VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, blas->build.sizes.accelerationStructureSize); + blas->address = getAccelAddress(blas->blas); if (!blas->blas) { ERR("Couldn't create vk accel"); @@ -637,8 +605,8 @@ struct rt_blas_s* RT_BlasCreate(rt_blas_create_t args) { blas->build.info.dstAccelerationStructure = blas->blas; blas->max_geoms = blas->build.info.geometryCount; - if (!args.dont_build) - BOUNDED_ARRAY_APPEND(g_accel.build.blas, blas); + blas->build.is_built = false; + blas->build.needs_to_be_built = true; return blas; @@ -660,19 +628,12 @@ void RT_BlasDestroy(struct rt_blas_s* blas) { if (blas->build.ranges) Mem_Free(blas->build.ranges); - /* if (blas->max_prims) */ - /* Mem_Free(blas->max_prims); */ - if (blas->blas) vkDestroyAccelerationStructureKHR(vk_core.device, blas->blas, NULL); Mem_Free(blas); } -VkDeviceAddress RT_BlasGetDeviceAddress(struct rt_blas_s *blas) { - return getAccelAddress(blas->blas); -} - qboolean RT_BlasUpdate(struct rt_blas_s *blas, const struct vk_render_geometry_s *geoms, int geoms_count) { switch (blas->usage) { case kBlasBuildStatic: @@ -680,7 +641,7 @@ qboolean RT_BlasUpdate(struct rt_blas_s *blas, const struct vk_render_geometry_s break; case kBlasBuildDynamicUpdate: ASSERT(geoms_count == blas->max_geoms); - if (blas->build.built) { + if (blas->build.is_built) { blas->build.info.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR; blas->build.info.srcAccelerationStructure = blas->blas; } @@ -704,6 +665,16 @@ qboolean RT_BlasUpdate(struct rt_blas_s *blas, const struct vk_render_geometry_s return false; } - BOUNDED_ARRAY_APPEND(g_accel.build.blas, blas); + blas->build.needs_to_be_built = true; return true; } + +void RT_VkAccelAddDrawInstance(const rt_draw_instance_t* instance) { + const int max_instances = (int)COUNTOF(g_accel.frame.instances.items); + if (g_accel.frame.instances.count >= max_instances) { + gEngine.Con_Printf(S_ERROR "Too many RT draw instances, max = %d\n", max_instances); + return; + } + + BOUNDED_ARRAY_APPEND_UNSAFE(g_accel.frame.instances) = *instance; +} diff --git a/ref/vk/vk_ray_accel.h b/ref/vk/vk_ray_accel.h index 21bbed7e83..3e246f88b9 100644 --- a/ref/vk/vk_ray_accel.h +++ b/ref/vk/vk_ray_accel.h @@ -6,7 +6,18 @@ qboolean RT_VkAccelInit(void); void RT_VkAccelShutdown(void); void RT_VkAccelNewMap(void); -void RT_VkAccelFrameBegin(void); struct vk_combuf_s; vk_resource_t RT_VkAccelPrepareTlas(struct vk_combuf_s *combuf); + +typedef struct rt_draw_instance_t { + struct rt_blas_s *blas; + uint32_t kusochki_offset; + matrix3x4 transform_row; + matrix4x4 prev_transform_row; + vec4_t color; + uint32_t material_mode; // MATERIAL_MODE_ from ray_interop.h + uint32_t material_flags; // material_flag_bits_e +} rt_draw_instance_t; + +void RT_VkAccelAddDrawInstance(const rt_draw_instance_t*); diff --git a/ref/vk/vk_ray_internal.h b/ref/vk/vk_ray_internal.h index b9a3132712..fe86ef7cb0 100644 --- a/ref/vk/vk_ray_internal.h +++ b/ref/vk/vk_ray_internal.h @@ -1,8 +1,6 @@ #pragma once -#include "vk_core.h" #include "vk_buffer.h" -#include "vk_const.h" #include "vk_rtx.h" #define MAX_INSTANCES 2048 @@ -13,37 +11,6 @@ typedef struct Kusok vk_kusok_data_t; -typedef struct rt_draw_instance_s { - VkDeviceAddress blas_addr; - uint32_t kusochki_offset; - matrix3x4 transform_row; - matrix4x4 prev_transform_row; - vec4_t color; - uint32_t material_mode; // MATERIAL_MODE_ from ray_interop.h - uint32_t material_flags; // material_flag_bits_e -} rt_draw_instance_t; - -typedef struct { - const char *debug_name; - VkAccelerationStructureKHR *p_accel; - const VkAccelerationStructureGeometryKHR *geoms; - const uint32_t *max_prim_counts; - const VkAccelerationStructureBuildRangeInfoKHR *build_ranges; - uint32_t n_geoms; - VkAccelerationStructureTypeKHR type; - qboolean dynamic; - - VkDeviceAddress *out_accel_addr; - uint32_t *inout_size; -} as_build_args_t; - -struct vk_combuf_s; -qboolean createOrUpdateAccelerationStructure(struct vk_combuf_s *combuf, const as_build_args_t *args); - -#define MAX_SCRATCH_BUFFER (32*1024*1024) -// FIXME compute this by lazily allocating #define MAX_ACCELS_BUFFER (128*1024*1024) -#define MAX_ACCELS_BUFFER (256*1024*1024) - typedef struct { // Geometry metadata. Lifetime is similar to geometry lifetime itself. // Semantically close to render buffer (describes layout for those objects) @@ -56,14 +23,6 @@ typedef struct { // Model header // Array of struct ModelHeader: color, material_mode, prev_transform vk_buffer_t model_headers_buffer; - - // Per-frame data that is accumulated between RayFrameBegin and End calls - struct { - rt_draw_instance_t instances[MAX_INSTANCES]; - int instances_count; - - uint32_t scratch_offset; // for building dynamic blases - } frame; } xvk_ray_model_state_t; extern xvk_ray_model_state_t g_ray_model_state; @@ -79,7 +38,6 @@ typedef struct { rt_blas_usage_e usage; const struct vk_render_geometry_s *geoms; int geoms_count; - qboolean dont_build; // for dynamic models } rt_blas_create_t; // Creates BLAS and schedules it to be built next frame @@ -90,22 +48,6 @@ void RT_BlasDestroy(struct rt_blas_s* blas); // Update dynamic BLAS, schedule it for build/update qboolean RT_BlasUpdate(struct rt_blas_s *blas, const struct vk_render_geometry_s *geoms, int geoms_count); -// TODO blas struct can have its addr field known -VkDeviceAddress RT_BlasGetDeviceAddress(struct rt_blas_s *blas); - -typedef struct rt_kusochki_s { - uint32_t offset; - int count; - int internal_index__; -} rt_kusochki_t; - -rt_kusochki_t RT_KusochkiAllocLong(int count); -uint32_t RT_KusochkiAllocOnce(int count); -void RT_KusochkiFree(const rt_kusochki_t*); - -//struct vk_render_geometry_s; -//qboolean RT_KusochkiUpload(uint32_t kusochki_offset, const struct vk_render_geometry_s *geoms, int geoms_count, int override_texture_id, const vec4_t *override_color); - qboolean RT_DynamicModelInit(void); void RT_DynamicModelShutdown(void); diff --git a/ref/vk/vk_ray_model.c b/ref/vk/vk_ray_model.c index 9f62e2fb03..0016cf6621 100644 --- a/ref/vk/vk_ray_model.c +++ b/ref/vk/vk_ray_model.c @@ -3,20 +3,24 @@ #include "vk_rtx.h" #include "vk_materials.h" #include "vk_render.h" -#include "vk_staging.h" #include "vk_logs.h" +#include "vk_ray_accel.h" #include "profiler.h" -#include "eiface.h" #include "xash3d_mathlib.h" #include xvk_ray_model_state_t g_ray_model_state; +typedef struct rt_kusochki_s { + uint32_t offset; + int count; + int internal_index__; +} rt_kusochki_t; + typedef struct rt_model_s { struct rt_blas_s *blas; - VkDeviceAddress blas_addr; rt_kusochki_t kusochki; } rt_model_t; @@ -147,11 +151,10 @@ void RT_RayModel_Clear(void) { } void XVK_RayModel_ClearForNextFrame( void ) { - g_ray_model_state.frame.instances_count = 0; R_DEBuffer_Flip(&g_ray_model_state.kusochki_alloc); } -rt_kusochki_t RT_KusochkiAllocLong(int count) { +static rt_kusochki_t kusochkiAllocLong(int count) { // TODO Proper block allocator, not just double-ended buffer uint32_t kusochki_offset = R_DEBuffer_Alloc(&g_ray_model_state.kusochki_alloc, LifetimeStatic, count, 1); @@ -167,7 +170,7 @@ rt_kusochki_t RT_KusochkiAllocLong(int count) { }; } -uint32_t RT_KusochkiAllocOnce(int count) { +static uint32_t kusochkiAllocOnce(int count) { // TODO Proper block allocator uint32_t kusochki_offset = R_DEBuffer_Alloc(&g_ray_model_state.kusochki_alloc, LifetimeDynamic, count, 1); @@ -179,38 +182,36 @@ uint32_t RT_KusochkiAllocOnce(int count) { return kusochki_offset; } -void RT_KusochkiFree(const rt_kusochki_t *kusochki) { +static void kusochkiFree(const rt_kusochki_t *kusochki) { // TODO block alloc PRINT_NOT_IMPLEMENTED(); } // TODO this function can't really fail. It'd mean that staging is completely broken. -qboolean RT_KusochkiUpload(uint32_t kusochki_offset, const struct vk_render_geometry_s *geoms, int geoms_count, const r_vk_material_t *override_material, const vec4_t *override_colors) { - const vk_staging_buffer_args_t staging_args = { - .buffer = g_ray_model_state.kusochki_buffer.buffer, +qboolean kusochkiUpload(uint32_t kusochki_offset, const struct vk_render_geometry_s *geoms, int geoms_count, const r_vk_material_t *override_material, const vec4_t *override_colors) { + const vk_buffer_lock_t lock_args = { .offset = kusochki_offset * sizeof(vk_kusok_data_t), .size = geoms_count * sizeof(vk_kusok_data_t), - .alignment = 16, }; - const vk_staging_region_t kusok_staging = R_VkStagingLockForBuffer(staging_args); + const vk_buffer_locked_t lock = R_VkBufferLock(&g_ray_model_state.kusochki_buffer, lock_args); - if (!kusok_staging.ptr) { + if (!lock.ptr) { gEngine.Con_Printf(S_ERROR "Couldn't allocate staging for %d kusochkov\n", geoms_count); return false; } - vk_kusok_data_t *const p = kusok_staging.ptr; + vk_kusok_data_t *const p = lock.ptr; for (int i = 0; i < geoms_count; ++i) { const vk_render_geometry_t *geom = geoms + i; applyMaterialToKusok(p + i, geom, override_material, override_colors ? override_colors[i] : NULL); } - R_VkStagingUnlock(kusok_staging.handle); + R_VkBufferUnlock(lock); return true; } struct rt_model_s *RT_ModelCreate(rt_model_create_t args) { - const rt_kusochki_t kusochki = RT_KusochkiAllocLong(args.geometries_count); + const rt_kusochki_t kusochki = kusochkiAllocLong(args.geometries_count); if (kusochki.count == 0) { gEngine.Con_Printf(S_ERROR "Cannot allocate kusochki for %s\n", args.debug_name); return NULL; @@ -228,12 +229,11 @@ struct rt_model_s *RT_ModelCreate(rt_model_create_t args) { } // Invokes staging, so this should be after all resource creation - RT_KusochkiUpload(kusochki.offset, args.geometries, args.geometries_count, NULL, NULL); + kusochkiUpload(kusochki.offset, args.geometries, args.geometries_count, NULL, NULL); { rt_model_t *const ret = Mem_Malloc(vk_core.pool, sizeof(*ret)); ret->blas = blas; - ret->blas_addr = RT_BlasGetDeviceAddress(ret->blas); ret->kusochki = kusochki; return ret; } @@ -243,7 +243,7 @@ struct rt_model_s *RT_ModelCreate(rt_model_create_t args) { RT_BlasDestroy(blas); if (kusochki.count) - RT_KusochkiFree(&kusochki); + kusochkiFree(&kusochki); return NULL; } @@ -256,7 +256,7 @@ void RT_ModelDestroy(struct rt_model_s* model) { RT_BlasDestroy(model->blas); if (model->kusochki.count) - RT_KusochkiFree(&model->kusochki); + kusochkiFree(&model->kusochki); Mem_Free(model); } @@ -274,7 +274,7 @@ qboolean RT_ModelUpdate(struct rt_model_s *model, const struct vk_render_geometr return false; // Also update materials - RT_KusochkiUpload(model->kusochki.offset, geometries, geometries_count, NULL, NULL); + kusochkiUpload(model->kusochki.offset, geometries, geometries_count, NULL, NULL); return true; } @@ -294,7 +294,7 @@ qboolean RT_ModelUpdateMaterials(struct rt_model_s *model, const struct vk_rende const int offset = geom_indices[begin]; const int count = i - begin; ASSERT(offset + count <= geometries_count); - if (!RT_KusochkiUpload(model->kusochki.offset + offset, geometries + offset, count, NULL, NULL)) { + if (!kusochkiUpload(model->kusochki.offset + offset, geometries + offset, count, NULL, NULL)) { APROF_SCOPE_END(update_materials); return false; } @@ -307,7 +307,7 @@ qboolean RT_ModelUpdateMaterials(struct rt_model_s *model, const struct vk_rende const int offset = geom_indices[begin]; const int count = geom_indices_count - begin; ASSERT(offset + count <= geometries_count); - if (!RT_KusochkiUpload(model->kusochki.offset + offset, geometries + offset, count, NULL, NULL)) { + if (!kusochkiUpload(model->kusochki.offset + offset, geometries + offset, count, NULL, NULL)) { APROF_SCOPE_END(update_materials); return false; @@ -318,15 +318,6 @@ qboolean RT_ModelUpdateMaterials(struct rt_model_s *model, const struct vk_rende return true; } -rt_draw_instance_t *getDrawInstance(void) { - if (g_ray_model_state.frame.instances_count >= ARRAYSIZE(g_ray_model_state.frame.instances)) { - gEngine.Con_Printf(S_ERROR "Too many RT draw instances, max = %d\n", (int)(ARRAYSIZE(g_ray_model_state.frame.instances))); - return NULL; - } - - return g_ray_model_state.frame.instances + (g_ray_model_state.frame.instances_count++); -} - static qboolean isLegacyBlendingMode(int material_mode) { switch (material_mode) { case MATERIAL_MODE_BLEND_ADD: @@ -374,33 +365,33 @@ void RT_FrameAddModel( struct rt_model_s *model, rt_frame_add_model_t args ) { uint32_t kusochki_offset = model->kusochki.offset; if (args.override.material != NULL) { - kusochki_offset = RT_KusochkiAllocOnce(args.override.geoms_count); + kusochki_offset = kusochkiAllocOnce(args.override.geoms_count); if (kusochki_offset == ALO_ALLOC_FAILED) return; - if (!RT_KusochkiUpload(kusochki_offset, args.override.geoms, args.override.geoms_count, args.override.material, NULL)) { + if (!kusochkiUpload(kusochki_offset, args.override.geoms, args.override.geoms_count, args.override.material, NULL)) { gEngine.Con_Printf(S_ERROR "Couldn't upload kusochki for instanced model\n"); return; } } - rt_draw_instance_t *const draw_instance = getDrawInstance(); - if (!draw_instance) - return; - - draw_instance->blas_addr = model->blas_addr; - draw_instance->kusochki_offset = kusochki_offset; - draw_instance->material_mode = args.material_mode; - draw_instance->material_flags = args.material_flags; + rt_draw_instance_t draw_instance = { + .blas = model->blas, + .kusochki_offset = kusochki_offset, + .material_mode = args.material_mode, + .material_flags = args.material_flags, + }; // Legacy blending is done in sRGB-γ space if (isLegacyBlendingMode(args.material_mode)) - Vector4Copy(*args.color_srgb, draw_instance->color); + Vector4Copy(*args.color_srgb, draw_instance.color); else - sRGBtoLinearVec4(*args.color_srgb, draw_instance->color); + sRGBtoLinearVec4(*args.color_srgb, draw_instance.color); - Matrix3x4_Copy(draw_instance->transform_row, args.transform); - Matrix4x4_Copy(draw_instance->prev_transform_row, args.prev_transform); + Matrix3x4_Copy(draw_instance.transform_row, args.transform); + Matrix4x4_Copy(draw_instance.prev_transform_row, args.prev_transform); + + RT_VkAccelAddDrawInstance(&draw_instance); } #define MAX_RT_DYNAMIC_GEOMETRIES 256 @@ -441,7 +432,6 @@ qboolean RT_DynamicModelInit(void) { .usage = kBlasBuildDynamicFast, .geoms = fake_geoms, .geoms_count = MAX_RT_DYNAMIC_GEOMETRIES, - .dont_build = true, }); if (!blas) { @@ -451,7 +441,6 @@ qboolean RT_DynamicModelInit(void) { } g_dyn.groups[i].blas = blas; - g_dyn.groups[i].blas_addr = RT_BlasGetDeviceAddress(blas); } Mem_Free(fake_geoms); @@ -470,17 +459,18 @@ void RT_DynamicModelProcessFrame(void) { APROF_SCOPE_DECLARE_BEGIN(process, __FUNCTION__); for (int i = 0; i < MATERIAL_MODE_COUNT; ++i) { rt_dynamic_t *const dyn = g_dyn.groups + i; + rt_draw_instance_t draw_instance; + if (!dyn->geometries_count) continue; - rt_draw_instance_t* draw_instance; - const uint32_t kusochki_offset = RT_KusochkiAllocOnce(dyn->geometries_count); + const uint32_t kusochki_offset = kusochkiAllocOnce(dyn->geometries_count); if (kusochki_offset == ALO_ALLOC_FAILED) { gEngine.Con_Printf(S_ERROR "Couldn't allocate kusochki once for %d geoms of %s, skipping\n", dyn->geometries_count, group_names[i]); goto tail; } - if (!RT_KusochkiUpload(kusochki_offset, dyn->geometries, dyn->geometries_count, NULL, dyn->colors)) { + if (!kusochkiUpload(kusochki_offset, dyn->geometries, dyn->geometries_count, NULL, dyn->colors)) { gEngine.Con_Printf(S_ERROR "Couldn't build blas for %d geoms of %s, skipping\n", dyn->geometries_count, group_names[i]); goto tail; } @@ -490,17 +480,20 @@ void RT_DynamicModelProcessFrame(void) { goto tail; } - draw_instance = getDrawInstance(); - if (!draw_instance) - goto tail; + draw_instance = (rt_draw_instance_t){ + .blas = dyn->blas, + .kusochki_offset = kusochki_offset, + .material_mode = i, + .material_flags = 0, + .color = {1, 1, 1, 1}, + }; + + // xash3d_mathlib is weird, can't just assign these + // TODO: make my own mathlib of perfectly assignable structs + Matrix3x4_LoadIdentity(draw_instance.transform_row); + Matrix4x4_LoadIdentity(draw_instance.prev_transform_row); - draw_instance->blas_addr = dyn->blas_addr; - draw_instance->kusochki_offset = kusochki_offset; - draw_instance->material_mode = i; - draw_instance->material_flags = 0; - Vector4Set(draw_instance->color, 1, 1, 1, 1); - Matrix3x4_LoadIdentity(draw_instance->transform_row); - Matrix4x4_LoadIdentity(draw_instance->prev_transform_row); + RT_VkAccelAddDrawInstance(&draw_instance); tail: dyn->geometries_count = 0; diff --git a/ref/vk/vk_render.c b/ref/vk/vk_render.c index 5ad72af9bd..037ffd25b2 100644 --- a/ref/vk/vk_render.c +++ b/ref/vk/vk_render.c @@ -3,6 +3,7 @@ #include "vk_core.h" #include "vk_buffer.h" #include "vk_geometry.h" +#include "vk_combuf.h" #include "vk_const.h" #include "vk_common.h" #include "vk_cvar.h" @@ -57,8 +58,6 @@ enum { kVkPipeline_AT, // no blend, depth RW, alpha test kVkPipeline_1_1_R, // blend: src + dst, depth test - // Special pipeline for skybox (tex = TEX_BASE_SKYBOX) - //kVkPipeline_Sky, kVkPipeline_COUNT, }; @@ -181,12 +180,6 @@ static qboolean createSkyboxPipeline( void ) { static qboolean createPipelines( void ) { - /* VkPushConstantRange push_const = { */ - /* .offset = 0, */ - /* .size = sizeof(AVec3f), */ - /* .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, */ - /* }; */ - VkDescriptorSetLayout descriptor_layouts[] = { vk_desc_fixme.one_uniform_buffer_layout, vk_desc_fixme.one_texture_layout, @@ -198,8 +191,6 @@ static qboolean createPipelines( void ) .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .setLayoutCount = ARRAYSIZE(descriptor_layouts), .pSetLayouts = descriptor_layouts, - /* .pushConstantRangeCount = 1, */ - /* .pPushConstantRanges = &push_const, */ }; // FIXME store layout separately @@ -552,7 +543,7 @@ static uint32_t getUboOffset_FIXME( void ) { if (g_render_state.current_ubo_offset_FIXME == ALO_ALLOC_FAILED) return UINT32_MAX; - uniform_data_t *const ubo = (uniform_data_t*)((byte*)g_render.uniform_buffer.mapped + g_render_state.current_ubo_offset_FIXME); + uniform_data_t *const ubo = PTR_CAST(uniform_data_t, (byte*)g_render.uniform_buffer.mapped + g_render_state.current_ubo_offset_FIXME); memcpy(&g_render_state.current_uniform_data, &g_render_state.dirty_uniform_data, sizeof(g_render_state.dirty_uniform_data)); memcpy(ubo, &g_render_state.current_uniform_data, sizeof(*ubo)); g_render_state.uniform_data_set_mask |= UNIFORM_UPLOADED; @@ -613,7 +604,7 @@ static uint32_t writeDlightsToUBO( void ) gEngine.Con_Printf(S_ERROR "Cannot allocate UBO for DLights\n"); return UINT32_MAX; } - ubo_lights = (vk_ubo_lights_t*)((byte*)(g_render.uniform_buffer.mapped) + ubo_lights_offset); + ubo_lights = PTR_CAST(vk_ubo_lights_t, (byte*)(g_render.uniform_buffer.mapped) + ubo_lights_offset); // TODO this should not be here (where? vk_scene?) for (int i = 0; i < MAX_DLIGHTS && num_lights < ARRAYSIZE(ubo_lights->light); ++i) { @@ -641,55 +632,13 @@ static uint32_t writeDlightsToUBO( void ) return ubo_lights_offset; } -/* -static void debugBarrier( VkCommandBuffer cmdbuf, VkBuffer buf) { - const VkBufferMemoryBarrier bmb[] = { { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - .srcAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, - .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, - .buffer = buf, - .offset = 0, - .size = VK_WHOLE_SIZE, - } }; - vkCmdPipelineBarrier(cmdbuf, - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - 0, 0, NULL, ARRAYSIZE(bmb), bmb, 0, NULL); -} -*/ - -void VK_Render_FIXME_Barrier( VkCommandBuffer cmdbuf ) { - const VkBuffer geom_buffer = R_GeometryBuffer_Get(); - //debugBarrier(cmdbuf, geom_buffer); - // FIXME: this should be automatic and dynamically depend on actual usage, resolving this with render graph - { - const VkBufferMemoryBarrier bmb[] = { { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask - = VK_ACCESS_INDEX_READ_BIT - | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT - | (vk_core.rtx ? ( VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR | VK_ACCESS_SHADER_READ_BIT) : 0), - .buffer = geom_buffer, - .offset = 0, - .size = VK_WHOLE_SIZE, - } }; - vkCmdPipelineBarrier(cmdbuf, - VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | (vk_core.rtx - ? VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR - | VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR - | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT - : 0), - 0, 0, NULL, ARRAYSIZE(bmb), bmb, 0, NULL); - } -} - -void VK_RenderEnd( VkCommandBuffer cmdbuf, qboolean draw, uint32_t width, uint32_t height, int frame_index ) +void VK_RenderEnd( vk_combuf_t* combuf, qboolean draw, uint32_t width, uint32_t height, int frame_index ) { if (!draw) return; + VkCommandBuffer cmdbuf = combuf->cmdbuf; + // TODO we can sort collected draw commands for more efficient and correct rendering // that requires adding info about distance to camera for correct order-dependent blending @@ -712,10 +661,10 @@ void VK_RenderEnd( VkCommandBuffer cmdbuf, qboolean draw, uint32_t width, uint32 ASSERT(!g_render_state.current_frame_is_ray_traced); { - const VkBuffer geom_buffer = R_GeometryBuffer_Get(); + vk_buffer_t* const geom = R_GeometryBuffer_Get(); const VkDeviceSize offset = 0; - vkCmdBindVertexBuffers(cmdbuf, 0, 1, &geom_buffer, &offset); - vkCmdBindIndexBuffer(cmdbuf, geom_buffer, 0, VK_INDEX_TYPE_UINT16); + vkCmdBindVertexBuffers(cmdbuf, 0, 1, &geom->buffer, &offset); + vkCmdBindIndexBuffer(cmdbuf, geom->buffer, 0, VK_INDEX_TYPE_UINT16); } for (int i = 0; i < g_render_state.num_draw_commands; ++i) { @@ -747,7 +696,7 @@ void VK_RenderEnd( VkCommandBuffer cmdbuf, qboolean draw, uint32_t width, uint32 // Compute and upload UBO stuff { - sky_uniform_data_t* const sky_ubo = (sky_uniform_data_t*)((byte*)g_render.uniform_buffer.mapped + ubo_offset); + sky_uniform_data_t* const sky_ubo = PTR_CAST(sky_uniform_data_t, (byte*)g_render.uniform_buffer.mapped + ubo_offset); // FIXME model matrix Matrix4x4_ToArrayFloatGL(g_render_state.projection_view, (float*)sky_ubo->mvp); @@ -847,26 +796,22 @@ void VK_RenderDebugLabelEnd( void ) drawCmdPushDebugLabelEnd(); } -void VK_RenderEndRTX( struct vk_combuf_s* combuf, VkImageView img_dst_view, VkImage img_dst, uint32_t w, uint32_t h ) -{ - const VkBuffer geom_buffer = R_GeometryBuffer_Get(); +void VK_RenderEndRTX( struct vk_combuf_s* combuf, struct r_vk_image_s *dst) { + vk_buffer_t *const geom = R_GeometryBuffer_Get(); ASSERT(vk_core.rtx); + R_VkBufferStagingCommit(geom, combuf); + { const vk_ray_frame_render_args_t args = { .combuf = combuf, - .dst = { - .image_view = img_dst_view, - .image = img_dst, - .width = w, - .height = h, - }, + .dst = dst, .projection = &g_render_state.vk_projection, .view = &g_camera.viewMatrix, .geometry_data = { - .buffer = geom_buffer, + .buffer = geom, .size = VK_WHOLE_SIZE, }, diff --git a/ref/vk/vk_render.h b/ref/vk/vk_render.h index e9a5747949..b17d397df2 100644 --- a/ref/vk/vk_render.h +++ b/ref/vk/vk_render.h @@ -165,8 +165,9 @@ void VK_RenderDebugLabelBegin( const char *label ); void VK_RenderDebugLabelEnd( void ); void VK_RenderBegin( qboolean ray_tracing ); -void VK_RenderEnd( VkCommandBuffer cmdbuf, qboolean draw, uint32_t width, uint32_t height, int frame_index ); + struct vk_combuf_s; -void VK_RenderEndRTX( struct vk_combuf_s* combuf, VkImageView img_dst_view, VkImage img_dst, uint32_t w, uint32_t h ); +void VK_RenderEnd( struct vk_combuf_s*, qboolean draw, uint32_t width, uint32_t height, int frame_index ); -void VK_Render_FIXME_Barrier( VkCommandBuffer cmdbuf ); +struct r_vk_image_s; +void VK_RenderEndRTX( struct vk_combuf_s* combuf, struct r_vk_image_s *dst); diff --git a/ref/vk/vk_resources.c b/ref/vk/vk_resources.c index e9ea71aca7..1e9f74d085 100644 --- a/ref/vk/vk_resources.c +++ b/ref/vk/vk_resources.c @@ -3,6 +3,8 @@ #include "vk_image.h" #include "vk_common.h" #include "vk_logs.h" +#include "vk_combuf.h" +#include "arrays.h" #define LOG_MODULE rt @@ -100,9 +102,10 @@ void R_VkResourcesSetBuiltinFIXME(r_vk_resources_builtin_fixme_t args) { #define RES_SET_BUFFER(name, type_, source_, offset_, size_) \ g_res.res[ExternalResource_##name].resource = (vk_resource_t){ \ .type = type_, \ + .ref.buffer = (source_), \ .value = (vk_descriptor_value_t) { \ .buffer = (VkDescriptorBufferInfo) { \ - .buffer = (source_), \ + .buffer = (source_)->buffer, \ .offset = (offset_), \ .range = (size_), \ } \ @@ -112,15 +115,15 @@ void R_VkResourcesSetBuiltinFIXME(r_vk_resources_builtin_fixme_t args) { RES_SET_BUFFER(ubo, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, args.uniform_buffer, args.frame_index * args.uniform_unit_size, sizeof(struct UniformBuffer)); #define RES_SET_SBUFFER_FULL(name, source_) \ - RES_SET_BUFFER(name, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, source_.buffer, 0, source_.size) + RES_SET_BUFFER(name, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, source_, 0, (source_)->size) // TODO move this to ray model producer - RES_SET_SBUFFER_FULL(kusochki, g_ray_model_state.kusochki_buffer); - RES_SET_SBUFFER_FULL(model_headers, g_ray_model_state.model_headers_buffer); + RES_SET_SBUFFER_FULL(kusochki, &g_ray_model_state.kusochki_buffer); + RES_SET_SBUFFER_FULL(model_headers, &g_ray_model_state.model_headers_buffer); // TODO move these to vk_geometry - RES_SET_SBUFFER_FULL(indices, args.geometry_data); - RES_SET_SBUFFER_FULL(vertices, args.geometry_data); + RES_SET_SBUFFER_FULL(indices, args.geometry_data.buffer); + RES_SET_SBUFFER_FULL(vertices, args.geometry_data.buffer); // TODO move this to lights RES_SET_BUFFER(lights, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, args.light_bindings->buffer, args.light_bindings->metadata.offset, args.light_bindings->metadata.size); @@ -130,7 +133,7 @@ void R_VkResourcesSetBuiltinFIXME(r_vk_resources_builtin_fixme_t args) { } // FIXME not even sure what this functions is supposed to do in the end -void R_VkResourcesFrameBeginStateChangeFIXME(VkCommandBuffer cmdbuf, qboolean discontinuity) { +void R_VkResourcesFrameBeginStateChangeFIXME(vk_combuf_t* combuf, qboolean discontinuity) { // Transfer previous frames before they had a chance of their resource-barrier metadata overwritten (as there's no guaranteed order for them) for (int i = ExternalResource_COUNT; i < MAX_RESOURCES; ++i) { rt_resource_t* const res = g_res.res + i; @@ -153,13 +156,10 @@ void R_VkResourcesFrameBeginStateChangeFIXME(VkCommandBuffer cmdbuf, qboolean di src->image = tmp_img; // If there was no initial state, prepare it. (this should happen only for the first frame) - if (discontinuity || res->resource.write.pipelines == 0) { + if (discontinuity || res->image.sync.write.stage == 0) { // TODO is there a better way? Can image be cleared w/o explicit clear op? - DEBUG("discontinuity: %s", res->name); - R_VkImageClear( cmdbuf, res->image.image ); - res->resource.write.pipelines = VK_PIPELINE_STAGE_TRANSFER_BIT; - res->resource.write.image_layout = VK_IMAGE_LAYOUT_GENERAL; - res->resource.write.access_mask = VK_ACCESS_TRANSFER_WRITE_BIT; + WARN("discontinuity: %s", res->name); + R_VkImageClear( &res->image, combuf, NULL ); } } @@ -169,98 +169,62 @@ void R_VkResourcesFrameBeginStateChangeFIXME(VkCommandBuffer cmdbuf, qboolean di if (!res->name[0] || !res->image.image || res->source_index_plus_1 > 0) continue; - //res->resource.read = res->resource.write = (ray_resource_state_t){0}; - res->resource.write = (ray_resource_state_t){0}; + // 2024-12-12 E384 1:56:00 Commented out: Try not clearing this state. Could be beneficial for later barrier-based extra-cmdbuf sync + //res->resource.deprecate.write = (ray_resource_state_t){0}; } } -void R_VkResourceAddToBarrier(vk_resource_t *res, qboolean write, VkPipelineStageFlags dst_stage_mask, r_vk_barrier_t *barrier) { - if (res->type != VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) { - // TODO - return; - } - - if (write) { - // No reads are happening - //ASSERT(res->read.pipelines == 0); - - const ray_resource_state_t new_state = { - .pipelines = dst_stage_mask, - .access_mask = VK_ACCESS_SHADER_WRITE_BIT, - .image_layout = VK_IMAGE_LAYOUT_GENERAL, - }; - - R_VkBarrierAddImage(barrier, (r_vk_barrier_image_t){ - .image = res->value.image_object->image, - .src_stage_mask = res->read.pipelines | res->write.pipelines, - // FIXME MEMORY_WRITE is needed to silence write-after-write layout-transition validation hazard - .src_access_mask = res->read.access_mask | res->write.access_mask | VK_ACCESS_MEMORY_WRITE_BIT, - .dst_access_mask = new_state.access_mask, - .old_layout = VK_IMAGE_LAYOUT_UNDEFINED, - .new_layout = new_state.image_layout, - }); - - // Mark that read would need a transition - res->read = (ray_resource_state_t){0}; - res->write = new_state; - } else { - // Write happened - ASSERT(res->write.pipelines != 0); - - // Check if no more barriers needed - if ((res->read.pipelines & dst_stage_mask) == dst_stage_mask) - return; - - res->read = (ray_resource_state_t) { - .pipelines = res->read.pipelines | dst_stage_mask, - .access_mask = VK_ACCESS_SHADER_READ_BIT, - .image_layout = VK_IMAGE_LAYOUT_GENERAL, - }; - - R_VkBarrierAddImage(barrier, (r_vk_barrier_image_t){ - .image = res->value.image_object->image, - .src_stage_mask = res->write.pipelines, - .src_access_mask = res->write.access_mask, - .dst_access_mask = res->read.access_mask, - .old_layout = res->write.image_layout, - .new_layout = res->read.image_layout, - }); - } +static void barrierAddBuffer(r_vk_barrier_t *barrier, vk_buffer_t *buf, VkAccessFlags access) { + const r_vkcombuf_barrier_buffer_t bb = { + .buffer = buf, + .access = access, + }; + BOUNDED_ARRAY_APPEND_ITEM(barrier->buffers, bb); } -void R_VkBarrierAddImage(r_vk_barrier_t *barrier, r_vk_barrier_image_t image) { - barrier->src_stage_mask |= image.src_stage_mask; - const VkImageMemoryBarrier ib = (VkImageMemoryBarrier) { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .image = image.image, - .srcAccessMask = image.src_access_mask, - .dstAccessMask = image.dst_access_mask, - .oldLayout = image.old_layout, - .newLayout = image.new_layout, - .subresourceRange = (VkImageSubresourceRange) { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }; - BOUNDED_ARRAY_APPEND(barrier->images, ib); +void R_VkResourceAddToBarrier(vk_resource_t *res, qboolean write, VkPipelineStageFlags2 dst_stage_mask, r_vk_barrier_t *barrier) { + switch (res->type) { + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + { + const r_vkcombuf_barrier_image_t image_barrier = { + .image = res->ref.image, + // Image must remain in GENERAL layout regardless of r/w. + // Storage image reads still require GENERAL, not SHADER_READ_ONLY_OPTIMAL + .layout = VK_IMAGE_LAYOUT_GENERAL, + .access = write ? VK_ACCESS_2_SHADER_WRITE_BIT : VK_ACCESS_2_SHADER_READ_BIT, + }; + BOUNDED_ARRAY_APPEND_ITEM(barrier->images, image_barrier); + } + break; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + ASSERT(!write); + barrierAddBuffer(barrier, res->ref.buffer, VK_ACCESS_2_SHADER_READ_BIT); + break; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + // nothing for now, as all textures are static at this point + break; + case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + // nop + break; + default: + ASSERT(!"Unsupported descriptor type"); + } } -void R_VkBarrierCommit(VkCommandBuffer cmdbuf, r_vk_barrier_t *barrier, VkPipelineStageFlags dst_stage_mask) { - if (barrier->images.count == 0) +void R_VkBarrierCommit(vk_combuf_t* combuf, r_vk_barrier_t *barrier, VkPipelineStageFlags2 dst_stage_mask) { + if (barrier->images.count == 0 && barrier->buffers.count == 0) return; - // TODO vkCmdPipelineBarrier2() - vkCmdPipelineBarrier(cmdbuf, - barrier->src_stage_mask == 0 - ? VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT - : barrier->src_stage_mask, - dst_stage_mask, - 0, 0, NULL, 0, NULL, barrier->images.count, barrier->images.items); + R_VkCombufIssueBarrier(combuf, (r_vkcombuf_barrier_t){ + .stage = dst_stage_mask, + .buffers.items = barrier->buffers.items, + .buffers.count = barrier->buffers.count, + .images.items = barrier->images.items, + .images.count = barrier->images.count, + }); // Mark as used - barrier->src_stage_mask = 0; barrier->images.count = 0; + barrier->buffers.count = 0; } diff --git a/ref/vk/vk_resources.h b/ref/vk/vk_resources.h index 909c9d3fb4..75db73f79f 100644 --- a/ref/vk/vk_resources.h +++ b/ref/vk/vk_resources.h @@ -3,6 +3,8 @@ #include "vk_core.h" #include "vk_descriptor.h" #include "vk_image.h" +#include "vk_combuf.h" // r_vkcombuf_barrier_buffer_t +#include "arrays.h" // TODO remove #include "vk_light.h" @@ -28,17 +30,14 @@ enum { ExternalResource_COUNT, }; -typedef struct { - VkAccessFlags access_mask; - VkImageLayout image_layout; - VkPipelineStageFlagBits pipelines; -} ray_resource_state_t; - struct xvk_image_s; typedef struct vk_resource_s { VkDescriptorType type; - ray_resource_state_t write, read; vk_descriptor_value_t value; + union { + vk_buffer_t *buffer; + r_vk_image_t *image; + } ref; } vk_resource_t; typedef struct vk_resource_s *vk_resource_p; @@ -47,6 +46,7 @@ typedef struct { char name[64]; vk_resource_t resource; r_vk_image_t image; + vk_buffer_t *buffer; int refcount; int source_index_plus_1; } rt_resource_t; @@ -65,37 +65,25 @@ void R_VkResourcesCleanup(void); typedef struct { uint32_t frame_index; - VkBuffer uniform_buffer; + vk_buffer_t *uniform_buffer; uint32_t uniform_unit_size; struct { - VkBuffer buffer; // must be the same as in vk_ray_model_create_t TODO: validate or make impossible to specify incorrectly + vk_buffer_t *buffer; // must be the same as in vk_ray_model_create_t TODO: validate or make impossible to specify incorrectly uint64_t size; } geometry_data; const vk_lights_bindings_t *light_bindings; } r_vk_resources_builtin_fixme_t; void R_VkResourcesSetBuiltinFIXME(r_vk_resources_builtin_fixme_t builtin); -void R_VkResourcesFrameBeginStateChangeFIXME(VkCommandBuffer cmdbuf, qboolean discontinuity); - +struct vk_combuf_s; +void R_VkResourcesFrameBeginStateChangeFIXME(struct vk_combuf_s* combuf, qboolean discontinuity); typedef struct { - // TODO VK_KHR_synchronization2, has a slightly different (better) semantics - VkPipelineStageFlags src_stage_mask; - BOUNDED_ARRAY_DECLARE(images, VkImageMemoryBarrier, 16); - //BOUNDED_ARRAY_DECLARE(buffers, VkBufferMemoryBarrier, 16); + BOUNDED_ARRAY_DECLARE(r_vkcombuf_barrier_image_t, images, 32); + BOUNDED_ARRAY_DECLARE(r_vkcombuf_barrier_buffer_t, buffers, 16); } r_vk_barrier_t; -typedef struct { - VkImage image; - VkPipelineStageFlags src_stage_mask; - VkAccessFlags src_access_mask; - VkAccessFlags dst_access_mask; - VkImageLayout old_layout; - VkImageLayout new_layout; -} r_vk_barrier_image_t; - -void R_VkBarrierAddImage(r_vk_barrier_t *barrier, r_vk_barrier_image_t image); -void R_VkBarrierCommit(VkCommandBuffer cmdbuf, r_vk_barrier_t *barrier, VkPipelineStageFlags dst_stage_mask); - -void R_VkResourceAddToBarrier(vk_resource_t *res, qboolean write, VkPipelineStageFlags dst_stage_mask, r_vk_barrier_t *barrier); +void R_VkBarrierCommit(struct vk_combuf_s* combuf, r_vk_barrier_t *barrier, VkPipelineStageFlags2 dst_stage_mask); + +void R_VkResourceAddToBarrier(vk_resource_t *res, qboolean write, VkPipelineStageFlags2 dst_stage_mask, r_vk_barrier_t *barrier); diff --git a/ref/vk/vk_rmain.c b/ref/vk/vk_rmain.c index 507c1de576..2ae89423ba 100644 --- a/ref/vk/vk_rmain.c +++ b/ref/vk/vk_rmain.c @@ -693,5 +693,8 @@ int EXPORT GetRefAPI( int version, ref_interface_t *funcs, ref_api_t *engfuncs, memcpy( &gEngine, engfuncs, sizeof( ref_api_t )); gpGlobals = globals; + INFO("GetRefAPI version=%d (REF_API_VERSION=%d) funcs=%p engfuncs=%p globals=%p", + version, REF_API_VERSION, funcs, engfuncs, globals); + return REF_API_VERSION; } diff --git a/ref/vk/vk_rtx.c b/ref/vk/vk_rtx.c index 31a631a437..83bd6353b2 100644 --- a/ref/vk/vk_rtx.c +++ b/ref/vk/vk_rtx.c @@ -19,7 +19,6 @@ #include "profiler.h" -#include "eiface.h" #include "xash3d_mathlib.h" #include @@ -69,6 +68,8 @@ static struct { } g_rtx = {0}; void VK_RayNewMapBegin( void ) { + // TODO it seems like these are unnecessary leftovers. Moreover, they are actively harmful, + // as they recreate things that are in fact pretty much static. Untangle this. RT_VkAccelNewMap(); RT_RayModel_Clear(); } @@ -76,7 +77,6 @@ void VK_RayNewMapBegin( void ) { void VK_RayFrameBegin( void ) { ASSERT(vk_core.rtx); - RT_VkAccelFrameBegin(); XVK_RayModel_ClearForNextFrame(); RT_LightsFrameBegin(); } @@ -165,7 +165,7 @@ static uint32_t getRandomSeed( void ) { } static void prepareUniformBuffer( const vk_ray_frame_render_args_t *args, int frame_index, uint32_t frame_counter, float fov_angle_y, int frame_width, int frame_height ) { - struct UniformBuffer *ubo = (struct UniformBuffer*)((char*)g_rtx.uniform_buffer.mapped + frame_index * g_rtx.uniform_unit_size); + struct UniformBuffer *ubo = PTR_CAST(struct UniformBuffer, (char*)g_rtx.uniform_buffer.mapped + frame_index * g_rtx.uniform_unit_size); matrix4x4 proj_inv, view_inv; Matrix4x4_Invert_Full(proj_inv, *args->projection); @@ -216,38 +216,14 @@ static void performTracing( vk_combuf_t *combuf, const perform_tracing_args_t* a R_VkResourcesSetBuiltinFIXME((r_vk_resources_builtin_fixme_t){ .frame_index = args->frame_index, - .uniform_buffer = g_rtx.uniform_buffer.buffer, + .uniform_buffer = &g_rtx.uniform_buffer, .uniform_unit_size = g_rtx.uniform_unit_size, .geometry_data.buffer = args->render_args->geometry_data.buffer, .geometry_data.size = args->render_args->geometry_data.size, .light_bindings = args->light_bindings, }); - // Upload kusochki updates - { - const VkBufferMemoryBarrier bmb[] = { { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR, - .buffer = g_ray_model_state.kusochki_buffer.buffer, - .offset = 0, - .size = VK_WHOLE_SIZE, - }, { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .buffer = g_ray_model_state.model_headers_buffer.buffer, - .offset = 0, - .size = VK_WHOLE_SIZE, - } }; - - vkCmdPipelineBarrier(cmdbuf, - VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR | VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - 0, 0, NULL, ARRAYSIZE(bmb), bmb, 0, NULL); - } - - R_VkResourcesFrameBeginStateChangeFIXME(cmdbuf, g_rtx.discontinuity); + R_VkResourcesFrameBeginStateChangeFIXME(combuf, g_rtx.discontinuity); if (g_rtx.discontinuity) { DEBUG("discontinuity => false"); g_rtx.discontinuity = false; @@ -255,31 +231,8 @@ static void performTracing( vk_combuf_t *combuf, const perform_tracing_args_t* a DEBUG_BEGIN(cmdbuf, "yay tracing"); - // FIXME move this to "TLAS producer" - { - rt_resource_t *const tlas = R_VkResourceGetByIndex(ExternalResource_tlas); - tlas->resource = RT_VkAccelPrepareTlas(combuf); - } - prepareUniformBuffer(args->render_args, args->frame_index, args->frame_counter, args->fov_angle_y, args->frame_width, args->frame_height); - { // FIXME this should be done automatically inside meatpipe, TODO - //const uint32_t size = sizeof(struct Lights); - //const uint32_t size = sizeof(struct LightsMetadata); // + 8 * sizeof(uint32_t); - const VkBufferMemoryBarrier bmb[] = {{ - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .buffer = args->light_bindings->buffer, - .offset = 0, - .size = VK_WHOLE_SIZE, - }}; - vkCmdPipelineBarrier(cmdbuf, - VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - 0, 0, NULL, ARRAYSIZE(bmb), bmb, 0, NULL); - } - // Update image resource links after the prev_-related swap above // TODO Preserve the indexes somewhere to avoid searching // FIXME I don't really get why we need this, the pointers should have been preserved ?! @@ -289,7 +242,7 @@ static void performTracing( vk_combuf_t *combuf, const perform_tracing_args_t* a const qboolean create = !!(mr->flags & MEATPIPE_RES_CREATE); if (create && mr->descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) // THIS FAILS WHY?! ASSERT(g_rtx.mainpipe_resources[i]->value.image_object == &res->image); - g_rtx.mainpipe_resources[i]->value.image_object = &res->image; + g_rtx.mainpipe_resources[i]->ref.image = &res->image; } R_VkMeatpipePerform(g_rtx.mainpipe, combuf, (vk_meatpipe_perfrom_args_t) { @@ -299,31 +252,6 @@ static void performTracing( vk_combuf_t *combuf, const perform_tracing_args_t* a .resources = g_rtx.mainpipe_resources, }); - { - const r_vkimage_blit_args blit_args = { - .in_stage = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - .src = { - .image = g_rtx.mainpipe_out->image.image, - .width = args->frame_width, - .height = args->frame_height, - .oldLayout = VK_IMAGE_LAYOUT_GENERAL, - .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - }, - .dst = { - .image = args->render_args->dst.image, - .width = args->render_args->dst.width, - .height = args->render_args->dst.height, - .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, - .srcAccessMask = 0, - }, - }; - - R_VkImageBlit( cmdbuf, &blit_args ); - - // TODO this is to make sure we remember image layout after image_blit - // The proper way to do this would be to teach R_VkImageBlit to properly track the image metadata (i.e. vk_resource_t state) - g_rtx.mainpipe_out->resource.write.image_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; - } DEBUG_END(cmdbuf); APROF_SCOPE_END(perform); @@ -410,7 +338,10 @@ static void reloadMainpipe(void) { .tiling = VK_IMAGE_TILING_OPTIMAL, // TODO figure out how to detect this need properly. prev_dest is not defined as "output" //.usage = VK_IMAGE_USAGE_STORAGE_BIT | (output ? VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT : 0), - .usage = VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT, + .usage = VK_IMAGE_USAGE_STORAGE_BIT + //| VK_IMAGE_USAGE_SAMPLED_BIT // required by VK_IMAGE_LAYOUT_SHADER_READ_OPTIMAL + | VK_IMAGE_USAGE_TRANSFER_SRC_BIT + | VK_IMAGE_USAGE_TRANSFER_DST_BIT, .flags = 0, }; res->image = R_VkImageCreate(&create); @@ -422,11 +353,11 @@ static void reloadMainpipe(void) { if (create) { if (mr->descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) { - newpipe_resources[i]->value.image_object = &res->image; + newpipe_resources[i]->ref.image = &res->image; } // TODO full r/w initialization - res->resource.write.pipelines = 0; + // FIXME not sure if not needed res->resource.deprecate.write.pipelines = 0; res->resource.type = mr->descriptor_type; } else { // TODO no assert, complain and exit @@ -505,7 +436,7 @@ void VK_RayFrameEnd(const vk_ray_frame_render_args_t* args) // FIXME pass these matrices explicitly to let RTX module handle ubo itself RT_LightsFrameEnd(); - const vk_lights_bindings_t light_bindings = VK_LightsUpload(); + const vk_lights_bindings_t light_bindings = VK_LightsUpload(args->combuf); g_rtx.frame_number++; @@ -514,15 +445,15 @@ void VK_RayFrameEnd(const vk_ray_frame_render_args_t* args) qboolean need_reload = g_rtx.reload_pipeline; - if (g_rtx.max_frame_width < args->dst.width) { - g_rtx.max_frame_width = ALIGN_UP(args->dst.width, 16); + if (g_rtx.max_frame_width < args->dst->width) { + g_rtx.max_frame_width = ALIGN_UP(args->dst->width, 16); WARN("Increasing max_frame_width to %d", g_rtx.max_frame_width); // TODO only reload resources, no need to reload the entire pipeline need_reload = true; } - if (g_rtx.max_frame_height < args->dst.height) { - g_rtx.max_frame_height = ALIGN_UP(args->dst.height, 16); + if (g_rtx.max_frame_height < args->dst->height) { + g_rtx.max_frame_height = ALIGN_UP(args->dst->height, 16); WARN("Increasing max_frame_height to %d", g_rtx.max_frame_height); // TODO only reload resources, no need to reload the entire pipeline need_reload = true; @@ -542,38 +473,30 @@ void VK_RayFrameEnd(const vk_ray_frame_render_args_t* args) // Feed tlas with dynamic data RT_DynamicModelProcessFrame(); - ASSERT(args->dst.width <= g_rtx.max_frame_width); - ASSERT(args->dst.height <= g_rtx.max_frame_height); + // FIXME what's the right place for this? + // This needs to happen every frame where we might've locked staging for kusochki + // - After dynamic stuff (might upload kusochki) + // - Before performTracing(), even if it is not called + // See ~3:00:00-3:40:00 of stream E383 about push-vs-pull models and their boundaries. + R_VkBufferStagingCommit(&g_ray_model_state.kusochki_buffer, args->combuf); + + ASSERT(args->dst->width <= g_rtx.max_frame_width); + ASSERT(args->dst->height <= g_rtx.max_frame_height); // TODO dynamic scaling based on perf - const int frame_width = args->dst.width; - const int frame_height = args->dst.height; + const int frame_width = args->dst->width; + const int frame_height = args->dst->height; + + rt_resource_t *const tlas = R_VkResourceGetByIndex(ExternalResource_tlas); // Do not draw when we have no swapchain - if (args->dst.image_view == VK_NULL_HANDLE) + if (!args->dst->image) goto tail; - if (g_ray_model_state.frame.instances_count == 0) { - const r_vkimage_blit_args blit_args = { - .in_stage = VK_PIPELINE_STAGE_TRANSFER_BIT, - .src = { - .image = g_rtx.mainpipe_out->image.image, - .width = frame_width, - .height = frame_height, - .oldLayout = VK_IMAGE_LAYOUT_GENERAL, - .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - }, - .dst = { - .image = args->dst.image, - .width = args->dst.width, - .height = args->dst.height, - .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, - .srcAccessMask = 0, - }, - }; - - R_VkImageClear( cmdbuf, g_rtx.mainpipe_out->image.image ); - R_VkImageBlit( cmdbuf, &blit_args ); + // TODO move this to "TLAS producer" + tlas->resource = RT_VkAccelPrepareTlas(args->combuf); + if (tlas->resource.value.accel.accelerationStructureCount == 0) { + R_VkImageClear( &g_rtx.mainpipe_out->image, args->combuf, NULL ); } else { const perform_tracing_args_t trace_args = { .render_args = args, @@ -587,6 +510,21 @@ void VK_RayFrameEnd(const vk_ray_frame_render_args_t* args) performTracing( args->combuf, &trace_args ); } + { + const r_vkimage_blit_args blit_args = { + .src = { + .image = &g_rtx.mainpipe_out->image, + .width = frame_width, + .height = frame_height, + }, + .dst = { + .image = args->dst, + }, + }; + + R_VkImageBlit( args->combuf, &blit_args ); + } + tail: APROF_SCOPE_END(ray_frame_end); } diff --git a/ref/vk/vk_rtx.h b/ref/vk/vk_rtx.h index f3288033e9..b7669c9398 100644 --- a/ref/vk/vk_rtx.h +++ b/ref/vk/vk_rtx.h @@ -5,21 +5,19 @@ void VK_RayFrameBegin( void ); +struct vk_combuf_s; +struct r_vk_image_s; typedef struct { struct vk_combuf_s *combuf; - struct { - VkImageView image_view; - VkImage image; - uint32_t width, height; - } dst; + struct r_vk_image_s *dst; const matrix4x4 *projection, *view; // Buffer holding vertex and index data // TODO remove struct { - VkBuffer buffer; // must be the same as in vk_ray_model_create_t TODO: validate or make impossible to specify incorrectly + vk_buffer_t* buffer; // must be the same as in vk_ray_model_create_t TODO: validate or make impossible to specify incorrectly uint64_t size; } geometry_data; diff --git a/ref/vk/vk_scene.c b/ref/vk/vk_scene.c index 1c057d901b..6de06bbaeb 100644 --- a/ref/vk/vk_scene.c +++ b/ref/vk/vk_scene.c @@ -1,6 +1,5 @@ #include "vk_scene.h" #include "vk_brush.h" -#include "vk_staging.h" #include "vk_studio.h" #include "vk_lightmap.h" #include "vk_const.h" @@ -65,28 +64,6 @@ static struct { draw_list_t *draw_list; } g_lists; -static void loadLights( const model_t *const map ) { - RT_LightsLoadBegin(map); - - const int num_models = gEngine.EngineGetParm( PARM_NUMMODELS, 0 ); - for( int i = 0; i < num_models; i++ ) { - const model_t *const mod = gEngine.pfnGetModelByIndex( i + 1 ); - - if (!mod) - continue; - - if( mod->type != mod_brush ) - continue; - - const qboolean is_worldmodel = i == 0; - R_VkBrushModelCollectEmissiveSurfaces(mod, is_worldmodel); - } - - // Load static map lights - // Reads surfaces from loaded brush models (must happen after all brushes are loaded) - RT_LightsLoadEnd(); -} - static void preloadModels( void ) { const int num_models = gEngine.EngineGetParm( PARM_NUMMODELS, 0 ); @@ -138,6 +115,7 @@ static void loadMap(const model_t* const map, qboolean force_reload) { RT_LightsNewMap(map); + // TODO doesn't really need to exist: sprite instance models are static R_SpriteNewMapFIXME(); // Load light entities and patch data prior to loading map brush model @@ -150,13 +128,14 @@ static void loadMap(const model_t* const map, qboolean force_reload) { // Depends on loaded materials. Must preceed loading brush models. XVK_ParseMapPatches(); + RT_LightsLoadBegin(map); preloadModels(); + // Marks all loaded lights as static. Should happen after preloadModels(), where brush models are loaded. + RT_LightsLoadEnd(); // Can only do after preloadModels(), as we need to know whether there are SURF_DRAWSKY R_TextureSetupSky( gEngine.pfnGetMoveVars()->skyName, force_reload ); - loadLights(map); - // TODO should we do something like R_BrushEndLoad? VK_UploadLightmap(); } @@ -164,7 +143,7 @@ static void loadMap(const model_t* const map, qboolean force_reload) { static void reloadPatches( void ) { INFO("Reloading patches and materials"); - R_VkStagingFlushSync(); + // FIXME R_VkStagingFlushSync(); XVK_CHECK(vkDeviceWaitIdle( vk_core.device )); @@ -173,8 +152,6 @@ static void reloadPatches( void ) { const model_t *const map = gEngine.pfnGetModelByIndex( 1 ); const qboolean force_reload = true; loadMap(map, force_reload); - - R_VkStagingFlushSync(); } void VK_SceneInit( void ) diff --git a/ref/vk/vk_sprite.c b/ref/vk/vk_sprite.c index f85549cfe7..244af16feb 100644 --- a/ref/vk/vk_sprite.c +++ b/ref/vk/vk_sprite.c @@ -174,7 +174,7 @@ static mspriteframe_t *R_GetSpriteFrame( const model_t *pModel, int frame, float } else if( psprite->frames[frame].type == SPR_GROUP ) { - pspritegroup = (mspritegroup_t *)psprite->frames[frame].frameptr; + pspritegroup = PTR_CAST(mspritegroup_t, psprite->frames[frame].frameptr); pintervals = pspritegroup->intervals; numframes = pspritegroup->numframes; fullinterval = pintervals[numframes-1]; @@ -198,7 +198,7 @@ static mspriteframe_t *R_GetSpriteFrame( const model_t *pModel, int frame, float gEngine.Con_Printf(S_WARN "VK FIXME: %s doesn't know about viewangles\n", __FUNCTION__); // e.g. doom-style sprite monsters - pspritegroup = (mspritegroup_t *)psprite->frames[frame].frameptr; + pspritegroup = PTR_CAST(mspritegroup_t, psprite->frames[frame].frameptr); pspriteframe = pspritegroup->frames[angleframe]; } @@ -261,7 +261,7 @@ static const dframetype_t *VK_SpriteLoadFrame( model_t *mod, const void *pin, ms pspriteframe->gl_texturenum = gl_texturenum; *ppframe = pspriteframe; - return ( const dframetype_t* )(( const byte* )pin + sizeof( dspriteframe_t ) + pinframe.width * pinframe.height * bytes ); + return PTR_CAST(const dframetype_t, ( const byte* )pin + sizeof( dspriteframe_t ) + pinframe.width * pinframe.height * bytes ); } static const dframetype_t *VK_SpriteLoadGroup( model_t *mod, const void *pin, mspriteframe_t **ppframe, int framenum, const SpriteLoadContext *ctx ) @@ -609,7 +609,7 @@ static float R_GetSpriteFrameInterpolant( cl_entity_t *ent, mspriteframe_t **old } else if( psprite->frames[frame].type == FRAME_GROUP ) { - pspritegroup = (mspritegroup_t *)psprite->frames[frame].frameptr; + pspritegroup = PTR_CAST(mspritegroup_t, psprite->frames[frame].frameptr); pintervals = pspritegroup->intervals; numframes = pspritegroup->numframes; fullinterval = pintervals[numframes-1]; @@ -681,10 +681,10 @@ static float R_GetSpriteFrameInterpolant( cl_entity_t *ent, mspriteframe_t **old lerpFrac = 1.0f; } - pspritegroup = (mspritegroup_t *)psprite->frames[ent->latched.prevblending[0]].frameptr; + pspritegroup = PTR_CAST(mspritegroup_t, psprite->frames[ent->latched.prevblending[0]].frameptr); if( oldframe ) *oldframe = pspritegroup->frames[angleframe]; - pspritegroup = (mspritegroup_t *)psprite->frames[frame].frameptr; + pspritegroup = PTR_CAST(mspritegroup_t, psprite->frames[frame].frameptr); if( curframe ) *curframe = pspritegroup->frames[angleframe]; } @@ -1099,7 +1099,7 @@ void Mod_SpriteUnloadTextures( void *data ) } else { - pspritegroup = (mspritegroup_t *)psprite->frames[i].frameptr; + pspritegroup = PTR_CAST(mspritegroup_t, psprite->frames[i].frameptr); for( j = 0; j < pspritegroup->numframes; j++ ) { diff --git a/ref/vk/vk_staging.c b/ref/vk/vk_staging.c index 4e7ceee05f..55b2806f1b 100644 --- a/ref/vk/vk_staging.c +++ b/ref/vk/vk_staging.c @@ -1,352 +1,147 @@ #include "vk_staging.h" + #include "vk_buffer.h" -#include "alolcator.h" -#include "vk_commandpool.h" -#include "profiler.h" -#include "r_speeds.h" #include "vk_combuf.h" #include "vk_logs.h" +#include "r_speeds.h" + +#include "alolcator.h" +#include "arrays.h" #include #define MODULE_NAME "staging" #define LOG_MODULE staging -#define DEFAULT_STAGING_SIZE (128*1024*1024) -#define MAX_STAGING_ALLOCS (2048) -#define MAX_CONCURRENT_FRAMES 2 -#define COMMAND_BUFFER_COUNT (MAX_CONCURRENT_FRAMES + 1) // to accommodate two frames in flight plus something trying to upload data before waiting for the next frame to complete +// FIXME decrease size to something reasonable, see https://github.com/w23/xash3d-fwgs/issues/746 +#define DEFAULT_STAGING_SIZE (4*128*1024*1024) -typedef struct { - VkImage image; - VkImageLayout layout; - size_t size; // for stats only -} staging_image_t; - -static struct { - vk_buffer_t buffer; - r_flipping_buffer_t buffer_alloc; +#define MAX_STAGING_USERS 8 - struct { - VkBuffer dest[MAX_STAGING_ALLOCS]; - VkBufferCopy copy[MAX_STAGING_ALLOCS]; - int count; - } buffers; +typedef struct r_vkstaging_user_t { + r_vkstaging_user_create_t info; + uint32_t locked_count; struct { - staging_image_t dest[MAX_STAGING_ALLOCS]; - VkBufferImageCopy copy[MAX_STAGING_ALLOCS]; - int count; - } images; + int allocs; + int size; + } stats; +} r_vkstaging_user_t; - vk_combuf_t *combuf[3]; +static struct { + vk_buffer_t buffer; + alo_ring_t buffer_alloc_ring; - // Currently opened command buffer, ready to accept new commands - vk_combuf_t *current; + BOUNDED_ARRAY_DECLARE(r_vkstaging_user_t, users, MAX_STAGING_USERS); struct { int total_size; - int buffers_size; - int images_size; - int buffer_chunks; - int images; + int total_chunks; } stats; - int buffer_upload_scope_id; - int image_upload_scope_id; + //int buffer_upload_scope_id; + //int image_upload_scope_id; } g_staging = {0}; qboolean R_VkStagingInit(void) { - if (!VK_BufferCreate("staging", &g_staging.buffer, DEFAULT_STAGING_SIZE, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) + if (!VK_BufferCreate("staging", &g_staging.buffer, DEFAULT_STAGING_SIZE, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) return false; - g_staging.combuf[0] = R_VkCombufOpen(); - g_staging.combuf[1] = R_VkCombufOpen(); - g_staging.combuf[2] = R_VkCombufOpen(); - - R_FlippingBuffer_Init(&g_staging.buffer_alloc, DEFAULT_STAGING_SIZE); + aloRingInit(&g_staging.buffer_alloc_ring, g_staging.buffer.size); R_SPEEDS_COUNTER(g_staging.stats.total_size, "total_size", kSpeedsMetricBytes); - R_SPEEDS_COUNTER(g_staging.stats.buffers_size, "buffers_size", kSpeedsMetricBytes); - R_SPEEDS_COUNTER(g_staging.stats.images_size, "images_size", kSpeedsMetricBytes); + R_SPEEDS_COUNTER(g_staging.stats.total_chunks, "total_chunks", kSpeedsMetricBytes); - R_SPEEDS_COUNTER(g_staging.stats.buffer_chunks, "buffer_chunks", kSpeedsMetricCount); - R_SPEEDS_COUNTER(g_staging.stats.images, "images", kSpeedsMetricCount); - - g_staging.buffer_upload_scope_id = R_VkGpuScope_Register("staging_buffers"); - g_staging.image_upload_scope_id = R_VkGpuScope_Register("staging_images"); + //g_staging.buffer_upload_scope_id = R_VkGpuScope_Register("staging_buffers"); + //g_staging.image_upload_scope_id = R_VkGpuScope_Register("staging_images"); return true; } void R_VkStagingShutdown(void) { + // TODO ASSERT(g_staging.users.count == 0); VK_BufferDestroy(&g_staging.buffer); } -// FIXME There's a severe race condition here. Submitting things manually and prematurely (before framectl had a chance to synchronize with the previous frame) -// may lead to data races and memory corruption (e.g. writing into memory that's being read in some pipeline stage still going) -void R_VkStagingFlushSync( void ) { - APROF_SCOPE_DECLARE_BEGIN(function, __FUNCTION__); - - vk_combuf_t *combuf = R_VkStagingCommit(); - if (!combuf) - goto end; +r_vkstaging_user_t *R_VkStagingUserCreate(r_vkstaging_user_create_t info) { + ASSERT(g_staging.users.count < MAX_STAGING_USERS); - R_VkCombufEnd(combuf); - g_staging.current = NULL; - - //gEngine.Con_Reportf(S_WARN "flushing staging buffer img count=%d\n", g_staging.images.count); - - { - const VkSubmitInfo subinfo = { - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .commandBufferCount = 1, - .pCommandBuffers = &combuf->cmdbuf, - }; - - // TODO wait for previous command buffer completion. Why: we might end up writing into the same dst - - XVK_CHECK(vkQueueSubmit(vk_core.queue, 1, &subinfo, VK_NULL_HANDLE)); - - // TODO wait for fence, not this - XVK_CHECK(vkQueueWaitIdle(vk_core.queue)); - } - - g_staging.buffers.count = 0; - g_staging.images.count = 0; - R_FlippingBuffer_Clear(&g_staging.buffer_alloc); - -end: - APROF_SCOPE_END(function); -}; - -static uint32_t allocateInRing(uint32_t size, uint32_t alignment) { - alignment = alignment < 1 ? 1 : alignment; + r_vkstaging_user_t *const user = g_staging.users.items + (g_staging.users.count++); + *user = (r_vkstaging_user_t) { + .info = info, + }; - const uint32_t offset = R_FlippingBuffer_Alloc(&g_staging.buffer_alloc, size, alignment ); - if (offset != ALO_ALLOC_FAILED) - return offset; + char buf[64]; + snprintf(buf, sizeof(buf), "%s.size", info.name); + R_SPEEDS_COUNTER(user->stats.size, buf, kSpeedsMetricBytes); - R_VkStagingFlushSync(); + snprintf(buf, sizeof(buf), "%s.allocs", info.name); + R_SPEEDS_COUNTER(user->stats.allocs, buf, kSpeedsMetricCount); - return R_FlippingBuffer_Alloc(&g_staging.buffer_alloc, size, alignment ); + return user; } -vk_staging_region_t R_VkStagingLockForBuffer(vk_staging_buffer_args_t args) { - if ( g_staging.buffers.count >= MAX_STAGING_ALLOCS ) - R_VkStagingFlushSync(); - - const uint32_t offset = allocateInRing(args.size, args.alignment); - if (offset == ALO_ALLOC_FAILED) - return (vk_staging_region_t){0}; - - const int index = g_staging.buffers.count; - - g_staging.buffers.dest[index] = args.buffer; - g_staging.buffers.copy[index] = (VkBufferCopy){ - .srcOffset = offset, - .dstOffset = args.offset, - .size = args.size, - }; - - g_staging.buffers.count++; - - return (vk_staging_region_t){ - .ptr = (char*)g_staging.buffer.mapped + offset, - .handle = index, - }; +void R_VkStagingUserDestroy(r_vkstaging_user_t *user) { + ASSERT(user->locked_count == 0); + // TODO remove from the table } -vk_staging_region_t R_VkStagingLockForImage(vk_staging_image_args_t args) { - if ( g_staging.images.count >= MAX_STAGING_ALLOCS ) - R_VkStagingFlushSync(); +r_vkstaging_region_t R_VkStagingLock(r_vkstaging_user_t* user, uint32_t size) { + const uint32_t alignment = 4; + const uint32_t offset = aloRingAlloc(&g_staging.buffer_alloc_ring, size, alignment); + ASSERT(offset != ALO_ALLOC_FAILED && "FIXME: workaround: increase staging buffer size"); - const uint32_t offset = allocateInRing(args.size, args.alignment); - if (offset == ALO_ALLOC_FAILED) - return (vk_staging_region_t){0}; + DEBUG("Lock alignment=%d size=%d region=%d..%d", alignment, size, offset, offset + size); - const int index = g_staging.images.count; - staging_image_t *const dest = g_staging.images.dest + index; + user->locked_count++; - dest->image = args.image; - dest->layout = args.layout; - dest->size = args.size; - g_staging.images.copy[index] = args.region; - g_staging.images.copy[index].bufferOffset += offset; + user->stats.allocs++; + user->stats.size += size; - g_staging.images.count++; + g_staging.stats.total_chunks++; + g_staging.stats.total_size += size; - return (vk_staging_region_t){ + return (r_vkstaging_region_t){ + .offset = offset, + .buffer = g_staging.buffer.buffer, .ptr = (char*)g_staging.buffer.mapped + offset, - .handle = index + MAX_STAGING_ALLOCS, }; } -void R_VkStagingUnlock(staging_handle_t handle) { - ASSERT(handle >= 0); - ASSERT(handle < MAX_STAGING_ALLOCS * 2); - - // FIXME mark and check ready +void R_VkStagingUnlockBulk(r_vkstaging_user_t* user, uint32_t count) { + ASSERT(user->locked_count >= count); + user->locked_count -= count; } -static void commitBuffers(vk_combuf_t *combuf) { - if (!g_staging.buffers.count) - return; - - const VkCommandBuffer cmdbuf = g_staging.current->cmdbuf; - const int begin_index = R_VkCombufScopeBegin(combuf, g_staging.buffer_upload_scope_id); - - // TODO better coalescing: - // - upload once per buffer - // - join adjacent regions - - BOUNDED_ARRAY(barriers, VkBufferMemoryBarrier, 4); - - for (int i = 0; i < g_staging.buffers.count; i++) { - const VkBuffer dst_buf = g_staging.buffers.dest[i]; - for (int j = 0;; ++j) { - if (j == COUNTOF(barriers.items)) { - ERR("Ran out of buffer barrier slots, oh no"); - break; - } - - // Instert last - if (j == barriers.count) { - barriers.count++; - barriers.items[j] = (VkBufferMemoryBarrier){ - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - .srcAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, - .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .buffer = dst_buf, - .offset = 0, - .size = VK_WHOLE_SIZE, - }; - break; - } - - // Already inserted - if (barriers.items[j].buffer == dst_buf) - break; - } - } - - if (barriers.count) { - vkCmdPipelineBarrier(cmdbuf, - // FIXME this should be more concrete. Will need to pass buffer "state" around. - // For now it works, but makes validation uhappy. - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, 0, NULL, barriers.count, barriers.items, 0, NULL); - } - - VkBuffer prev_buffer = VK_NULL_HANDLE; - int first_copy = 0; - for (int i = 0; i < g_staging.buffers.count; i++) { - /* { */ - /* const VkBufferCopy *const copy = g_staging.buffers.copy + i; */ - /* gEngine.Con_Reportf(" %d: [%08llx, %08llx) => [%08llx, %08llx)\n", i, copy->srcOffset, copy->srcOffset + copy->size, copy->dstOffset, copy->dstOffset + copy->size); */ - /* } */ - - if (prev_buffer == g_staging.buffers.dest[i]) +uint32_t R_VkStagingFrameEpilogue(vk_combuf_t* combuf) { + for (int i = 0; i < g_staging.users.count; ++i) { + r_vkstaging_user_t *const user = g_staging.users.items + i; + if (user->locked_count == 0) continue; - if (prev_buffer != VK_NULL_HANDLE) { - DEBUG_NV_CHECKPOINTF(cmdbuf, "staging dst_buffer=%p count=%d", prev_buffer, i-first_copy); - g_staging.stats.buffer_chunks++; - vkCmdCopyBuffer(cmdbuf, g_staging.buffer.buffer, - prev_buffer, - i - first_copy, g_staging.buffers.copy + first_copy); - } - - g_staging.stats.buffers_size += g_staging.buffers.copy[i].size; - - prev_buffer = g_staging.buffers.dest[i]; - first_copy = i; - } - - if (prev_buffer != VK_NULL_HANDLE) { - DEBUG_NV_CHECKPOINTF(cmdbuf, "staging dst_buffer=%p count=%d", prev_buffer, g_staging.buffers.count-first_copy); - g_staging.stats.buffer_chunks++; - vkCmdCopyBuffer(cmdbuf, g_staging.buffer.buffer, - prev_buffer, - g_staging.buffers.count - first_copy, g_staging.buffers.copy + first_copy); + WARN("%s has %u locked staging items, pushing", user->info.name, user->locked_count); + user->info.push(user->info.userptr, combuf, user->locked_count); + ASSERT(user->locked_count == 0); } - g_staging.buffers.count = 0; + // TODO it would be nice to attach a finalization callback to combuf + // So that when the combuf is done on GPU, the callback is called and we can clean its memory + // instead of depending on framectl calling Completed function manually. - R_VkCombufScopeEnd(combuf, begin_index, VK_PIPELINE_STAGE_TRANSFER_BIT); + return g_staging.buffer_alloc_ring.head; } -static void commitImages(vk_combuf_t *combuf) { - if (!g_staging.images.count) - return; - - const VkCommandBuffer cmdbuf = g_staging.current->cmdbuf; - const int begin_index = R_VkCombufScopeBegin(combuf, g_staging.image_upload_scope_id); - for (int i = 0; i < g_staging.images.count; i++) { - /* { */ - /* const VkBufferImageCopy *const copy = g_staging.images.copy + i; */ - /* gEngine.Con_Reportf(" i%d: [%08llx, ?) => %p\n", i, copy->bufferOffset, g_staging.images.dest[i].image); */ - /* } */ - - g_staging.stats.images++; - g_staging.stats.images_size += g_staging.images.dest[i].size; - - vkCmdCopyBufferToImage(cmdbuf, g_staging.buffer.buffer, - g_staging.images.dest[i].image, - g_staging.images.dest[i].layout, - 1, g_staging.images.copy + i); - } - - g_staging.images.count = 0; - R_VkCombufScopeEnd(combuf, begin_index, VK_PIPELINE_STAGE_TRANSFER_BIT); -} +void R_VkStagingFrameCompleted(uint32_t frame_boundary_addr) { + // Note that these stats are for latest frame, not the one for which the frame boundary is. + g_staging.stats.total_size = 0; + g_staging.stats.total_chunks = 0; -static vk_combuf_t *getCurrentCombuf(void) { - if (!g_staging.current) { - g_staging.current = g_staging.combuf[0]; - R_VkCombufBegin(g_staging.current); + for (int i = 0; i < g_staging.users.count; ++i) { + r_vkstaging_user_t *const user = g_staging.users.items + i; + user->stats.allocs = 0; + user->stats.size = 0; } - return g_staging.current; -} - -VkCommandBuffer R_VkStagingGetCommandBuffer(void) { - return getCurrentCombuf()->cmdbuf; -} - -vk_combuf_t *R_VkStagingCommit(void) { - if (!g_staging.images.count && !g_staging.buffers.count && !g_staging.current) - return VK_NULL_HANDLE; - - getCurrentCombuf(); - commitBuffers(g_staging.current); - commitImages(g_staging.current); - return g_staging.current; -} - -void R_VkStagingFrameBegin(void) { - R_FlippingBuffer_Flip(&g_staging.buffer_alloc); - - g_staging.buffers.count = 0; - g_staging.images.count = 0; -} - -vk_combuf_t *R_VkStagingFrameEnd(void) { - R_VkStagingCommit(); - vk_combuf_t *current = g_staging.current; - - if (current) { - R_VkCombufEnd(g_staging.current); - } - - g_staging.current = NULL; - vk_combuf_t *const tmp = g_staging.combuf[0]; - g_staging.combuf[0] = g_staging.combuf[1]; - g_staging.combuf[1] = g_staging.combuf[2]; - g_staging.combuf[2] = tmp; - - g_staging.stats.total_size = g_staging.stats.images_size + g_staging.stats.buffers_size; - - return current; + aloRingFree(&g_staging.buffer_alloc_ring, frame_boundary_addr); } diff --git a/ref/vk/vk_staging.h b/ref/vk/vk_staging.h index 2caa14230f..601fa68b0a 100644 --- a/ref/vk/vk_staging.h +++ b/ref/vk/vk_staging.h @@ -5,49 +5,50 @@ qboolean R_VkStagingInit(void); void R_VkStagingShutdown(void); -typedef int staging_handle_t; +struct vk_combuf_s; +typedef void (r_vkstaging_push_f)(void* userptr, struct vk_combuf_s *combuf, uint32_t pending); typedef struct { - void *ptr; - staging_handle_t handle; -} vk_staging_region_t; + // Expected to be static, stored as a pointer + const char *name; -// Allocate region for uploadting to buffer -typedef struct { - VkBuffer buffer; - uint32_t offset; - uint32_t size; - uint32_t alignment; -} vk_staging_buffer_args_t; -vk_staging_region_t R_VkStagingLockForBuffer(vk_staging_buffer_args_t args); + void *userptr; + r_vkstaging_push_f *push; +} r_vkstaging_user_create_t; + +struct r_vkstaging_user_t; +typedef struct r_vkstaging_user_t *r_vkstaging_user_handle_t; +r_vkstaging_user_handle_t R_VkStagingUserCreate(r_vkstaging_user_create_t); +void R_VkStagingUserDestroy(r_vkstaging_user_handle_t); -// Allocate region for uploading to image typedef struct { - VkImage image; - VkImageLayout layout; - VkBufferImageCopy region; - uint32_t size; - uint32_t alignment; -} vk_staging_image_args_t; -vk_staging_region_t R_VkStagingLockForImage(vk_staging_image_args_t args); - -// Mark allocated region as ready for upload -void R_VkStagingUnlock(staging_handle_t handle); - -// Append copy commands to command buffer. -struct vk_combuf_s* R_VkStagingCommit(void); - -// Mark previous frame data as uploaded and safe to use. -void R_VkStagingFrameBegin(void); - -// Uploads staging contents and returns the command buffer ready to be submitted. -// Can return NULL if there's nothing to upload. -struct vk_combuf_s *R_VkStagingFrameEnd(void); - -// Gets the current command buffer. -// WARNING: Can be invalidated by any of the Lock calls -VkCommandBuffer R_VkStagingGetCommandBuffer(void); - -// Commit all staging data into current cmdbuf, submit it and wait for completion. -// Needed for CPU-GPU sync -void R_VkStagingFlushSync( void ); + // CPU-accessible memory + void *ptr; + + // GPU buffer to copy from + VkBuffer buffer; + VkDeviceSize offset; +} r_vkstaging_region_t; + +// Allocate CPU-accessible memory in staging buffer +r_vkstaging_region_t R_VkStagingLock(r_vkstaging_user_handle_t, uint32_t size); + +// Notify staging that this amount of regions are about to be consumed when the next combuf ends +// I.e. they're "free" from the staging standpoint +void R_VkStagingUnlockBulk(r_vkstaging_user_handle_t, uint32_t count); + +// This gets called just before the combuf is ended and submitted. +// Gives the last chance for the users that haven't yet used their data. +// This is a workaround to patch up the impedance mismatch between top-down push model, +// where the engine "pushes down" the data to be rendered, and "bottom-up" pull model, +// where the frame is constructed based on render graph dependency tree. Not all pushed +// resources could be used, and this gives the opportunity to at least ingest the data +// to make sure that it remains complete, in case it might be needed in the future. +// Returns current frame tag to be closed in the R_VkStagingCombufCompleted() function. +uint32_t R_VkStagingFrameEpilogue(struct vk_combuf_s*); + +// This function is called when a frame is finished. It allows staging to free all the +// data used in that frame. +// TODO make this dependency more explicit, i.e. combuf should track when it's done +// and what finalization functions it should call when it's done (there are many). +void R_VkStagingFrameCompleted(uint32_t tag); diff --git a/ref/vk/vk_studio.c b/ref/vk/vk_studio.c index 174649bdaa..cc2eab0623 100644 --- a/ref/vk/vk_studio.c +++ b/ref/vk/vk_studio.c @@ -228,7 +228,7 @@ static qboolean R_StudioComputeBBox( vec3_t bbox[8] ) if( e->curstate.sequence < 0 || e->curstate.sequence >= m_pStudioHeader->numseq ) e->curstate.sequence = 0; - pseqdesc = (mstudioseqdesc_t *)((byte *)m_pStudioHeader + m_pStudioHeader->seqindex) + e->curstate.sequence; + pseqdesc = PTR_CAST(mstudioseqdesc_t, (byte *)m_pStudioHeader + m_pStudioHeader->seqindex) + e->curstate.sequence; // add sequence box to the model box AddPointToBounds( pseqdesc->bbmin, mins, maxs ); @@ -520,7 +520,7 @@ void R_StudioCalcBoneAdj( float dadt, float *adj, const byte *pcontroller1, cons float value = 0.0f; int i, j; - pbonecontroller = (mstudiobonecontroller_t *)((byte *)m_pStudioHeader + m_pStudioHeader->bonecontrollerindex); + pbonecontroller = PTR_CAST(mstudiobonecontroller_t, (byte *)m_pStudioHeader + m_pStudioHeader->bonecontrollerindex); for( j = 0; j < m_pStudioHeader->numbonecontrollers; j++ ) { @@ -599,7 +599,7 @@ void R_StudioCalcRotations( cl_entity_t *e, float pos[][3], vec4_t *q, mstudiose s = (f - frame); // add in programtic controllers - pbone = (mstudiobone_t *)((byte *)m_pStudioHeader + m_pStudioHeader->boneindex); + pbone = PTR_CAST(mstudiobone_t, (byte *)m_pStudioHeader + m_pStudioHeader->boneindex); R_StudioCalcBoneAdj( dadt, adj, e->curstate.controller, e->latched.prevcontroller, e->mouth.mouthopen ); @@ -628,13 +628,13 @@ void R_StudioMergeBones( cl_entity_t *e, model_t *m_pSubModel ) if( e->curstate.sequence >= m_pStudioHeader->numseq ) e->curstate.sequence = 0; - pseqdesc = (mstudioseqdesc_t *)((byte *)m_pStudioHeader + m_pStudioHeader->seqindex) + e->curstate.sequence; + pseqdesc = PTR_CAST(mstudioseqdesc_t, (byte *)m_pStudioHeader + m_pStudioHeader->seqindex) + e->curstate.sequence; f = R_StudioEstimateFrame( e, pseqdesc, g_studio.time ); panim = gEngine.R_StudioGetAnim( m_pStudioHeader, m_pSubModel, pseqdesc ); R_StudioCalcRotations( e, pos, q, pseqdesc, panim, f ); - pbones = (mstudiobone_t *)((byte *)m_pStudioHeader + m_pStudioHeader->boneindex); + pbones = PTR_CAST(mstudiobone_t, (byte *)m_pStudioHeader + m_pStudioHeader->boneindex); for( i = 0; i < m_pStudioHeader->numbones; i++ ) { @@ -688,7 +688,7 @@ void R_StudioSetupBones( cl_entity_t *e ) if( e->curstate.sequence >= m_pStudioHeader->numseq ) e->curstate.sequence = 0; - pseqdesc = (mstudioseqdesc_t *)((byte *)m_pStudioHeader + m_pStudioHeader->seqindex) + e->curstate.sequence; + pseqdesc = PTR_CAST(mstudioseqdesc_t, (byte *)m_pStudioHeader + m_pStudioHeader->seqindex) + e->curstate.sequence; f = R_StudioEstimateFrame( e, pseqdesc, g_studio.time ); @@ -731,7 +731,7 @@ void R_StudioSetupBones( cl_entity_t *e ) static vec4_t q1b[MAXSTUDIOBONES]; float s; - pseqdesc = (mstudioseqdesc_t *)((byte *)m_pStudioHeader + m_pStudioHeader->seqindex) + e->latched.prevsequence; + pseqdesc = PTR_CAST(mstudioseqdesc_t, (byte *)m_pStudioHeader + m_pStudioHeader->seqindex) + e->latched.prevsequence; panim = gEngine.R_StudioGetAnim( m_pStudioHeader, RI.currentmodel, pseqdesc ); // clip prevframe @@ -770,7 +770,7 @@ void R_StudioSetupBones( cl_entity_t *e ) e->latched.prevframe = f; } - pbones = (mstudiobone_t *)((byte *)m_pStudioHeader + m_pStudioHeader->boneindex); + pbones = PTR_CAST(mstudiobone_t, (byte *)m_pStudioHeader + m_pStudioHeader->boneindex); // calc gait animation if( m_pPlayerInfo && m_pPlayerInfo->gaitsequence != 0 ) @@ -780,7 +780,7 @@ void R_StudioSetupBones( cl_entity_t *e ) if( m_pPlayerInfo->gaitsequence >= m_pStudioHeader->numseq ) m_pPlayerInfo->gaitsequence = 0; - pseqdesc = (mstudioseqdesc_t *)((byte *)m_pStudioHeader + m_pStudioHeader->seqindex) + m_pPlayerInfo->gaitsequence; + pseqdesc = PTR_CAST(mstudioseqdesc_t, (byte *)m_pStudioHeader + m_pStudioHeader->seqindex) + m_pPlayerInfo->gaitsequence; panim = gEngine.R_StudioGetAnim( m_pStudioHeader, RI.currentmodel, pseqdesc ); R_StudioCalcRotations( e, pos2, q2, pseqdesc, panim, m_pPlayerInfo->gaitframe ); @@ -824,7 +824,7 @@ static void R_StudioSaveBones( void ) mstudiobone_t *pbones; int i; - pbones = (mstudiobone_t *)((byte *)m_pStudioHeader + m_pStudioHeader->boneindex); + pbones = PTR_CAST(mstudiobone_t, (byte *)m_pStudioHeader + m_pStudioHeader->boneindex); g_studio.cached_numbones = m_pStudioHeader->numbones; for( i = 0; i < m_pStudioHeader->numbones; i++ ) @@ -861,8 +861,8 @@ void R_StudioBuildNormalTable( void ) { short *ptricmds; - pmesh = (mstudiomesh_t *)((byte *)m_pStudioHeader + m_pSubModel->meshindex) + j; - ptricmds = (short *)((byte *)m_pStudioHeader + pmesh->triindex); + pmesh = PTR_CAST(mstudiomesh_t, (byte *)m_pStudioHeader + m_pSubModel->meshindex) + j; + ptricmds = PTR_CAST(short, (byte *)m_pStudioHeader + pmesh->triindex); while(( i = *( ptricmds++ ))) { @@ -912,8 +912,8 @@ void R_StudioGenerateNormals( void ) { short *ptricmds; - pmesh = (mstudiomesh_t *)((byte *)m_pStudioHeader + m_pSubModel->meshindex) + j; - ptricmds = (short *)((byte *)m_pStudioHeader + pmesh->triindex); + pmesh = PTR_CAST(mstudiomesh_t, (byte *)m_pStudioHeader + m_pSubModel->meshindex) + j; + ptricmds = PTR_CAST(short, (byte *)m_pStudioHeader + pmesh->triindex); while(( i = *( ptricmds++ ))) { @@ -1058,7 +1058,7 @@ static void R_StudioCalcAttachments( void ) int i; // calculate attachment points - pAtt = (mstudioattachment_t *)((byte *)m_pStudioHeader + m_pStudioHeader->attachmentindex); + pAtt = PTR_CAST(mstudioattachment_t, (byte *)m_pStudioHeader + m_pStudioHeader->attachmentindex); for( i = 0; i < Q_min( MAXSTUDIOATTACHMENTS, m_pStudioHeader->numattachments ); i++ ) { @@ -1075,12 +1075,12 @@ static void R_StudioSetupModel( int bodypart, void **ppbodypart, void **ppsubmod g_studio_current.bodypart_index = bodypart; - m_pBodyPart = (mstudiobodyparts_t *)((byte *)m_pStudioHeader + m_pStudioHeader->bodypartindex) + bodypart; + m_pBodyPart = PTR_CAST(mstudiobodyparts_t, (byte *)m_pStudioHeader + m_pStudioHeader->bodypartindex) + bodypart; index = RI.currententity->curstate.body / m_pBodyPart->base; index = index % m_pBodyPart->nummodels; - m_pSubModel = (mstudiomodel_t *)((byte *)m_pStudioHeader + m_pBodyPart->modelindex) + index; + m_pSubModel = PTR_CAST(mstudiomodel_t, (byte *)m_pStudioHeader + m_pBodyPart->modelindex) + index; if( ppbodypart ) *ppbodypart = m_pBodyPart; if( ppsubmodel ) *ppsubmodel = m_pSubModel; @@ -1543,7 +1543,7 @@ static int R_StudioSetupSkin( studiohdr_t *ptexturehdr, int index ) // NOTE: user may ignore to call StudioRemapColors and remap_info will be unavailable if( m_fDoRemap ) ptexture = gEngine.CL_GetRemapInfoForEntity( RI.currententity )->ptexture; - if( !ptexture ) ptexture = (mstudiotexture_t *)((byte *)ptexturehdr + ptexturehdr->textureindex); // fallback + if( !ptexture ) ptexture = PTR_CAST(mstudiotexture_t, (byte *)ptexturehdr + ptexturehdr->textureindex); // fallback /* FIXME VK if( r_lightmap->value && !r_fullbright->value ) @@ -1572,7 +1572,7 @@ mstudiotexture_t *R_StudioGetTexture( cl_entity_t *e ) if( !thdr ) return NULL; if( m_fDoRemap ) ptexture = gEngine.CL_GetRemapInfoForEntity( e )->ptexture; - else ptexture = (mstudiotexture_t *)((byte *)thdr + thdr->textureindex); + else ptexture = PTR_CAST(mstudiotexture_t, (byte *)thdr + thdr->textureindex); return ptexture; } @@ -1909,14 +1909,14 @@ static void buildStudioSubmodelGeometry(build_submodel_geometry_t args) { // safety bounding the skinnum const int m_skinnum = bound( 0, RI.currententity->curstate.skin, ( m_pStudioHeader->numskinfamilies - 1 )); - const mstudiotexture_t *const ptexture = (const mstudiotexture_t *)((const byte *)m_pStudioHeader + m_pStudioHeader->textureindex); + const mstudiotexture_t *const ptexture = PTR_CAST(const mstudiotexture_t, (const byte *)m_pStudioHeader + m_pStudioHeader->textureindex); const byte *const pvertbone = ((const byte *)m_pStudioHeader + m_pSubModel->vertinfoindex); const byte *pnormbone = ((const byte *)m_pStudioHeader + m_pSubModel->norminfoindex); - const vec3_t *pstudioverts = (const vec3_t *)((const byte *)m_pStudioHeader + m_pSubModel->vertindex); - const vec3_t *pstudionorms = (const vec3_t *)((const byte *)m_pStudioHeader + m_pSubModel->normindex); + const vec3_t *pstudioverts = PTR_CAST(const vec3_t, (const byte *)m_pStudioHeader + m_pSubModel->vertindex); + const vec3_t *pstudionorms = PTR_CAST(const vec3_t, (const byte *)m_pStudioHeader + m_pSubModel->normindex); - const short *pskinref = (short *)((byte *)m_pStudioHeader + m_pStudioHeader->skinindex); + const short *pskinref = PTR_CAST(const short, (byte *)m_pStudioHeader + m_pStudioHeader->skinindex); if( m_skinnum != 0 ) pskinref += (m_skinnum * m_pStudioHeader->numskinref); // Compute inverse entity matrix, as we need vertices to be in local model space instead of global world space. @@ -1973,7 +1973,7 @@ static void buildStudioSubmodelGeometry(build_submodel_geometry_t args) { R_StudioGenerateNormals(); - const mstudiomesh_t *const pmesh = (mstudiomesh_t *)((byte *)m_pStudioHeader + m_pSubModel->meshindex); + const mstudiomesh_t *const pmesh = PTR_CAST(const mstudiomesh_t, (byte *)m_pStudioHeader + m_pSubModel->meshindex); qboolean need_sort = false; for( int j = 0, k = 0; j < m_pSubModel->nummesh; j++ ) @@ -2024,12 +2024,12 @@ static void buildStudioSubmodelGeometry(build_submodel_geometry_t args) { */ // NOTE: rewind normals at start - pstudionorms = (const vec3_t *)((const byte *)m_pStudioHeader + m_pSubModel->normindex); + pstudionorms = PTR_CAST(const vec3_t, (const byte *)m_pStudioHeader + m_pSubModel->normindex); int vertices_offset = 0, indices_offset = 0; for( int j = 0; j < m_pSubModel->nummesh; j++ ) { const mstudiomesh_t *const pmesh = g_studio.meshes[j].mesh; - const short *const ptricmds = (short *)((byte *)m_pStudioHeader + pmesh->triindex); + const short *const ptricmds = PTR_CAST(const short, (byte *)m_pStudioHeader + pmesh->triindex); const int face_flags = ptexture[pskinref[pmesh->skinref]].flags | g_nForceFaceFlags; @@ -2113,9 +2113,9 @@ static qboolean studioSubmodelRenderInit(r_studio_submodel_render_t *render_subm // TODO should this be part of r_studio_model_info_t? int vertex_count = 0, index_count = 0; { - const mstudiomesh_t *const pmesh = (mstudiomesh_t *)((byte *)m_pStudioHeader + m_pSubModel->meshindex); + const mstudiomesh_t *const pmesh = PTR_CAST(const mstudiomesh_t, (byte *)m_pStudioHeader + m_pSubModel->meshindex); for(int i = 0; i < submodel->nummesh; i++) { - const short* const ptricmds = (short *)((byte *)m_pStudioHeader + pmesh[i].triindex); + const short* const ptricmds = PTR_CAST(const short, (byte *)m_pStudioHeader + pmesh[i].triindex); addVerticesIndicesCounts(ptricmds, &vertex_count, &index_count); } @@ -2449,7 +2449,7 @@ int R_GetEntityRenderMode( cl_entity_t *ent ) } return ent->curstate.rendermode; } - ptexture = (mstudiotexture_t *)((byte *)phdr + phdr->textureindex); + ptexture = PTR_CAST(mstudiotexture_t, (byte *)phdr + phdr->textureindex); for( opaque = trans = i = 0; i < phdr->numtextures; i++, ptexture++ ) { @@ -2506,7 +2506,7 @@ static void R_StudioClientEvents( void ) } sequence = bound( 0, e->curstate.sequence, m_pStudioHeader->numseq - 1 ); - pseqdesc = (mstudioseqdesc_t *)((byte *)m_pStudioHeader + m_pStudioHeader->seqindex) + sequence; + pseqdesc = PTR_CAST(mstudioseqdesc_t, (byte *)m_pStudioHeader + m_pStudioHeader->seqindex) + sequence; // no events for this animation if( pseqdesc->numevents == 0 ) @@ -2514,7 +2514,7 @@ static void R_StudioClientEvents( void ) end = R_StudioEstimateFrame( e, pseqdesc, g_studio.time ); start = end - e->curstate.framerate * gpGlobals->frametime * pseqdesc->fps; - pevent = (mstudioevent_t *)((byte *)m_pStudioHeader + pseqdesc->eventindex); + pevent = PTR_CAST(mstudioevent_t, (byte *)m_pStudioHeader + pseqdesc->eventindex); if( e->latched.sequencetime == e->curstate.animtime ) { @@ -2572,7 +2572,7 @@ static void R_StudioSetupRenderer( int rendermode ) if( phdr && FBitSet( phdr->flags, STUDIO_HAS_BONEINFO )) { // NOTE: extended boneinfo goes immediately after bones - mstudioboneinfo_t *boneinfo = (mstudioboneinfo_t *)((byte *)phdr + phdr->boneindex + phdr->numbones * sizeof( mstudiobone_t )); + mstudioboneinfo_t *boneinfo = PTR_CAST(mstudioboneinfo_t, (byte *)phdr + phdr->boneindex + phdr->numbones * sizeof( mstudiobone_t )); for( i = 0; i < phdr->numbones; i++ ) Matrix3x4_ConcatTransforms( g_studio.worldtransform[i], g_studio.bonestransform[i], boneinfo[i].poseToBone ); @@ -2625,8 +2625,8 @@ static void R_StudioDrawPointsShadow( void ) { short *ptricmds; - pmesh = (mstudiomesh_t *)((byte *)m_pStudioHeader + m_pSubModel->meshindex) + k; - ptricmds = (short *)((byte *)m_pStudioHeader + pmesh->triindex); + pmesh = PTR_CAST(mstudiomesh_t, (byte *)m_pStudioHeader + m_pSubModel->meshindex) + k; + ptricmds = PTR_CAST(short, (byte *)m_pStudioHeader + pmesh->triindex); /* FIXME VK r_stats.c_studio_polys += pmesh->numtris; @@ -2815,7 +2815,7 @@ void R_StudioProcessGait( entity_state_t *pplayer ) dt = bound( 0.0f, g_studio.frametime, 1.0f ); - pseqdesc = (mstudioseqdesc_t *)((byte *)m_pStudioHeader + m_pStudioHeader->seqindex) + RI.currententity->curstate.sequence; + pseqdesc = PTR_CAST(mstudioseqdesc_t, (byte *)m_pStudioHeader + m_pStudioHeader->seqindex) + RI.currententity->curstate.sequence; R_StudioPlayerBlend( pseqdesc, &iBlend, &RI.currententity->angles[PITCH] ); @@ -2861,7 +2861,7 @@ void R_StudioProcessGait( entity_state_t *pplayer ) if( pplayer->gaitsequence >= m_pStudioHeader->numseq ) pplayer->gaitsequence = 0; - pseqdesc = (mstudioseqdesc_t *)((byte *)m_pStudioHeader + m_pStudioHeader->seqindex) + pplayer->gaitsequence; + pseqdesc = PTR_CAST(mstudioseqdesc_t, (byte *)m_pStudioHeader + m_pStudioHeader->seqindex) + pplayer->gaitsequence; // calc gait frame if( pseqdesc->linearmovement[0] > 0 ) @@ -3403,7 +3403,7 @@ void Mod_StudioLoadTextures( model_t *mod, void *data ) if( !phdr ) return; - ptexture = (mstudiotexture_t *)(((byte *)phdr) + phdr->textureindex); + ptexture = PTR_CAST(mstudiotexture_t, ((byte *)phdr) + phdr->textureindex); if( phdr->textureindex > 0 && phdr->numtextures <= MAXSTUDIOSKINS ) { for( i = 0; i < phdr->numtextures; i++ ) @@ -3420,7 +3420,7 @@ void Mod_StudioUnloadTextures( void *data ) if( !phdr ) return; - ptexture = (mstudiotexture_t *)(((byte *)phdr) + phdr->textureindex); + ptexture = PTR_CAST(mstudiotexture_t, ((byte *)phdr) + phdr->textureindex); // release all textures for( i = 0; i < phdr->numtextures; i++ ) @@ -3538,12 +3538,12 @@ static void pfnGetAliasScale( float *x, float *y ) static float ****pfnStudioGetBoneTransform( void ) { - return (float ****)g_studio.bonestransform; + return PTR_CAST(float ***, g_studio.bonestransform); } static float ****pfnStudioGetLightTransform( void ) { - return (float ****)g_studio.lighttransform; + return PTR_CAST(float ***, g_studio.lighttransform); } static float ***pfnStudioGetAliasTransform( void ) @@ -3553,7 +3553,7 @@ static float ***pfnStudioGetAliasTransform( void ) static float ***pfnStudioGetRotationMatrix( void ) { - return (float ***)g_studio.rotationmatrix; + return PTR_CAST(float **, g_studio.rotationmatrix); } static engine_studio_api_t gStudioAPI = diff --git a/ref/vk/vk_studio_model.c b/ref/vk/vk_studio_model.c index aaddafaeaa..7c0c480df5 100644 --- a/ref/vk/vk_studio_model.c +++ b/ref/vk/vk_studio_model.c @@ -119,7 +119,7 @@ static qboolean isBoneSame(int b) { /* } */ static void studioModelProcessBonesAnimations(const model_t *const model, const studiohdr_t *const hdr, r_studio_submodel_info_t *submodels, int submodels_count) { - const mstudiobone_t* const pbone = (mstudiobone_t *)((byte *)hdr + hdr->boneindex); + const mstudiobone_t* const pbone = PTR_CAST(const mstudiobone_t, (byte *)hdr + hdr->boneindex); /* for (int i = 0; i < hdr->numbones; ++i) { */ /* const mstudiobone_t* const bone = pbone + i; */ @@ -127,7 +127,7 @@ static void studioModelProcessBonesAnimations(const model_t *const model, const /* } */ for (int i = 0; i < hdr->numseq; ++i) { - const mstudioseqdesc_t *const pseqdesc = (mstudioseqdesc_t *)((byte *)hdr + hdr->seqindex) + i; + const mstudioseqdesc_t *const pseqdesc = PTR_CAST(const mstudioseqdesc_t, (byte *)hdr + hdr->seqindex) + i; const mstudioanim_t* const panim = gEngine.R_StudioGetAnim( (studiohdr_t*)hdr, (model_t*)model, (mstudioseqdesc_t*)pseqdesc ); @@ -187,11 +187,11 @@ static void studioModelProcessBonesAnimations(const model_t *const model, const static int studioModelGetSubmodels(const studiohdr_t *hdr, r_studio_submodel_info_t *out_submodels) { int count = 0; for (int i = 0; i < hdr->numbodyparts; ++i) { - const mstudiobodyparts_t* const bodypart = (mstudiobodyparts_t *)((byte *)hdr + hdr->bodypartindex) + i; + const mstudiobodyparts_t* const bodypart = PTR_CAST(const mstudiobodyparts_t, (byte *)hdr + hdr->bodypartindex) + i; if (out_submodels) { DEBUG(" Bodypart %d/%d: %s (nummodels=%d)", i, hdr->numbodyparts - 1, bodypart->name, bodypart->nummodels); for (int j = 0; j < bodypart->nummodels; ++j) { - const mstudiomodel_t * const submodel = (mstudiomodel_t *)((byte *)hdr + bodypart->modelindex) + j; + const mstudiomodel_t * const submodel = PTR_CAST(const mstudiomodel_t, (byte *)hdr + bodypart->modelindex) + j; DEBUG(" Submodel %d: %s", j, submodel->name); out_submodels[count++].submodel_key = submodel; } @@ -212,7 +212,7 @@ const r_studio_model_info_t* R_StudioModelPreload(model_t *mod) { DEBUG("Studio model %p(%s) hdr=%p(%s), sequences=%d:", mod, mod->name, hdr, hdr->name, hdr->numseq); for (int i = 0; i < hdr->numseq; ++i) { - const mstudioseqdesc_t *const pseqdesc = (mstudioseqdesc_t *)((byte *)hdr + hdr->seqindex) + i; + const mstudioseqdesc_t *const pseqdesc = PTR_CAST(const mstudioseqdesc_t, (byte *)hdr + hdr->seqindex) + i; DEBUG(" %d: fps=%f numframes=%d", i, pseqdesc->fps, pseqdesc->numframes); } diff --git a/ref/vk/vk_swapchain.c b/ref/vk/vk_swapchain.c index f0238d5c45..605abb6d0d 100644 --- a/ref/vk/vk_swapchain.c +++ b/ref/vk/vk_swapchain.c @@ -259,11 +259,34 @@ r_vk_swapchain_framebuffer_t R_VkSwapchainAcquire( VkSemaphore sem_image_availa break; } + // This is temporary non-owning placeholder object. + // It is used only for combuf barrier tracking. + ret.image = (r_vk_image_t) { + .image = g_swapchain.images[ret.index], + .view = g_swapchain.image_views[ret.index], + .width = g_swapchain.width, + .height = g_swapchain.height, + .depth = 1, + .mips = 1, + .layers = 1, + + .format = g_swapchain.image_format, + // TODO? .image_size = ??? + + .sync = { + .layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, + .write = { + .access = VK_ACCESS_2_NONE, + .stage = VK_PIPELINE_STAGE_2_NONE, + }, + .read = { + .access = VK_ACCESS_2_NONE, + .stage = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT, + }, + }, + }; + snprintf(ret.image.name, sizeof(ret.image.name), "framebuffer[%u]", ret.index); ret.framebuffer = g_swapchain.framebuffers[ret.index]; - ret.width = g_swapchain.width; - ret.height = g_swapchain.height; - ret.image = g_swapchain.images[ret.index]; - ret.view = g_swapchain.image_views[ret.index]; finalize: APROF_SCOPE_END(function); diff --git a/ref/vk/vk_swapchain.h b/ref/vk/vk_swapchain.h index 84bcadffd3..7dfc34d990 100644 --- a/ref/vk/vk_swapchain.h +++ b/ref/vk/vk_swapchain.h @@ -1,4 +1,5 @@ #include "vk_core.h" +#include "vk_image.h" // TODO this needs to be negotiated by swapchain creation // however, currently render pass also needs it so ugh @@ -11,10 +12,9 @@ void R_VkSwapchainShutdown( void ); typedef struct { uint32_t index; - uint32_t width, height; - VkFramebuffer framebuffer; // TODO move out - VkImage image; - VkImageView view; + // Non-owned image mostly for for sync/barrier tracking purposes + r_vk_image_t image; + VkFramebuffer framebuffer; } r_vk_swapchain_framebuffer_t; r_vk_swapchain_framebuffer_t R_VkSwapchainAcquire( VkSemaphore sem_image_available ); diff --git a/ref/vk/vk_textures.c b/ref/vk/vk_textures.c index 80600e0caa..bb2e76a565 100644 --- a/ref/vk/vk_textures.c +++ b/ref/vk/vk_textures.c @@ -2,7 +2,6 @@ #include "vk_core.h" #include "vk_descriptor.h" -#include "vk_staging.h" #include "vk_logs.h" #include "r_textures.h" #include "r_speeds.h" @@ -59,7 +58,7 @@ static void generateFallbackNoiseTextures( const rgbdata_t *pic ) { ERR("Generating bad quality regular noise textures as a fallback for blue noise textures"); const int blue_noise_count = pic->size / sizeof(uint32_t); - uint32_t *const scratch = (uint32_t*)pic->buffer; + uint32_t *const scratch = PTR_CAST(uint32_t, pic->buffer); // Fill with random data { @@ -349,9 +348,9 @@ static qboolean uploadRawKtx2( int tex_index, vk_texture_t *tex, const rgbdata_t const ktx2_index_t* index; const ktx2_level_t* levels; - header = (const ktx2_header_t*)(data + KTX2_IDENTIFIER_SIZE); - index = (const ktx2_index_t*)(data + KTX2_IDENTIFIER_SIZE + sizeof(ktx2_header_t)); - levels = (const ktx2_level_t*)(data + KTX2_IDENTIFIER_SIZE + sizeof(ktx2_header_t) + sizeof(ktx2_index_t)); + header = PTR_CAST(const ktx2_header_t, data + KTX2_IDENTIFIER_SIZE); + index = PTR_CAST(const ktx2_index_t, data + KTX2_IDENTIFIER_SIZE + sizeof(ktx2_header_t)); + levels = PTR_CAST(const ktx2_level_t, data + KTX2_IDENTIFIER_SIZE + sizeof(ktx2_header_t) + sizeof(ktx2_index_t)); DEBUG(" header:"); #define X(field) DEBUG(" " # field "=%d", header->field); @@ -395,6 +394,7 @@ static qboolean uploadRawKtx2( int tex_index, vk_texture_t *tex, const rgbdata_t .height = header->pixelHeight, .depth = Q_max(1, header->pixelDepth), .mips = header->levelCount, + // header->layerCount? header->faceCount? .layers = 1, // TODO or 6 for cubemap; header->faceCount .format = header->vkFormat, .tiling = VK_IMAGE_TILING_OPTIMAL, @@ -408,7 +408,6 @@ static qboolean uploadRawKtx2( int tex_index, vk_texture_t *tex, const rgbdata_t { R_VkImageUploadBegin(&tex->vk.image); - // TODO layers for (int mip = 0; mip < header->levelCount; ++mip) { const ktx2_level_t* const level = levels + mip; const size_t mip_size = level->byteLength; @@ -615,12 +614,6 @@ void R_VkTextureDestroy( int index, vk_texture_t *tex ) { if (tex->vk.image.image == VK_NULL_HANDLE) return; - // Need to make sure that there are no references to this texture anywhere. - // It might have been added to staging and then immediately deleted, leaving references to its vkimage - // in the staging command buffer. See https://github.com/w23/xash3d-fwgs/issues/464 - R_VkStagingFlushSync(); - XVK_CHECK(vkDeviceWaitIdle(vk_core.device)); - R_VkImageDestroy(&tex->vk.image); g_vktextures.stats.size_total -= tex->total_size; g_vktextures.stats.count--; diff --git a/scripts/gha/deps_linux.sh b/scripts/gha/deps_linux.sh index f0a05a2b82..29cb9e1a96 100755 --- a/scripts/gha/deps_linux.sh +++ b/scripts/gha/deps_linux.sh @@ -1,4 +1,5 @@ #!/bin/bash +set -x cd $GITHUB_WORKSPACE @@ -30,7 +31,7 @@ mv SDL2-$SDL_VERSION SDL2_src # ref_vk requires Vulkan SDK wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add - -sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-${VULKAN_SDK_VERSION}-focal.list https://packages.lunarg.com/vulkan/${VULKAN_SDK_VERSION}/lunarg-vulkan-${VULKAN_SDK_VERSION}-focal.list +sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-${VULKAN_SDK_VERSION}-jammy.list https://packages.lunarg.com/vulkan/${VULKAN_SDK_VERSION}/lunarg-vulkan-${VULKAN_SDK_VERSION}-jammy.list sudo apt update [ "$ARCH" = "i386" ] && SUFFIX=":i386" || SUFFIX="" sudo apt install -y vulkan-sdk"$SUFFIX"