ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

Commit: ad11111303575137d78ca4118bb1601bcc11b69d
Parent: 920a6bc097353f8f2c05edc2fe12b3d2406d26b7
Author: Randy Palamar
Date:   Tue,  3 Mar 2026 09:33:00 -0700

core: allow shaders to access buffers through descriptors

Depending on usage a buffer reference pointer can result in
significant performance degradation. The is presumbably because
the compiler needs to insert additional busy work math
instructions to access items. In the case of DAS this also meant
that 4 global load instructions were needed when performing cubic
interpolation instead of 2 buffer load instructions when using a
binding. This caused a slowdown of ~40% for my 2D test benchmark
(premumably worse for 3D). There still seems to be a minor
slowdown over the OpenGL code but this is more then made up for in
other stages and that can also be corrected later.

Currently I only added this to the DAS shader for both the ping
pong buffer and the output buffer and the Filter shader for the
ping pong buffer. I will likely do it for other portions as well
later.

closes #43

Diffstat:
Mbeamformer.c | 9+++++++++
Mbeamformer.meta | 53++++++++++++++++++++++++++++++++++++++---------------
Mbeamformer_core.c | 52+++++++++++++++++++++++++++++++++++-----------------
Mbeamformer_internal.h | 10+++++++++-
Mgenerated/beamformer.meta.c | 57++++++++++++++++++++++++++++++++++++++++++---------------
Mshaders/das.glsl | 43++++++++++++++++++++-----------------------
Mshaders/filter.glsl | 26+++++++++++++-------------
Mvulkan.c | 181+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
Mvulkan.h | 100++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------
9 files changed, 405 insertions(+), 126 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -223,6 +223,15 @@ beamformer_init(BeamformerInput *input) // NOTE(rnp): if this becomes an issue we may be able to get by in some other way fatal(s8("Failed to allocate space for beamformed data\n")); } + + BeamformerShaderResourceInfo shader_resource_infos[] = { + { + .kind = BeamformerShaderResourceKind_Buffer, + .handle = cs->backlog.buffer->handle, + .slot = BeamformerShaderBufferSlot_BeamformedData, + }, + }; + vk_bind_shader_resources(shader_resource_infos, countof(shader_resource_infos)); } beamformer_load_cuda_library(ctx, input->cuda_library_handle, memory); diff --git a/beamformer.meta b/beamformer.meta @@ -7,6 +7,17 @@ @Constant(16) MaxParameterBlocks @Constant(3) MaxRawDataFramesInFlight +@Enumeration ShaderResourceKind +{ + Buffer +} + +@Enumeration ShaderBufferSlot +{ + BeamformedData + PingPong +} + @Enumeration DecodeMode { None @@ -255,6 +266,14 @@ `read_only global s8 beamformer_interpolation_mode_strings[] = {` @Expand(InterpolationMode) ` s8_comp("$(name)"),` `};` + `` + `read_only global s8 beamformer_shader_resource_kind_strings[] = {` + @Expand(ShaderResourceKind) ` s8_comp("$(name)"),` + `};` + `` + `read_only global s8 game_shader_buffer_slot_strings[] = {` + @Expand(ShaderBufferSlot) ` s8_comp("$(name)"),` + `};` } @ShaderGroup Compute @@ -296,6 +315,8 @@ @Shader(filter.glsl) Filter { @Enumeration DataKind + @Enumeration ShaderBufferSlot + @Enumeration ShaderResourceKind @ShaderAlias Demodulate @@ -321,9 +342,9 @@ @PushConstants { - [input_data U64] - [output_data U64] - [filter_coefficients U64] + [input_data U64] + [filter_coefficients U64] + [output_element_offset U32] } } @@ -335,6 +356,8 @@ @Enumeration DataKind @Enumeration InterpolationMode @Enumeration RCAOrientation + @Enumeration ShaderBufferSlot + @Enumeration ShaderResourceKind @Struct DASArrayParameters @@ -364,18 +387,18 @@ @PushConstants { - [xdc_transform M4] - [voxel_transform M4] - [xdc_element_pitch V2] - [rf_data U64] - [output_data U64] - [incoherent_output U64] - [array_parameters U64] - [output_size_x U32] - [output_size_y U32] - [output_size_z U32] - [cycle_t U32] - [channel_offset S32] + [xdc_transform M4] + [voxel_transform M4] + [xdc_element_pitch V2] + [array_parameters U64] + [rf_element_offset U32] + [output_element_offset U32] + [incoherent_element_offset U32] + [output_size_x U32] + [output_size_y U32] + [output_size_z U32] + [cycle_t U32] + [channel_offset S32] } } diff --git a/beamformer_core.c b/beamformer_core.c @@ -741,6 +741,15 @@ beamformer_commit_parameter_block(BeamformerCtx *ctx, BeamformerComputePlan *cp, if (ctx->compute_context.ping_pong_buffer.size < buffer_size) { GPUBufferAllocateInfo allocate_info = {.size = buffer_size, .label = s8("PingPongBuffer")}; vk_buffer_allocate(&ctx->compute_context.ping_pong_buffer, &allocate_info); + + BeamformerShaderResourceInfo shader_resource_infos[] = { + { + .kind = BeamformerShaderResourceKind_Buffer, + .handle = ctx->compute_context.ping_pong_buffer.handle, + .slot = BeamformerShaderBufferSlot_PingPong, + }, + }; + vk_bind_shader_resources(shader_resource_infos, countof(shader_resource_infos)); // TODO(rnp): figure out how to share with CUDA } @@ -863,11 +872,14 @@ do_compute_shader(BeamformerCtx *ctx, VulkanHandle cmd, BeamformerComputePlan *c case BeamformerShaderKind_Filter: case BeamformerShaderKind_Demodulate: { + u64 element_size = beamformer_data_kind_byte_size[cp->shader_descriptors[shader_slot].bake.Filter.data_kind]; + b32 demod = cp->pipeline.shaders[shader_slot] == BeamformerShaderKind_Demodulate; + u32 filter_slot = cp->pipeline.parameters[shader_slot].filter_slot; BeamformerFilterPushConstants pc = { - .filter_coefficients = cp->filters[filter_slot].buffer.gpu_pointer, - .output_data = pp_output_pointer, - .input_data = shader_slot == 0 ? rf_pointer : pp_input_pointer, + .filter_coefficients = cp->filters[filter_slot].buffer.gpu_pointer, + .input_data = shader_slot == 0 ? rf_pointer : pp_input_pointer, + .output_element_offset = output_index * pp_size / element_size / (demod ? 2 : 1), }; GPUMemoryBarrierInfo barrier = { @@ -888,20 +900,26 @@ do_compute_shader(BeamformerCtx *ctx, VulkanHandle cmd, BeamformerComputePlan *c GPUBuffer *b = cc->backlog.buffer; - u64 frame_size = beamformer_frame_byte_size(frame->points, frame->data_kind); - u64 incoherent_size = frame_size / beamformer_data_kind_element_count[frame->data_kind]; + + u64 frame_element_size = beamformer_data_kind_byte_size[frame->data_kind]; + u64 frame_size = beamformer_frame_byte_size(frame->points, frame->data_kind); + u64 iframe_element_size = beamformer_data_kind_byte_size[frame->data_kind] + / beamformer_data_kind_element_count[frame->data_kind]; + u64 iframe_size = frame_size / beamformer_data_kind_element_count[frame->data_kind]; + + u64 element_size = beamformer_data_kind_byte_size[cp->shader_descriptors[shader_slot].bake.DAS.data_kind]; BeamformerDASPushConstants pc = { - .xdc_element_pitch = cp->xdc_element_pitch, - .rf_data = pp_input_pointer, - .output_data = b->gpu_pointer + frame->buffer_offset, - .incoherent_output = b->gpu_pointer + b->size - incoherent_size, - .array_parameters = cp->array_parameters.gpu_pointer + offsetof(BeamformerDASArrayParameters, focal_vectors), - .output_size_x = cp->output_points.x, - .output_size_y = cp->output_points.y, - .output_size_z = cp->output_points.z, - .cycle_t = das_cycle_t++, - .channel_offset = channel_offset, + .xdc_element_pitch = cp->xdc_element_pitch, + .rf_element_offset = input_index * pp_size / element_size, + .output_element_offset = frame->buffer_offset / frame_element_size, + .incoherent_element_offset = (b->size - iframe_size) / iframe_element_size, + .output_size_x = cp->output_points.x, + .output_size_y = cp->output_points.y, + .output_size_z = cp->output_points.z, + .cycle_t = das_cycle_t++, + .channel_offset = channel_offset, + .array_parameters = cp->array_parameters.gpu_pointer + offsetof(BeamformerDASArrayParameters, focal_vectors), }; mem_copy(pc.voxel_transform.E, cp->voxel_transform.E, sizeof(pc.voxel_transform)); mem_copy(pc.xdc_transform.E, cp->xdc_transform.E, sizeof(pc.xdc_transform)); @@ -916,8 +934,8 @@ do_compute_shader(BeamformerCtx *ctx, VulkanHandle cmd, BeamformerComputePlan *c }, { .gpu_buffer = b, - .offset = pc.incoherent_output - b->gpu_pointer, - .size = incoherent_size, + .offset = pc.incoherent_element_offset * iframe_element_size, + .size = iframe_size, }, }; diff --git a/beamformer_internal.h b/beamformer_internal.h @@ -59,7 +59,7 @@ typedef struct { } VulkanPipelineCreateInfo; typedef struct { - VulkanHandle buffer; + VulkanHandle handle; u64 gpu_pointer; i64 size; @@ -125,6 +125,12 @@ typedef struct { u32 normals_offset; } RenderModel; +typedef struct { + BeamformerShaderResourceKind kind; + VulkanHandle handle; + u32 slot; +} BeamformerShaderResourceInfo; + #include "threads.c" #include "util_os.c" @@ -148,6 +154,8 @@ DEBUG_IMPORT void vk_render_model_allocate(GPUBuffer *, void *indices, u64 index DEBUG_IMPORT void vk_render_model_range_upload(GPUBuffer *, void *data, u64 offset, u64 size, b32 non_temporal); DEBUG_IMPORT void vk_render_model_release(GPUBuffer *); +DEBUG_IMPORT void vk_bind_shader_resources(BeamformerShaderResourceInfo *infos, u64 info_count); + /* NOTE: Pipelines do not have bindings. Data should be passed using push constants. * In particular the push constants should contain pointers to gpu memory using the * BufferDeviceAddress extension. */ diff --git a/generated/beamformer.meta.c b/generated/beamformer.meta.c @@ -15,6 +15,17 @@ // NOTE: Constants (Float) typedef enum { + BeamformerShaderResourceKind_Buffer = 0, + BeamformerShaderResourceKind_Count, +} BeamformerShaderResourceKind; + +typedef enum { + BeamformerShaderBufferSlot_BeamformedData = 0, + BeamformerShaderBufferSlot_PingPong = 1, + BeamformerShaderBufferSlot_Count, +} BeamformerShaderBufferSlot; + +typedef enum { BeamformerDecodeMode_None = 0, BeamformerDecodeMode_Hadamard = 1, BeamformerDecodeMode_Count, @@ -181,18 +192,18 @@ typedef struct { typedef struct { u64 input_data; - u64 output_data; u64 filter_coefficients; + u32 output_element_offset; } BeamformerFilterPushConstants; typedef struct { m4 xdc_transform; m4 voxel_transform; v2 xdc_element_pitch; - u64 rf_data; - u64 output_data; - u64 incoherent_output; u64 array_parameters; + u32 rf_element_offset; + u32 output_element_offset; + u32 incoherent_element_offset; u32 output_size_x; u32 output_size_y; u32 output_size_z; @@ -474,6 +485,15 @@ read_only global s8 beamformer_interpolation_mode_strings[] = { s8_comp("Cubic"), }; +read_only global s8 beamformer_shader_resource_kind_strings[] = { + s8_comp("Buffer"), +}; + +read_only global s8 game_shader_buffer_slot_strings[] = { + s8_comp("BeamformedData"), + s8_comp("PingPong"), +}; + read_only global s8 beamformer_shader_names[] = { s8_comp("CudaDecode"), s8_comp("CudaHilbert"), @@ -567,10 +587,17 @@ read_only global s8 beamformer_shader_global_header_strings[] = { "};\n" "\n"), s8_comp("" + "#define ShaderBufferSlot_BeamformedData 0\n" + "#define ShaderBufferSlot_PingPong 1\n" + "\n"), + s8_comp("" + "#define ShaderResourceKind_Buffer 0\n" + "\n"), + s8_comp("" "layout(push_constant, std430) uniform PushConstants {\n" " uint64_t input_data;\n" - " uint64_t output_data;\n" " uint64_t filter_coefficients;\n" + " uint32_t output_element_offset;\n" "};\n" "\n"), s8_comp("#define MaxChannelCount (256)\n\n"), @@ -610,10 +637,10 @@ read_only global s8 beamformer_shader_global_header_strings[] = { " f32mat4 xdc_transform;\n" " f32mat4 voxel_transform;\n" " f32vec2 xdc_element_pitch;\n" - " uint64_t rf_data;\n" - " uint64_t output_data;\n" - " uint64_t incoherent_output;\n" " uint64_t array_parameters;\n" + " uint32_t rf_element_offset;\n" + " uint32_t output_element_offset;\n" + " uint32_t incoherent_element_offset;\n" " uint32_t output_size_x;\n" " uint32_t output_size_y;\n" " uint32_t output_size_z;\n" @@ -690,19 +717,19 @@ read_only global b8 beamformer_shader_primitive_is_vertex[] = { read_only global i32 *beamformer_shader_header_vectors[] = { (i32 []){0, 1, 2}, - (i32 []){0, 3}, - (i32 []){4, 5, 0, 6, 7, 8, 9}, - (i32 []){0, 10}, + (i32 []){0, 3, 4, 5}, + (i32 []){6, 7, 0, 8, 9, 3, 4, 10, 11}, + (i32 []){0, 12}, 0, - (i32 []){0, 11}, - (i32 []){12}, (i32 []){0, 13}, + (i32 []){14}, + (i32 []){0, 15}, }; read_only global i32 beamformer_shader_header_vector_lengths[] = { 3, - 2, - 7, + 4, + 9, 2, 0, 2, diff --git a/shaders/das.glsl b/shaders/das.glsl @@ -6,7 +6,6 @@ #define RESULT_INCOHERENT_CAST(a) (a).y #endif #define SAMPLE_TYPE float - #define SAMPLE_BYTES 4 #elif DataKind == DataKind_Float32Complex #if CoherencyWeighting #define RESULT_TYPE vec3 @@ -14,7 +13,6 @@ #define RESULT_INCOHERENT_CAST(a) (a).z #endif #define SAMPLE_TYPE vec2 - #define SAMPLE_BYTES 8 #else #error DataKind unsupported for DAS #endif @@ -33,16 +31,16 @@ #define RESULT_STORE(a) (a) #endif -layout(std430, buffer_reference, buffer_reference_align = 64) restrict readonly buffer RF { - SAMPLE_TYPE values[]; +layout(set = ShaderResourceKind_Buffer, binding = ShaderBufferSlot_PingPong) readonly buffer RF { + SAMPLE_TYPE rf[]; }; -layout(std430, buffer_reference, buffer_reference_align = 64) restrict buffer Output { - SAMPLE_TYPE values[]; +layout(set = ShaderResourceKind_Buffer, binding = ShaderBufferSlot_BeamformedData) buffer Output { + SAMPLE_TYPE output_data[]; }; -layout(std430, buffer_reference, buffer_reference_align = 64) restrict buffer IncoherentOutput { - float values[]; +layout(set = ShaderResourceKind_Buffer, binding = ShaderBufferSlot_BeamformedData) buffer IncoherentOutput { + float incoherent_data[]; }; layout(std430, buffer_reference) restrict readonly buffer ArrayParameters { @@ -68,7 +66,7 @@ vec2 rotate_iq(const vec2 iq, const float time) #endif /* NOTE: See: https://cubic.org/docs/hermite.htm */ -SAMPLE_TYPE cubic(const RF rf, const float t) +SAMPLE_TYPE cubic(const int offset, const float t) { const mat4 h = mat4( 2, -3, 0, 1, @@ -78,10 +76,10 @@ SAMPLE_TYPE cubic(const RF rf, const float t) ); SAMPLE_TYPE samples[4] = { - rf.values[0], - rf.values[1], - rf.values[2], - rf.values[3], + rf[offset + 0], + rf[offset + 1], + rf[offset + 2], + rf[offset + 3], }; vec4 S = vec4(t * t * t, t * t, t, 1); @@ -103,24 +101,23 @@ SAMPLE_TYPE cubic(const RF rf, const float t) SAMPLE_TYPE sample_rf(const int rf_offset, const float index) { SAMPLE_TYPE result = SAMPLE_TYPE(0); - RF rf = RF(rf_data + SAMPLE_BYTES * rf_offset); switch (InterpolationMode) { case InterpolationMode_Nearest:{ if (int(index) >= 0 && int(round(index)) < SampleCount) - result = rotate_iq(rf.values[int(round(index))], index / SamplingFrequency); + result = rotate_iq(rf[rf_offset + int(round(index))], index / SamplingFrequency); }break; case InterpolationMode_Linear:{ if (int(index) >= 0 && int(index) < SampleCount - 1) { float tk, t = modf(index, tk); - int n = int(tk); - result = (1 - t) * rf.values[n] + t * rf.values[n + 1]; + int n = rf_offset + int(tk); + result = (1 - t) * rf[n] + t * rf[n + 1]; result = rotate_iq(result, index / SamplingFrequency); } }break; case InterpolationMode_Cubic:{ if (int(index) > 0 && int(index) < SampleCount - 2) { float tk, t = modf(index, tk); - result = rotate_iq(cubic(RF(rf_data + SAMPLE_BYTES * (rf_offset + int(index))), t), index / SamplingFrequency); + result = rotate_iq(cubic(rf_offset + int(index), t), index / SamplingFrequency); } }break; } @@ -215,7 +212,7 @@ RESULT_TYPE RCA(const vec3 world_point) vec2 xdc_world_point = rca_plane_projection((xdc_transform * vec4(world_point, 1)).xyz, rx_rows); float transmit_distance = rca_transmit_distance(world_point, focal_vector, tx_rx_orientation); - int rf_offset = acquisition * SampleCount; + int rf_offset = int(rf_element_offset) + acquisition * SampleCount; rf_offset -= int(InterpolationMode == InterpolationMode_Cubic); for (int chunk_channel = 0; chunk_channel < ChannelChunkCount; chunk_channel++) { int rx_channel = channel_offset + chunk_channel; @@ -250,7 +247,7 @@ RESULT_TYPE HERCULES(const vec3 world_point) RESULT_TYPE result = RESULT_TYPE(0); for (float chunk_channel = 0; chunk_channel < float(ChannelChunkCount); chunk_channel += 1.0f) { float rx_channel = float(channel_offset) + chunk_channel; - int rf_offset = int(chunk_channel) * SampleCount * AcquisitionCount + Sparse * SampleCount; + int rf_offset = int(rf_element_offset) + int(chunk_channel) * SampleCount * AcquisitionCount + Sparse * SampleCount; rf_offset -= int(InterpolationMode == InterpolationMode_Cubic); // NOTE(rnp): this wouldn't be so messy if we just forced an orientation like with FORCES @@ -302,7 +299,7 @@ RESULT_TYPE FORCES(const vec3 xdc_world_point) float a_arg = abs(FNumber * receive_x_delta / xdc_world_point.z); if (a_arg < 0.5f) { - int rf_offset = int(chunk_channel) * SampleCount * AcquisitionCount + Sparse * SampleCount; + int rf_offset = int(rf_element_offset) + int(chunk_channel) * SampleCount * AcquisitionCount + Sparse * SampleCount; rf_offset -= int(InterpolationMode == InterpolationMode_Cubic); float receive_index = sample_index(sqrt(receive_x_delta * receive_x_delta + z_delta_squared)); @@ -356,8 +353,8 @@ void main() } #if CoherencyWeighting - IncoherentOutput(incoherent_output).values[out_index] += RESULT_INCOHERENT_CAST(sum); + incoherent_data[incoherent_element_offset + out_index] += RESULT_INCOHERENT_CAST(sum); #endif - Output(output_data).values[out_index] += RESULT_COHERENT_CAST(sum); + output_data[output_element_offset + out_index] += RESULT_COHERENT_CAST(sum); } diff --git a/shaders/filter.glsl b/shaders/filter.glsl @@ -32,26 +32,26 @@ layout(std430, buffer_reference, buffer_reference_align = 64) restrict readonly DATA_TYPE values[]; }; -layout(std430, buffer_reference, buffer_reference_align = 64) restrict writeonly buffer Output { - OUT_DATA_TYPE values[]; +layout(set = ShaderResourceKind_Buffer, binding = ShaderBufferSlot_PingPong) buffer Output { + OUT_DATA_TYPE output_data[]; }; layout(std430, buffer_reference, buffer_reference_align = 64) restrict readonly buffer Filter { FILTER_TYPE values[FilterLength]; }; -vec2 complex_mul(vec2 a, vec2 b) +SAMPLE_TYPE complex_mul(SAMPLE_TYPE a, SAMPLE_TYPE b) { mat2 m = mat2(b.x, b.y, -b.y, b.x); - vec2 result = m * a; + SAMPLE_TYPE result = SAMPLE_TYPE(m * a); return result; } #if Demodulate -vec2 rotate_iq(vec2 iq, uint index) +SAMPLE_TYPE rotate_iq(SAMPLE_TYPE iq, uint index) { - float arg = radians(360) * DemodulationFrequency * index / SamplingFrequency; - vec2 result = complex_mul(iq, vec2(cos(arg), -sin(arg))); + float arg = radians(360) * DemodulationFrequency * index / SamplingFrequency; + SAMPLE_TYPE result = complex_mul(iq, SAMPLE_TYPE(cos(arg), -sin(arg))); return result; } #endif @@ -70,11 +70,7 @@ void main() uint channel = gl_GlobalInvocationID.y; uint transmit = gl_GlobalInvocationID.z; - uint in_offset = InputChannelStride * channel + InputTransmitStride * transmit; - uint out_offset = OutputChannelStride * channel + - OutputTransmitStride * transmit + - OutputSampleStride * out_sample; - + uint in_offset = InputChannelStride * channel + InputTransmitStride * transmit; uint thread_index = gl_LocalInvocationIndex; uint thread_count = gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z; ///////////////////////// @@ -110,6 +106,10 @@ void main() uint offset = DecimationRate * thread_index; for (uint j = 0; j < FilterLength; j++) result += apply_filter(rf[offset + j], Filter(filter_coefficients).values[j]); - Output(output_data).values[out_offset] = RESULT_TYPE_CAST(result); + + uint out_offset = OutputChannelStride * channel + + OutputTransmitStride * transmit + + OutputSampleStride * out_sample; + output_data[output_element_offset + out_offset] = RESULT_TYPE_CAST(result); } } diff --git a/vulkan.c b/vulkan.c @@ -18,6 +18,9 @@ #define MaxCommandBuffersInFlight BeamformerMaxRawDataFramesInFlight #define MaxCommandBufferTimestamps (1024) +// TODO(rnp): labelling +#define vk_label_object(...) + typedef enum { VulkanQueueKind_Graphics, VulkanQueueKind_Compute, @@ -35,6 +38,7 @@ typedef enum { typedef struct { VkDeviceMemory memory; VkBuffer buffer; + u64 memory_size; void * host_pointer; @@ -127,6 +131,12 @@ typedef struct { VkDevice device; VkPhysicalDevice physical_device; + VkDescriptorPool descriptor_pool; + VkDescriptorSetLayout descriptor_set_layouts[BeamformerShaderResourceKind_Count]; + VkDescriptorSet descriptor_sets[BeamformerShaderResourceKind_Count]; + // NOTE(rnp): must store these if we want to allow partial updates easily + VkDescriptorBufferInfo descriptor_buffer_infos[BeamformerShaderBufferSlot_Count]; + // NOTE(rnp): fallback for when a shader fails to compile VulkanPipeline default_compute_pipeline; VulkanPipeline default_graphics_pipeline; @@ -504,6 +514,8 @@ vk_compute_pipeline_from_shader_text(Arena arena, s8 text, s8 name, u32 push_con VkPipelineLayoutCreateInfo pipeline_layout_create_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = countof(vulkan_context->descriptor_set_layouts), + .pSetLayouts = vulkan_context->descriptor_set_layouts, .pushConstantRangeCount = push_constants_size ? 1 : 0, .pPushConstantRanges = push_constants_size ? &push_constant_range : 0, }; @@ -522,6 +534,11 @@ vk_compute_pipeline_from_shader_text(Arena arena, s8 text, s8 name, u32 push_con }; vkCreateComputePipelines(vulkan_context->device, 0, 1, &pipeline_create_info, 0, &result.pipeline); + + vk_label_object(PIPELINE, result.pipeline, name, s8("Pipeline")); + vk_label_object(PIPELINE_LAYOUT, result.layout, name, s8("Pipeline Layout")); + vk_label_object(SHADER_MODULE, module, name, s8("Module")); + vkDestroyShaderModule(vulkan_context->device, module, 0); } if (result.pipeline == 0) result = vulkan_context->default_compute_pipeline; @@ -553,6 +570,8 @@ vk_graphics_pipeline_from_infos(Arena arena, VulkanPipelineCreateInfo *infos, u3 VkPipelineLayoutCreateInfo pipeline_layout_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = countof(vulkan_context->descriptor_set_layouts), + .pSetLayouts = vulkan_context->descriptor_set_layouts, .pushConstantRangeCount = push_constants_size ? 1 : 0, .pPushConstantRanges = push_constants_size ? &pcr : 0, }; @@ -673,6 +692,19 @@ vk_graphics_pipeline_from_infos(Arena arena, VulkanPipelineCreateInfo *infos, u3 }; vkCreateGraphicsPipelines(vulkan_context->device, 0, 1, &pci,0, &result.pipeline); + + s8 extras[] = { + [VulkanShaderKind_Vertex] = s8_comp("Vertex Module"), + [VulkanShaderKind_Mesh] = s8_comp("Mesh Module"), + [VulkanShaderKind_Fragment] = s8_comp("Fragment Module"), + }; + assert(infos[0].kind < countof(extras)); + assert(infos[1].kind < countof(extras)); + + vk_label_object(PIPELINE, result.pipeline, infos[0].name, s8("Pipeline")); + vk_label_object(PIPELINE_LAYOUT, result.layout, infos[0].name, s8("Pipeline Layout")); + //vk_label_object_(VK_OBJECT_TYPE_SHADER_MODULE, (u64)modules[0], infos[0].name, extras[infos[0].kind]); + //vk_label_object_(VK_OBJECT_TYPE_SHADER_MODULE, (u64)modules[1], infos[1].name, extras[infos[1].kind]); } if (modules[0]) vkDestroyShaderModule(vulkan_context->device, modules[0], 0); @@ -831,7 +863,7 @@ vk_buffer_allocate_common(VulkanBuffer *vb, VulkanBufferAllocateInfo *ai) VkBufferCreateInfo buffer_create_info = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .usage = VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + .usage = VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT|VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, .size = size, .sharingMode = ai->queue_family_count > 1 ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = ai->queue_family_count, @@ -878,6 +910,7 @@ vk_buffer_allocate_common(VulkanBuffer *vb, VulkanBufferAllocateInfo *ai) if (vk_allocate_memory(&vb->memory, size, vb->memory_kind, VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT, &dedicated_allocate_info, 0)) { result = 1; ai->gpu_buffer->size = size; + vb->memory_size = size; vb->index_type = ai->index_type; @@ -1488,6 +1521,79 @@ vk_load_graphics(void) } } +function void +vk_load_descriptor_block(void) +{ + // NOTE(rnp): + // * One Descriptor Pool + // * One Descriptor Set Per Resource Kind + // * Shaders know the ResourceKind enumeration + // * Shaders know the per set binding points + + VulkanContext *vk = vulkan_context; + + // NOTE(rnp): Pool + VkDescriptorPoolSize pool_sizes[] = { + { + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = BeamformerShaderBufferSlot_Count, + }, + }; + static_assert(countof(pool_sizes) == BeamformerShaderResourceKind_Count, ""); + + VkDescriptorPoolCreateInfo pool_create_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .maxSets = BeamformerShaderResourceKind_Count, + .poolSizeCount = countof(pool_sizes), + .pPoolSizes = pool_sizes, + }; + + vkCreateDescriptorPool(vk->device, &pool_create_info, 0, &vk->descriptor_pool); + + // NOTE(rnp): Set Layouts + VkDescriptorSetLayoutCreateInfo layout_create_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + }; + + { + VkDescriptorSetLayoutBinding layout_bindings[BeamformerShaderBufferSlot_Count]; + for EachEnumValue(BeamformerShaderBufferSlot, it) { + layout_bindings[it] = (VkDescriptorSetLayoutBinding){ + .binding = it, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_ALL, + }; + } + layout_create_info.bindingCount = countof(layout_bindings), + layout_create_info.pBindings = layout_bindings, + vkCreateDescriptorSetLayout(vk->device, &layout_create_info, 0, + vk->descriptor_set_layouts + BeamformerShaderResourceKind_Buffer); + } + + // NOTE(rnp): Sets + VkDescriptorSetAllocateInfo set_allocate_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = vk->descriptor_pool, + .descriptorSetCount = countof(vk->descriptor_sets), + .pSetLayouts = vk->descriptor_set_layouts, + }; + static_assert(countof(vk->descriptor_set_layouts) == countof(vk->descriptor_sets), ""); + vkAllocateDescriptorSets(vk->device, &set_allocate_info, vk->descriptor_sets); + + vk_label_object(DESCRIPTOR_POOL, vk->descriptor_pool, s8("Beamformer Resources"), s8("Pool")); + + DeferLoop(take_lock(&vk->arena_lock, -1), release_lock(&vk->arena_lock)) { + Arena scratch = vk->arena; + for EachElement(vk->descriptor_sets, it) { + Stream sb = arena_stream(scratch); + stream_append_s8s(&sb, s8("Beamformer "), beamformer_shader_resource_kind_strings[it], s8("s")); + vk_label_object(DESCRIPTOR_SET, vk->descriptor_sets[it], stream_to_s8(&sb), s8("Set")); + vk_label_object(DESCRIPTOR_SET_LAYOUT, vk->descriptor_set_layouts[it], stream_to_s8(&sb), s8("Set Layout")); + } + } +} + /////////////////////// // NOTE(rnp): User API @@ -1511,6 +1617,7 @@ vk_load(OSLibrary vulkan_library_handle, Arena *memory, Stream *err) vk_load_physical_device(vulkan_context->arena, err); vk_load_queues(&vulkan_context->arena, err); vk_load_graphics(); + vk_load_descriptor_block(); read_only local_persist s8 default_compute_shader = s8("" "#version 430 core\n" @@ -1561,7 +1668,7 @@ vk_gpu_info(void) } function void -vk_vulkan_buffer_release(VulkanBuffer *vb, u64 size) +vk_vulkan_buffer_release(VulkanBuffer *vb) { VulkanContext *vk = vulkan_context; VulkanEntity *e = (VulkanEntity *)((u8 *)vb - offsetof(VulkanEntity, as)); @@ -1572,15 +1679,15 @@ vk_vulkan_buffer_release(VulkanBuffer *vb, u64 size) if (vb->buffer) vkDestroyBuffer(vk->device, vb->buffer, 0); - vk_release_memory(vb->memory, vb->memory_kind != VulkanMemoryKind_Host ? size : 0); + vk_release_memory(vb->memory, vb->memory_kind != VulkanMemoryKind_Host ? vb->memory_size : 0); vk_entity_release(e); } DEBUG_IMPORT void vk_buffer_release(GPUBuffer *b) { - if ValidVulkanHandle(b->buffer) - vk_vulkan_buffer_release(vk_entity_data(b->buffer, VulkanEntityKind_Buffer), b->size); + if ValidVulkanHandle(b->handle) + vk_vulkan_buffer_release(vk_entity_data(b->handle, VulkanEntityKind_Buffer)); zero_struct(b); } @@ -1614,7 +1721,7 @@ vk_buffer_allocate(GPUBuffer *b, GPUBufferAllocateInfo *info) } if (vk_buffer_allocate_common(&e->as.buffer, &vulkan_buffer_allocate_info)) { - b->buffer.value[0] = (u64)e; + b->handle.value[0] = (u64)e; } else { vk_entity_release(e); } @@ -1624,8 +1731,8 @@ DEBUG_IMPORT b32 vk_buffer_needs_sync(GPUBuffer *b) { b32 result = 0; - if ValidVulkanHandle(b->buffer) { - VulkanBuffer *vb = vk_entity_data(b->buffer, VulkanEntityKind_Buffer); + if ValidVulkanHandle(b->handle) { + VulkanBuffer *vb = vk_entity_data(b->handle, VulkanEntityKind_Buffer); // TODO(rnp): not correct check. need to check if we used transfer queue result = vb->memory_kind != VulkanMemoryKind_BAR; @@ -1718,7 +1825,7 @@ vk_buffer_buffer_copy(VulkanBuffer *destination, VulkanBuffer *source, u64 desti DEBUG_IMPORT void vk_buffer_range_upload(GPUBuffer *b, void *data, u64 offset, u64 size, b32 non_temporal) { - VulkanBuffer *db = vk_entity_data(b->buffer, VulkanEntityKind_Buffer); + VulkanBuffer *db = vk_entity_data(b->handle, VulkanEntityKind_Buffer); VulkanBuffer sb = { .host_pointer = data, .memory_kind = VulkanMemoryKind_Host, @@ -1729,7 +1836,7 @@ vk_buffer_range_upload(GPUBuffer *b, void *data, u64 offset, u64 size, b32 non_t DEBUG_IMPORT void vk_buffer_range_download(void *destination, GPUBuffer *source, u64 offset, u64 size, b32 non_temporal) { - VulkanBuffer *sb = vk_entity_data(source->buffer, VulkanEntityKind_Buffer); + VulkanBuffer *sb = vk_entity_data(source->handle, VulkanEntityKind_Buffer); VulkanBuffer db = { .host_pointer = destination, .memory_kind = VulkanMemoryKind_Host, @@ -1740,8 +1847,8 @@ vk_buffer_range_download(void *destination, GPUBuffer *source, u64 offset, u64 s DEBUG_IMPORT void vk_render_model_release(GPUBuffer *model) { - if ValidVulkanHandle(model->buffer) - vk_vulkan_buffer_release(vk_entity_data(model->buffer, VulkanEntityKind_RenderModel), model->size); + if ValidVulkanHandle(model->handle) + vk_vulkan_buffer_release(vk_entity_data(model->handle, VulkanEntityKind_RenderModel)); zero_struct(model); } @@ -1772,7 +1879,7 @@ vk_render_model_allocate(GPUBuffer *model, void *indices, u64 index_count, u64 m .queue_family_indices[0] = vulkan_context->queues[VulkanQueueKind_Graphics]->queue_family, }; if (vk_buffer_allocate_common(&e->as.buffer, &vulkan_buffer_allocate_info)) { - model->buffer.value[0] = (u64)e; + model->handle.value[0] = (u64)e; model->index_count = index_count; model->gpu_pointer += indices_size; @@ -1790,7 +1897,7 @@ vk_render_model_allocate(GPUBuffer *model, void *indices, u64 index_count, u64 m DEBUG_IMPORT void vk_render_model_range_upload(GPUBuffer *model, void *data, u64 offset, u64 size, b32 non_temporal) { - VulkanBuffer *db = vk_entity_data(model->buffer, VulkanEntityKind_RenderModel); + VulkanBuffer *db = vk_entity_data(model->handle, VulkanEntityKind_RenderModel); VulkanBuffer sb = { .host_pointer = data, .memory_kind = VulkanMemoryKind_Host, @@ -2030,6 +2137,36 @@ vk_pipeline_release(VulkanHandle h) } } +DEBUG_IMPORT void +vk_bind_shader_resources(BeamformerShaderResourceInfo *infos, u64 info_count) +{ + VulkanContext *vk = vulkan_context; + + VkWriteDescriptorSet write_sets[BeamformerShaderResourceKind_Count] = {0}; + + for EachIndex(info_count, it) { + switch (infos[it].kind) { + case BeamformerShaderResourceKind_Buffer:{ + VulkanBuffer *vb = vk_entity_data(infos[it].handle, VulkanEntityKind_Buffer); + vk->descriptor_buffer_infos[infos[it].slot].buffer = vb->buffer; + vk->descriptor_buffer_infos[infos[it].slot].offset = 0; + vk->descriptor_buffer_infos[infos[it].slot].range = vb->memory_size; + }break; + + InvalidDefaultCase; + } + } + + write_sets[BeamformerShaderResourceKind_Buffer].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write_sets[BeamformerShaderResourceKind_Buffer].dstSet = vk->descriptor_sets[BeamformerShaderResourceKind_Buffer]; + write_sets[BeamformerShaderResourceKind_Buffer].dstBinding = 0; + write_sets[BeamformerShaderResourceKind_Buffer].descriptorCount = countof(vk->descriptor_buffer_infos); + write_sets[BeamformerShaderResourceKind_Buffer].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + write_sets[BeamformerShaderResourceKind_Buffer].pBufferInfo = vk->descriptor_buffer_infos; + + vkUpdateDescriptorSets(vk->device, countof(write_sets), write_sets, 0, 0); +} + DEBUG_IMPORT VulkanHandle vk_command_begin(VulkanTimeline timeline) { @@ -2095,6 +2232,8 @@ vk_command_bind_pipeline(VulkanHandle command, VulkanHandle pipeline) assert(bind_point != (VkPipelineBindPoint)-1); vkCmdBindPipeline(vcp->buffers[vcb->buffer_index], bind_point, vp->pipeline); + vkCmdBindDescriptorSets(vcp->buffers[vcb->buffer_index], bind_point, vp->layout, + 0, countof(vk->descriptor_sets), vk->descriptor_sets, 0, 0); vcp->bound_pipeline = vp; } } @@ -2114,9 +2253,9 @@ vk_command_buffer_memory_barriers(VulkanHandle command, GPUMemoryBarrierInfo *ba u32 valid_count = 0; VkBufferMemoryBarrier2 *memory_barriers = push_array(&arena, VkBufferMemoryBarrier2, count); for (u64 it = 0; it < count; it++) { - if ValidVulkanHandle(barriers[it].gpu_buffer->buffer) { + if ValidVulkanHandle(barriers[it].gpu_buffer->handle) { u32 index = valid_count++; - VulkanBuffer *vb = vk_entity_data(barriers[it].gpu_buffer->buffer, VulkanEntityKind_Buffer); + VulkanBuffer *vb = vk_entity_data(barriers[it].gpu_buffer->handle, VulkanEntityKind_Buffer); memory_barriers[index].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2; memory_barriers[index].srcStageMask = vq->pipeline_stage_flags; memory_barriers[index].srcAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT; @@ -2409,9 +2548,9 @@ vk_command_begin_rendering(VulkanHandle command, GPUImage *colour, GPUImage *dep DEBUG_IMPORT void vk_command_draw(VulkanHandle command, GPUBuffer *model) { - if (ValidVulkanHandle(command) && ValidVulkanHandle(model->buffer)) { + if (ValidVulkanHandle(command) && ValidVulkanHandle(model->handle)) { VkCommandBuffer cmd = vk_command_buffer(command); - VulkanBuffer *vb = vk_entity_data(model->buffer, VulkanEntityKind_RenderModel); + VulkanBuffer *vb = vk_entity_data(model->handle, VulkanEntityKind_RenderModel); vkCmdBindIndexBuffer2(cmd, vb->buffer, 0, vk_index_size(vb->index_type) * model->index_count, vb->index_type); vkCmdDrawIndexed(cmd, model->index_count, 1, 0, 0, 0); } @@ -2447,10 +2586,10 @@ DEBUG_IMPORT void vk_command_copy_buffer(VulkanHandle command, GPUBuffer *restrict destination, GPUBuffer *restrict source, u64 source_offset, i64 size) { - if (ValidVulkanHandle(command) && ValidVulkanHandle(destination->buffer) && ValidVulkanHandle(source->buffer)) { + if (ValidVulkanHandle(command) && ValidVulkanHandle(destination->handle) && ValidVulkanHandle(source->handle)) { VkCommandBuffer cmd = vk_command_buffer(command); - VulkanBuffer *db = vk_entity_data(destination->buffer, VulkanEntityKind_Buffer); - VulkanBuffer *sb = vk_entity_data(source->buffer, VulkanEntityKind_Buffer); + VulkanBuffer *db = vk_entity_data(destination->handle, VulkanEntityKind_Buffer); + VulkanBuffer *sb = vk_entity_data(source->handle, VulkanEntityKind_Buffer); VkBufferCopy2 buffer_copy = { .sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2, diff --git a/vulkan.h b/vulkan.h @@ -29,8 +29,11 @@ typedef uint32_t VkSampleMask; typedef uint64_t VkDeviceAddress; typedef uint64_t VkDeviceSize; VK_HANDLE(VkBuffer); +VK_HANDLE(VkBufferView); VK_HANDLE(VkCommandBuffer); VK_HANDLE(VkCommandPool); +VK_HANDLE(VkDescriptorPool); +VK_HANDLE(VkDescriptorSet); VK_HANDLE(VkDescriptorSetLayout); VK_HANDLE(VkDevice); VK_HANDLE(VkDeviceMemory); @@ -87,8 +90,11 @@ typedef enum { VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO = 28, VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO = 29, VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO = 30, + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO = 32, + VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO = 33, + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO = 34, + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET = 35, VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO = 37, - VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO = 38, VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO = 39, VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO = 40, VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO = 42, @@ -633,12 +639,6 @@ typedef enum { } VkBlendOp; typedef enum { - VK_FENCE_CREATE_SIGNALED_BIT = 0x00000001, - VK_FENCE_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF -} VkFenceCreateFlagBits; -typedef VkFlags VkFenceCreateFlags; - -typedef enum { VK_QUERY_POOL_CREATE_RESET_BIT_KHR = 0x00000001, VK_QUERY_POOL_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VkQueryPoolCreateFlagBits; @@ -1450,6 +1450,16 @@ typedef VkFlags VkPipelineRasterizationStateCreateFlags; typedef VkFlags VkPipelineMultisampleStateCreateFlags; typedef enum { + VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT = 0x00000001, + VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT = 0x00000002, + VK_DESCRIPTOR_POOL_CREATE_HOST_ONLY_BIT_EXT = 0x00000004, + VK_DESCRIPTOR_POOL_CREATE_ALLOW_OVERALLOCATION_SETS_BIT_NV = 0x00000008, + VK_DESCRIPTOR_POOL_CREATE_ALLOW_OVERALLOCATION_POOLS_BIT_NV = 0x00000010, + VK_DESCRIPTOR_POOL_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkDescriptorPoolCreateFlagBits; +typedef VkFlags VkDescriptorPoolCreateFlags; + +typedef enum { VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT = 0x00000001, VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT = 0x00000002, VK_DESCRIPTOR_SET_LAYOUT_CREATE_HOST_ONLY_POOL_BIT_EXT = 0x00000004, @@ -2216,12 +2226,6 @@ typedef struct { } VkSurfaceFormatKHR; typedef struct { - VkStructureType sType; - const void * pNext; - VkFenceCreateFlags flags; -} VkFenceCreateInfo; - -typedef struct { VkStructureType sType; const void * pNext; VkQueryPoolCreateFlags flags; @@ -2827,14 +2831,18 @@ typedef struct { } VkMemoryGetFdInfoKHR; typedef struct { - VkStructureType sType; - const void * pNext; - VkRenderPass renderPass; - VkFramebuffer framebuffer; - VkRect2D renderArea; - uint32_t clearValueCount; - const VkClearValue * pClearValues; -} VkRenderPassBeginInfo; + VkDescriptorType type; + uint32_t descriptorCount; +} VkDescriptorPoolSize; + +typedef struct { + VkStructureType sType; + const void * pNext; + VkDescriptorPoolCreateFlags flags; + uint32_t maxSets; + uint32_t poolSizeCount; + const VkDescriptorPoolSize * pPoolSizes; +} VkDescriptorPoolCreateInfo; typedef struct { uint32_t binding; @@ -2852,6 +2860,51 @@ typedef struct { const VkDescriptorSetLayoutBinding * pBindings; } VkDescriptorSetLayoutCreateInfo; +typedef struct { + VkStructureType sType; + const void * pNext; + VkDescriptorPool descriptorPool; + uint32_t descriptorSetCount; + const VkDescriptorSetLayout * pSetLayouts; +} VkDescriptorSetAllocateInfo; + +typedef struct { + VkStructureType sType; + const void * pNext; + VkDescriptorSet srcSet; + uint32_t srcBinding; + uint32_t srcArrayElement; + VkDescriptorSet dstSet; + uint32_t dstBinding; + uint32_t dstArrayElement; + uint32_t descriptorCount; +} VkCopyDescriptorSet; + +typedef struct { + VkBuffer buffer; + VkDeviceSize offset; + VkDeviceSize range; +} VkDescriptorBufferInfo; + +typedef struct { + VkSampler sampler; + VkImageView imageView; + VkImageLayout imageLayout; +} VkDescriptorImageInfo; + +typedef struct { + VkStructureType sType; + const void * pNext; + VkDescriptorSet dstSet; + uint32_t dstBinding; + uint32_t dstArrayElement; + uint32_t descriptorCount; + VkDescriptorType descriptorType; + const VkDescriptorImageInfo * pImageInfo; + const VkDescriptorBufferInfo * pBufferInfo; + const VkBufferView * pTexelBufferView; +} VkWriteDescriptorSet; + typedef enum { VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT = 0, VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT = 1, @@ -2907,12 +2960,15 @@ typedef struct { /* X(name, ret, params) */ #define VkDeviceProcedureList \ X(vkAllocateCommandBuffers, VkResult, (VkDevice device, const VkCommandBufferAllocateInfo *pAllocateInfo, VkCommandBuffer *pCommandBuffers)) \ + X(vkAllocateDescriptorSets, VkResult, (VkDevice device, const VkDescriptorSetAllocateInfo *pAllocateInfo, VkDescriptorSet *pDescriptorSets)) \ X(vkAllocateMemory, VkResult, (VkDevice device, const VkMemoryAllocateInfo *pAllocateInfo, const VkAllocationCallbacks *pAllocator, VkDeviceMemory *pMemory)) \ X(vkBindBufferMemory, VkResult, (VkDevice device, VkBuffer buffer, VkDeviceMemory memory, VkDeviceSize memoryOffset)) \ X(vkBindImageMemory, VkResult, (VkDevice device, VkImage image, VkDeviceMemory memory, VkDeviceSize memoryOffset)) \ X(vkCreateBuffer, VkResult, (VkDevice device, const VkBufferCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkBuffer *pBuffer)) \ X(vkCreateCommandPool, VkResult, (VkDevice device, const VkCommandPoolCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkCommandPool *pCommandPool)) \ X(vkCreateComputePipelines, VkResult, (VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount, const VkComputePipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)) \ + X(vkCreateDescriptorPool, VkResult, (VkDevice device, const VkDescriptorPoolCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkDescriptorPool *pDescriptorPool)) \ + X(vkCreateDescriptorSetLayout, VkResult, (VkDevice device, const VkDescriptorSetLayoutCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkDescriptorSetLayout *pSetLayout)) \ X(vkCreateGraphicsPipelines, VkResult, (VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount, const VkGraphicsPipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)) \ X(vkCreateImage, VkResult, (VkDevice device, const VkImageCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkImage *pImage)) \ X(vkCreateImageView, VkResult, (VkDevice device, const VkImageViewCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkImageView *pView)) \ @@ -2941,9 +2997,11 @@ typedef struct { X(vkMapMemory, VkResult, (VkDevice device, VkDeviceMemory memory, VkDeviceSize offset, VkDeviceSize size, VkMemoryMapFlags flags, void **ppData)) \ X(vkSignalSemaphore, VkResult, (VkDevice device, const VkSemaphoreSignalInfo *pSignalInfo)) \ X(vkUnmapMemory, void, (VkDevice device, VkDeviceMemory memory)) \ + X(vkUpdateDescriptorSets, void, (VkDevice device, uint32_t descriptorWriteCount, const VkWriteDescriptorSet *pDescriptorWrites, uint32_t descriptorCopyCount, const VkCopyDescriptorSet *pDescriptorCopies)) \ X(vkWaitSemaphores, VkResult, (VkDevice device, const VkSemaphoreWaitInfo *pWaitInfo, uint64_t timeout)) \ X(vkBeginCommandBuffer, VkResult, (VkCommandBuffer commandBuffer, const VkCommandBufferBeginInfo *pBeginInfo)) \ X(vkCmdBeginRendering, void, (VkCommandBuffer commandBuffer, const VkRenderingInfo *pRenderingInfo)) \ + X(vkCmdBindDescriptorSets, void, (VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout layout, uint32_t firstSet, uint32_t descriptorSetCount, const VkDescriptorSet *pDescriptorSets, uint32_t dynamicOffsetCount, const uint32_t *pDynamicOffsets)) \ X(vkCmdBindIndexBuffer2, void, (VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, VkIndexType indexType)) \ X(vkCmdBindPipeline, void, (VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline pipeline)) \ X(vkCmdCopyBuffer2, void, (VkCommandBuffer commandBuffer, const VkCopyBufferInfo2 *pCopyBufferInfo)) \