ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

Commit: 6fabb5b95ffffb95026f62b9e907ca408f5e65af
Parent: b82062dbbe56afc62a623d067c5800a681f08b8e
Author: Randy Palamar
Date:   Tue, 10 Mar 2026 12:44:12 -0600

core/das: switch back to buffer reference for output data

The Adreno X1-85 has this very cool feature where you can make
giant buffer allocations but only the first 128MB are visible via
a buffer binding. If you want to continue using the buffer binding
you have to rebind with a different offset instead of just
offsetting in the shader. Instead, buffer references (shader
pointers) can access the whole allocation. Very nice design Qualcomm ;).

This doesn't have any negative effect elsewhere because those
pointers are only touched twice per shader invocation (the rf data
one is touched 16K times * 4 if cubic).

Diffstat:
Mbeamformer.meta | 24++++++++++++------------
Mbeamformer_core.c | 41+++++++++++++++++++++++------------------
Mgenerated/beamformer.meta.c | 8++++----
Mshaders/das.glsl | 17+++++++++--------
Mvulkan.c | 1-
5 files changed, 48 insertions(+), 43 deletions(-)

diff --git a/beamformer.meta b/beamformer.meta @@ -387,18 +387,18 @@ @PushConstants { - [xdc_transform M4] - [voxel_transform M4] - [xdc_element_pitch V2] - [array_parameters U64] - [rf_element_offset U32] - [output_element_offset U32] - [incoherent_element_offset U32] - [output_size_x U32] - [output_size_y U32] - [output_size_z U32] - [cycle_t U32] - [channel_offset S32] + [xdc_transform M4] + [voxel_transform M4] + [xdc_element_pitch V2] + [array_parameters U64] + [output_frame U64] + [incoherent_frame U64] + [rf_element_offset U32] + [output_size_x U32] + [output_size_y U32] + [output_size_z U32] + [cycle_t U32] + [channel_offset S32] } } diff --git a/beamformer_core.c b/beamformer_core.c @@ -583,7 +583,16 @@ stream_append_shader_header(Stream *s, i32 reloadable_index, BeamformerShaderDes stream_append_s8s(s, s8("#version 460 core\n\n" "#extension GL_EXT_buffer_reference : require\n" "#extension GL_EXT_shader_16bit_storage : require\n" - "#extension GL_EXT_shader_explicit_arithmetic_types : require\n\n")); + "#extension GL_EXT_shader_explicit_arithmetic_types : require\n\n" + "#define f32 float32_t\n" + "#define f16 float16_t\n" + "#define s32 int32_t\n" + "#define u32 uint32_t\n" + "#define s16 int16_t\n" + "#define u16 uint16_t\n" + "#define s32vec2 i32vec2\n" + "#define s16vec2 i16vec2\n" + "\n")); i32 header_vector_length = beamformer_shader_header_vector_lengths[reloadable_index]; i32 *header_vector = beamformer_shader_header_vectors[reloadable_index]; @@ -901,25 +910,21 @@ do_compute_shader(BeamformerCtx *ctx, VulkanHandle cmd, BeamformerComputePlan *c GPUBuffer *b = cc->backlog.buffer; - u64 frame_element_size = beamformer_data_kind_byte_size[frame->data_kind]; - u64 frame_size = beamformer_frame_byte_size(frame->points, frame->data_kind); - u64 iframe_element_size = beamformer_data_kind_byte_size[frame->data_kind] - / beamformer_data_kind_element_count[frame->data_kind]; - u64 iframe_size = frame_size / beamformer_data_kind_element_count[frame->data_kind]; - + u64 frame_size = beamformer_frame_byte_size(frame->points, frame->data_kind); + u64 iframe_size = frame_size / beamformer_data_kind_element_count[frame->data_kind]; u64 element_size = beamformer_data_kind_byte_size[cp->shader_descriptors[shader_slot].bake.DAS.data_kind]; BeamformerDASPushConstants pc = { - .xdc_element_pitch = cp->xdc_element_pitch, - .rf_element_offset = input_index * pp_size / element_size, - .output_element_offset = frame->buffer_offset / frame_element_size, - .incoherent_element_offset = (b->size - iframe_size) / iframe_element_size, - .output_size_x = cp->output_points.x, - .output_size_y = cp->output_points.y, - .output_size_z = cp->output_points.z, - .cycle_t = das_cycle_t++, - .channel_offset = channel_offset, - .array_parameters = cp->array_parameters.gpu_pointer + offsetof(BeamformerDASArrayParameters, focal_vectors), + .xdc_element_pitch = cp->xdc_element_pitch, + .rf_element_offset = input_index * pp_size / element_size, + .output_frame = b->gpu_pointer + frame->buffer_offset, + .incoherent_frame = b->gpu_pointer + b->size - iframe_size, + .output_size_x = cp->output_points.x, + .output_size_y = cp->output_points.y, + .output_size_z = cp->output_points.z, + .cycle_t = das_cycle_t++, + .channel_offset = channel_offset, + .array_parameters = cp->array_parameters.gpu_pointer + offsetof(BeamformerDASArrayParameters, focal_vectors), }; mem_copy(pc.voxel_transform.E, cp->voxel_transform.E, sizeof(pc.voxel_transform)); mem_copy(pc.xdc_transform.E, cp->xdc_transform.E, sizeof(pc.xdc_transform)); @@ -934,7 +939,7 @@ do_compute_shader(BeamformerCtx *ctx, VulkanHandle cmd, BeamformerComputePlan *c }, { .gpu_buffer = b, - .offset = pc.incoherent_element_offset * iframe_element_size, + .offset = pc.incoherent_frame - b->gpu_pointer, .size = iframe_size, }, }; diff --git a/generated/beamformer.meta.c b/generated/beamformer.meta.c @@ -201,9 +201,9 @@ typedef struct { m4 voxel_transform; v2 xdc_element_pitch; u64 array_parameters; + u64 output_frame; + u64 incoherent_frame; u32 rf_element_offset; - u32 output_element_offset; - u32 incoherent_element_offset; u32 output_size_x; u32 output_size_y; u32 output_size_z; @@ -638,9 +638,9 @@ read_only global s8 beamformer_shader_global_header_strings[] = { " f32mat4 voxel_transform;\n" " f32vec2 xdc_element_pitch;\n" " uint64_t array_parameters;\n" + " uint64_t output_frame;\n" + " uint64_t incoherent_frame;\n" " uint32_t rf_element_offset;\n" - " uint32_t output_element_offset;\n" - " uint32_t incoherent_element_offset;\n" " uint32_t output_size_x;\n" " uint32_t output_size_y;\n" " uint32_t output_size_z;\n" diff --git a/shaders/das.glsl b/shaders/das.glsl @@ -35,18 +35,19 @@ layout(set = ShaderResourceKind_Buffer, binding = ShaderBufferSlot_PingPong) rea SAMPLE_TYPE rf[]; }; -layout(set = ShaderResourceKind_Buffer, binding = ShaderBufferSlot_BeamformedData) buffer Output { - SAMPLE_TYPE output_data[]; +layout(std430, buffer_reference) restrict readonly buffer ArrayParameters { + DASArrayParameters data; }; -layout(set = ShaderResourceKind_Buffer, binding = ShaderBufferSlot_BeamformedData) buffer IncoherentOutput { - float incoherent_data[]; +layout(std430, buffer_reference) buffer Output { + SAMPLE_TYPE x[]; }; -layout(std430, buffer_reference) restrict readonly buffer ArrayParameters { - DASArrayParameters data; +layout(std430, buffer_reference) buffer IncoherentOutput { + f32 x[]; }; + #define RX_ORIENTATION(tx_rx) bitfieldExtract((tx_rx), 0, 4) #define TX_ORIENTATION(tx_rx) bitfieldExtract((tx_rx), 4, 4) @@ -353,8 +354,8 @@ void main() } #if CoherencyWeighting - incoherent_data[incoherent_element_offset + out_index] += RESULT_INCOHERENT_CAST(sum); + IncoherentOutput(incoherent_frame).x[out_index] += RESULT_INCOHERENT_CAST(sum); #endif - output_data[output_element_offset + out_index] += RESULT_COHERENT_CAST(sum); + Output(output_frame).x[out_index] += RESULT_COHERENT_CAST(sum); } diff --git a/vulkan.c b/vulkan.c @@ -2,7 +2,6 @@ // TODO(rnp) // [ ]: what is needed for HDR? I think it makes sense to just default to it nowadays // [ ]: once opengl is removed switch images to SRGB and/or 16 bit Float -// [ ]: synchronization is busted when there is only one unique queue #include "beamformer_internal.h" #include "vulkan.h"