Commit: 6fabb5b95ffffb95026f62b9e907ca408f5e65af
Parent: b82062dbbe56afc62a623d067c5800a681f08b8e
Author: Randy Palamar
Date: Tue, 10 Mar 2026 12:44:12 -0600
core/das: switch back to buffer reference for output data
The Adreno X1-85 has this very cool feature where you can make
giant buffer allocations but only the first 128MB are visible via
a buffer binding. If you want to continue using the buffer binding
you have to rebind with a different offset instead of just
offsetting in the shader. Instead, buffer references (shader
pointers) can access the whole allocation. Very nice design Qualcomm ;).
This doesn't have any negative effect elsewhere because those
pointers are only touched twice per shader invocation (the rf data
one is touched 16K times * 4 if cubic).
Diffstat:
5 files changed, 48 insertions(+), 43 deletions(-)
diff --git a/beamformer.meta b/beamformer.meta
@@ -387,18 +387,18 @@
@PushConstants
{
- [xdc_transform M4]
- [voxel_transform M4]
- [xdc_element_pitch V2]
- [array_parameters U64]
- [rf_element_offset U32]
- [output_element_offset U32]
- [incoherent_element_offset U32]
- [output_size_x U32]
- [output_size_y U32]
- [output_size_z U32]
- [cycle_t U32]
- [channel_offset S32]
+ [xdc_transform M4]
+ [voxel_transform M4]
+ [xdc_element_pitch V2]
+ [array_parameters U64]
+ [output_frame U64]
+ [incoherent_frame U64]
+ [rf_element_offset U32]
+ [output_size_x U32]
+ [output_size_y U32]
+ [output_size_z U32]
+ [cycle_t U32]
+ [channel_offset S32]
}
}
diff --git a/beamformer_core.c b/beamformer_core.c
@@ -583,7 +583,16 @@ stream_append_shader_header(Stream *s, i32 reloadable_index, BeamformerShaderDes
stream_append_s8s(s, s8("#version 460 core\n\n"
"#extension GL_EXT_buffer_reference : require\n"
"#extension GL_EXT_shader_16bit_storage : require\n"
- "#extension GL_EXT_shader_explicit_arithmetic_types : require\n\n"));
+ "#extension GL_EXT_shader_explicit_arithmetic_types : require\n\n"
+ "#define f32 float32_t\n"
+ "#define f16 float16_t\n"
+ "#define s32 int32_t\n"
+ "#define u32 uint32_t\n"
+ "#define s16 int16_t\n"
+ "#define u16 uint16_t\n"
+ "#define s32vec2 i32vec2\n"
+ "#define s16vec2 i16vec2\n"
+ "\n"));
i32 header_vector_length = beamformer_shader_header_vector_lengths[reloadable_index];
i32 *header_vector = beamformer_shader_header_vectors[reloadable_index];
@@ -901,25 +910,21 @@ do_compute_shader(BeamformerCtx *ctx, VulkanHandle cmd, BeamformerComputePlan *c
GPUBuffer *b = cc->backlog.buffer;
- u64 frame_element_size = beamformer_data_kind_byte_size[frame->data_kind];
- u64 frame_size = beamformer_frame_byte_size(frame->points, frame->data_kind);
- u64 iframe_element_size = beamformer_data_kind_byte_size[frame->data_kind]
- / beamformer_data_kind_element_count[frame->data_kind];
- u64 iframe_size = frame_size / beamformer_data_kind_element_count[frame->data_kind];
-
+ u64 frame_size = beamformer_frame_byte_size(frame->points, frame->data_kind);
+ u64 iframe_size = frame_size / beamformer_data_kind_element_count[frame->data_kind];
u64 element_size = beamformer_data_kind_byte_size[cp->shader_descriptors[shader_slot].bake.DAS.data_kind];
BeamformerDASPushConstants pc = {
- .xdc_element_pitch = cp->xdc_element_pitch,
- .rf_element_offset = input_index * pp_size / element_size,
- .output_element_offset = frame->buffer_offset / frame_element_size,
- .incoherent_element_offset = (b->size - iframe_size) / iframe_element_size,
- .output_size_x = cp->output_points.x,
- .output_size_y = cp->output_points.y,
- .output_size_z = cp->output_points.z,
- .cycle_t = das_cycle_t++,
- .channel_offset = channel_offset,
- .array_parameters = cp->array_parameters.gpu_pointer + offsetof(BeamformerDASArrayParameters, focal_vectors),
+ .xdc_element_pitch = cp->xdc_element_pitch,
+ .rf_element_offset = input_index * pp_size / element_size,
+ .output_frame = b->gpu_pointer + frame->buffer_offset,
+ .incoherent_frame = b->gpu_pointer + b->size - iframe_size,
+ .output_size_x = cp->output_points.x,
+ .output_size_y = cp->output_points.y,
+ .output_size_z = cp->output_points.z,
+ .cycle_t = das_cycle_t++,
+ .channel_offset = channel_offset,
+ .array_parameters = cp->array_parameters.gpu_pointer + offsetof(BeamformerDASArrayParameters, focal_vectors),
};
mem_copy(pc.voxel_transform.E, cp->voxel_transform.E, sizeof(pc.voxel_transform));
mem_copy(pc.xdc_transform.E, cp->xdc_transform.E, sizeof(pc.xdc_transform));
@@ -934,7 +939,7 @@ do_compute_shader(BeamformerCtx *ctx, VulkanHandle cmd, BeamformerComputePlan *c
},
{
.gpu_buffer = b,
- .offset = pc.incoherent_element_offset * iframe_element_size,
+ .offset = pc.incoherent_frame - b->gpu_pointer,
.size = iframe_size,
},
};
diff --git a/generated/beamformer.meta.c b/generated/beamformer.meta.c
@@ -201,9 +201,9 @@ typedef struct {
m4 voxel_transform;
v2 xdc_element_pitch;
u64 array_parameters;
+ u64 output_frame;
+ u64 incoherent_frame;
u32 rf_element_offset;
- u32 output_element_offset;
- u32 incoherent_element_offset;
u32 output_size_x;
u32 output_size_y;
u32 output_size_z;
@@ -638,9 +638,9 @@ read_only global s8 beamformer_shader_global_header_strings[] = {
" f32mat4 voxel_transform;\n"
" f32vec2 xdc_element_pitch;\n"
" uint64_t array_parameters;\n"
+ " uint64_t output_frame;\n"
+ " uint64_t incoherent_frame;\n"
" uint32_t rf_element_offset;\n"
- " uint32_t output_element_offset;\n"
- " uint32_t incoherent_element_offset;\n"
" uint32_t output_size_x;\n"
" uint32_t output_size_y;\n"
" uint32_t output_size_z;\n"
diff --git a/shaders/das.glsl b/shaders/das.glsl
@@ -35,18 +35,19 @@ layout(set = ShaderResourceKind_Buffer, binding = ShaderBufferSlot_PingPong) rea
SAMPLE_TYPE rf[];
};
-layout(set = ShaderResourceKind_Buffer, binding = ShaderBufferSlot_BeamformedData) buffer Output {
- SAMPLE_TYPE output_data[];
+layout(std430, buffer_reference) restrict readonly buffer ArrayParameters {
+ DASArrayParameters data;
};
-layout(set = ShaderResourceKind_Buffer, binding = ShaderBufferSlot_BeamformedData) buffer IncoherentOutput {
- float incoherent_data[];
+layout(std430, buffer_reference) buffer Output {
+ SAMPLE_TYPE x[];
};
-layout(std430, buffer_reference) restrict readonly buffer ArrayParameters {
- DASArrayParameters data;
+layout(std430, buffer_reference) buffer IncoherentOutput {
+ f32 x[];
};
+
#define RX_ORIENTATION(tx_rx) bitfieldExtract((tx_rx), 0, 4)
#define TX_ORIENTATION(tx_rx) bitfieldExtract((tx_rx), 4, 4)
@@ -353,8 +354,8 @@ void main()
}
#if CoherencyWeighting
- incoherent_data[incoherent_element_offset + out_index] += RESULT_INCOHERENT_CAST(sum);
+ IncoherentOutput(incoherent_frame).x[out_index] += RESULT_INCOHERENT_CAST(sum);
#endif
- output_data[output_element_offset + out_index] += RESULT_COHERENT_CAST(sum);
+ Output(output_frame).x[out_index] += RESULT_COHERENT_CAST(sum);
}
diff --git a/vulkan.c b/vulkan.c
@@ -2,7 +2,6 @@
// TODO(rnp)
// [ ]: what is needed for HDR? I think it makes sense to just default to it nowadays
// [ ]: once opengl is removed switch images to SRGB and/or 16 bit Float
-// [ ]: synchronization is busted when there is only one unique queue
#include "beamformer_internal.h"
#include "vulkan.h"