ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

Commit: 9b71d439faf4ff9a5e4f5a92a036e0ffd0be29b9
Parent: 4747c52afb2223adac069e10294f00eb7df75c79
Author: Randy Palamar
Date:   Mon, 16 Mar 2026 13:27:51 -0600

core/decode: default to processing 1 transmit with decode

unlike when OpenGL was compiling the shader, processing a single
element here just performs better. The option is kept around
because I haven't remeasured decoding with 48 transmits.

Diffstat:
Mbeamformer_core.c | 4+---
1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/beamformer_core.c b/beamformer_core.c @@ -363,6 +363,7 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) } db->dilate_output = run_cuda_hilbert; + db->to_process = 1; if (db->decode_mode == BeamformerDecodeMode_None) { sd->layout = (uv3){{subgroup_size, 1, 1}}; @@ -372,7 +373,6 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) sd->dispatch.z = (u32)ceil_f32((f32)pb->parameters.acquisition_count / (f32)sd->layout.z); } else if (db->transmit_count > 40) { db->use_shared_memory = 1; - db->to_process = 2; if (db->transmit_count == 48) db->to_process = db->transmit_count / 16; @@ -385,8 +385,6 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) sd->dispatch.y = (u32)ceil_f32((f32)channel_chunk_count / (f32)sd->layout.y); sd->dispatch.z = (u32)ceil_f32((f32)pb->parameters.acquisition_count / (f32)sd->layout.z / (f32)db->to_process); } else { - db->to_process = 1; - /* NOTE(rnp): register caching. using more threads will cause the compiler to do * contortions to avoid spilling registers. using less gives higher performance */ sd->layout = (uv3){{subgroup_size / 2, 1, 1}};