ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

Commit: 4f052280896bf6c43608992e9c14721ab2acbd97
Parent: 423409c940fc65a55e6306c9c2a482aec33f5d98
Author: Randy Palamar
Date:   Wed, 29 Apr 2026 14:41:43 -0600

core/api: drop Cuda shader prefix, remove CudaDecode

I would rather the operation of these be an implementation detail
rather than an API contract. If in the future there is a
compelling reason to use CUDA for decoding the use should be
decided internally and not by the API user.

Diffstat:
Mbeamformer.meta | 5++---
Mbeamformer_core.c | 21++++++++-------------
Mbeamformer_internal.h | 5-----
Mbeamformer_shared_memory.c | 2+-
Mgenerated/beamformer.meta.c | 35++++++++++++++++-------------------
Mlib/ogl_beamformer_lib.c | 2+-
Mtests/decode.c | 13+++----------
Mtests/throughput.c | 10++--------
Mui.c | 6+++---
9 files changed, 36 insertions(+), 63 deletions(-)

diff --git a/beamformer.meta b/beamformer.meta @@ -286,9 +286,6 @@ @ShaderGroup Compute { - @Shader CudaDecode - @Shader CudaHilbert - @Shader(decode.glsl) Decode { @Enumeration DecodeMode @@ -415,6 +412,8 @@ } @Shader(min_max.glsl) MinMax + + @Shader Hilbert } // NOTE: shaders which need to be baked into the beamforming pipeline diff --git a/beamformer_core.c b/beamformer_core.c @@ -297,18 +297,18 @@ push_compute_graph_node(BeamformerComputeGraphNode *root, BeamformerShaderKind k function void plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb, Arena scratch) { - b32 run_cuda_hilbert = 0; - b32 demodulate = 0; + b32 run_hilbert = 0; + b32 demodulate = 0; for (u32 i = 0; i < pb->pipeline.shader_count; i++) { switch (pb->pipeline.shaders[i]) { - case BeamformerShaderKind_CudaHilbert:{ run_cuda_hilbert = 1; }break; - case BeamformerShaderKind_Demodulate:{ demodulate = 1; }break; + case BeamformerShaderKind_Hilbert:{run_hilbert = 1;}break; + case BeamformerShaderKind_Demodulate:{demodulate = 1;}break; default:{}break; } } - if (demodulate) run_cuda_hilbert = 0; + if (demodulate) run_hilbert = 0; f32 sampling_frequency = pb->parameters.sampling_frequency; u32 input_sample_count = pb->parameters.sample_count; @@ -330,7 +330,7 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb, A sampling_frequency /= (2 * decimation_rate); } - cp->iq_pipeline = beamformer_data_kind_complex[input_data_kind] || run_cuda_hilbert; + cp->iq_pipeline = beamformer_data_kind_complex[input_data_kind] || run_hilbert; BeamformerDataKind das_data_kind = cp->iq_pipeline ? BeamformerDataKind_Float32Complex : BeamformerDataKind_Float32; @@ -367,14 +367,13 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb, A for EachIndex(pb->pipeline.shader_count, it) { // NOTE(rnp): skip unnecessary shaders switch (pb->pipeline.shaders[it]) { - case BeamformerShaderKind_CudaHilbert:{if (!run_cuda_hilbert) continue;}break; + case BeamformerShaderKind_Hilbert:{if (!run_hilbert) continue;}break; case BeamformerShaderKind_Decode:{ if (pb->parameters.decode_mode == BeamformerDecodeMode_None) continue; }break; - case BeamformerShaderKind_CudaDecode: case BeamformerShaderKind_Sum: case BeamformerShaderKind_MinMax: { @@ -1030,11 +1029,7 @@ do_compute_shader(BeamformerCtx *ctx, VulkanHandle cmd, BeamformerComputePlan *c cc->ping_pong_input_index = !cc->ping_pong_input_index; }break; - case BeamformerShaderKind_CudaDecode:{ - cuda_decode(0, output_index, 0); - cc->ping_pong_input_index = !cc->ping_pong_input_index; - }break; - case BeamformerShaderKind_CudaHilbert:{ + case BeamformerShaderKind_Hilbert:{ cuda_hilbert(input_index, output_index); cc->ping_pong_input_index = !cc->ping_pong_input_index; }break; diff --git a/beamformer_internal.h b/beamformer_internal.h @@ -225,10 +225,6 @@ CUDA_INIT_FN(cuda_init_stub) {} typedef CUDA_REGISTER_BUFFERS_FN(cuda_register_buffers_fn); CUDA_REGISTER_BUFFERS_FN(cuda_register_buffers_stub) {} -#define CUDA_DECODE_FN(name) void name(size_t input_offset, u32 output_buffer_idx, u32 rf_channel_offset) -typedef CUDA_DECODE_FN(cuda_decode_fn); -CUDA_DECODE_FN(cuda_decode_stub) {} - #define CUDA_HILBERT_FN(name) void name(u32 input_buffer_idx, u32 output_buffer_idx) typedef CUDA_HILBERT_FN(cuda_hilbert_fn); CUDA_HILBERT_FN(cuda_hilbert_stub) {} @@ -238,7 +234,6 @@ typedef CUDA_SET_CHANNEL_MAPPING_FN(cuda_set_channel_mapping_fn); CUDA_SET_CHANNEL_MAPPING_FN(cuda_set_channel_mapping_stub) {} #define CUDALibraryProcedureList \ - X(decode, "cuda_decode") \ X(hilbert, "cuda_hilbert") \ X(init, "init_cuda_configuration") \ X(register_buffers, "register_cuda_buffers") \ diff --git a/beamformer_shared_memory.c b/beamformer_shared_memory.c @@ -1,5 +1,5 @@ /* See LICENSE for license details. */ -#define BEAMFORMER_SHARED_MEMORY_VERSION (29UL) +#define BEAMFORMER_SHARED_MEMORY_VERSION (30UL) typedef enum { BeamformerWorkKind_Compute, diff --git a/generated/beamformer.meta.c b/generated/beamformer.meta.c @@ -96,23 +96,22 @@ typedef enum { } BeamformerAcquisitionKind; typedef enum { - BeamformerShaderKind_CudaDecode = 0, - BeamformerShaderKind_CudaHilbert = 1, - BeamformerShaderKind_Decode = 2, - BeamformerShaderKind_Filter = 3, - BeamformerShaderKind_Demodulate = 4, - BeamformerShaderKind_DAS = 5, - BeamformerShaderKind_Sum = 6, - BeamformerShaderKind_MinMax = 7, - BeamformerShaderKind_CoherencyWeighting = 8, - BeamformerShaderKind_Reshape = 9, - BeamformerShaderKind_BufferClear = 10, - BeamformerShaderKind_RenderBeamformed = 11, + BeamformerShaderKind_Decode = 0, + BeamformerShaderKind_Filter = 1, + BeamformerShaderKind_Demodulate = 2, + BeamformerShaderKind_DAS = 3, + BeamformerShaderKind_Sum = 4, + BeamformerShaderKind_MinMax = 5, + BeamformerShaderKind_Hilbert = 6, + BeamformerShaderKind_CoherencyWeighting = 7, + BeamformerShaderKind_Reshape = 8, + BeamformerShaderKind_BufferClear = 9, + BeamformerShaderKind_RenderBeamformed = 10, BeamformerShaderKind_Count, - BeamformerShaderKind_ComputeFirst = BeamformerShaderKind_CudaDecode, - BeamformerShaderKind_ComputeLast = BeamformerShaderKind_MinMax, - BeamformerShaderKind_ComputeCount = 8, + BeamformerShaderKind_ComputeFirst = BeamformerShaderKind_Decode, + BeamformerShaderKind_ComputeLast = BeamformerShaderKind_Hilbert, + BeamformerShaderKind_ComputeCount = 7, BeamformerShaderKind_ComputeHelpersFirst = BeamformerShaderKind_CoherencyWeighting, BeamformerShaderKind_ComputeHelpersLast = BeamformerShaderKind_Reshape, BeamformerShaderKind_ComputeHelpersCount = 2, @@ -525,14 +524,13 @@ read_only global s8 game_shader_buffer_slot_strings[] = { }; read_only global s8 beamformer_shader_names[] = { - s8_comp("CudaDecode"), - s8_comp("CudaHilbert"), s8_comp("Decode"), s8_comp("Filter"), s8_comp("Demodulate"), s8_comp("DAS"), s8_comp("Sum"), s8_comp("MinMax"), + s8_comp("Hilbert"), s8_comp("CoherencyWeighting"), s8_comp("Reshape"), s8_comp("BufferClear"), @@ -564,14 +562,13 @@ read_only global s8 *beamformer_reloadable_shader_files[] = { }; read_only global i32 beamformer_shader_reloadable_index_by_shader[] = { - -1, - -1, 0, 1, 1, 2, 3, 4, + -1, 5, 6, 7, diff --git a/lib/ogl_beamformer_lib.c b/lib/ogl_beamformer_lib.c @@ -654,7 +654,7 @@ beamformer_beamform_data(BeamformerSimpleParameters *bp, void *data, uint32_t da b32 complex = 0; for (u32 stage = 0; stage < bp->compute_stages_count; stage++) { BeamformerShaderKind shader = (BeamformerShaderKind)bp->compute_stages[stage]; - complex |= shader == BeamformerShaderKind_Demodulate || shader == BeamformerShaderKind_CudaHilbert; + complex |= shader == BeamformerShaderKind_Demodulate || shader == BeamformerShaderKind_Hilbert; } u64 output_size = output_points.x * output_points.y * output_points.z * sizeof(f32); diff --git a/tests/decode.c b/tests/decode.c @@ -17,7 +17,6 @@ read_only global u32 decode_transmit_counts[] = { typedef struct { b32 loop; - b32 cuda; b32 once; b32 dump; b32 full_aperture; @@ -77,11 +76,10 @@ os_make_directory(char *name) function void usage(char *argv0) { - die("%s [--loop] [--once] [--full-aperture] [--cuda] [--warmup n] [--dump dir]\n" + die("%s [--loop] [--once] [--full-aperture] [--warmup n] [--dump dir]\n" " --loop: reupload data forever\n" " --once: only run a single frame\n" " --full-aperture: recieve on full 256 channel aperture\n" - " --cuda: use cuda for decoding\n" " --warmup: warmup with n runs\n" " --dump: dump output stats files to dir\n", argv0); @@ -103,8 +101,6 @@ parse_argv(i32 argc, char *argv[]) result.loop = 1; } else if (s8_equal(arg, s8("--full-aperture"))) { result.full_aperture = 1; - } else if (s8_equal(arg, s8("--cuda"))) { - result.cuda = 1; } else if (s8_equal(arg, s8("--dump"))) { if (argc) { result.outdir = *argv; @@ -181,7 +177,6 @@ dump_stats(BeamformerComputeStatsTable *stats, Options *options, u32 transmit_co char path_buffer[1024]; Stream sb = {.data = (u8 *)path_buffer, .cap = sizeof(path_buffer)}; stream_append_s8s(&sb, c_str_to_s8(options->outdir), s8(OS_PATH_SEPARATOR), s8("decode_")); - if (options->cuda) stream_append_s8(&sb, s8("cuda_")); stream_append_u64(&sb, transmit_count); stream_append_s8(&sb, s8(".bin")); stream_append_byte(&sb, 0); @@ -223,10 +218,8 @@ send_parameters(Options *options, u32 transmit_count) }; beamformer_push_channel_mapping(channel_mapping, countof(channel_mapping)); - i32 shader_stages[1]; - if (options->cuda) shader_stages[0] = BeamformerShaderKind_CudaDecode; - else shader_stages[0] = BeamformerShaderKind_Decode; - beamformer_push_pipeline(shader_stages, 1, BeamformerDataKind_Int16); + i32 shader_stages = BeamformerShaderKind_Decode; + beamformer_push_pipeline(&shader_stages, 1, BeamformerDataKind_Int16); beamformer_set_global_timeout(1000); } diff --git a/tests/throughput.c b/tests/throughput.c @@ -21,7 +21,6 @@ global f32 g_f_number = 0.5f; typedef struct { b32 loop; - b32 cuda; u32 frame_number; char **remaining; @@ -377,9 +376,8 @@ beamformer_simple_parameters_from_zbp_file(BeamformerSimpleParameters *bp, char function void usage(char *argv0) { - die("%s [--loop] [--cuda] [--frame n] parameters_file\n" + die("%s [--loop] [--frame n] parameters_file\n" " --loop: reupload data forever\n" - " --cuda: use cuda for decoding\n" " --frame n: use frame n of the data for display\n", argv0); } @@ -398,9 +396,6 @@ parse_argv(i32 argc, char *argv[]) if (s8_equal(arg, s8("--loop"))) { shift(argv, argc); result.loop = 1; - } else if (s8_equal(arg, s8("--cuda"))) { - shift(argv, argc); - result.cuda = 1; } else if (s8_equal(arg, s8("--frame"))) { shift(argv, argc); if (argc) { @@ -459,8 +454,7 @@ execute_study(Arena arena, Stream path, Options *options) { bp.compute_stages[bp.compute_stages_count++] = BeamformerShaderKind_Demodulate; } - if (options->cuda) bp.compute_stages[bp.compute_stages_count++] = BeamformerShaderKind_CudaDecode; - else bp.compute_stages[bp.compute_stages_count++] = BeamformerShaderKind_Decode; + bp.compute_stages[bp.compute_stages_count++] = BeamformerShaderKind_Decode; bp.compute_stages[bp.compute_stages_count++] = BeamformerShaderKind_DAS; { diff --git a/ui.c b/ui.c @@ -2928,9 +2928,9 @@ draw_compute_stats_view(BeamformerUI *ui, Arena arena, Variable *view, Rect r, v cell_rect.size.w = t->widths[column]; text_spec.limits.size.w = r.size.w - (cell_rect.pos.x - it->start_x); - if (column == 0 && row_index < stages && vk_pipeline_valid(cp->vulkan_pipelines[row_index]) == 0 && - stats->table.shader_ids[row_index] != BeamformerShaderKind_CudaHilbert && - stats->table.shader_ids[row_index] != BeamformerShaderKind_CudaDecode) + if (column == 0 && row_index < stages && + vk_pipeline_valid(cp->vulkan_pipelines[row_index]) == 0 && + stats->table.shader_ids[row_index] != BeamformerShaderKind_Hilbert) { text_spec.colour = v4_lerp(FG_COLOUR, FOCUSED_COLOUR, ease_in_out_quartic(csv->blink.t)); } else {