Commit: 4f052280896bf6c43608992e9c14721ab2acbd97
Parent: 423409c940fc65a55e6306c9c2a482aec33f5d98
Author: Randy Palamar
Date: Wed, 29 Apr 2026 14:41:43 -0600
core/api: drop Cuda shader prefix, remove CudaDecode
I would rather the operation of these be an implementation detail
rather than an API contract. If in the future there is a
compelling reason to use CUDA for decoding the use should be
decided internally and not by the API user.
Diffstat:
9 files changed, 36 insertions(+), 63 deletions(-)
diff --git a/beamformer.meta b/beamformer.meta
@@ -286,9 +286,6 @@
@ShaderGroup Compute
{
- @Shader CudaDecode
- @Shader CudaHilbert
-
@Shader(decode.glsl) Decode
{
@Enumeration DecodeMode
@@ -415,6 +412,8 @@
}
@Shader(min_max.glsl) MinMax
+
+ @Shader Hilbert
}
// NOTE: shaders which need to be baked into the beamforming pipeline
diff --git a/beamformer_core.c b/beamformer_core.c
@@ -297,18 +297,18 @@ push_compute_graph_node(BeamformerComputeGraphNode *root, BeamformerShaderKind k
function void
plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb, Arena scratch)
{
- b32 run_cuda_hilbert = 0;
- b32 demodulate = 0;
+ b32 run_hilbert = 0;
+ b32 demodulate = 0;
for (u32 i = 0; i < pb->pipeline.shader_count; i++) {
switch (pb->pipeline.shaders[i]) {
- case BeamformerShaderKind_CudaHilbert:{ run_cuda_hilbert = 1; }break;
- case BeamformerShaderKind_Demodulate:{ demodulate = 1; }break;
+ case BeamformerShaderKind_Hilbert:{run_hilbert = 1;}break;
+ case BeamformerShaderKind_Demodulate:{demodulate = 1;}break;
default:{}break;
}
}
- if (demodulate) run_cuda_hilbert = 0;
+ if (demodulate) run_hilbert = 0;
f32 sampling_frequency = pb->parameters.sampling_frequency;
u32 input_sample_count = pb->parameters.sample_count;
@@ -330,7 +330,7 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb, A
sampling_frequency /= (2 * decimation_rate);
}
- cp->iq_pipeline = beamformer_data_kind_complex[input_data_kind] || run_cuda_hilbert;
+ cp->iq_pipeline = beamformer_data_kind_complex[input_data_kind] || run_hilbert;
BeamformerDataKind das_data_kind = cp->iq_pipeline ? BeamformerDataKind_Float32Complex
: BeamformerDataKind_Float32;
@@ -367,14 +367,13 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb, A
for EachIndex(pb->pipeline.shader_count, it) {
// NOTE(rnp): skip unnecessary shaders
switch (pb->pipeline.shaders[it]) {
- case BeamformerShaderKind_CudaHilbert:{if (!run_cuda_hilbert) continue;}break;
+ case BeamformerShaderKind_Hilbert:{if (!run_hilbert) continue;}break;
case BeamformerShaderKind_Decode:{
if (pb->parameters.decode_mode == BeamformerDecodeMode_None)
continue;
}break;
- case BeamformerShaderKind_CudaDecode:
case BeamformerShaderKind_Sum:
case BeamformerShaderKind_MinMax:
{
@@ -1030,11 +1029,7 @@ do_compute_shader(BeamformerCtx *ctx, VulkanHandle cmd, BeamformerComputePlan *c
cc->ping_pong_input_index = !cc->ping_pong_input_index;
}break;
- case BeamformerShaderKind_CudaDecode:{
- cuda_decode(0, output_index, 0);
- cc->ping_pong_input_index = !cc->ping_pong_input_index;
- }break;
- case BeamformerShaderKind_CudaHilbert:{
+ case BeamformerShaderKind_Hilbert:{
cuda_hilbert(input_index, output_index);
cc->ping_pong_input_index = !cc->ping_pong_input_index;
}break;
diff --git a/beamformer_internal.h b/beamformer_internal.h
@@ -225,10 +225,6 @@ CUDA_INIT_FN(cuda_init_stub) {}
typedef CUDA_REGISTER_BUFFERS_FN(cuda_register_buffers_fn);
CUDA_REGISTER_BUFFERS_FN(cuda_register_buffers_stub) {}
-#define CUDA_DECODE_FN(name) void name(size_t input_offset, u32 output_buffer_idx, u32 rf_channel_offset)
-typedef CUDA_DECODE_FN(cuda_decode_fn);
-CUDA_DECODE_FN(cuda_decode_stub) {}
-
#define CUDA_HILBERT_FN(name) void name(u32 input_buffer_idx, u32 output_buffer_idx)
typedef CUDA_HILBERT_FN(cuda_hilbert_fn);
CUDA_HILBERT_FN(cuda_hilbert_stub) {}
@@ -238,7 +234,6 @@ typedef CUDA_SET_CHANNEL_MAPPING_FN(cuda_set_channel_mapping_fn);
CUDA_SET_CHANNEL_MAPPING_FN(cuda_set_channel_mapping_stub) {}
#define CUDALibraryProcedureList \
- X(decode, "cuda_decode") \
X(hilbert, "cuda_hilbert") \
X(init, "init_cuda_configuration") \
X(register_buffers, "register_cuda_buffers") \
diff --git a/beamformer_shared_memory.c b/beamformer_shared_memory.c
@@ -1,5 +1,5 @@
/* See LICENSE for license details. */
-#define BEAMFORMER_SHARED_MEMORY_VERSION (29UL)
+#define BEAMFORMER_SHARED_MEMORY_VERSION (30UL)
typedef enum {
BeamformerWorkKind_Compute,
diff --git a/generated/beamformer.meta.c b/generated/beamformer.meta.c
@@ -96,23 +96,22 @@ typedef enum {
} BeamformerAcquisitionKind;
typedef enum {
- BeamformerShaderKind_CudaDecode = 0,
- BeamformerShaderKind_CudaHilbert = 1,
- BeamformerShaderKind_Decode = 2,
- BeamformerShaderKind_Filter = 3,
- BeamformerShaderKind_Demodulate = 4,
- BeamformerShaderKind_DAS = 5,
- BeamformerShaderKind_Sum = 6,
- BeamformerShaderKind_MinMax = 7,
- BeamformerShaderKind_CoherencyWeighting = 8,
- BeamformerShaderKind_Reshape = 9,
- BeamformerShaderKind_BufferClear = 10,
- BeamformerShaderKind_RenderBeamformed = 11,
+ BeamformerShaderKind_Decode = 0,
+ BeamformerShaderKind_Filter = 1,
+ BeamformerShaderKind_Demodulate = 2,
+ BeamformerShaderKind_DAS = 3,
+ BeamformerShaderKind_Sum = 4,
+ BeamformerShaderKind_MinMax = 5,
+ BeamformerShaderKind_Hilbert = 6,
+ BeamformerShaderKind_CoherencyWeighting = 7,
+ BeamformerShaderKind_Reshape = 8,
+ BeamformerShaderKind_BufferClear = 9,
+ BeamformerShaderKind_RenderBeamformed = 10,
BeamformerShaderKind_Count,
- BeamformerShaderKind_ComputeFirst = BeamformerShaderKind_CudaDecode,
- BeamformerShaderKind_ComputeLast = BeamformerShaderKind_MinMax,
- BeamformerShaderKind_ComputeCount = 8,
+ BeamformerShaderKind_ComputeFirst = BeamformerShaderKind_Decode,
+ BeamformerShaderKind_ComputeLast = BeamformerShaderKind_Hilbert,
+ BeamformerShaderKind_ComputeCount = 7,
BeamformerShaderKind_ComputeHelpersFirst = BeamformerShaderKind_CoherencyWeighting,
BeamformerShaderKind_ComputeHelpersLast = BeamformerShaderKind_Reshape,
BeamformerShaderKind_ComputeHelpersCount = 2,
@@ -525,14 +524,13 @@ read_only global s8 game_shader_buffer_slot_strings[] = {
};
read_only global s8 beamformer_shader_names[] = {
- s8_comp("CudaDecode"),
- s8_comp("CudaHilbert"),
s8_comp("Decode"),
s8_comp("Filter"),
s8_comp("Demodulate"),
s8_comp("DAS"),
s8_comp("Sum"),
s8_comp("MinMax"),
+ s8_comp("Hilbert"),
s8_comp("CoherencyWeighting"),
s8_comp("Reshape"),
s8_comp("BufferClear"),
@@ -564,14 +562,13 @@ read_only global s8 *beamformer_reloadable_shader_files[] = {
};
read_only global i32 beamformer_shader_reloadable_index_by_shader[] = {
- -1,
- -1,
0,
1,
1,
2,
3,
4,
+ -1,
5,
6,
7,
diff --git a/lib/ogl_beamformer_lib.c b/lib/ogl_beamformer_lib.c
@@ -654,7 +654,7 @@ beamformer_beamform_data(BeamformerSimpleParameters *bp, void *data, uint32_t da
b32 complex = 0;
for (u32 stage = 0; stage < bp->compute_stages_count; stage++) {
BeamformerShaderKind shader = (BeamformerShaderKind)bp->compute_stages[stage];
- complex |= shader == BeamformerShaderKind_Demodulate || shader == BeamformerShaderKind_CudaHilbert;
+ complex |= shader == BeamformerShaderKind_Demodulate || shader == BeamformerShaderKind_Hilbert;
}
u64 output_size = output_points.x * output_points.y * output_points.z * sizeof(f32);
diff --git a/tests/decode.c b/tests/decode.c
@@ -17,7 +17,6 @@ read_only global u32 decode_transmit_counts[] = {
typedef struct {
b32 loop;
- b32 cuda;
b32 once;
b32 dump;
b32 full_aperture;
@@ -77,11 +76,10 @@ os_make_directory(char *name)
function void
usage(char *argv0)
{
- die("%s [--loop] [--once] [--full-aperture] [--cuda] [--warmup n] [--dump dir]\n"
+ die("%s [--loop] [--once] [--full-aperture] [--warmup n] [--dump dir]\n"
" --loop: reupload data forever\n"
" --once: only run a single frame\n"
" --full-aperture: recieve on full 256 channel aperture\n"
- " --cuda: use cuda for decoding\n"
" --warmup: warmup with n runs\n"
" --dump: dump output stats files to dir\n",
argv0);
@@ -103,8 +101,6 @@ parse_argv(i32 argc, char *argv[])
result.loop = 1;
} else if (s8_equal(arg, s8("--full-aperture"))) {
result.full_aperture = 1;
- } else if (s8_equal(arg, s8("--cuda"))) {
- result.cuda = 1;
} else if (s8_equal(arg, s8("--dump"))) {
if (argc) {
result.outdir = *argv;
@@ -181,7 +177,6 @@ dump_stats(BeamformerComputeStatsTable *stats, Options *options, u32 transmit_co
char path_buffer[1024];
Stream sb = {.data = (u8 *)path_buffer, .cap = sizeof(path_buffer)};
stream_append_s8s(&sb, c_str_to_s8(options->outdir), s8(OS_PATH_SEPARATOR), s8("decode_"));
- if (options->cuda) stream_append_s8(&sb, s8("cuda_"));
stream_append_u64(&sb, transmit_count);
stream_append_s8(&sb, s8(".bin"));
stream_append_byte(&sb, 0);
@@ -223,10 +218,8 @@ send_parameters(Options *options, u32 transmit_count)
};
beamformer_push_channel_mapping(channel_mapping, countof(channel_mapping));
- i32 shader_stages[1];
- if (options->cuda) shader_stages[0] = BeamformerShaderKind_CudaDecode;
- else shader_stages[0] = BeamformerShaderKind_Decode;
- beamformer_push_pipeline(shader_stages, 1, BeamformerDataKind_Int16);
+ i32 shader_stages = BeamformerShaderKind_Decode;
+ beamformer_push_pipeline(&shader_stages, 1, BeamformerDataKind_Int16);
beamformer_set_global_timeout(1000);
}
diff --git a/tests/throughput.c b/tests/throughput.c
@@ -21,7 +21,6 @@ global f32 g_f_number = 0.5f;
typedef struct {
b32 loop;
- b32 cuda;
u32 frame_number;
char **remaining;
@@ -377,9 +376,8 @@ beamformer_simple_parameters_from_zbp_file(BeamformerSimpleParameters *bp, char
function void
usage(char *argv0)
{
- die("%s [--loop] [--cuda] [--frame n] parameters_file\n"
+ die("%s [--loop] [--frame n] parameters_file\n"
" --loop: reupload data forever\n"
- " --cuda: use cuda for decoding\n"
" --frame n: use frame n of the data for display\n",
argv0);
}
@@ -398,9 +396,6 @@ parse_argv(i32 argc, char *argv[])
if (s8_equal(arg, s8("--loop"))) {
shift(argv, argc);
result.loop = 1;
- } else if (s8_equal(arg, s8("--cuda"))) {
- shift(argv, argc);
- result.cuda = 1;
} else if (s8_equal(arg, s8("--frame"))) {
shift(argv, argc);
if (argc) {
@@ -459,8 +454,7 @@ execute_study(Arena arena, Stream path, Options *options)
{
bp.compute_stages[bp.compute_stages_count++] = BeamformerShaderKind_Demodulate;
}
- if (options->cuda) bp.compute_stages[bp.compute_stages_count++] = BeamformerShaderKind_CudaDecode;
- else bp.compute_stages[bp.compute_stages_count++] = BeamformerShaderKind_Decode;
+ bp.compute_stages[bp.compute_stages_count++] = BeamformerShaderKind_Decode;
bp.compute_stages[bp.compute_stages_count++] = BeamformerShaderKind_DAS;
{
diff --git a/ui.c b/ui.c
@@ -2928,9 +2928,9 @@ draw_compute_stats_view(BeamformerUI *ui, Arena arena, Variable *view, Rect r, v
cell_rect.size.w = t->widths[column];
text_spec.limits.size.w = r.size.w - (cell_rect.pos.x - it->start_x);
- if (column == 0 && row_index < stages && vk_pipeline_valid(cp->vulkan_pipelines[row_index]) == 0 &&
- stats->table.shader_ids[row_index] != BeamformerShaderKind_CudaHilbert &&
- stats->table.shader_ids[row_index] != BeamformerShaderKind_CudaDecode)
+ if (column == 0 && row_index < stages &&
+ vk_pipeline_valid(cp->vulkan_pipelines[row_index]) == 0 &&
+ stats->table.shader_ids[row_index] != BeamformerShaderKind_Hilbert)
{
text_spec.colour = v4_lerp(FG_COLOUR, FOCUSED_COLOUR, ease_in_out_quartic(csv->blink.t));
} else {