ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

Commit: de3f2343c1bd2c4e0a90b0a64cb841ebcdf5ecab
Parent: 4f052280896bf6c43608992e9c14721ab2acbd97
Author: Randy Palamar
Date:   Thu, 30 Apr 2026 10:59:47 -0600

lib: add checks for gpu rf data size, hilbert support, max frame backlog query

see #49
see #50

Diffstat:
Mbeamformer.c | 12+++++++++++-
Mbeamformer_shared_memory.c | 12++++++++++--
Mlib/ogl_beamformer_lib.c | 79+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------
Mlib/ogl_beamformer_lib_base.h | 13+++++++++++--
4 files changed, 95 insertions(+), 21 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -258,7 +258,17 @@ beamformer_init(BeamformerInput *input) ctx->shared_memory->version = BEAMFORMER_SHARED_MEMORY_VERSION; ctx->shared_memory->reserved_parameter_blocks = 1; - ctx->shared_memory->max_beamformed_data_size = cs->backlog.buffer->size; + + ctx->shared_memory->beamformed_frame_buffer_size = cs->backlog.buffer->size; + + // TODO(rnp): dynamic rf data buffer slot usage + // NOTE(rnp): will be same as the max size we were able to get for the frame buffer + ctx->shared_memory->capabilities.max_rf_data_size = cs->backlog.buffer->size + / BeamformerMaxRawDataFramesInFlight; + + ctx->shared_memory->capabilities.cuda = cuda_init != cuda_init_stub; + // TODO(rnp): re-enable hilbert support, with and without cuda + ctx->shared_memory->capabilities.hilbert = 0; /* TODO(rnp): I'm not sure if its a good idea to pre-reserve a bunch of semaphores * on w32 but thats what we are doing for now */ diff --git a/beamformer_shared_memory.c b/beamformer_shared_memory.c @@ -1,5 +1,5 @@ /* See LICENSE for license details. */ -#define BEAMFORMER_SHARED_MEMORY_VERSION (30UL) +#define BEAMFORMER_SHARED_MEMORY_VERSION (31UL) typedef enum { BeamformerWorkKind_Compute, @@ -153,7 +153,15 @@ typedef struct { /* TODO(rnp): this is really sucky. we need a better way to communicate this */ u64 rf_block_rf_size; - u64 max_beamformed_data_size; + // NOTE(rnp): currently this cannot be directly user readable. its interpretation + // requires beamformer implementation details + u64 beamformed_frame_buffer_size; + + struct { + u64 max_rf_data_size; + b8 cuda; + b8 hilbert; + } capabilities; BeamformerLiveImagingParameters live_imaging_parameters; BeamformerLiveImagingDirtyFlags live_imaging_dirty_flags; diff --git a/lib/ogl_beamformer_lib.c b/lib/ogl_beamformer_lib.c @@ -229,15 +229,6 @@ beamformer_get_last_error_string(void) return beamformer_error_string(beamformer_get_last_error()); } -u64 -beamformer_maximum_frame_size(void) -{ - u64 result = U64_MAX; - if (check_shared_memory()) - result = g_beamformer_library_context.bp->max_beamformed_data_size; - return result; -} - void beamformer_set_global_timeout(u32 timeout_ms) { @@ -267,6 +258,20 @@ validate_parameters(BeamformerParameters *bp) if (!lib_error_check(contrast_raw_sample_count <= bp->raw_data_dimensions.x, DataSizeMismatch)) return 0; + // NOTE(rnp): frame size checks + { + // TODO(rnp): this check is overly conservative, what if we are exporting something smaller than Float32Complex + u64 buffer_size = g_beamformer_library_context.bp->beamformed_frame_buffer_size; + u64 frame_size = Max(1, bp->output_points.x) * Max(1, bp->output_points.y) * Max(1, bp->output_points.z) + * beamformer_data_kind_byte_size[BeamformerDataKind_Float32Complex]; + u64 incoherent_size = frame_size / 2; + if (bp->coherency_weighting) + buffer_size -= incoherent_size; + + if (!lib_error_check(frame_size <= buffer_size, FrameSizeOverflow)) + return 0; + } + return 1; } @@ -285,6 +290,10 @@ validate_pipeline(i32 *shaders, u32 shader_count, BeamformerDataKind data_kind) if (!lib_error_check(stage_test, InvalidComputeStage)) return 0; + if (shaders[i] == BeamformerShaderKind_Hilbert && + !lib_error_check(g_beamformer_library_context.bp->capabilities.hilbert != 0, InvalidComputeStage)) + return 0; + if (shaders[i] == BeamformerShaderKind_Demodulate && !lib_error_check(!beamformer_data_kind_complex[data_kind], InvalidDemodulationDataKind)) { @@ -300,6 +309,42 @@ validate_pipeline(i32 *shaders, u32 shader_count, BeamformerDataKind data_kind) return 1; } +u64 +beamformer_maximum_rf_data_size(void) +{ + u64 result = U64_MAX; + if (check_shared_memory()) { + Arena sm = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp, + g_beamformer_library_context.shared_memory_size); + result = Min((u64)arena_capacity(&sm, u8), g_beamformer_library_context.bp->capabilities.max_rf_data_size); + } + return result; +} + +u64 +beamformer_maximum_frames_for_parameters(BeamformerParameters *bp) +{ + u64 result = U64_MAX; + if (check_shared_memory() && validate_parameters(bp)) { + // TODO(rnp): overly conservative frame size check + u64 buffer_size = g_beamformer_library_context.bp->beamformed_frame_buffer_size; + u64 frame_size = Max(1, bp->output_points.x) * Max(1, bp->output_points.y) * Max(1, bp->output_points.z) + * beamformer_data_kind_byte_size[BeamformerDataKind_Float32Complex]; + u64 incoherent_size = frame_size / 2; + if (bp->coherency_weighting) + buffer_size -= incoherent_size; + result = buffer_size / frame_size; + } + return result; +} + +u64 +beamformer_maximum_frames_for_simple_parameters(BeamformerSimpleParameters *bp) +{ + u64 result = beamformer_maximum_frames_for_parameters((BeamformerParameters *)bp); + return result; +} + function b32 parameter_block_region_upload(void *data, u32 size, u32 block, BeamformerParameterBlockRegions region_id, u32 block_offset, i32 timeout_ms) @@ -453,11 +498,15 @@ beamformer_push_data_base(void *data, u32 data_size, i32 timeout_ms, u32 block) BeamformerDataKind data_kind = b->pipeline.data_kind; BeamformerContrastMode contrast_mode = bp->contrast_mode; - u32 size = bp->acquisition_count * bp->sample_count * bp->channel_count * beamformer_data_kind_byte_size[data_kind]; - u32 raw_size = bp->raw_data_dimensions.x * bp->raw_data_dimensions.y * beamformer_data_kind_byte_size[data_kind]; - if (lib_error_check(size <= arena_capacity(&scratch, u8), BufferOverflow) && - lib_error_check(size <= data_size && data_size == raw_size, DataSizeMismatch)) + u64 max_rf_size = g_beamformer_library_context.bp->capabilities.max_rf_data_size; + u32 rf_size = bp->acquisition_count * bp->sample_count * bp->channel_count * beamformer_data_kind_byte_size[data_kind]; + u32 raw_size = bp->raw_data_dimensions.x * bp->raw_data_dimensions.y * beamformer_data_kind_byte_size[data_kind]; + + // TODO(rnp): support multi push upload so that max_rf_size is actual limit + if (lib_error_check(rf_size <= arena_capacity(&scratch, u8), BufferOverflow) && + lib_error_check(rf_size <= max_rf_size, RFDataSizeOverflow) && + lib_error_check(rf_size <= data_size && data_size == raw_size, DataSizeMismatch)) { if (lib_try_lock(BeamformerSharedMemoryLockKind_UploadRF, timeout_ms)) { if (lib_try_lock(BeamformerSharedMemoryLockKind_ScratchSpace, 0)) { @@ -510,7 +559,7 @@ beamformer_push_data_base(void *data, u32 data_size, i32 timeout_ms, u32 block) lib_release_lock(BeamformerSharedMemoryLockKind_ScratchSpace); /* TODO(rnp): need a better way to communicate this */ - u64 rf_block_rf_size = (u64)block << 32ULL | (u64)size; + u64 rf_block_rf_size = (u64)block << 32ULL | (u64)rf_size; atomic_store_u64(&g_beamformer_library_context.bp->rf_block_rf_size, rf_block_rf_size); result = 1; } @@ -660,8 +709,6 @@ beamformer_beamform_data(BeamformerSimpleParameters *bp, void *data, uint32_t da u64 output_size = output_points.x * output_points.y * output_points.z * sizeof(f32); if (complex) output_size *= 2; - result = lib_error_check(output_size <= g_beamformer_library_context.bp->max_beamformed_data_size, FrameSizeOverflow); - Arena scratch = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp, g_beamformer_library_context.shared_memory_size); if (result && out_data) result &= lib_error_check((iz)output_size <= arena_capacity(&scratch, u8), ExportSpaceOverflow); diff --git a/lib/ogl_beamformer_lib_base.h b/lib/ogl_beamformer_lib_base.h @@ -28,6 +28,7 @@ X(SharedMemory, 17, "failed to open shared memory region") \ X(SyncVariable, 18, "failed to acquire lock within timeout period") \ X(FrameSizeOverflow, 19, "maximum frame size exceeded") \ + X(RFDataSizeOverflow, 20, "raw rf size exceeds available GPU space") \ #define X(type, num, string) BeamformerLibErrorKind_##type = num, typedef enum {BEAMFORMER_LIB_ERRORS} BeamformerLibErrorKind; @@ -39,8 +40,16 @@ BEAMFORMER_LIB_EXPORT BeamformerLibErrorKind beamformer_get_last_error(void); BEAMFORMER_LIB_EXPORT const char *beamformer_get_last_error_string(void); BEAMFORMER_LIB_EXPORT const char *beamformer_error_string(BeamformerLibErrorKind kind); -// NOTE: returns U64_MAX if shared memory could not be opened -BEAMFORMER_LIB_EXPORT uint64_t beamformer_maximum_frame_size(void); +// NOTE: returns the maximum number of frames which may be beamformed with the provided +// parameters before old frames are overwritten. +// +// returns U64_MAX on error. use beamformer_get_last_error() to determine why +BEAMFORMER_LIB_EXPORT uint64_t beamformer_maximum_frames_for_parameters(BeamformerParameters *); +BEAMFORMER_LIB_EXPORT uint64_t beamformer_maximum_frames_for_simple_parameters(BeamformerSimpleParameters *); + +// NOTE: returns the maximum single rf dataset size that can be uploaded to the beamformer +// returns U64_MAX on error. use beamformer_get_last_error() to determine why +BEAMFORMER_LIB_EXPORT uint64_t beamformer_maximum_rf_data_size(void); /////////////////////////// // NOTE: Simple API