Commit: de3f2343c1bd2c4e0a90b0a64cb841ebcdf5ecab
Parent: 4f052280896bf6c43608992e9c14721ab2acbd97
Author: Randy Palamar
Date: Thu, 30 Apr 2026 10:59:47 -0600
lib: add checks for gpu rf data size, hilbert support, max frame backlog query
see #49
see #50
Diffstat:
4 files changed, 95 insertions(+), 21 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -258,7 +258,17 @@ beamformer_init(BeamformerInput *input)
ctx->shared_memory->version = BEAMFORMER_SHARED_MEMORY_VERSION;
ctx->shared_memory->reserved_parameter_blocks = 1;
- ctx->shared_memory->max_beamformed_data_size = cs->backlog.buffer->size;
+
+ ctx->shared_memory->beamformed_frame_buffer_size = cs->backlog.buffer->size;
+
+ // TODO(rnp): dynamic rf data buffer slot usage
+ // NOTE(rnp): will be same as the max size we were able to get for the frame buffer
+ ctx->shared_memory->capabilities.max_rf_data_size = cs->backlog.buffer->size
+ / BeamformerMaxRawDataFramesInFlight;
+
+ ctx->shared_memory->capabilities.cuda = cuda_init != cuda_init_stub;
+ // TODO(rnp): re-enable hilbert support, with and without cuda
+ ctx->shared_memory->capabilities.hilbert = 0;
/* TODO(rnp): I'm not sure if its a good idea to pre-reserve a bunch of semaphores
* on w32 but thats what we are doing for now */
diff --git a/beamformer_shared_memory.c b/beamformer_shared_memory.c
@@ -1,5 +1,5 @@
/* See LICENSE for license details. */
-#define BEAMFORMER_SHARED_MEMORY_VERSION (30UL)
+#define BEAMFORMER_SHARED_MEMORY_VERSION (31UL)
typedef enum {
BeamformerWorkKind_Compute,
@@ -153,7 +153,15 @@ typedef struct {
/* TODO(rnp): this is really sucky. we need a better way to communicate this */
u64 rf_block_rf_size;
- u64 max_beamformed_data_size;
+ // NOTE(rnp): currently this cannot be directly user readable. its interpretation
+ // requires beamformer implementation details
+ u64 beamformed_frame_buffer_size;
+
+ struct {
+ u64 max_rf_data_size;
+ b8 cuda;
+ b8 hilbert;
+ } capabilities;
BeamformerLiveImagingParameters live_imaging_parameters;
BeamformerLiveImagingDirtyFlags live_imaging_dirty_flags;
diff --git a/lib/ogl_beamformer_lib.c b/lib/ogl_beamformer_lib.c
@@ -229,15 +229,6 @@ beamformer_get_last_error_string(void)
return beamformer_error_string(beamformer_get_last_error());
}
-u64
-beamformer_maximum_frame_size(void)
-{
- u64 result = U64_MAX;
- if (check_shared_memory())
- result = g_beamformer_library_context.bp->max_beamformed_data_size;
- return result;
-}
-
void
beamformer_set_global_timeout(u32 timeout_ms)
{
@@ -267,6 +258,20 @@ validate_parameters(BeamformerParameters *bp)
if (!lib_error_check(contrast_raw_sample_count <= bp->raw_data_dimensions.x, DataSizeMismatch))
return 0;
+ // NOTE(rnp): frame size checks
+ {
+ // TODO(rnp): this check is overly conservative, what if we are exporting something smaller than Float32Complex
+ u64 buffer_size = g_beamformer_library_context.bp->beamformed_frame_buffer_size;
+ u64 frame_size = Max(1, bp->output_points.x) * Max(1, bp->output_points.y) * Max(1, bp->output_points.z)
+ * beamformer_data_kind_byte_size[BeamformerDataKind_Float32Complex];
+ u64 incoherent_size = frame_size / 2;
+ if (bp->coherency_weighting)
+ buffer_size -= incoherent_size;
+
+ if (!lib_error_check(frame_size <= buffer_size, FrameSizeOverflow))
+ return 0;
+ }
+
return 1;
}
@@ -285,6 +290,10 @@ validate_pipeline(i32 *shaders, u32 shader_count, BeamformerDataKind data_kind)
if (!lib_error_check(stage_test, InvalidComputeStage))
return 0;
+ if (shaders[i] == BeamformerShaderKind_Hilbert &&
+ !lib_error_check(g_beamformer_library_context.bp->capabilities.hilbert != 0, InvalidComputeStage))
+ return 0;
+
if (shaders[i] == BeamformerShaderKind_Demodulate &&
!lib_error_check(!beamformer_data_kind_complex[data_kind], InvalidDemodulationDataKind))
{
@@ -300,6 +309,42 @@ validate_pipeline(i32 *shaders, u32 shader_count, BeamformerDataKind data_kind)
return 1;
}
+u64
+beamformer_maximum_rf_data_size(void)
+{
+ u64 result = U64_MAX;
+ if (check_shared_memory()) {
+ Arena sm = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp,
+ g_beamformer_library_context.shared_memory_size);
+ result = Min((u64)arena_capacity(&sm, u8), g_beamformer_library_context.bp->capabilities.max_rf_data_size);
+ }
+ return result;
+}
+
+u64
+beamformer_maximum_frames_for_parameters(BeamformerParameters *bp)
+{
+ u64 result = U64_MAX;
+ if (check_shared_memory() && validate_parameters(bp)) {
+ // TODO(rnp): overly conservative frame size check
+ u64 buffer_size = g_beamformer_library_context.bp->beamformed_frame_buffer_size;
+ u64 frame_size = Max(1, bp->output_points.x) * Max(1, bp->output_points.y) * Max(1, bp->output_points.z)
+ * beamformer_data_kind_byte_size[BeamformerDataKind_Float32Complex];
+ u64 incoherent_size = frame_size / 2;
+ if (bp->coherency_weighting)
+ buffer_size -= incoherent_size;
+ result = buffer_size / frame_size;
+ }
+ return result;
+}
+
+u64
+beamformer_maximum_frames_for_simple_parameters(BeamformerSimpleParameters *bp)
+{
+ u64 result = beamformer_maximum_frames_for_parameters((BeamformerParameters *)bp);
+ return result;
+}
+
function b32
parameter_block_region_upload(void *data, u32 size, u32 block, BeamformerParameterBlockRegions region_id,
u32 block_offset, i32 timeout_ms)
@@ -453,11 +498,15 @@ beamformer_push_data_base(void *data, u32 data_size, i32 timeout_ms, u32 block)
BeamformerDataKind data_kind = b->pipeline.data_kind;
BeamformerContrastMode contrast_mode = bp->contrast_mode;
- u32 size = bp->acquisition_count * bp->sample_count * bp->channel_count * beamformer_data_kind_byte_size[data_kind];
- u32 raw_size = bp->raw_data_dimensions.x * bp->raw_data_dimensions.y * beamformer_data_kind_byte_size[data_kind];
- if (lib_error_check(size <= arena_capacity(&scratch, u8), BufferOverflow) &&
- lib_error_check(size <= data_size && data_size == raw_size, DataSizeMismatch))
+ u64 max_rf_size = g_beamformer_library_context.bp->capabilities.max_rf_data_size;
+ u32 rf_size = bp->acquisition_count * bp->sample_count * bp->channel_count * beamformer_data_kind_byte_size[data_kind];
+ u32 raw_size = bp->raw_data_dimensions.x * bp->raw_data_dimensions.y * beamformer_data_kind_byte_size[data_kind];
+
+ // TODO(rnp): support multi push upload so that max_rf_size is actual limit
+ if (lib_error_check(rf_size <= arena_capacity(&scratch, u8), BufferOverflow) &&
+ lib_error_check(rf_size <= max_rf_size, RFDataSizeOverflow) &&
+ lib_error_check(rf_size <= data_size && data_size == raw_size, DataSizeMismatch))
{
if (lib_try_lock(BeamformerSharedMemoryLockKind_UploadRF, timeout_ms)) {
if (lib_try_lock(BeamformerSharedMemoryLockKind_ScratchSpace, 0)) {
@@ -510,7 +559,7 @@ beamformer_push_data_base(void *data, u32 data_size, i32 timeout_ms, u32 block)
lib_release_lock(BeamformerSharedMemoryLockKind_ScratchSpace);
/* TODO(rnp): need a better way to communicate this */
- u64 rf_block_rf_size = (u64)block << 32ULL | (u64)size;
+ u64 rf_block_rf_size = (u64)block << 32ULL | (u64)rf_size;
atomic_store_u64(&g_beamformer_library_context.bp->rf_block_rf_size, rf_block_rf_size);
result = 1;
}
@@ -660,8 +709,6 @@ beamformer_beamform_data(BeamformerSimpleParameters *bp, void *data, uint32_t da
u64 output_size = output_points.x * output_points.y * output_points.z * sizeof(f32);
if (complex) output_size *= 2;
- result = lib_error_check(output_size <= g_beamformer_library_context.bp->max_beamformed_data_size, FrameSizeOverflow);
-
Arena scratch = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp,
g_beamformer_library_context.shared_memory_size);
if (result && out_data) result &= lib_error_check((iz)output_size <= arena_capacity(&scratch, u8), ExportSpaceOverflow);
diff --git a/lib/ogl_beamformer_lib_base.h b/lib/ogl_beamformer_lib_base.h
@@ -28,6 +28,7 @@
X(SharedMemory, 17, "failed to open shared memory region") \
X(SyncVariable, 18, "failed to acquire lock within timeout period") \
X(FrameSizeOverflow, 19, "maximum frame size exceeded") \
+ X(RFDataSizeOverflow, 20, "raw rf size exceeds available GPU space") \
#define X(type, num, string) BeamformerLibErrorKind_##type = num,
typedef enum {BEAMFORMER_LIB_ERRORS} BeamformerLibErrorKind;
@@ -39,8 +40,16 @@ BEAMFORMER_LIB_EXPORT BeamformerLibErrorKind beamformer_get_last_error(void);
BEAMFORMER_LIB_EXPORT const char *beamformer_get_last_error_string(void);
BEAMFORMER_LIB_EXPORT const char *beamformer_error_string(BeamformerLibErrorKind kind);
-// NOTE: returns U64_MAX if shared memory could not be opened
-BEAMFORMER_LIB_EXPORT uint64_t beamformer_maximum_frame_size(void);
+// NOTE: returns the maximum number of frames which may be beamformed with the provided
+// parameters before old frames are overwritten.
+//
+// returns U64_MAX on error. use beamformer_get_last_error() to determine why
+BEAMFORMER_LIB_EXPORT uint64_t beamformer_maximum_frames_for_parameters(BeamformerParameters *);
+BEAMFORMER_LIB_EXPORT uint64_t beamformer_maximum_frames_for_simple_parameters(BeamformerSimpleParameters *);
+
+// NOTE: returns the maximum single rf dataset size that can be uploaded to the beamformer
+// returns U64_MAX on error. use beamformer_get_last_error() to determine why
+BEAMFORMER_LIB_EXPORT uint64_t beamformer_maximum_rf_data_size(void);
///////////////////////////
// NOTE: Simple API