ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

Commit: 98014fe9ac555ab15cba796e645ca63d5dd084ed
Parent: 4b076acf215453e2d87f7267abd7e792dac475eb
Author: Randy Palamar
Date:   Tue,  2 Jun 2026 11:39:55 -0600

util: improve SLL/DLL macros, add extra string helpers

not that important yet but I don't want to need a rebuild of
glslang when I switch between branches

Diffstat:
Mbeamformer_core.c | 44+++++++++++++++++++++++---------------------
Mui.c | 36+++++++++++++++++-------------------
Mutil.c | 163+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
Mutil.h | 41+++++++++++++++++++++++++----------------
Mvulkan.c | 2+-
5 files changed, 218 insertions(+), 68 deletions(-)

diff --git a/beamformer_core.c b/beamformer_core.c @@ -46,6 +46,12 @@ struct BeamformerComputeGraphNode { BeamformerComputeGraphNode *next; }; +typedef struct { + BeamformerComputeGraphNode *first; + BeamformerComputeGraphNode *last; + u64 count; +} BeamformerComputeGraph; + read_only global u32 beamformer_compute_array_parameter_sizes[] = { #define X(k, type, elements) sizeof(type) * elements, BEAMFORMER_COMPUTE_ARRAY_PARAMETERS_LIST @@ -282,10 +288,13 @@ compute_plan_push_shader(BeamformerComputePlan *p, BeamformerComputeGraphNode *n } function BeamformerComputeGraphNode * -push_compute_graph_node(BeamformerComputeGraphNode *root, BeamformerShaderKind kind, Arena *arena) +push_compute_graph_node(BeamformerComputeGraph *graph, BeamformerShaderKind kind, Arena *arena) { BeamformerComputeGraphNode *result = push_struct(arena, BeamformerComputeGraphNode); - DLLPushEnd(root, result); + if (graph) { + DLLInsertLast(0, graph->first, graph->last, result, next, prev); + graph->count++; + } result->kind = kind; result->user_pipeline_index = -1; // NOTE(rnp): initially don't care data kind @@ -352,8 +361,8 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb, A ////////////////////////////////////// // NOTE(rnp): First Pass: build initial graph and insert hard layout constraints - BeamformerComputeGraphNode *root_node = push_struct(&scratch, BeamformerComputeGraphNode); - root_node->kind = BeamformerShaderKind_Count; + BeamformerComputeGraph graph = {0}; + BeamformerComputeGraphNode *root_node = push_compute_graph_node(&graph, BeamformerShaderKind_Count, &scratch); root_node->input_data_kind = input_data_kind; root_node->input_stride.x = 1; // Sample Stride root_node->input_stride.y = pb->parameters.sample_count * acquisition_count; // Channel Stride @@ -362,7 +371,6 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb, A root_node->output_stride.x = 1; // Sample Stride root_node->output_stride.y = pb->parameters.sample_count * acquisition_count; // Channel Stride root_node->output_stride.z = pb->parameters.sample_count; // Receive Event Stride - root_node->next = root_node->prev = root_node; for EachIndex(pb->pipeline.shader_count, it) { // NOTE(rnp): skip unnecessary shaders @@ -384,8 +392,7 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb, A default:{}break; } - BeamformerComputeGraphNode *node = push_compute_graph_node(root_node, pb->pipeline.shaders[it], - &scratch); + BeamformerComputeGraphNode *node = push_compute_graph_node(&graph, pb->pipeline.shaders[it], &scratch); node->user_pipeline_index = (i32)it; switch (pb->pipeline.shaders[it]) { case BeamformerShaderKind_Decode:{ @@ -422,7 +429,7 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb, A // NOTE(rnp): insert implicit CoherencyWeighting node if (pb->parameters.coherency_weighting) - node = push_compute_graph_node(root_node, BeamformerShaderKind_CoherencyWeighting, &scratch); + node = push_compute_graph_node(&graph, BeamformerShaderKind_CoherencyWeighting, &scratch); }break; default:{}break; @@ -431,10 +438,7 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb, A ////////////////////////////////////// // NOTE(rnp): Second Pass: resolve layout constraints - for (BeamformerComputeGraphNode *node = root_node->next; - node != root_node; - node = node->next) - { + for (BeamformerComputeGraphNode *node = root_node->next; node; node = node->next) { b32 needs_reshape = 0; // NOTE(rnp): data strides @@ -473,13 +477,14 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb, A // NOTE(rnp): insert reshape if needed if (needs_reshape) { - BeamformerComputeGraphNode *new = push_compute_graph_node(node, BeamformerShaderKind_Reshape, - &scratch); + BeamformerComputeGraphNode *new = push_compute_graph_node(0, BeamformerShaderKind_Reshape, &scratch); + BeamformerComputeGraphNode *last = node->prev; + DLLInsertLast(0, node, last, new, next, prev); + graph.count++; new->input_data_kind = new->prev->output_data_kind; new->input_stride = new->prev->output_stride; - - new->output_data_kind = node->input_data_kind; - new->output_stride = node->input_stride; + new->output_data_kind = new->next->input_data_kind; + new->output_stride = new->next->input_stride; } } @@ -489,10 +494,7 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb, A cp->first_image_shader_index = 0; cp->pipeline.shader_count = 0; - for (BeamformerComputeGraphNode *node = root_node->next; - node != root_node; - node = node->next) - { + for (BeamformerComputeGraphNode *node = root_node->next; node; node = node->next) { assert(node->prev->output_data_kind == node->input_data_kind); assert(bv3_all(iv3_equal(node->prev->output_stride, node->input_stride))); diff --git a/ui.c b/ui.c @@ -423,7 +423,8 @@ struct BeamformerUI { Variable floating_widget_sentinal; - BeamformerFrameView *views; + BeamformerFrameView *view_first; + BeamformerFrameView *view_last; BeamformerFrameView *view_freelist; Interaction interaction; @@ -935,7 +936,7 @@ table_push_row(Table *t, Arena *a, TableRowKind kind) function TableRow * table_push_parameter_row(Table *t, Arena *a, s8 label, Variable *var, s8 suffix) { - ASSERT(t->columns >= 3); + assert(t->columns >= 3); TableRow *result = table_push_row(t, a, TRK_CELLS); TableCell *cells = result->data; @@ -1035,10 +1036,7 @@ ui_variable_free(BeamformerUI *ui, Variable *var) /* TODO(rnp): instead there should be a way of linking these up */ BeamformerFrameView *bv = var->generic; ui_beamformer_frame_view_release_subresources(ui, bv, bv->kind); - DLLRemove(bv); - /* TODO(rnp): hack; use a sentinal */ - if (bv == ui->views) - ui->views = bv->next; + DLLRemove(0, ui->view_first, ui->view_last, bv, next, prev); SLLPushFreelist(bv, ui->view_freelist); } @@ -1115,7 +1113,7 @@ add_variable_group(BeamformerUI *ui, Variable *group, Arena *arena, s8 name, Var function Variable * end_variable_group(Variable *group) { - ASSERT(group->type == VT_GROUP); + assert(group->type == VT_GROUP); return group->parent; } @@ -1432,7 +1430,7 @@ ui_beamformer_frame_view_new(BeamformerUI *ui, Arena *arena) if (!result) result = push_struct_no_zero(arena, typeof(*result)); zero_struct(result); result->export_handle.value[0] = OSInvalidHandleValue; - DLLPushDown(result, ui->views); + DLLInsertLast(0, ui->view_first, ui->view_last, result, next, prev); return result; } @@ -1827,7 +1825,7 @@ view_update(BeamformerUI *ui, BeamformerFrameView *view) function void update_frame_views(BeamformerUI *ui, Rect window) { - for (BeamformerFrameView *view = ui->views; view; view = view->next) { + for (BeamformerFrameView *view = ui->view_first; view; view = view->next) { if (view_update(ui, view)) { BeamformerRenderBeamformedPushConstants pc = { .bounding_box_colour = FRAME_VIEW_BB_COLOUR, @@ -3491,9 +3489,9 @@ update_text_input(InputState *is, Variable *var) b32 allow_key = !is->numeric || (BETWEEN(key, '0', '9') || (key == '.') || (key == '-' && is->cursor == 0)); if (allow_key) { - mem_move(is->buf + is->cursor + 1, - is->buf + is->cursor, - (uz)(is->count - is->cursor)); + memory_move(is->buf + is->cursor + 1, + is->buf + is->cursor, + (uz)(is->count - is->cursor)); is->buf[is->cursor++] = (u8)key; is->count++; } @@ -3505,17 +3503,17 @@ update_text_input(InputState *is, Variable *var) if ((IsKeyPressed(KEY_BACKSPACE) || IsKeyPressedRepeat(KEY_BACKSPACE)) && is->cursor > 0) { is->cursor--; if (is->cursor < countof(is->buf) - 1) { - mem_move(is->buf + is->cursor, - is->buf + is->cursor + 1, - (uz)(is->count - is->cursor - 1)); + memory_move(is->buf + is->cursor, + is->buf + is->cursor + 1, + (uz)(is->count - is->cursor - 1)); } is->count--; } if ((IsKeyPressed(KEY_DELETE) || IsKeyPressedRepeat(KEY_DELETE)) && is->cursor < is->count) { - mem_move(is->buf + is->cursor, - is->buf + is->cursor + 1, - (uz)(is->count - is->cursor - 1)); + memory_move(is->buf + is->cursor, + is->buf + is->cursor + 1, + (uz)(is->count - is->cursor - 1)); is->count--; } @@ -3801,7 +3799,7 @@ ui_extra_actions(BeamformerUI *ui, Variable *var) ui_beamformer_frame_view_release_subresources(ui, old, last_kind); ui_beamformer_frame_view_convert(ui, &ui->arena, view->child, view->menu, old->kind, old, log_scale); - DLLRemove(old); + DLLRemove(0, old->next, old->prev, old, next, prev); SLLPushFreelist(old, ui->view_freelist); }break; InvalidDefaultCase; diff --git a/util.c b/util.c @@ -15,7 +15,7 @@ memory_clear(void *restrict p_, u8 c, u64 size) } function b32 -memory_equal(void *restrict left, void *restrict right, uz n) +memory_equal(void *restrict left, void *restrict right, u64 n) { u8 *a = left, *b = right; b32 result = 1; @@ -26,7 +26,7 @@ memory_equal(void *restrict left, void *restrict right, uz n) #define mem_copy memory_copy function void -memory_copy(void *restrict dest, void *restrict src, uz n) +memory_copy(void *restrict dest, void *restrict src, u64 n) { u8 *s = src, *d = dest; #ifdef __AVX512BW__ @@ -43,7 +43,7 @@ memory_copy(void *restrict dest, void *restrict src, uz n) /* IMPORTANT: this function may fault if dest, src, and n are not multiples of 64 */ function void -memory_copy_non_temporal(void *restrict dest, void *restrict src, uz n) +memory_copy_non_temporal(void *restrict dest, void *restrict src, u64 n) { assume(((u64)dest & 63) == 0); assume(((u64)src & 63) == 0); @@ -76,14 +76,14 @@ memory_copy_non_temporal(void *restrict dest, void *restrict src, uz n) ); } #else - mem_copy(d, s, n); + memory_copy(d, s, n); #endif } function void -mem_move(u8 *dest, u8 *src, uz n) +memory_move(u8 *dest, u8 *src, u64 n) { - if (dest < src) mem_copy(dest, src, n); + if (dest < src) memory_copy(dest, src, n); else while (n) { n--; dest[n] = src[n]; } } @@ -364,6 +364,17 @@ stream_append(Stream *s, void *data, iz count) } } +// TODO(rnp): replace with handwritten version +#include <stdarg.h> +#include <stdio.h> +function void +stream_appendfv(Stream *s, const char *format, va_list args) +{ + i32 written = vsnprintf((char *)s->data + s->widx, s->cap - s->widx, format, args); + s->errors |= written > (s->cap - s->widx); + if (!s->errors) s->widx += written; +} + function void stream_append_byte(Stream *s, u8 b) { @@ -533,7 +544,7 @@ arena_stream(Arena a) function s8 arena_stream_commit(Arena *a, Stream *s) { - ASSERT(s->data == a->beg); + assert(s->data == a->beg); s8 result = stream_to_s8(s); arena_commit(a, result.len); return result; @@ -575,9 +586,16 @@ u128_hash_from_data(void *data, uz size) } function u64 +u64_hash_from_str8_seed(str8 string, u64 seed) +{ + u64 result = XXH3_64bits_withSeed(string.data, (uz)string.length, seed); + return result; +} + +function u64 u64_hash_from_s8(s8 v) { - u64 result = XXH3_64bits_withSeed(v.data, (uz)v.len, 4969); + u64 result = u64_hash_from_str8_seed(str8_from_s8(v), 4969); return result; } @@ -589,15 +607,41 @@ c_str_to_s8(char *cstr) return result; } +function str8 +str8_range(u8 *start, u8 *one_past_last) +{ + str8 result; + result.data = start; + result.length = one_past_last - start; + return result; +} + +function str8 +str8_skip(str8 s, i64 count) +{ + str8 result = s; + if (count > 0) { + result.data += count; + result.length -= count; + } + return result; +} + function b32 -s8_equal(s8 a, s8 b) +str8_equal(str8 a, str8 b) { - b32 result = a.len == b.len; - for (iz i = 0; result && i < a.len; i++) + b32 result = a.length == b.length; + for (i64 i = 0; result && i < a.length; i++) result = a.data[i] == b.data[i]; return result; } +function b32 +s8_equal(s8 a, s8 b) +{ + return str8_equal(str8_from_s8(a), str8_from_s8(b)); +} + /* NOTE(rnp): returns < 0 if byte is not found */ function iz s8_scan_backwards(s8 s, u8 byte) @@ -617,6 +661,58 @@ s8_cut_head(s8 s, iz cut) return result; } +function b32 +str8_match(str8 a, str8 b, StringMatchFlags flags) +{ + b32 result = 0; + if (flags == 0) { + result = str8_equal(a, b); + } else if (a.length == b.length || (flags & StringMatchFlag_SloppySize)) { + result = 1; + i64 length = Min(a.length, b.length); + for (i64 it = 0; it < length && result; it++) { + u8 ab = a.data[it], bb = b.data[it]; + if (flags & StringMatchFlag_CaseInsensitive) { + ab |= 0x20; + bb |= 0x20; + } + result &= ab == bb; + } + } + return result; +} + +function i64 +str8_find_needle(str8 string, str8 needle, StringMatchFlags flags) +{ + u8 *s = string.data; + u8 *se = string.data + Max(string.length + 1, needle.length) - needle.length; + if (needle.length > 0) { + flags |= StringMatchFlag_SloppySize; + + u8 nb = needle.data[0]; + if (flags & StringMatchFlag_CaseInsensitive) + nb |= 0x20; + + str8 needle_tail = str8_skip(needle, 1); + u8 *s_opl = string.data + string.length; + for (; s < se; s++) { + u8 sb = *s; + if (flags & StringMatchFlag_CaseInsensitive) + sb |= 0x20; + + if (sb == nb && str8_match(str8_range(s + 1, s_opl), needle_tail, flags)) + break; + } + } + + i64 result = string.length; + if (s < se) + result = s - string.data; + return result; +} + + function s8 s8_alloc(Arena *a, iz len) { @@ -665,6 +761,30 @@ s8_to_s16(Arena *a, s8 in) return result; } +#define push_str8_from_parts(a, j, ...) push_str8_from_parts_((a), (j), arg_list(str8, __VA_ARGS__)) +function str8 +push_str8_from_parts_(Arena *arena, str8 joiner, str8 *parts, i64 count) +{ + i64 length = joiner.length * (count - 1); + for (i64 i = 0; i < count; i++) + length += parts[i].length; + + str8 result = {.length = length, .data = arena_commit(arena, length + 1)}; + + i64 offset = 0; + for (i64 i = 0; i < count; i++) { + if (i != 0) { + memory_copy(result.data + offset, joiner.data, (uz)joiner.length); + offset += joiner.length; + } + memory_copy(result.data + offset, parts[i].data, (uz)parts[i].length); + offset += parts[i].length; + } + result.data[result.length] = 0; + + return result; +} + #define push_s8_from_parts(a, j, ...) push_s8_from_parts_((a), (j), arg_list(s8, __VA_ARGS__)) function s8 push_s8_from_parts_(Arena *arena, s8 joiner, s8 *parts, iz count) @@ -698,6 +818,27 @@ push_s8(Arena *a, s8 str) return result; } +// TODO(rnp): replace with handwritten version +function str8 +push_str8_fv(Arena *arena, const char *format, va_list args) +{ + Stream sb = arena_stream(*arena); + stream_appendfv(&sb, format, args); + s8 s = arena_stream_commit(arena, &sb); + str8 result = {.length = s.len, .data = s.data}; + return result; +} + +function str8 +push_f64_string(Arena *arena, f64 value, u64 precision) +{ + Stream sb = arena_stream(*arena); + stream_append_f64(&sb, value, precision); + s8 s = arena_stream_commit(arena, &sb); + str8 result = {.length = s.len, .data = s.data}; + return result; +} + /* NOTE(rnp): from Hacker's Delight */ function force_inline u64 round_down_power_of_two(u64 a) diff --git a/util.h b/util.h @@ -71,7 +71,6 @@ typedef u64 uptr; #define DEBUG_DECL(a) #define assert(c) (void)(c) #endif -#define ASSERT assert #if ASAN_ACTIVE void __asan_poison_memory_region(void *, i64); @@ -145,22 +144,26 @@ typedef u64 uptr; #define spin_wait(c) while ((c)) cpu_yield() +// NOTE(rnp): typically for enums, wtf is wrong with modern compilers +#define circular_add(v, add, max) (((u64)(v) + (u64)(max) + (i64)(add)) % (u64)(max)) + #define DA_STRUCT(kind, name) typedef struct { \ kind *data; \ da_count count; \ da_count capacity; \ } name ##List; -#define SLLStackPush(list, n) ((n)->next = (list), (list) = (n)) +#define SLLStackPush(list, n, next) ((n)->next = (list), (list) = (n)) // TODO(rnp): clean this up -#define SLLPush(v, list) SLLStackPush(list, v) +#define SLLPush(v, list) SLLStackPush(list, v, next) /* NOTE(rnp): no guarantees about actually getting an element */ -#define SLLPop(list) list; list = list ? list->next : 0 +#define SLLPop(l, next) (l); ((l) = (l) ? (l)->next : 0) +#define SLLStackPop(l, next) ((l) = (l)->next) #define SLLPopFreelist(list) list; do { \ asan_unpoison_region((list), sizeof(*(list))); \ - (void)SLLPop((list)); \ + (void)SLLPop((list), next); \ } while(0) #define SLLPushFreelist(v, list) do { \ @@ -168,19 +171,19 @@ typedef u64 uptr; asan_poison_region((v), sizeof(*(v))); \ } while(0) -#define DLLPushEnd(l, n) ((n)->prev = (l)->prev, ((l)->prev ? (l)->prev->next = (n) : (0)), (l)->prev = (n), (n)->next = (l)) +#define DLLInsert(nil, f, l, n, next, prev) (\ + ((f) == 0 || (f) == nil) ? ((f) = (l) = (n), (n)->next = (n)->prev = nil) :\ + ((n)->next = (f), (n)->prev = (f)->prev, (f)->prev = (n), (f) = (n)),\ + ((n)->prev ? ((n)->prev->next = (n)) : (0))) -// TODO(rnp): cleanup -#define DLLPushDown(v, list) do { \ - (v)->next = (list); \ - if ((v)->next) (v)->next->prev = (v); \ - (list) = (v); \ -} while (0) +#define DLLInsertFirst(nil, f, l, n, next, prev) DLLInsert(nil, f, l, n, next, prev) +#define DLLInsertLast(nil, f, l, n, next, prev) DLLInsert(nil, l, f, n, prev, next) -#define DLLRemove(v) do { \ - if ((v)->next) (v)->next->prev = (v)->prev; \ - if ((v)->prev) (v)->prev->next = (v)->next; \ -} while (0) +#define DLLRemove(nil, f, l, n, next, prev) (\ + ((n) == (f) ? (f) = (n)->next : (0)),\ + ((n) == (l) ? (l) = (l)->prev : (0)),\ + (((n)->prev != nil && (n)->prev != 0) ? (n)->prev->next = (n)->next : (0)),\ + (((n)->next != nil && (n)->next != 0) ? (n)->next->prev = (n)->prev : (0))) #define KB(a) ((u64)(a) << 10ULL) #define MB(a) ((u64)(a) << 20ULL) @@ -225,6 +228,11 @@ typedef struct { i64 length; u8 *data; } str8; typedef struct { i64 len; u16 *data; } s16; +typedef enum { + StringMatchFlag_CaseInsensitive = (1 << 0), + StringMatchFlag_SloppySize = (1 << 1), +} StringMatchFlags; + typedef struct { u32 cp, consumed; } UnicodeDecode; typedef enum { @@ -300,6 +308,7 @@ typedef union { struct { f32 w, h; }; f32 E[2]; } v2; +#define V2_INFINITY (v2){{-F32_INFINITY, F32_INFINITY}} typedef union { struct { f32 x, y, z; }; diff --git a/vulkan.c b/vulkan.c @@ -407,7 +407,7 @@ vk_entity_release(VulkanEntity *entity) { DeferLoop(take_lock(&vulkan_context->entity_lock, -1), release_lock(&vulkan_context->entity_lock)) { - SLLStackPush(vulkan_context->entity_freelist, entity); + SLLStackPush(vulkan_context->entity_freelist, entity, next); } }