ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

Commit: 0a72f22e3240087c5ebb2c650b79ca5e0b606450
Parent: 41bb2e1365cd15b605e9298f2d8ba7774a4097d1
Author: Randy Palamar
Date:   Sun, 11 Jan 2026 19:36:19 -0700

vulkan/opengl: load relevant GPU parameters from vulkan

Diffstat:
Mbeamformer.c | 81++++++++-----------------------------------------------------------------------
Mbeamformer_core.c | 4++--
Mbeamformer_internal.h | 20++++++++++++++++++--
Mintrinsics.c | 35++++++++---------------------------
Mlib/ogl_beamformer_lib.c | 4++--
Mopengl.h | 29-----------------------------
Mui.c | 2+-
Mutil.c | 13+++++++------
Mvulkan.c | 33+++++++++++++++++++--------------
9 files changed, 65 insertions(+), 156 deletions(-)

diff --git a/beamformer.c b/beamformer.c @@ -97,78 +97,13 @@ load_gl(Stream *err) OGLRequiredExtensionProcedureList #undef X - /* NOTE: Gather information about the GPU */ - { - char *vendor = (char *)glGetString(GL_VENDOR); - if (!vendor) { - stream_append_s8(err, s8("Failed to determine GL Vendor\n")); - fatal(stream_to_s8(err)); - } - /* TODO(rnp): str prefix of */ - switch (vendor[0]) { - case 'A': gl_parameters.vendor_id = GLVendor_AMD; break; - case 'I': gl_parameters.vendor_id = GLVendor_Intel; break; - case 'N': gl_parameters.vendor_id = GLVendor_NVIDIA; break; - /* NOTE(rnp): freedreno */ - case 'f': gl_parameters.vendor_id = GLVendor_ARM; break; - /* NOTE(rnp): Microsoft Corporation - weird win32 thing (microsoft is just using mesa for the driver) */ - case 'M': gl_parameters.vendor_id = GLVendor_ARM; break; - default: - stream_append_s8s(err, s8("Unknown GL Vendor: "), c_str_to_s8(vendor), s8("\n")); - fatal(stream_to_s8(err)); - } - - #define X(glname, name, suffix) glGetIntegerv(GL_##glname, &gl_parameters.name); - GL_PARAMETERS - #undef X - } - -#ifdef _DEBUG - { - s8 vendor = s8("vendor:"); - i32 max_width = (i32)vendor.len; - #define X(glname, name, suffix) if (s8(#name).len > max_width) max_width = (i32)s8(#name ":").len; - GL_PARAMETERS - #undef X - max_width++; - - stream_append_s8s(err, s8("---- GL Parameters ----\n"), vendor); - stream_pad(err, ' ', max_width - (i32)vendor.len); - switch (gl_parameters.vendor_id) { - case GLVendor_AMD: stream_append_s8(err, s8("AMD")); break; - case GLVendor_ARM: stream_append_s8(err, s8("ARM")); break; - case GLVendor_Intel: stream_append_s8(err, s8("Intel")); break; - case GLVendor_NVIDIA: stream_append_s8(err, s8("nVidia")); break; - } - stream_append_byte(err, '\n'); - - #define X(glname, name, suffix) \ - stream_append_s8(err, s8(#name ":")); \ - stream_pad(err, ' ', max_width - (i32)s8(#name ":").len); \ - stream_append_i64(err, gl_parameters.name); \ - stream_append_s8(err, s8(suffix "\n")); - GL_PARAMETERS - #undef X - stream_append_s8(err, s8("-----------------------\n")); - os_console_log(err->data, err->widx); - } -#endif - - { - stream_reset(err, 0); - if (gl_parameters.max_ubo_size < (i32)sizeof(BeamformerParameters)) { - stream_append_s8(err, s8("GPU must support UBOs of at least ")); - stream_append_i64(err, sizeof(BeamformerParameters)); - stream_append_s8(err, s8(" bytes!\n")); - } - - #define X(name, ret, params) if (!name) stream_append_s8(err, s8("missing required GL function: " #name "\n")); - OGLProcedureList - OGLRequiredExtensionProcedureList - #undef X + stream_reset(err, 0); + #define X(name, ret, params) if (!name) stream_append_s8(err, s8("missing required GL function: " #name "\n")); + OGLProcedureList + OGLRequiredExtensionProcedureList + #undef X - if (err->widx) fatal(stream_to_s8(err)); - } + if (err->widx) fatal(stream_to_s8(err)); } function void @@ -177,7 +112,7 @@ beamformer_load_cuda_library(BeamformerCtx *ctx, OSLibrary cuda, Arena arena) /* TODO(rnp): (25.10.30) registering the rf buffer with CUDA is currently * causing a major performance regression. for now we are disabling its use * altogether. it will be reenabled once the issue can be fixed */ - b32 result = 0 && gl_parameters.vendor_id == GLVendor_NVIDIA && ValidHandle(cuda); + b32 result = 0 && vk_gpu_info()->vendor == GPUVendor_NVIDIA && ValidHandle(cuda); if (result) { Stream err = arena_stream(arena); @@ -431,7 +366,7 @@ beamformer_init(BeamformerInput *input) LABEL_GL_OBJECT(GL_FRAMEBUFFER, fvr->framebuffers[1], s8("Frame View Resolving Framebuffer")); glCreateRenderbuffers(countof(fvr->renderbuffers), fvr->renderbuffers); - i32 msaa_samples = gl_parameters.vendor_id == GLVendor_ARM? 4 : 8; + u32 msaa_samples = vk_gpu_info()->max_msaa_samples; glNamedRenderbufferStorageMultisample(fvr->renderbuffers[0], msaa_samples, GL_RGBA8, FRAME_VIEW_RENDER_TARGET_SIZE); glNamedRenderbufferStorageMultisample(fvr->renderbuffers[1], msaa_samples, GL_DEPTH_COMPONENT24, diff --git a/beamformer_core.c b/beamformer_core.c @@ -221,7 +221,7 @@ alloc_beamform_frame(BeamformerFrame *out, iv3 out_dim, GLenum gl_kind, s8 name, /* NOTE: allocate storage for beamformed output data; * this is shared between compute and fragment shaders */ u32 max_dim = (u32)Max(out->dim.x, Max(out->dim.y, out->dim.z)); - out->mips = (i32)ctz_u32(round_up_power_of_2(max_dim)) + 1; + out->mips = (i32)ctz_u64(round_up_power_of_two(max_dim)) + 1; out->gl_kind = gl_kind; @@ -375,7 +375,7 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb) f32 time_offset = pb->parameters.time_offset; // TODO(rnp): subgroup size - u32 subgroup_size = gl_parameters.vendor_id == GLVendor_NVIDIA ? 32 : 64; + u32 subgroup_size = vk_gpu_info()->vendor == GPUVendor_NVIDIA ? 32 : 64; cp->pipeline.shader_count = 0; for (u32 i = 0; i < pb->pipeline.shader_count; i++) { diff --git a/beamformer_internal.h b/beamformer_internal.h @@ -34,11 +34,27 @@ typedef struct { VulkanHandle buffer; } GPUBuffer; +typedef enum { + GPUVendor_AMD = 0x1002, + GPUVendor_NVIDIA = 0x10DE, + GPUVendor_Qualcomm = 0x5143, + GPUVendor_Intel = 0x8086, +} GPUVendor; + typedef struct { - u64 gpu_heap_size; - u64 gpu_heap_used; + s8 name; + GPUVendor vendor; f32 timestamp_period_ns; + + u32 max_compute_shared_memory_size; + u32 max_msaa_samples; + u32 max_image_dimension_2D; + // NOTE(rnp): vulkan compute will output to a buffer so this won't be relevant + u32 max_image_dimension_3D; + + u64 gpu_heap_size; + u64 gpu_heap_used; } GPUInfo; /////////////////////////// diff --git a/intrinsics.c b/intrinsics.c @@ -122,23 +122,12 @@ #if COMPILER_MSVC -function force_inline u32 -clz_u32(u32 a) -{ - u32 result = 32, index; - if (a) { - _BitScanReverse(&index, a); - result = index; - } - return result; -} - -function force_inline u32 -ctz_u32(u32 a) +function force_inline u64 +clz_u64(u64 a) { - u32 result = 32, index; + u64 result = 64, index; if (a) { - _BitScanForward(&index, a); + _BitScanReverse64(&index, a); result = index; } return result; @@ -157,19 +146,11 @@ ctz_u64(u64 a) #else /* !COMPILER_MSVC */ -function force_inline u32 -clz_u32(u32 a) -{ - u32 result = 32; - if (a) result = (u32)__builtin_clz(a); - return result; -} - -function force_inline u32 -ctz_u32(u32 a) +function force_inline u64 +clz_u64(u32 a) { - u32 result = 32; - if (a) result = (u32)__builtin_ctz(a); + u64 result = 64; + if (a) result = (u64)__builtin_clzll(a); return result; } diff --git a/lib/ogl_beamformer_lib.c b/lib/ogl_beamformer_lib.c @@ -692,8 +692,8 @@ beamformer_live_parameters_get_dirty_flag(void) { i32 result = -1; if (check_shared_memory()) { - u32 flag = ctz_u32(g_beamformer_library_context.bp->live_imaging_dirty_flags); - if (flag != 32) { + u32 flag = ctz_u64(g_beamformer_library_context.bp->live_imaging_dirty_flags); + if (flag != 64) { atomic_and_u32(&g_beamformer_library_context.bp->live_imaging_dirty_flags, ~(1u << flag)); result = (i32)flag; } diff --git a/opengl.h b/opengl.h @@ -169,33 +169,4 @@ OGLProcedureList OGLRequiredExtensionProcedureList #undef X -typedef enum { - GLVendor_AMD, - GLVendor_ARM, - GLVendor_Intel, - GLVendor_NVIDIA, -} GLVendorID; - -#define GL_PARAMETERS \ - X(MAJOR_VERSION, version_major, "") \ - X(MINOR_VERSION, version_minor, "") \ - X(MIN_MAP_BUFFER_ALIGNMENT, min_map_buffer_alignment, "") \ - X(TEXTURE_BUFFER_OFFSET_ALIGNMENT, texture_buffer_offset_alignment, "") \ - X(MAX_TEXTURE_BUFFER_SIZE, max_texture_buffer_size, "") \ - X(MAX_TEXTURE_SIZE, max_2d_texture_dim, "") \ - X(MAX_3D_TEXTURE_SIZE, max_3d_texture_dim, "") \ - X(MAX_SHADER_STORAGE_BLOCK_SIZE, max_ssbo_size, "") \ - X(MAX_COMPUTE_SHARED_MEMORY_SIZE, max_shared_memory_size, "") \ - X(MAX_UNIFORM_BLOCK_SIZE, max_ubo_size, "") \ - X(MAX_SERVER_WAIT_TIMEOUT, max_server_wait_time, " [ns]") \ - -typedef struct { - GLVendorID vendor_id; - #define X(glname, name, suffix) i32 name; - GL_PARAMETERS - #undef X -} GLParameters; - -DEBUG_IMPORT GLParameters gl_parameters; - #endif /* _OPENGL_H_*/ diff --git a/ui.c b/ui.c @@ -960,7 +960,7 @@ resize_frame_view(BeamformerFrameView *view, iv2 dim) glCreateTextures(GL_TEXTURE_2D, 1, &view->texture); view->texture_dim = dim; - view->texture_mipmaps = (i32)ctz_u32((u32)Max(dim.x, dim.y)) + 1; + view->texture_mipmaps = (i32)ctz_u64((u64)Max(dim.x, dim.y)) + 1; glTextureStorage2D(view->texture, view->texture_mipmaps, GL_RGBA8, dim.x, dim.y); glGenerateTextureMipmap(view->texture); diff --git a/util.c b/util.c @@ -698,17 +698,18 @@ push_s8(Arena *a, s8 str) return result; } -function force_inline u32 -round_down_power_of_2(u32 a) +/* NOTE(rnp): from Hacker's Delight */ +function force_inline u64 +round_down_power_of_two(u64 a) { - u32 result = 0x80000000UL >> clz_u32(a); + u64 result = 0x8000000000000000ULL >> clz_u64(a); return result; } -function force_inline u32 -round_up_power_of_2(u32 a) +function force_inline u64 +round_up_power_of_two(u64 a) { - u32 result = 0x80000000UL >> (clz_u32(a - 1) - 1); + u64 result = 0x8000000000000000ULL >> (clz_u64(a - 1) - 1); return result; } diff --git a/vulkan.c b/vulkan.c @@ -205,15 +205,13 @@ vk_load_physical_device(Arena arena, Stream *err) if (!vk->physical_device) fatal(vulkan_info("failed to find a suitable GPU\n")); - VkPhysicalDeviceProperties2 *dp = push_struct(&arena, typeof(*dp)); - dp->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + VkPhysicalDeviceProperties2 dp = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2}; + VkPhysicalDeviceMaintenance3Properties dm3p = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES}; + dp.pNext = &dm3p; - VkPhysicalDeviceMaintenance3Properties *dm3p = dp->pNext = push_struct(&arena, typeof(*dp)); - dm3p->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES; + vkGetPhysicalDeviceProperties2(vk->physical_device, &dp); - vkGetPhysicalDeviceProperties2(vk->physical_device, dp); - - stream_append_s8s(err, vulkan_info("selecting device: "), c_str_to_s8(dp->properties.deviceName), s8("\n")); + stream_append_s8s(err, vulkan_info("selecting device: "), c_str_to_s8(dp.properties.deviceName), s8("\n")); { Arena scratch = arena; @@ -258,11 +256,10 @@ vk_load_physical_device(Arena arena, Stream *err) } } - VkPhysicalDeviceMemoryProperties2 *mp = push_struct(&arena, typeof(*mp)); - mp->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2; - vkGetPhysicalDeviceMemoryProperties2(vk->physical_device, mp); + VkPhysicalDeviceMemoryProperties2 mp = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2}; + vkGetPhysicalDeviceMemoryProperties2(vk->physical_device, &mp); - VkPhysicalDeviceMemoryProperties *bmp = &mp->memoryProperties; + VkPhysicalDeviceMemoryProperties *bmp = &mp.memoryProperties; // NOTE(rnp): vulkan spec says that highest performance memory types must // come first. just take the first one found. @@ -316,10 +313,18 @@ vk_load_physical_device(Arena arena, Stream *err) vk->memory_info.memory_host_coherent[it] = (flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0; } - vk->memory_info.max_allocation_size = dm3p->maxMemoryAllocationSize; - vk->memory_info.non_coherent_atom_size = dp->properties.limits.nonCoherentAtomSize; + vk->memory_info.max_allocation_size = dm3p.maxMemoryAllocationSize; + vk->memory_info.non_coherent_atom_size = dp.properties.limits.nonCoherentAtomSize; + vk->gpu_info.vendor = dp.properties.vendorID; vk->gpu_info.gpu_heap_size = bmp->memoryHeaps[vk->memory_info.gpu_heap_index].size; - vk->gpu_info.timestamp_period_ns = dp->properties.limits.timestampPeriod; + vk->gpu_info.timestamp_period_ns = dp.properties.limits.timestampPeriod; + vk->gpu_info.max_image_dimension_2D = dp.properties.limits.maxImageDimension2D; + vk->gpu_info.max_image_dimension_3D = dp.properties.limits.maxImageDimension3D; + vk->gpu_info.max_msaa_samples = round_down_power_of_two(dp.properties.limits.framebufferColorSampleCounts); + vk->gpu_info.max_compute_shared_memory_size = dp.properties.limits.maxComputeSharedMemorySize; + + // IMPORTANT(rnp): memory must only be pushed at the end of the function + vk->gpu_info.name = push_s8(&vk->arena, c_str_to_s8(dp.properties.deviceName)); } function void