vulkan/opengl: load relevant GPU parameters from vulkan - ogl_beamforming - Ultrasound Beamforming Implemented with OpenGL

Commit: 0a72f22e3240087c5ebb2c650b79ca5e0b606450
Parent: 41bb2e1365cd15b605e9298f2d8ba7774a4097d1
Author: Randy Palamar
Date:   Sun, 11 Jan 2026 19:36:19 -0700

vulkan/opengl: load relevant GPU parameters from vulkan

Diffstat:
M beamformer.c  | 81 ++++++++-----------------------------------------------------------------------
M beamformer_core.c  | 4 ++--
M beamformer_internal.h  | 20 ++++++++++++++++++--
M intrinsics.c  | 35 ++++++++---------------------------
M lib/ogl_beamformer_lib.c  | 4 ++--
M opengl.h  | 29 -----------------------------
M ui.c  | 2 +-
M util.c  | 13 +++++++------
M vulkan.c  | 33 +++++++++++++++++++--------------

9 files changed, 65 insertions(+), 156 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -97,78 +97,13 @@ load_gl(Stream *err)
 	OGLRequiredExtensionProcedureList
 	#undef X
 
-	/* NOTE: Gather information about the GPU */
-	{
-		char *vendor = (char *)glGetString(GL_VENDOR);
-		if (!vendor) {
-			stream_append_s8(err, s8("Failed to determine GL Vendor\n"));
-			fatal(stream_to_s8(err));
-		}
-		/* TODO(rnp): str prefix of */
-		switch (vendor[0]) {
-		case 'A': gl_parameters.vendor_id = GLVendor_AMD;    break;
-		case 'I': gl_parameters.vendor_id = GLVendor_Intel;  break;
-		case 'N': gl_parameters.vendor_id = GLVendor_NVIDIA; break;
-		/* NOTE(rnp): freedreno */
-		case 'f': gl_parameters.vendor_id = GLVendor_ARM;    break;
-		/* NOTE(rnp): Microsoft Corporation - weird win32 thing (microsoft is just using mesa for the driver) */
-		case 'M': gl_parameters.vendor_id = GLVendor_ARM;    break;
-		default:
-			stream_append_s8s(err, s8("Unknown GL Vendor: "), c_str_to_s8(vendor), s8("\n"));
-			fatal(stream_to_s8(err));
-		}
-
-		#define X(glname, name, suffix) glGetIntegerv(GL_##glname, &gl_parameters.name);
-		GL_PARAMETERS
-		#undef X
-	}
-
-#ifdef _DEBUG
-	{
-		s8 vendor = s8("vendor:");
-		i32 max_width = (i32)vendor.len;
-		#define X(glname, name, suffix) if (s8(#name).len > max_width) max_width = (i32)s8(#name ":").len;
-		GL_PARAMETERS
-		#undef X
-		max_width++;
-
-		stream_append_s8s(err, s8("---- GL Parameters ----\n"), vendor);
-		stream_pad(err, ' ', max_width - (i32)vendor.len);
-		switch (gl_parameters.vendor_id) {
-		case GLVendor_AMD:    stream_append_s8(err, s8("AMD"));    break;
-		case GLVendor_ARM:    stream_append_s8(err, s8("ARM"));    break;
-		case GLVendor_Intel:  stream_append_s8(err, s8("Intel"));  break;
-		case GLVendor_NVIDIA: stream_append_s8(err, s8("nVidia")); break;
-		}
-		stream_append_byte(err, '\n');
-
-		#define X(glname, name, suffix) \
-			stream_append_s8(err, s8(#name ":"));                     \
-			stream_pad(err, ' ', max_width - (i32)s8(#name ":").len); \
-			stream_append_i64(err, gl_parameters.name);               \
-			stream_append_s8(err, s8(suffix "\n"));
-		GL_PARAMETERS
-		#undef X
-		stream_append_s8(err, s8("-----------------------\n"));
-		os_console_log(err->data, err->widx);
-	}
-#endif
-
-	{
-		stream_reset(err, 0);
-		if (gl_parameters.max_ubo_size < (i32)sizeof(BeamformerParameters)) {
-			stream_append_s8(err, s8("GPU must support UBOs of at least "));
-			stream_append_i64(err, sizeof(BeamformerParameters));
-			stream_append_s8(err, s8(" bytes!\n"));
-		}
-
-		#define X(name, ret, params) if (!name) stream_append_s8(err, s8("missing required GL function: " #name "\n"));
-		OGLProcedureList
-		OGLRequiredExtensionProcedureList
-		#undef X
+	stream_reset(err, 0);
+	#define X(name, ret, params) if (!name) stream_append_s8(err, s8("missing required GL function: " #name "\n"));
+	OGLProcedureList
+	OGLRequiredExtensionProcedureList
+	#undef X
 
-		if (err->widx) fatal(stream_to_s8(err));
-	}
+	if (err->widx) fatal(stream_to_s8(err));
 }
 
 function void
@@ -177,7 +112,7 @@ beamformer_load_cuda_library(BeamformerCtx *ctx, OSLibrary cuda, Arena arena)
 	/* TODO(rnp): (25.10.30) registering the rf buffer with CUDA is currently
 	 * causing a major performance regression. for now we are disabling its use
 	 * altogether. it will be reenabled once the issue can be fixed */
-	b32 result = 0 && gl_parameters.vendor_id == GLVendor_NVIDIA && ValidHandle(cuda);
+	b32 result = 0 && vk_gpu_info()->vendor == GPUVendor_NVIDIA && ValidHandle(cuda);
 	if (result) {
 		Stream err = arena_stream(arena);
 
@@ -431,7 +366,7 @@ beamformer_init(BeamformerInput *input)
 	LABEL_GL_OBJECT(GL_FRAMEBUFFER, fvr->framebuffers[1], s8("Frame View Resolving Framebuffer"));
 
 	glCreateRenderbuffers(countof(fvr->renderbuffers), fvr->renderbuffers);
-	i32 msaa_samples = gl_parameters.vendor_id == GLVendor_ARM? 4 : 8;
+	u32 msaa_samples = vk_gpu_info()->max_msaa_samples;
 	glNamedRenderbufferStorageMultisample(fvr->renderbuffers[0], msaa_samples, GL_RGBA8,
 	                                      FRAME_VIEW_RENDER_TARGET_SIZE);
 	glNamedRenderbufferStorageMultisample(fvr->renderbuffers[1], msaa_samples, GL_DEPTH_COMPONENT24,
diff --git a/beamformer_core.c b/beamformer_core.c
@@ -221,7 +221,7 @@ alloc_beamform_frame(BeamformerFrame *out, iv3 out_dim, GLenum gl_kind, s8 name,
 	/* NOTE: allocate storage for beamformed output data;
 	 * this is shared between compute and fragment shaders */
 	u32 max_dim = (u32)Max(out->dim.x, Max(out->dim.y, out->dim.z));
-	out->mips   = (i32)ctz_u32(round_up_power_of_2(max_dim)) + 1;
+	out->mips   = (i32)ctz_u64(round_up_power_of_two(max_dim)) + 1;
 
 	out->gl_kind = gl_kind;
 
@@ -375,7 +375,7 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb)
 	f32 time_offset = pb->parameters.time_offset;
 
 	// TODO(rnp): subgroup size
-	u32 subgroup_size = gl_parameters.vendor_id == GLVendor_NVIDIA ? 32 : 64;
+	u32 subgroup_size = vk_gpu_info()->vendor == GPUVendor_NVIDIA ? 32 : 64;
 
 	cp->pipeline.shader_count = 0;
 	for (u32 i = 0; i < pb->pipeline.shader_count; i++) {
diff --git a/beamformer_internal.h b/beamformer_internal.h
@@ -34,11 +34,27 @@ typedef struct {
 	VulkanHandle buffer;
 } GPUBuffer;
 
+typedef enum {
+	GPUVendor_AMD      = 0x1002,
+	GPUVendor_NVIDIA   = 0x10DE,
+	GPUVendor_Qualcomm = 0x5143,
+	GPUVendor_Intel    = 0x8086,
+} GPUVendor;
+
 typedef struct {
-	u64 gpu_heap_size;
-	u64 gpu_heap_used;
+	s8        name;
+	GPUVendor vendor;
 
 	f32 timestamp_period_ns;
+
+	u32 max_compute_shared_memory_size;
+	u32 max_msaa_samples;
+	u32 max_image_dimension_2D;
+	// NOTE(rnp): vulkan compute will output to a buffer so this won't be relevant
+	u32 max_image_dimension_3D;
+
+	u64 gpu_heap_size;
+	u64 gpu_heap_used;
 } GPUInfo;
 
 ///////////////////////////
diff --git a/intrinsics.c b/intrinsics.c
@@ -122,23 +122,12 @@
 
 #if COMPILER_MSVC
 
-function force_inline u32
-clz_u32(u32 a)
-{
-	u32 result = 32, index;
-	if (a) {
-		_BitScanReverse(&index, a);
-		result = index;
-	}
-	return result;
-}
-
-function force_inline u32
-ctz_u32(u32 a)
+function force_inline u64
+clz_u64(u64 a)
 {
-	u32 result = 32, index;
+	u64 result = 64, index;
 	if (a) {
-		_BitScanForward(&index, a);
+		_BitScanReverse64(&index, a);
 		result = index;
 	}
 	return result;
@@ -157,19 +146,11 @@ ctz_u64(u64 a)
 
 #else /* !COMPILER_MSVC */
 
-function force_inline u32
-clz_u32(u32 a)
-{
-	u32 result = 32;
-	if (a) result = (u32)__builtin_clz(a);
-	return result;
-}
-
-function force_inline u32
-ctz_u32(u32 a)
+function force_inline u64
+clz_u64(u32 a)
 {
-	u32 result = 32;
-	if (a) result = (u32)__builtin_ctz(a);
+	u64 result = 64;
+	if (a) result = (u64)__builtin_clzll(a);
 	return result;
 }
 
diff --git a/lib/ogl_beamformer_lib.c b/lib/ogl_beamformer_lib.c
@@ -692,8 +692,8 @@ beamformer_live_parameters_get_dirty_flag(void)
 {
 	i32 result = -1;
 	if (check_shared_memory()) {
-		u32 flag = ctz_u32(g_beamformer_library_context.bp->live_imaging_dirty_flags);
-		if (flag != 32) {
+		u32 flag = ctz_u64(g_beamformer_library_context.bp->live_imaging_dirty_flags);
+		if (flag != 64) {
 			atomic_and_u32(&g_beamformer_library_context.bp->live_imaging_dirty_flags, ~(1u << flag));
 			result = (i32)flag;
 		}
diff --git a/opengl.h b/opengl.h
@@ -169,33 +169,4 @@ OGLProcedureList
 OGLRequiredExtensionProcedureList
 #undef X
 
-typedef enum {
-	GLVendor_AMD,
-	GLVendor_ARM,
-	GLVendor_Intel,
-	GLVendor_NVIDIA,
-} GLVendorID;
-
-#define GL_PARAMETERS \
-	X(MAJOR_VERSION,                   version_major,                   "") \
-	X(MINOR_VERSION,                   version_minor,                   "") \
-	X(MIN_MAP_BUFFER_ALIGNMENT,        min_map_buffer_alignment,        "") \
-	X(TEXTURE_BUFFER_OFFSET_ALIGNMENT, texture_buffer_offset_alignment, "") \
-	X(MAX_TEXTURE_BUFFER_SIZE,         max_texture_buffer_size,         "") \
-	X(MAX_TEXTURE_SIZE,                max_2d_texture_dim,              "") \
-	X(MAX_3D_TEXTURE_SIZE,             max_3d_texture_dim,              "") \
-	X(MAX_SHADER_STORAGE_BLOCK_SIZE,   max_ssbo_size,                   "") \
-	X(MAX_COMPUTE_SHARED_MEMORY_SIZE,  max_shared_memory_size,          "") \
-	X(MAX_UNIFORM_BLOCK_SIZE,          max_ubo_size,                    "") \
-	X(MAX_SERVER_WAIT_TIMEOUT,         max_server_wait_time,            " [ns]") \
-
-typedef struct {
-	GLVendorID vendor_id;
-	#define X(glname, name, suffix) i32 name;
-	GL_PARAMETERS
-	#undef X
-} GLParameters;
-
-DEBUG_IMPORT GLParameters gl_parameters;
-
 #endif /* _OPENGL_H_*/
diff --git a/ui.c b/ui.c
@@ -960,7 +960,7 @@ resize_frame_view(BeamformerFrameView *view, iv2 dim)
 	glCreateTextures(GL_TEXTURE_2D, 1, &view->texture);
 
 	view->texture_dim     = dim;
-	view->texture_mipmaps = (i32)ctz_u32((u32)Max(dim.x, dim.y)) + 1;
+	view->texture_mipmaps = (i32)ctz_u64((u64)Max(dim.x, dim.y)) + 1;
 	glTextureStorage2D(view->texture, view->texture_mipmaps, GL_RGBA8, dim.x, dim.y);
 
 	glGenerateTextureMipmap(view->texture);
diff --git a/util.c b/util.c
@@ -698,17 +698,18 @@ push_s8(Arena *a, s8 str)
 	return result;
 }
 
-function force_inline u32
-round_down_power_of_2(u32 a)
+/* NOTE(rnp): from Hacker's Delight */
+function force_inline u64
+round_down_power_of_two(u64 a)
 {
-	u32 result = 0x80000000UL >> clz_u32(a);
+	u64 result = 0x8000000000000000ULL >> clz_u64(a);
 	return result;
 }
 
-function force_inline u32
-round_up_power_of_2(u32 a)
+function force_inline u64
+round_up_power_of_two(u64 a)
 {
-	u32 result = 0x80000000UL >> (clz_u32(a - 1) - 1);
+	u64 result = 0x8000000000000000ULL >> (clz_u64(a - 1) - 1);
 	return result;
 }
 
diff --git a/vulkan.c b/vulkan.c
@@ -205,15 +205,13 @@ vk_load_physical_device(Arena arena, Stream *err)
 	if (!vk->physical_device)
 		fatal(vulkan_info("failed to find a suitable GPU\n"));
 
-	VkPhysicalDeviceProperties2 *dp = push_struct(&arena, typeof(*dp));
-	dp->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+	VkPhysicalDeviceProperties2            dp   = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2};
+	VkPhysicalDeviceMaintenance3Properties dm3p = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES};
+	dp.pNext = &dm3p;
 
-	VkPhysicalDeviceMaintenance3Properties *dm3p = dp->pNext = push_struct(&arena, typeof(*dp));
-	dm3p->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES;
+	vkGetPhysicalDeviceProperties2(vk->physical_device, &dp);
 
-	vkGetPhysicalDeviceProperties2(vk->physical_device, dp);
-
-	stream_append_s8s(err, vulkan_info("selecting device: "), c_str_to_s8(dp->properties.deviceName), s8("\n"));
+	stream_append_s8s(err, vulkan_info("selecting device: "), c_str_to_s8(dp.properties.deviceName), s8("\n"));
 
 	{
 		Arena scratch = arena;
@@ -258,11 +256,10 @@ vk_load_physical_device(Arena arena, Stream *err)
 		}
 	}
 
-	VkPhysicalDeviceMemoryProperties2 *mp = push_struct(&arena, typeof(*mp));
-	mp->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2;
-	vkGetPhysicalDeviceMemoryProperties2(vk->physical_device, mp);
+	VkPhysicalDeviceMemoryProperties2 mp = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2};
+	vkGetPhysicalDeviceMemoryProperties2(vk->physical_device, &mp);
 
-	VkPhysicalDeviceMemoryProperties *bmp = &mp->memoryProperties;
+	VkPhysicalDeviceMemoryProperties *bmp = &mp.memoryProperties;
 
 	// NOTE(rnp): vulkan spec says that highest performance memory types must
 	// come first. just take the first one found.
@@ -316,10 +313,18 @@ vk_load_physical_device(Arena arena, Stream *err)
 		vk->memory_info.memory_host_coherent[it] = (flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0;
 	}
 
-	vk->memory_info.max_allocation_size    = dm3p->maxMemoryAllocationSize;
-	vk->memory_info.non_coherent_atom_size = dp->properties.limits.nonCoherentAtomSize;
+	vk->memory_info.max_allocation_size    = dm3p.maxMemoryAllocationSize;
+	vk->memory_info.non_coherent_atom_size = dp.properties.limits.nonCoherentAtomSize;
+	vk->gpu_info.vendor                    = dp.properties.vendorID;
 	vk->gpu_info.gpu_heap_size             = bmp->memoryHeaps[vk->memory_info.gpu_heap_index].size;
-	vk->gpu_info.timestamp_period_ns       = dp->properties.limits.timestampPeriod;
+	vk->gpu_info.timestamp_period_ns       = dp.properties.limits.timestampPeriod;
+	vk->gpu_info.max_image_dimension_2D    = dp.properties.limits.maxImageDimension2D;
+	vk->gpu_info.max_image_dimension_3D    = dp.properties.limits.maxImageDimension3D;
+	vk->gpu_info.max_msaa_samples          = round_down_power_of_two(dp.properties.limits.framebufferColorSampleCounts);
+	vk->gpu_info.max_compute_shared_memory_size = dp.properties.limits.maxComputeSharedMemorySize;
+
+	// IMPORTANT(rnp): memory must only be pushed at the end of the function
+	vk->gpu_info.name = push_s8(&vk->arena, c_str_to_s8(dp.properties.deviceName));
 }
 
 function void

M	beamformer.c	\|	81	++++++++-----------------------------------------------------------------------
M	beamformer_core.c	\|	4	++--
M	beamformer_internal.h	\|	20	++++++++++++++++++--
M	intrinsics.c	\|	35	++++++++---------------------------
M	lib/ogl_beamformer_lib.c	\|	4	++--
M	opengl.h	\|	29	-----------------------------
M	ui.c	\|	2	+-
M	util.c	\|	13	+++++++------
M	vulkan.c	\|	33	+++++++++++++++++++--------------