Commit: 0a72f22e3240087c5ebb2c650b79ca5e0b606450
Parent: 41bb2e1365cd15b605e9298f2d8ba7774a4097d1
Author: Randy Palamar
Date: Sun, 11 Jan 2026 19:36:19 -0700
vulkan/opengl: load relevant GPU parameters from vulkan
Diffstat:
9 files changed, 65 insertions(+), 156 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -97,78 +97,13 @@ load_gl(Stream *err)
OGLRequiredExtensionProcedureList
#undef X
- /* NOTE: Gather information about the GPU */
- {
- char *vendor = (char *)glGetString(GL_VENDOR);
- if (!vendor) {
- stream_append_s8(err, s8("Failed to determine GL Vendor\n"));
- fatal(stream_to_s8(err));
- }
- /* TODO(rnp): str prefix of */
- switch (vendor[0]) {
- case 'A': gl_parameters.vendor_id = GLVendor_AMD; break;
- case 'I': gl_parameters.vendor_id = GLVendor_Intel; break;
- case 'N': gl_parameters.vendor_id = GLVendor_NVIDIA; break;
- /* NOTE(rnp): freedreno */
- case 'f': gl_parameters.vendor_id = GLVendor_ARM; break;
- /* NOTE(rnp): Microsoft Corporation - weird win32 thing (microsoft is just using mesa for the driver) */
- case 'M': gl_parameters.vendor_id = GLVendor_ARM; break;
- default:
- stream_append_s8s(err, s8("Unknown GL Vendor: "), c_str_to_s8(vendor), s8("\n"));
- fatal(stream_to_s8(err));
- }
-
- #define X(glname, name, suffix) glGetIntegerv(GL_##glname, &gl_parameters.name);
- GL_PARAMETERS
- #undef X
- }
-
-#ifdef _DEBUG
- {
- s8 vendor = s8("vendor:");
- i32 max_width = (i32)vendor.len;
- #define X(glname, name, suffix) if (s8(#name).len > max_width) max_width = (i32)s8(#name ":").len;
- GL_PARAMETERS
- #undef X
- max_width++;
-
- stream_append_s8s(err, s8("---- GL Parameters ----\n"), vendor);
- stream_pad(err, ' ', max_width - (i32)vendor.len);
- switch (gl_parameters.vendor_id) {
- case GLVendor_AMD: stream_append_s8(err, s8("AMD")); break;
- case GLVendor_ARM: stream_append_s8(err, s8("ARM")); break;
- case GLVendor_Intel: stream_append_s8(err, s8("Intel")); break;
- case GLVendor_NVIDIA: stream_append_s8(err, s8("nVidia")); break;
- }
- stream_append_byte(err, '\n');
-
- #define X(glname, name, suffix) \
- stream_append_s8(err, s8(#name ":")); \
- stream_pad(err, ' ', max_width - (i32)s8(#name ":").len); \
- stream_append_i64(err, gl_parameters.name); \
- stream_append_s8(err, s8(suffix "\n"));
- GL_PARAMETERS
- #undef X
- stream_append_s8(err, s8("-----------------------\n"));
- os_console_log(err->data, err->widx);
- }
-#endif
-
- {
- stream_reset(err, 0);
- if (gl_parameters.max_ubo_size < (i32)sizeof(BeamformerParameters)) {
- stream_append_s8(err, s8("GPU must support UBOs of at least "));
- stream_append_i64(err, sizeof(BeamformerParameters));
- stream_append_s8(err, s8(" bytes!\n"));
- }
-
- #define X(name, ret, params) if (!name) stream_append_s8(err, s8("missing required GL function: " #name "\n"));
- OGLProcedureList
- OGLRequiredExtensionProcedureList
- #undef X
+ stream_reset(err, 0);
+ #define X(name, ret, params) if (!name) stream_append_s8(err, s8("missing required GL function: " #name "\n"));
+ OGLProcedureList
+ OGLRequiredExtensionProcedureList
+ #undef X
- if (err->widx) fatal(stream_to_s8(err));
- }
+ if (err->widx) fatal(stream_to_s8(err));
}
function void
@@ -177,7 +112,7 @@ beamformer_load_cuda_library(BeamformerCtx *ctx, OSLibrary cuda, Arena arena)
/* TODO(rnp): (25.10.30) registering the rf buffer with CUDA is currently
* causing a major performance regression. for now we are disabling its use
* altogether. it will be reenabled once the issue can be fixed */
- b32 result = 0 && gl_parameters.vendor_id == GLVendor_NVIDIA && ValidHandle(cuda);
+ b32 result = 0 && vk_gpu_info()->vendor == GPUVendor_NVIDIA && ValidHandle(cuda);
if (result) {
Stream err = arena_stream(arena);
@@ -431,7 +366,7 @@ beamformer_init(BeamformerInput *input)
LABEL_GL_OBJECT(GL_FRAMEBUFFER, fvr->framebuffers[1], s8("Frame View Resolving Framebuffer"));
glCreateRenderbuffers(countof(fvr->renderbuffers), fvr->renderbuffers);
- i32 msaa_samples = gl_parameters.vendor_id == GLVendor_ARM? 4 : 8;
+ u32 msaa_samples = vk_gpu_info()->max_msaa_samples;
glNamedRenderbufferStorageMultisample(fvr->renderbuffers[0], msaa_samples, GL_RGBA8,
FRAME_VIEW_RENDER_TARGET_SIZE);
glNamedRenderbufferStorageMultisample(fvr->renderbuffers[1], msaa_samples, GL_DEPTH_COMPONENT24,
diff --git a/beamformer_core.c b/beamformer_core.c
@@ -221,7 +221,7 @@ alloc_beamform_frame(BeamformerFrame *out, iv3 out_dim, GLenum gl_kind, s8 name,
/* NOTE: allocate storage for beamformed output data;
* this is shared between compute and fragment shaders */
u32 max_dim = (u32)Max(out->dim.x, Max(out->dim.y, out->dim.z));
- out->mips = (i32)ctz_u32(round_up_power_of_2(max_dim)) + 1;
+ out->mips = (i32)ctz_u64(round_up_power_of_two(max_dim)) + 1;
out->gl_kind = gl_kind;
@@ -375,7 +375,7 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb)
f32 time_offset = pb->parameters.time_offset;
// TODO(rnp): subgroup size
- u32 subgroup_size = gl_parameters.vendor_id == GLVendor_NVIDIA ? 32 : 64;
+ u32 subgroup_size = vk_gpu_info()->vendor == GPUVendor_NVIDIA ? 32 : 64;
cp->pipeline.shader_count = 0;
for (u32 i = 0; i < pb->pipeline.shader_count; i++) {
diff --git a/beamformer_internal.h b/beamformer_internal.h
@@ -34,11 +34,27 @@ typedef struct {
VulkanHandle buffer;
} GPUBuffer;
+typedef enum {
+ GPUVendor_AMD = 0x1002,
+ GPUVendor_NVIDIA = 0x10DE,
+ GPUVendor_Qualcomm = 0x5143,
+ GPUVendor_Intel = 0x8086,
+} GPUVendor;
+
typedef struct {
- u64 gpu_heap_size;
- u64 gpu_heap_used;
+ s8 name;
+ GPUVendor vendor;
f32 timestamp_period_ns;
+
+ u32 max_compute_shared_memory_size;
+ u32 max_msaa_samples;
+ u32 max_image_dimension_2D;
+ // NOTE(rnp): vulkan compute will output to a buffer so this won't be relevant
+ u32 max_image_dimension_3D;
+
+ u64 gpu_heap_size;
+ u64 gpu_heap_used;
} GPUInfo;
///////////////////////////
diff --git a/intrinsics.c b/intrinsics.c
@@ -122,23 +122,12 @@
#if COMPILER_MSVC
-function force_inline u32
-clz_u32(u32 a)
-{
- u32 result = 32, index;
- if (a) {
- _BitScanReverse(&index, a);
- result = index;
- }
- return result;
-}
-
-function force_inline u32
-ctz_u32(u32 a)
+function force_inline u64
+clz_u64(u64 a)
{
- u32 result = 32, index;
+ u64 result = 64, index;
if (a) {
- _BitScanForward(&index, a);
+ _BitScanReverse64(&index, a);
result = index;
}
return result;
@@ -157,19 +146,11 @@ ctz_u64(u64 a)
#else /* !COMPILER_MSVC */
-function force_inline u32
-clz_u32(u32 a)
-{
- u32 result = 32;
- if (a) result = (u32)__builtin_clz(a);
- return result;
-}
-
-function force_inline u32
-ctz_u32(u32 a)
+function force_inline u64
+clz_u64(u32 a)
{
- u32 result = 32;
- if (a) result = (u32)__builtin_ctz(a);
+ u64 result = 64;
+ if (a) result = (u64)__builtin_clzll(a);
return result;
}
diff --git a/lib/ogl_beamformer_lib.c b/lib/ogl_beamformer_lib.c
@@ -692,8 +692,8 @@ beamformer_live_parameters_get_dirty_flag(void)
{
i32 result = -1;
if (check_shared_memory()) {
- u32 flag = ctz_u32(g_beamformer_library_context.bp->live_imaging_dirty_flags);
- if (flag != 32) {
+ u32 flag = ctz_u64(g_beamformer_library_context.bp->live_imaging_dirty_flags);
+ if (flag != 64) {
atomic_and_u32(&g_beamformer_library_context.bp->live_imaging_dirty_flags, ~(1u << flag));
result = (i32)flag;
}
diff --git a/opengl.h b/opengl.h
@@ -169,33 +169,4 @@ OGLProcedureList
OGLRequiredExtensionProcedureList
#undef X
-typedef enum {
- GLVendor_AMD,
- GLVendor_ARM,
- GLVendor_Intel,
- GLVendor_NVIDIA,
-} GLVendorID;
-
-#define GL_PARAMETERS \
- X(MAJOR_VERSION, version_major, "") \
- X(MINOR_VERSION, version_minor, "") \
- X(MIN_MAP_BUFFER_ALIGNMENT, min_map_buffer_alignment, "") \
- X(TEXTURE_BUFFER_OFFSET_ALIGNMENT, texture_buffer_offset_alignment, "") \
- X(MAX_TEXTURE_BUFFER_SIZE, max_texture_buffer_size, "") \
- X(MAX_TEXTURE_SIZE, max_2d_texture_dim, "") \
- X(MAX_3D_TEXTURE_SIZE, max_3d_texture_dim, "") \
- X(MAX_SHADER_STORAGE_BLOCK_SIZE, max_ssbo_size, "") \
- X(MAX_COMPUTE_SHARED_MEMORY_SIZE, max_shared_memory_size, "") \
- X(MAX_UNIFORM_BLOCK_SIZE, max_ubo_size, "") \
- X(MAX_SERVER_WAIT_TIMEOUT, max_server_wait_time, " [ns]") \
-
-typedef struct {
- GLVendorID vendor_id;
- #define X(glname, name, suffix) i32 name;
- GL_PARAMETERS
- #undef X
-} GLParameters;
-
-DEBUG_IMPORT GLParameters gl_parameters;
-
#endif /* _OPENGL_H_*/
diff --git a/ui.c b/ui.c
@@ -960,7 +960,7 @@ resize_frame_view(BeamformerFrameView *view, iv2 dim)
glCreateTextures(GL_TEXTURE_2D, 1, &view->texture);
view->texture_dim = dim;
- view->texture_mipmaps = (i32)ctz_u32((u32)Max(dim.x, dim.y)) + 1;
+ view->texture_mipmaps = (i32)ctz_u64((u64)Max(dim.x, dim.y)) + 1;
glTextureStorage2D(view->texture, view->texture_mipmaps, GL_RGBA8, dim.x, dim.y);
glGenerateTextureMipmap(view->texture);
diff --git a/util.c b/util.c
@@ -698,17 +698,18 @@ push_s8(Arena *a, s8 str)
return result;
}
-function force_inline u32
-round_down_power_of_2(u32 a)
+/* NOTE(rnp): from Hacker's Delight */
+function force_inline u64
+round_down_power_of_two(u64 a)
{
- u32 result = 0x80000000UL >> clz_u32(a);
+ u64 result = 0x8000000000000000ULL >> clz_u64(a);
return result;
}
-function force_inline u32
-round_up_power_of_2(u32 a)
+function force_inline u64
+round_up_power_of_two(u64 a)
{
- u32 result = 0x80000000UL >> (clz_u32(a - 1) - 1);
+ u64 result = 0x8000000000000000ULL >> (clz_u64(a - 1) - 1);
return result;
}
diff --git a/vulkan.c b/vulkan.c
@@ -205,15 +205,13 @@ vk_load_physical_device(Arena arena, Stream *err)
if (!vk->physical_device)
fatal(vulkan_info("failed to find a suitable GPU\n"));
- VkPhysicalDeviceProperties2 *dp = push_struct(&arena, typeof(*dp));
- dp->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+ VkPhysicalDeviceProperties2 dp = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2};
+ VkPhysicalDeviceMaintenance3Properties dm3p = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES};
+ dp.pNext = &dm3p;
- VkPhysicalDeviceMaintenance3Properties *dm3p = dp->pNext = push_struct(&arena, typeof(*dp));
- dm3p->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES;
+ vkGetPhysicalDeviceProperties2(vk->physical_device, &dp);
- vkGetPhysicalDeviceProperties2(vk->physical_device, dp);
-
- stream_append_s8s(err, vulkan_info("selecting device: "), c_str_to_s8(dp->properties.deviceName), s8("\n"));
+ stream_append_s8s(err, vulkan_info("selecting device: "), c_str_to_s8(dp.properties.deviceName), s8("\n"));
{
Arena scratch = arena;
@@ -258,11 +256,10 @@ vk_load_physical_device(Arena arena, Stream *err)
}
}
- VkPhysicalDeviceMemoryProperties2 *mp = push_struct(&arena, typeof(*mp));
- mp->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2;
- vkGetPhysicalDeviceMemoryProperties2(vk->physical_device, mp);
+ VkPhysicalDeviceMemoryProperties2 mp = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2};
+ vkGetPhysicalDeviceMemoryProperties2(vk->physical_device, &mp);
- VkPhysicalDeviceMemoryProperties *bmp = &mp->memoryProperties;
+ VkPhysicalDeviceMemoryProperties *bmp = &mp.memoryProperties;
// NOTE(rnp): vulkan spec says that highest performance memory types must
// come first. just take the first one found.
@@ -316,10 +313,18 @@ vk_load_physical_device(Arena arena, Stream *err)
vk->memory_info.memory_host_coherent[it] = (flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0;
}
- vk->memory_info.max_allocation_size = dm3p->maxMemoryAllocationSize;
- vk->memory_info.non_coherent_atom_size = dp->properties.limits.nonCoherentAtomSize;
+ vk->memory_info.max_allocation_size = dm3p.maxMemoryAllocationSize;
+ vk->memory_info.non_coherent_atom_size = dp.properties.limits.nonCoherentAtomSize;
+ vk->gpu_info.vendor = dp.properties.vendorID;
vk->gpu_info.gpu_heap_size = bmp->memoryHeaps[vk->memory_info.gpu_heap_index].size;
- vk->gpu_info.timestamp_period_ns = dp->properties.limits.timestampPeriod;
+ vk->gpu_info.timestamp_period_ns = dp.properties.limits.timestampPeriod;
+ vk->gpu_info.max_image_dimension_2D = dp.properties.limits.maxImageDimension2D;
+ vk->gpu_info.max_image_dimension_3D = dp.properties.limits.maxImageDimension3D;
+ vk->gpu_info.max_msaa_samples = round_down_power_of_two(dp.properties.limits.framebufferColorSampleCounts);
+ vk->gpu_info.max_compute_shared_memory_size = dp.properties.limits.maxComputeSharedMemorySize;
+
+ // IMPORTANT(rnp): memory must only be pushed at the end of the function
+ vk->gpu_info.name = push_s8(&vk->arena, c_str_to_s8(dp.properties.deviceName));
}
function void