Commit: f33f8f7270186a95011e8cf201acb3b50733cd4f
Parent: 3e4bea29377e32bee5ef97cc5efef02310587b1b
Author: Randy Palamar
Date: Thu, 7 May 2026 15:28:11 -0600
core: migrate to vulkan compute
Most things are working here. The only relevant thing that is
missing currently is the frame averaging which no one really uses.
This has a minor performance regression which I saw before when I
tried switching the images in OpenGL to a large ssbo. It can be
solved by not doing the "DAS Fast" thing (running a single channel
at a time) but this may cause issues on lower end devices. The
next commit will implement a new optimization which should solve
this universally.
Diffstat:
31 files changed, 4942 insertions(+), 2341 deletions(-)
diff --git a/beamformer.c b/beamformer.c
@@ -56,30 +56,10 @@ fatal(s8 message)
#include "vulkan.c"
-// TODO(rnp): none of this belongs here, but will be removed
+// TODO(rnp): this doesn't belong here, but will be removed
// once vulkan migration is complete
-#define GLFW_VISIBLE 0x00020004
-void glfwWindowHint(i32, i32);
-iptr glfwCreateWindow(i32, i32, char *, iptr, iptr);
-void glfwMakeContextCurrent(iptr);
-iptr glfwGetGLXContext(iptr);
-iptr glfwGetWGLContext(iptr);
void * glfwGetProcAddress(char *);
-#if OS_WINDOWS
-function iptr
-os_get_native_gl_context(iptr window)
-{
- return glfwGetWGLContext(window);
-}
-#else
-function iptr
-os_get_native_gl_context(iptr window)
-{
- return glfwGetGLXContext(window);
-}
-#endif
-
function void
gl_debug_logger(u32 src, u32 type, u32 id, u32 lvl, i32 len, const char *msg, const void *userctx)
{
@@ -100,7 +80,12 @@ load_gl(Stream *err)
stream_reset(err, 0);
#define X(name, ret, params) if (!name) stream_append_s8(err, s8("missing required GL function: " #name "\n"));
OGLProcedureList
- OGLRequiredExtensionProcedureList
+ OGLRequiredExtensionProcedureListBase
+ #if OS_WINDOWS
+ OGLRequiredExtensionProcedureListW32
+ #else
+ OGLRequiredExtensionProcedureListLinux
+ #endif
#undef X
if (err->widx) fatal(stream_to_s8(err));
@@ -129,41 +114,6 @@ beamformer_load_cuda_library(BeamformerCtx *ctx, OSLibrary cuda, Arena arena)
#undef X
}
-function BeamformerRenderModel
-render_model_from_arrays(f32 *vertices, f32 *normals, i32 vertices_size, u16 *indices, i32 index_count)
-{
- BeamformerRenderModel result = {0};
-
- i32 buffer_size = vertices_size * 2 + index_count * (i32)sizeof(u16);
- i32 indices_offset = vertices_size * 2;
- i32 indices_size = index_count * (i32)sizeof(u16);
-
- result.elements = index_count;
- result.elements_offset = indices_offset;
-
- glCreateBuffers(1, &result.buffer);
- glNamedBufferStorage(result.buffer, buffer_size, 0, GL_DYNAMIC_STORAGE_BIT);
- glNamedBufferSubData(result.buffer, 0, vertices_size, vertices);
- glNamedBufferSubData(result.buffer, vertices_size, vertices_size, normals);
- glNamedBufferSubData(result.buffer, indices_offset, indices_size, indices);
-
- glCreateVertexArrays(1, &result.vao);
- glVertexArrayVertexBuffer(result.vao, 0, result.buffer, 0, 3 * sizeof(f32));
- glVertexArrayVertexBuffer(result.vao, 1, result.buffer, vertices_size, 3 * sizeof(f32));
- glVertexArrayElementBuffer(result.vao, result.buffer);
-
- glEnableVertexArrayAttrib(result.vao, 0);
- glEnableVertexArrayAttrib(result.vao, 1);
-
- glVertexArrayAttribFormat(result.vao, 0, 3, GL_FLOAT, 0, 0);
- glVertexArrayAttribFormat(result.vao, 1, 3, GL_FLOAT, 0, (u32)vertices_size);
-
- glVertexArrayAttribBinding(result.vao, 0, 0);
- glVertexArrayAttribBinding(result.vao, 1, 0);
-
- return result;
-}
-
function void
worker_thread_sleep(GLWorkerThreadContext *ctx, BeamformerSharedMemory *sm)
{
@@ -186,17 +136,12 @@ function OS_THREAD_ENTRY_POINT_FN(compute_worker_thread_entry_point)
{
GLWorkerThreadContext *ctx = user_context;
- glfwMakeContextCurrent(ctx->window_handle);
- ctx->gl_context = os_get_native_gl_context(ctx->window_handle);
-
BeamformerCtx *beamformer = (BeamformerCtx *)ctx->user_context;
- glCreateQueries(GL_TIME_ELAPSED, countof(beamformer->compute_context.shader_timer_ids),
- beamformer->compute_context.shader_timer_ids);
for (;;) {
worker_thread_sleep(ctx, beamformer->shared_memory);
asan_poison_region(ctx->arena.beg, ctx->arena.end - ctx->arena.beg);
- beamformer_complete_compute(ctx->user_context, &ctx->arena, ctx->gl_context);
+ beamformer_complete_compute(beamformer, &ctx->arena);
}
unreachable();
@@ -206,31 +151,8 @@ function OS_THREAD_ENTRY_POINT_FN(compute_worker_thread_entry_point)
function OS_THREAD_ENTRY_POINT_FN(beamformer_upload_entry_point)
{
- GLWorkerThreadContext *ctx = user_context;
- glfwMakeContextCurrent(ctx->window_handle);
- ctx->gl_context = os_get_native_gl_context(ctx->window_handle);
-
- BeamformerUploadThreadContext *up = (typeof(up))ctx->user_context;
- BeamformerRFBuffer *rf = up->rf_buffer;
- glCreateQueries(GL_TIMESTAMP, 1, &rf->data_timestamp_query);
- /* NOTE(rnp): start this here so we don't have to worry about it being started or not */
- glQueryCounter(rf->data_timestamp_query, GL_TIMESTAMP);
-
- glGenSemaphoresEXT(countof(rf->gl_upload_semaphores), rf->gl_upload_semaphores);
- for EachElement(rf->vk_upload_semaphores, it) {
- OSHandle export = {0};
- rf->vk_upload_semaphores[it] = vk_semaphore_create(rf->upload_semaphores_handles + it);
-
- if (OS_WINDOWS) {
- glImportSemaphoreWin32HandleEXT(rf->gl_upload_semaphores[it], GL_HANDLE_TYPE_OPAQUE_WIN32_EXT,
- (void *)export.value[0]);
- // NOTE(rnp): w32 does not transfer ownership from handle back to driver
- rf->upload_semaphores_handles[it] = export;
- } else {
- glImportSemaphoreFdEXT(rf->gl_upload_semaphores[it], GL_HANDLE_TYPE_OPAQUE_FD_EXT, export.value[0]);
- rf->upload_semaphores_handles[it].value[0] = OSInvalidHandleValue;
- }
- }
+ GLWorkerThreadContext *ctx = user_context;
+ BeamformerUploadThreadContext *up = (typeof(up))ctx->user_context;
for (;;) {
worker_thread_sleep(ctx, up->shared_memory);
@@ -264,6 +186,45 @@ beamformer_init(BeamformerInput *input)
vk_load(input->vulkan_library_handle, &memory, &ctx->error_stream);
+ BeamformerComputeContext *cs = &ctx->compute_context;
+
+ // NOTE(rnp): allocate beamformed image ring buffer
+ {
+ u64 gpu_heap_size = vk_gpu_info()->gpu_heap_size;
+ u64 trial_sizes[] = {
+ GB(4),
+ GB(2),
+ GB(1) + MB(512),
+ GB(1),
+ };
+
+ u32 base_index = 0;
+ for EachElement(trial_sizes, it) {
+ if (gpu_heap_size >= 2 * trial_sizes[it])
+ break;
+ base_index++;
+ }
+
+ for (u32 i = base_index; i < countof(trial_sizes); i++) {
+ // TODO(rnp): it may be better to download data from this using the transfer queue
+ VulkanTimeline timelines[] = {VulkanTimeline_Compute, VulkanTimeline_Graphics};
+ GPUBufferAllocateInfo allocate_info = {
+ .size = trial_sizes[i],
+ .flags = VulkanUsageFlag_TransferSource|VulkanUsageFlag_HostReadWrite,
+ .timeline_count = countof(timelines),
+ .timelines_used = timelines,
+ .label = s8("BeamformedData"),
+ };
+ vk_buffer_allocate(cs->backlog.buffer, &allocate_info);
+ if (cs->backlog.buffer->size > 0)
+ break;
+ }
+ if (cs->backlog.buffer->size == 0) {
+ // NOTE(rnp): if this becomes an issue we may be able to get by in some other way
+ fatal(s8("Failed to allocate space for beamformed data\n"));
+ }
+ }
+
beamformer_load_cuda_library(ctx, input->cuda_library_handle, memory);
SetConfigFlags(FLAG_VSYNC_HINT|FLAG_WINDOW_ALWAYS_RUN);
@@ -272,15 +233,8 @@ beamformer_init(BeamformerInput *input)
SetWindowState(FLAG_WINDOW_RESIZABLE);
SetWindowMinSize(840, ctx->window_size.h);
- glfwWindowHint(GLFW_VISIBLE, 0);
- iptr raylib_window_handle = (iptr)GetPlatformWindowHandle();
-
load_gl(&ctx->error_stream);
- ctx->beamform_work_queue = push_struct(&memory, BeamformWorkQueue);
- ctx->compute_shader_stats = push_struct(&memory, ComputeShaderStats);
- ctx->compute_timing_table = push_struct(&memory, ComputeTimingTable);
-
ctx->shared_memory = input->shared_memory;
ctx->shared_memory_size = input->shared_memory_size;
if (ctx->shared_memory_size < (i64)sizeof(*ctx->shared_memory))
@@ -289,6 +243,7 @@ beamformer_init(BeamformerInput *input)
ctx->shared_memory->version = BEAMFORMER_SHARED_MEMORY_VERSION;
ctx->shared_memory->reserved_parameter_blocks = 1;
+ ctx->shared_memory->max_beamformed_data_size = cs->backlog.buffer->size;
/* TODO(rnp): I'm not sure if its a good idea to pre-reserve a bunch of semaphores
* on w32 but thats what we are doing for now */
@@ -316,14 +271,10 @@ beamformer_init(BeamformerInput *input)
}
#endif
- BeamformerComputeContext *cs = &ctx->compute_context;
- cs->rf_buffer.export_handle = (OSHandle){OSInvalidHandleValue};
-
GLWorkerThreadContext *worker = &ctx->compute_worker;
/* TODO(rnp): we should lock this down after we have something working */
- worker->user_context = (iptr)ctx;
- worker->window_handle = glfwCreateWindow(1, 1, "", 0, raylib_window_handle);
- worker->handle = os_create_thread("[compute]", worker, compute_worker_thread_entry_point);
+ worker->user_context = (iptr)ctx;
+ worker->handle = os_create_thread("[compute]", worker, compute_worker_thread_entry_point);
GLWorkerThreadContext *upload = &ctx->upload_worker;
BeamformerUploadThreadContext *upctx = push_struct(&memory, typeof(*upctx));
@@ -333,10 +284,7 @@ beamformer_init(BeamformerInput *input)
upctx->shared_memory_size = ctx->shared_memory_size;
upctx->compute_timing_table = ctx->compute_timing_table;
upctx->compute_worker_sync = &ctx->compute_worker.sync_variable;
- upload->window_handle = glfwCreateWindow(1, 1, "", 0, raylib_window_handle);
- upload->handle = os_create_thread("[upload]", upload, beamformer_upload_entry_point);
-
- glfwMakeContextCurrent(raylib_window_handle);
+ upload->handle = os_create_thread("[upload]", upload, beamformer_upload_entry_point);
/* NOTE: set up OpenGL debug logging */
Stream *gl_error_stream = push_struct(&memory, Stream);
@@ -352,171 +300,37 @@ beamformer_init(BeamformerInput *input)
i32 index = beamformer_reloadable_compute_shader_info_indices[it];
Arena temp = scratch;
s8 file = push_s8_from_parts(&temp, os_path_separator(), s8("shaders"),
- beamformer_reloadable_shader_files[index]);
+ beamformer_reloadable_shader_files[index][0]);
BeamformerFileReloadContext *frc = push_struct(&memory, typeof(*frc));
- frc->kind = BeamformerFileReloadKind_ComputeShader;
- frc->compute_shader_kind = beamformer_reloadable_shader_kinds[index];
+ frc->kind = BeamformerFileReloadKind_ComputeShader;
+ frc->shader_reload.shader = beamformer_reloadable_shader_kinds[index];
os_add_file_watch((char *)file.data, file.len, frc);
}
- }
- FrameViewRenderContext *fvr = &ctx->frame_view_render_context;
- glCreateFramebuffers(countof(fvr->framebuffers), fvr->framebuffers);
- LABEL_GL_OBJECT(GL_FRAMEBUFFER, fvr->framebuffers[0], s8("Frame View Framebuffer"));
- LABEL_GL_OBJECT(GL_FRAMEBUFFER, fvr->framebuffers[1], s8("Frame View Resolving Framebuffer"));
-
- glCreateRenderbuffers(countof(fvr->renderbuffers), fvr->renderbuffers);
- u32 msaa_samples = vk_gpu_info()->max_msaa_samples;
- glNamedRenderbufferStorageMultisample(fvr->renderbuffers[0], msaa_samples, GL_RGBA8,
- FRAME_VIEW_RENDER_TARGET_SIZE);
- glNamedRenderbufferStorageMultisample(fvr->renderbuffers[1], msaa_samples, GL_DEPTH_COMPONENT24,
- FRAME_VIEW_RENDER_TARGET_SIZE);
-
- static_assert(countof(beamformer_reloadable_render_shader_info_indices) == 1,
- "only a single render shader is currently handled");
- i32 render_rsi_index = beamformer_reloadable_render_shader_info_indices[0];
-
- // TODO(rnp): leaks when BakeShaders is true
- Arena *arena = &memory;
- BeamformerShaderReloadContext *render_3d = push_struct(arena, typeof(*render_3d));
- render_3d->reloadable_info_index = render_rsi_index;
- render_3d->gl_type = GL_FRAGMENT_SHADER;
- render_3d->header = s8(""
- "layout(location = 0) in vec3 normal;\n"
- "layout(location = 1) in vec3 texture_coordinate;\n\n"
- "layout(location = 2) in vec3 test_texture_coordinate;\n\n"
- "layout(location = 0) out vec4 out_colour;\n\n"
- "layout(location = " str(FRAME_VIEW_DYNAMIC_RANGE_LOC) ") uniform float u_db_cutoff = 60;\n"
- "layout(location = " str(FRAME_VIEW_THRESHOLD_LOC) ") uniform float u_threshold = 40;\n"
- "layout(location = " str(FRAME_VIEW_GAMMA_LOC) ") uniform float u_gamma = 1;\n"
- "layout(location = " str(FRAME_VIEW_LOG_SCALE_LOC) ") uniform bool u_log_scale;\n"
- "layout(location = " str(FRAME_VIEW_BB_COLOUR_LOC) ") uniform vec4 u_bb_colour = vec4(" str(FRAME_VIEW_BB_COLOUR) ");\n"
- "layout(location = " str(FRAME_VIEW_BB_FRACTION_LOC) ") uniform float u_bb_fraction = " str(FRAME_VIEW_BB_FRACTION) ";\n"
- "layout(location = " str(FRAME_VIEW_SOLID_BB_LOC) ") uniform bool u_solid_bb;\n"
- "\n"
- "layout(binding = 0) uniform sampler3D u_texture;\n");
-
- render_3d->link = push_struct(arena, typeof(*render_3d));
- render_3d->link->reloadable_info_index = -1;
- render_3d->link->gl_type = GL_VERTEX_SHADER;
- render_3d->link->link = render_3d;
- render_3d->link->header = s8(""
- "layout(location = 0) in vec3 v_position;\n"
- "layout(location = 1) in vec3 v_normal;\n"
- "\n"
- "layout(location = 0) out vec3 f_normal;\n"
- "layout(location = 1) out vec3 f_texture_coordinate;\n"
- "layout(location = 2) out vec3 f_orig_texture_coordinate;\n"
- "\n"
- "layout(location = " str(FRAME_VIEW_MODEL_MATRIX_LOC) ") uniform mat4 u_model;\n"
- "layout(location = " str(FRAME_VIEW_VIEW_MATRIX_LOC) ") uniform mat4 u_view;\n"
- "layout(location = " str(FRAME_VIEW_PROJ_MATRIX_LOC) ") uniform mat4 u_projection;\n"
- "\n"
- "\n"
- "void main()\n"
- "{\n"
- "\tvec3 pos = v_position;\n"
- "\tf_orig_texture_coordinate = (2 * v_position + 1) / 2;\n"
- //"\tif (v_position.y == -1) pos.x = clamp(v_position.x, -u_clip_fraction, u_clip_fraction);\n"
- "\tvec3 tex_coord = (2 * pos + 1) / 2;\n"
- "\tf_texture_coordinate = tex_coord;\n"
- //"\tf_texture_coordinate = u_swizzle? tex_coord.xzy : tex_coord;\n"
- //"\tf_normal = normalize(mat3(u_model) * v_normal);\n"
- "\tf_normal = v_normal;\n"
- "\tgl_Position = u_projection * u_view * u_model * vec4(pos, 1);\n"
- "}\n");
-
- // TODO(rnp): this is probably not expected by the platform, refactor so that all
- // needed context (eg. headers) are available outside of here and push initial load
- // into ui_init
- {
- BeamformerFileReloadContext *frc = push_struct(&memory, typeof(*frc));
- frc->kind = BeamformerFileReloadKind_Shader;
- frc->shader_reload_context = render_3d;
- input->event_queue[input->event_count++] = (BeamformerInputEvent){
- .kind = BeamformerInputEventKind_FileEvent,
- .file_watch_user_context = frc,
- };
+ for EachElement(beamformer_reloadable_compute_helpers_shader_info_indices, it) {
+ i32 index = beamformer_reloadable_compute_helpers_shader_info_indices[it];
+ Arena temp = scratch;
+ s8 file = push_s8_from_parts(&temp, os_path_separator(), s8("shaders"),
+ beamformer_reloadable_shader_files[index][0]);
+ BeamformerFileReloadContext *frc = push_struct(&memory, typeof(*frc));
+ frc->kind = BeamformerFileReloadKind_ComputeShader;
+ frc->shader_reload.shader = beamformer_reloadable_shader_kinds[index];
+ os_add_file_watch((char *)file.data, file.len, frc);
+ }
- s8 render_file = {0};
- if (!BakeShaders) {
- render_file = push_s8_from_parts(&scratch, os_path_separator(), s8("shaders"),
- beamformer_reloadable_shader_files[render_rsi_index]);
- os_add_file_watch((char *)render_file.data, render_file.len, frc);
+ for EachElement(beamformer_reloadable_compute_internal_shader_info_indices, it) {
+ i32 index = beamformer_reloadable_compute_internal_shader_info_indices[it];
+ Arena temp = scratch;
+ s8 file = push_s8_from_parts(&temp, os_path_separator(), s8("shaders"),
+ beamformer_reloadable_shader_files[index][0]);
+ BeamformerFileReloadContext *frc = push_struct(&memory, typeof(*frc));
+ frc->kind = BeamformerFileReloadKind_ComputeInternalShader;
+ frc->shader_reload.shader = beamformer_reloadable_shader_kinds[index];
+ frc->shader_reload.pipeline = cs->compute_internal_pipelines + it;
+ os_add_file_watch((char *)file.data, file.len, frc);
}
}
- f32 unit_cube_vertices[] = {
- 0.5f, 0.5f, -0.5f,
- 0.5f, 0.5f, -0.5f,
- 0.5f, 0.5f, -0.5f,
- 0.5f, -0.5f, -0.5f,
- 0.5f, -0.5f, -0.5f,
- 0.5f, -0.5f, -0.5f,
- 0.5f, 0.5f, 0.5f,
- 0.5f, 0.5f, 0.5f,
- 0.5f, 0.5f, 0.5f,
- 0.5f, -0.5f, 0.5f,
- 0.5f, -0.5f, 0.5f,
- 0.5f, -0.5f, 0.5f,
- -0.5f, 0.5f, -0.5f,
- -0.5f, 0.5f, -0.5f,
- -0.5f, 0.5f, -0.5f,
- -0.5f, -0.5f, -0.5f,
- -0.5f, -0.5f, -0.5f,
- -0.5f, -0.5f, -0.5f,
- -0.5f, 0.5f, 0.5f,
- -0.5f, 0.5f, 0.5f,
- -0.5f, 0.5f, 0.5f,
- -0.5f, -0.5f, 0.5f,
- -0.5f, -0.5f, 0.5f,
- -0.5f, -0.5f, 0.5f
- };
- f32 unit_cube_normals[] = {
- 0.0f, 0.0f, -1.0f,
- 0.0f, 1.0f, 0.0f,
- 1.0f, 0.0f, 0.0f,
- 0.0f, 0.0f, -1.0f,
- 0.0f, -1.0f, 0.0f,
- 1.0f, 0.0f, 0.0f,
- 0.0f, 0.0f, 1.0f,
- 0.0f, 1.0f, 0.0f,
- 1.0f, 0.0f, 0.0f,
- 0.0f, 0.0f, 1.0f,
- 0.0f, -1.0f, 0.0f,
- 1.0f, 0.0f, 0.0f,
- 0.0f, 0.0f, -1.0f,
- 0.0f, 1.0f, 0.0f,
- -1.0f, 0.0f, 0.0f,
- 0.0f, 0.0f, -1.0f,
- 0.0f, -1.0f, 0.0f,
- -1.0f, 0.0f, 0.0f,
- 0.0f, 0.0f, 1.0f,
- 0.0f, 1.0f, 0.0f,
- -1.0f, 0.0f, 0.0f,
- 0.0f, 0.0f, 1.0f,
- 0.0f, -1.0f, 0.0f,
- -1.0f, 0.0f, 0.0f
- };
- u16 unit_cube_indices[] = {
- 1, 13, 19,
- 1, 19, 7,
- 9, 6, 18,
- 9, 18, 21,
- 23, 20, 14,
- 23, 14, 17,
- 16, 4, 10,
- 16, 10, 22,
- 5, 2, 8,
- 5, 8, 11,
- 15, 12, 0,
- 15, 0, 3
- };
-
- cs->unit_cube_model = render_model_from_arrays(unit_cube_vertices, unit_cube_normals,
- sizeof(unit_cube_vertices),
- unit_cube_indices, countof(unit_cube_indices));
-
memory.end = scratch.end;
ctx->arena = memory;
ctx->state = BeamformerState_Running;
diff --git a/beamformer.h b/beamformer.h
@@ -182,6 +182,7 @@ typedef struct {
#if BEAMFORMER_RENDERDOC_HOOKS
void *renderdoc_start_frame_capture;
void *renderdoc_end_frame_capture;
+ void *renderdoc_set_capture_file_path_template;
#endif
} BeamformerInput;
diff --git a/beamformer.meta b/beamformer.meta
@@ -1,5 +1,5 @@
@Constant(4) FilterSlots
-@Constant(16) MaxBacklogFrames
+@Constant(4096) MaxBacklogFrames
@Constant(256) MaxChannelCount
@Constant(256) MaxEmissionsCount
@Constant(16) MaxComputeShaderStages
@@ -210,6 +210,13 @@
@Library @Struct SimpleParameters
@MATLAB @Struct SimpleParameters
+@Struct DASArrayParameters
+{
+ [focal_vectors V2 MaxChannelCount]
+ [sparse_elements S16 MaxChannelCount]
+ [transmit_receive_orientations U16 MaxChannelCount]
+}
+
@Emit
{
`read_only global u8 beamformer_data_kind_element_size[] = {`
@@ -274,6 +281,15 @@
[ToProcess to_process U32]
[TransmitCount transmit_count U32]
}
+
+ @PushConstants
+ {
+ [hadamard_buffer U64]
+ [rf_buffer U64]
+ [output_buffer U64]
+ [output_rf_buffer U64]
+ [first_pass B32]
+ }
}
@Shader(filter.glsl) Filter
@@ -301,22 +317,32 @@
[DemodulationFrequency demodulation_frequency F32]
[SamplingFrequency sampling_frequency F32]
}
+
+ @PushConstants
+ {
+ [input_data U64]
+ [output_data U64]
+ [filter_coefficients U64]
+ }
}
@Shader(das.glsl) DAS
{
+ @Constant MaxChannelCount
+
@Enumeration AcquisitionKind
@Enumeration DataKind
@Enumeration InterpolationMode
@Enumeration RCAOrientation
+ @Struct DASArrayParameters
+
@Bake
{
[DataKind data_kind U32]
[CoherencyWeighting coherency_weighting U32]
[SingleFocus single_focus U32]
[SingleOrientation single_orientation U32]
- [Fast fast U32]
[Sparse sparse U32]
[AcquisitionCount acquisition_count U32]
[AcquisitionKind acquisition_kind U32]
@@ -336,17 +362,101 @@
@PushConstants
{
- [xdc_transform M4]
- [voxel_transform M4]
- [xdc_element_pitch V2]
+ [xdc_transform M4]
+ [voxel_transform M4]
+ [xdc_element_pitch V2]
+ [rf_data U64]
+ [output_data U64]
+ [incoherent_output U64]
+ [array_parameters U64]
+ [output_size_x U32]
+ [output_size_y U32]
+ [output_size_z U32]
+ [cycle_t U32]
+ [channel_t S32]
}
}
- @Shader(min_max.glsl) MinMax
@Shader(sum.glsl) Sum
+ {
+ @Enumeration DataKind
+ @PushConstants
+ {
+ [output_data U64]
+ [input_data U64]
+ [image_elements U32]
+ [scale F32]
+ }
+ }
+
+ @Shader(min_max.glsl) MinMax
+}
+
+// NOTE: shaders which need to be baked into the beamforming pipeline
+// but should not be visible to the external interface
+@ShaderGroup ComputeHelpers
+{
+ @Shader(coherency_weighting.glsl) CoherencyWeighting
+ {
+ @Enumeration DataKind
+
+ @Bake
+ {
+ [DataKind data_kind U32]
+ }
+
+ @PushConstants
+ {
+ [left_side_buffer U64]
+ [right_side_buffer U64]
+ [elements U32]
+ [scale F32]
+ [output_size_x U32]
+ [output_size_y U32]
+ [output_size_z U32]
+ }
+ }
+}
+
+// NOTE: general compute shaders which do not need baking
+@ShaderGroup ComputeInternal
+{
+ @Shader(buffer_clear.glsl) BufferClear
+ {
+ @PushConstants
+ {
+ [data U64]
+ [clear_word U32]
+ [words U32]
+ }
+ }
}
@ShaderGroup Render
{
- @Shader(render_3d.frag.glsl) Render3D
+ @RenderShader RenderBeamformed
+ {
+ @Enumeration DataKind
+
+ @VertexShader(render_3d.vert.glsl)
+ @FragmentShader(render_3d.frag.glsl)
+
+ @PushConstants
+ {
+ [mvp_matrix M4]
+ [positions U64]
+ [normals U64]
+
+ [bounding_box_colour V4]
+ [bounding_box_fraction F32]
+ [db_cutoff F32]
+ [threshold F32]
+ [gamma F32]
+ [input_data U64]
+ [input_size_x U32]
+ [input_size_y U32]
+ [input_size_z U32]
+ [data_kind U32]
+ }
+ }
}
diff --git a/beamformer_core.c b/beamformer_core.c
@@ -11,14 +11,9 @@
* - the check for first pass reshaping is the last non constant check
* in the shader
* - this will also remove the need for the channel mapping in the decode shader
- * [X]: refactor: ui: reload only shader which is affected by the interaction
* [ ]: BeamformWorkQueue -> BeamformerWorkQueue
- * [ ]: need to keep track of gpu memory in some way
- * - want to be able to store more than 16 2D frames but limit 3D frames
- * - maybe keep track of how much gpu memory is committed for beamformed images
- * and use that to determine when to loop back over existing textures
- * - to do this maybe use a circular linked list instead of a flat array
- * - then have a way of querying how many frames are available for a specific point count
+ * [ ]: refactor: work queue needs a cleanup, we should only have a single one
+ * - that queue isn't really considered hot so a lock is probably fine
* [ ]: bug: reinit cuda on hot-reload
*/
@@ -32,33 +27,33 @@
global f32 dt_for_frame;
-#define DECODE_FIRST_PASS_UNIFORM_LOC 1
-
-#define DAS_CYCLE_T_UNIFORM_LOC 2
-#define DAS_FAST_CHANNEL_UNIFORM_LOC 3
-
-#define MIN_MAX_MIPS_LEVEL_UNIFORM_LOC 1
-#define SUM_PRESCALE_UNIFORM_LOC 1
-
#if !BEAMFORMER_RENDERDOC_HOOKS
#define start_renderdoc_capture(...)
#define end_renderdoc_capture(...)
#define renderdoc_attached(...) (0)
#else
-global renderdoc_start_frame_capture_fn *start_frame_capture;
-global renderdoc_end_frame_capture_fn *end_frame_capture;
-#define start_renderdoc_capture(gl) if (start_frame_capture) start_frame_capture(gl, 0)
-#define end_renderdoc_capture(gl) if (end_frame_capture) end_frame_capture(gl, 0)
+global renderdoc_start_frame_capture_fn *start_frame_capture;
+global renderdoc_set_capture_path_template_fn *set_capture_path_template;
+global renderdoc_end_frame_capture_fn *end_frame_capture;
+#define start_renderdoc_capture() do { \
+ if (set_capture_path_template) set_capture_path_template("captures/ogl.rdc"); \
+ if (start_frame_capture) start_frame_capture(vk_renderdoc_instance_handle(), 0); \
+} while(0)
+#define end_renderdoc_capture() if (end_frame_capture) end_frame_capture(vk_renderdoc_instance_handle(), 0)
#define renderdoc_attached(...) (start_frame_capture != 0)
#endif
-typedef struct {
- BeamformerFrame *frames;
- u32 capacity;
- u32 offset;
- u32 cursor;
- u32 needed_frames;
-} ComputeFrameIterator;
+read_only global u32 beamformer_compute_array_parameter_sizes[] = {
+ #define X(k, type, elements) sizeof(type) * elements,
+ BEAMFORMER_COMPUTE_ARRAY_PARAMETERS_LIST
+ #undef X
+};
+
+read_only global u32 beamformer_compute_array_parameter_offsets[] = {
+ #define X(k, ...) offsetof(BeamformerComputeArrayParameters, k),
+ BEAMFORMER_COMPUTE_ARRAY_PARAMETERS_LIST
+ #undef X
+};
function void
beamformer_compute_plan_release(BeamformerComputeContext *cc, u32 block)
@@ -66,10 +61,9 @@ beamformer_compute_plan_release(BeamformerComputeContext *cc, u32 block)
assert(block < countof(cc->compute_plans));
BeamformerComputePlan *cp = cc->compute_plans[block];
if (cp) {
- glDeleteBuffers(countof(cp->ubos), cp->ubos);
- glDeleteTextures(countof(cp->textures), cp->textures);
+ vk_buffer_release(&cp->array_parameters);
for (u32 i = 0; i < countof(cp->filters); i++)
- glDeleteBuffers(1, &cp->filters[i].ssbo);
+ vk_buffer_release(&cp->filters[i].buffer);
cc->compute_plans[block] = 0;
SLLPushFreelist(cp, cc->compute_plan_freelist);
}
@@ -88,39 +82,19 @@ beamformer_compute_plan_for_block(BeamformerComputeContext *cc, u32 block, Arena
result->ui_voxel_transform = m4_identity();
- glCreateBuffers(countof(result->ubos), result->ubos);
-
Stream label = arena_stream(*arena);
- #define X(k, t, ...) \
- glNamedBufferStorage(result->ubos[BeamformerComputeUBOKind_##k], sizeof(t), \
- 0, GL_DYNAMIC_STORAGE_BIT); \
- stream_append_s8(&label, s8(#t "[")); \
- stream_append_u64(&label, block); \
- stream_append_byte(&label, ']'); \
- glObjectLabel(GL_BUFFER, result->ubos[BeamformerComputeUBOKind_##k], \
- label.widx, (c8 *)label.data); \
- label.widx = 0;
- BEAMFORMER_COMPUTE_UBO_LIST
- #undef X
-
- #define X(_k, t, ...) t,
- GLenum gl_kind[] = {BEAMFORMER_COMPUTE_TEXTURE_LIST_FULL};
- #undef X
- read_only local_persist s8 tex_prefix[] = {
- #define X(k, ...) s8_comp(#k "["),
- BEAMFORMER_COMPUTE_TEXTURE_LIST_FULL
- #undef X
+ stream_append_s8(&label, s8("ComputeParameterArray["));
+ stream_append_u64(&label, block);
+ stream_append_s8(&label, s8("]"));
+ stream_append_byte(&label, 0);
+
+ GPUBufferAllocateInfo allocate_info = {
+ .size = sizeof(BeamformerComputeArrayParameters),
+ .flags = VulkanUsageFlag_HostReadWrite,
+ .label = stream_to_s8(&label),
};
- glCreateTextures(GL_TEXTURE_1D, BeamformerComputeTextureKind_Count - 1, result->textures);
- for (u32 i = 0; i < BeamformerComputeTextureKind_Count - 1; i++) {
- /* TODO(rnp): this could be predicated on channel count for this compute plan */
- glTextureStorage1D(result->textures[i], 1, gl_kind[i], BeamformerMaxChannelCount);
- stream_append_s8(&label, tex_prefix[i]);
- stream_append_u64(&label, block);
- stream_append_byte(&label, ']');
- glObjectLabel(GL_TEXTURE, result->textures[i], label.widx, (c8 *)label.data);
- label.widx = 0;
- }
+ vk_buffer_allocate(&result->array_parameters, &allocate_info);
+ assert((result->array_parameters.gpu_pointer & 63) == 0);
}
return result;
}
@@ -165,42 +139,16 @@ beamformer_filter_update(BeamformerFilter *f, BeamformerFilterParameters fp, u32
f->parameters = fp;
- glDeleteBuffers(1, &f->ssbo);
- glCreateBuffers(1, &f->ssbo);
- glNamedBufferStorage(f->ssbo, f->length * (i32)sizeof(f32) * (fp.complex? 2 : 1), filter, 0);
- glObjectLabel(GL_BUFFER, f->ssbo, (i32)label.len, (c8 *)label.data);
-}
-
-function ComputeFrameIterator
-compute_frame_iterator(BeamformerCtx *ctx, u32 start_index, u32 needed_frames)
-{
- start_index = start_index % countof(ctx->beamform_frames);
-
- ComputeFrameIterator result;
- result.frames = ctx->beamform_frames;
- result.offset = start_index;
- result.capacity = countof(ctx->beamform_frames);
- result.cursor = 0;
- result.needed_frames = needed_frames;
- return result;
-}
-
-function BeamformerFrame *
-frame_next(ComputeFrameIterator *bfi)
-{
- BeamformerFrame *result = 0;
- if (bfi->cursor != bfi->needed_frames) {
- u32 index = (bfi->offset + bfi->cursor++) % bfi->capacity;
- result = bfi->frames + index;
+ u32 byte_size = f->length * (i32)sizeof(f32) * (fp.complex? 2 : 1);
+ if (f->buffer.size < byte_size) {
+ GPUBufferAllocateInfo allocate_info = {
+ .size = byte_size,
+ .flags = VulkanUsageFlag_HostReadWrite,
+ .label = label,
+ };
+ vk_buffer_allocate(&f->buffer, &allocate_info);
}
- return result;
-}
-
-function b32
-beamformer_frame_compatible(BeamformerFrame *f, iv3 dim, GLenum gl_kind)
-{
- b32 result = gl_kind == f->gl_kind && iv3_equal(dim, f->dim);
- return result;
+ vk_buffer_range_upload(&f->buffer, filter, 0, byte_size, 0);
}
function iv3
@@ -214,83 +162,51 @@ das_valid_points(iv3 points)
}
function void
-alloc_beamform_frame(BeamformerFrame *out, iv3 out_dim, GLenum gl_kind, s8 name, Arena arena)
+update_hadamard(BeamformerComputePlan *cp, i32 order, b32 row_major, Arena arena)
{
- out->dim = das_valid_points(out_dim);
+ f16 *hadamard = make_hadamard_transpose(&arena, order, row_major);
+ if (hadamard) {
+ u64 offset = offsetof(BeamformerComputeArrayParameters, Hadamard);
+ u64 size = sizeof(*((BeamformerComputeArrayParameters *)0)->Hadamard) * order * order;
+ vk_buffer_range_upload(&cp->array_parameters, hadamard, offset, size, 0);
+ cp->hadamard_order = order;
+ }
+}
- /* NOTE: allocate storage for beamformed output data;
- * this is shared between compute and fragment shaders */
- u32 max_dim = (u32)Max(out->dim.x, Max(out->dim.y, out->dim.z));
- out->mips = (i32)ctz_u64(round_up_power_of_two(max_dim)) + 1;
+function u64
+beamformer_frame_byte_size(iv3 points, BeamformerDataKind kind)
+{
+ u64 result = points.x * points.y * points.z * beamformer_data_kind_byte_size[kind];
+ result = round_up_to(result, 64);
+ return result;
+}
- out->gl_kind = gl_kind;
+function BeamformerFrame *
+beamformer_frame_next(BeamformerComputeContext *cc, iv3 output_points, b32 complex, u64 reserved_size)
+{
+ BeamformerFrameBacklog *bl = &cc->backlog;
- Stream label = arena_stream(arena);
- stream_append_s8(&label, name);
- stream_append_byte(&label, '[');
- stream_append_hex_u64(&label, out->id);
- stream_append_byte(&label, ']');
+ BeamformerDataKind kind = complex ? BeamformerDataKind_Float32Complex : BeamformerDataKind_Float32;
+ u64 frame_size = beamformer_frame_byte_size(output_points, kind);
- glDeleteTextures(1, &out->texture);
- glCreateTextures(GL_TEXTURE_3D, 1, &out->texture);
- glTextureStorage3D(out->texture, out->mips, gl_kind, out->dim.x, out->dim.y, out->dim.z);
+ // TODO(rnp): handle this somewhat gracefully (even it produces garbled output)
+ assert(frame_size + reserved_size <= (u64)bl->buffer->size);
- glTextureParameteri(out->texture, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
- glTextureParameteri(out->texture, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ if (bl->next_offset > (u64)bl->buffer->size - frame_size - reserved_size)
+ bl->next_offset = 0;
- LABEL_GL_OBJECT(GL_TEXTURE, out->texture, stream_to_s8(&label));
-}
+ u64 id = bl->counter++;
-function void
-update_hadamard_texture(BeamformerComputePlan *cp, i32 order, b32 row_major, Arena arena)
-{
- f16 *hadamard = make_hadamard_transpose(&arena, order, row_major);
- if (hadamard) {
- cp->hadamard_order = order;
- u32 *texture = cp->textures + BeamformerComputeTextureKind_Hadamard;
- glDeleteTextures(1, texture);
- glCreateTextures(GL_TEXTURE_2D, 1, texture);
- glTextureStorage2D(*texture, 1, GL_R16F, order, order);
- glTextureSubImage2D(*texture, 0, 0, 0, order, order, GL_RED, GL_SHORT, hadamard);
-
- Stream label = arena_stream(arena);
- stream_append_s8(&label, s8("Hadamard"));
- stream_append_i64(&label, order);
- LABEL_GL_OBJECT(GL_TEXTURE, *texture, stream_to_s8(&label));
- }
-}
+ BeamformerFrame *result = bl->frames + (id % countof(bl->frames));
+ atomic_store_u64(&result->timeline_valid_value, -1ULL);
+ result->id = id & U32_MAX;
+ result->buffer_offset = bl->next_offset;
+ result->points = output_points;
+ result->data_kind = kind;
-function void
-alloc_shader_storage(BeamformerCtx *ctx, u32 decoded_data_size, Arena arena)
-{
- BeamformerComputeContext *cc = &ctx->compute_context;
- glDeleteBuffers(countof(cc->ping_pong_ssbos), cc->ping_pong_ssbos);
- glCreateBuffers(countof(cc->ping_pong_ssbos), cc->ping_pong_ssbos);
-
- cc->ping_pong_ssbo_size = decoded_data_size;
-
- Stream label = arena_stream(arena);
- stream_append_s8(&label, s8("PingPongSSBO["));
- i32 s_widx = label.widx;
- for (i32 i = 0; i < countof(cc->ping_pong_ssbos); i++) {
- glNamedBufferStorage(cc->ping_pong_ssbos[i], (iz)decoded_data_size, 0, 0);
- stream_append_i64(&label, i);
- stream_append_byte(&label, ']');
- LABEL_GL_OBJECT(GL_BUFFER, cc->ping_pong_ssbos[i], stream_to_s8(&label));
- stream_reset(&label, s_widx);
- }
+ bl->next_offset += frame_size;
- /* TODO(rnp): (25.08.04) cuda lib is heavily broken atm. First there are multiple RF
- * buffers and cuda decode shouldn't assume that the data is coming from the rf_buffer
- * ssbo. Second each parameter block may need a different hadamard matrix so ideally
- * decode should just take the texture as a parameter. Third, none of these dimensions
- * need to be pre-known by the library unless its allocating GPU memory which it shouldn't
- * need to do. For now grab out of parameter block 0 but it is not correct */
- BeamformerParameterBlock *pb = beamformer_parameter_block(ctx->shared_memory, 0);
- /* NOTE(rnp): these are stubs when CUDA isn't supported */
- cuda_register_buffers(cc->ping_pong_ssbos, countof(cc->ping_pong_ssbos), cc->rf_buffer.ssbo);
- u32 decoded_data_dimension[3] = {pb->parameters.sample_count, pb->parameters.channel_count, pb->parameters.acquisition_count};
- cuda_init(pb->parameters.raw_data_dimensions.E, decoded_data_dimension);
+ return result;
}
function void
@@ -306,35 +222,69 @@ fill_frame_compute_work(BeamformerCtx *ctx, BeamformWork *work, BeamformerViewPl
{
b32 result = work != 0;
if (result) {
- u32 frame_id = atomic_add_u32(&ctx->next_render_frame_index, 1);
- u32 frame_index = frame_id % countof(ctx->beamform_frames);
- work->kind = indirect? BeamformerWorkKind_ComputeIndirect : BeamformerWorkKind_Compute;
- work->lock = BeamformerSharedMemoryLockKind_DispatchCompute;
+ work->kind = indirect? BeamformerWorkKind_ComputeIndirect : BeamformerWorkKind_Compute;
+ work->lock = BeamformerSharedMemoryLockKind_DispatchCompute;
work->compute_context.parameter_block = parameter_block;
- work->compute_context.frame = ctx->beamform_frames + frame_index;
- work->compute_context.frame->ready_to_present = 0;
- work->compute_context.frame->view_plane_tag = plane;
- work->compute_context.frame->id = frame_id;
}
return result;
}
-function void
-do_sum_shader(BeamformerComputeContext *cc, u32 *in_textures, u32 in_texture_count,
- u32 out_texture, iv3 out_data_dim)
+function uv3
+layout_for_output(iv3 points)
{
- /* NOTE: zero output before summing */
- glClearTexImage(out_texture, 0, GL_RED, GL_FLOAT, 0);
- glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
-
- glBindImageTexture(0, out_texture, 0, GL_TRUE, 0, GL_READ_WRITE, GL_RG32F);
- for (u32 i = 0; i < in_texture_count; i++) {
- glBindImageTexture(1, in_textures[i], 0, GL_TRUE, 0, GL_READ_ONLY, GL_RG32F);
- glDispatchCompute(ORONE((u32)out_data_dim.x / 32u),
- ORONE((u32)out_data_dim.y),
- ORONE((u32)out_data_dim.z / 32u));
- glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
+ uv3 result = {{1, 1, 1}};
+
+ b32 has_x = points.x > 1;
+ b32 has_y = points.y > 1;
+ b32 has_z = points.z > 1;
+
+ u32 subgroup_size = vk_gpu_info()->subgroup_size;
+ u32 grid_3d_z_size = Max(1, subgroup_size / (4 * 4));
+ u32 grid_2d_y_size = Max(1, subgroup_size / 8);
+
+ switch (iv3_dimension(points)) {
+ case 1:{
+ if (has_x) result.x = subgroup_size;
+ if (has_y) result.y = subgroup_size;
+ if (has_z) result.z = subgroup_size;
+ }break;
+
+ case 2:{
+ if (has_x && has_y) {result.x = 8; result.y = grid_2d_y_size;}
+ if (has_x && has_z) {result.x = 8; result.z = grid_2d_y_size;}
+ if (has_y && has_z) {result.y = 8; result.z = grid_2d_y_size;}
+ }break;
+
+ case 3:{result = (uv3){{4, 4, grid_3d_z_size}};}break;
+
+ InvalidDefaultCase;
}
+
+ return result;
+}
+
+function uv3
+dispatch_for_output(uv3 layout, iv3 points)
+{
+ uv3 result;
+ result.x = (u32)ceil_f32((f32)points.x / layout.x);
+ result.y = (u32)ceil_f32((f32)points.y / layout.y);
+ result.z = (u32)ceil_f32((f32)points.z / layout.z);
+ return result;
+}
+
+function b32
+compute_plan_push_shader(BeamformerComputePlan *p, BeamformerShaderKind shader, BeamformerShaderParameters *sp)
+{
+ b32 result = 0;
+ if (p->pipeline.shader_count < countof(p->pipeline.shaders)) {
+ u32 index = p->pipeline.shader_count++;
+ p->pipeline.shaders[index] = shader;
+ p->pipeline.parameters[index] = *sp;
+ zero_struct(p->shader_descriptors + index);
+ result = 1;
+ }
+ return result;
}
function void
@@ -374,355 +324,372 @@ plan_compute_pipeline(BeamformerComputePlan *cp, BeamformerParameterBlock *pb)
f32 time_offset = pb->parameters.time_offset;
- // TODO(rnp): subgroup size
- u32 subgroup_size = vk_gpu_info()->vendor == GPUVendor_NVIDIA ? 32 : 64;
+ u32 subgroup_size = vk_gpu_info()->subgroup_size;
cp->pipeline.shader_count = 0;
for (u32 i = 0; i < pb->pipeline.shader_count; i++) {
BeamformerShaderParameters *sp = pb->pipeline.parameters + i;
u32 slot = cp->pipeline.shader_count;
u32 shader = pb->pipeline.shaders[i];
- b32 commit = 0;
BeamformerShaderDescriptor *ld = cp->shader_descriptors + slot - 1;
BeamformerShaderDescriptor *sd = cp->shader_descriptors + slot;
- zero_struct(sd);
switch (shader) {
- case BeamformerShaderKind_CudaHilbert:{ commit = run_cuda_hilbert; }break;
+
+ case BeamformerShaderKind_CudaHilbert:{
+ if (run_cuda_hilbert)
+ compute_plan_push_shader(cp, shader, sp);
+ }break;
+
case BeamformerShaderKind_Decode:{
/* TODO(rnp): rework decode first and demodulate after */
b32 first = slot == 0;
- BeamformerDecodeBakeParameters *db = &sd->bake.Decode;
- db->data_kind = data_kind;
- if (!first) {
- if (data_kind == BeamformerDataKind_Int16) {
- db->data_kind = BeamformerDataKind_Int16Complex;
- } else {
- db->data_kind = BeamformerDataKind_Float32Complex;
- }
- }
-
BeamformerShaderKind *last_shader = cp->pipeline.shaders + slot - 1;
assert(first || ((*last_shader == BeamformerShaderKind_Demodulate ||
*last_shader == BeamformerShaderKind_Filter)));
- db->decode_mode = pb->parameters.decode_mode;
- db->transmit_count = pb->parameters.acquisition_count;
+ if ((first || pb->parameters.decode_mode != BeamformerDecodeMode_None) &&
+ compute_plan_push_shader(cp, shader, sp))
+ {
+ BeamformerDecodeBakeParameters *db = &sd->bake.Decode;
- u32 channel_stride = pb->parameters.acquisition_count * pb->parameters.sample_count;
- db->input_sample_stride = first? 1 : ld->bake.Filter.output_sample_stride;
- db->input_channel_stride = first? channel_stride : ld->bake.Filter.output_channel_stride;
- db->input_transmit_stride = first? pb->parameters.sample_count : 1;
+ db->data_kind = data_kind;
+ if (!first) {
+ if (data_kind == BeamformerDataKind_Int16) {
+ db->data_kind = BeamformerDataKind_Int16Complex;
+ } else {
+ db->data_kind = BeamformerDataKind_Float32Complex;
+ }
+ }
- db->output_sample_stride = das_sample_stride;
- db->output_channel_stride = das_channel_stride;
- db->output_transmit_stride = das_transmit_stride;
- if (first) {
- db->output_channel_stride *= decimation_rate;
- db->output_transmit_stride *= decimation_rate;
- }
+ db->decode_mode = pb->parameters.decode_mode;
+ db->transmit_count = pb->parameters.acquisition_count;
- db->dilate_output = run_cuda_hilbert;
+ u32 channel_stride = pb->parameters.acquisition_count * pb->parameters.sample_count;
+ db->input_sample_stride = first? 1 : ld->bake.Filter.output_sample_stride;
+ db->input_channel_stride = first? channel_stride : ld->bake.Filter.output_channel_stride;
+ db->input_transmit_stride = first? pb->parameters.sample_count : 1;
- if (db->decode_mode == BeamformerDecodeMode_None) {
- sd->layout = (uv3){{subgroup_size, 1, 1}};
+ db->output_sample_stride = das_sample_stride;
+ db->output_channel_stride = das_channel_stride;
+ db->output_transmit_stride = das_transmit_stride;
+ if (first) {
+ db->output_channel_stride *= decimation_rate;
+ db->output_transmit_stride *= decimation_rate;
+ }
- sd->dispatch.x = (u32)ceil_f32((f32)sample_count / (f32)sd->layout.x);
- sd->dispatch.y = (u32)ceil_f32((f32)pb->parameters.channel_count / (f32)sd->layout.y);
- sd->dispatch.z = (u32)ceil_f32((f32)pb->parameters.acquisition_count / (f32)sd->layout.z);
- } else if (db->transmit_count > 40) {
- db->use_shared_memory = 1;
- db->to_process = 2;
+ db->dilate_output = run_cuda_hilbert;
- if (db->transmit_count == 48)
- db->to_process = db->transmit_count / 16;
+ if (db->decode_mode == BeamformerDecodeMode_None) {
+ sd->layout = (uv3){{subgroup_size, 1, 1}};
- b32 use_16z = db->transmit_count == 48 || db->transmit_count == 80 ||
- db->transmit_count == 96 || db->transmit_count == 160;
- sd->layout = (uv3){{4, 1, use_16z? 16 : 32}};
+ sd->dispatch.x = (u32)ceil_f32((f32)sample_count / (f32)sd->layout.x);
+ sd->dispatch.y = (u32)ceil_f32((f32)pb->parameters.channel_count / (f32)sd->layout.y);
+ sd->dispatch.z = (u32)ceil_f32((f32)pb->parameters.acquisition_count / (f32)sd->layout.z);
+ } else if (db->transmit_count > 40) {
+ db->use_shared_memory = 1;
+ db->to_process = 2;
- sd->dispatch.x = (u32)ceil_f32((f32)sample_count / (f32)sd->layout.x);
- sd->dispatch.y = (u32)ceil_f32((f32)pb->parameters.channel_count / (f32)sd->layout.y);
- sd->dispatch.z = (u32)ceil_f32((f32)pb->parameters.acquisition_count / (f32)sd->layout.z / (f32)db->to_process);
- } else {
- db->to_process = 1;
+ if (db->transmit_count == 48)
+ db->to_process = db->transmit_count / 16;
- /* NOTE(rnp): register caching. using more threads will cause the compiler to do
- * contortions to avoid spilling registers. using less gives higher performance */
- sd->layout = (uv3){{subgroup_size / 2, 1, 1}};
+ b32 use_16z = db->transmit_count == 48 || db->transmit_count == 80 ||
+ db->transmit_count == 96 || db->transmit_count == 160;
+ sd->layout = (uv3){{4, 1, use_16z? 16 : 32}};
- sd->dispatch.x = (u32)ceil_f32((f32)sample_count / (f32)sd->layout.x);
- sd->dispatch.y = (u32)ceil_f32((f32)pb->parameters.channel_count / (f32)sd->layout.y);
- sd->dispatch.z = 1;
- }
+ sd->dispatch.x = (u32)ceil_f32((f32)sample_count / (f32)sd->layout.x);
+ sd->dispatch.y = (u32)ceil_f32((f32)pb->parameters.channel_count / (f32)sd->layout.y);
+ sd->dispatch.z = (u32)ceil_f32((f32)pb->parameters.acquisition_count / (f32)sd->layout.z / (f32)db->to_process);
+ } else {
+ db->to_process = 1;
+
+ /* NOTE(rnp): register caching. using more threads will cause the compiler to do
+ * contortions to avoid spilling registers. using less gives higher performance */
+ /* TODO(rnp): may need to be adjusted to 16 on NVIDIA */
+ sd->layout = (uv3){{subgroup_size / 2, 1, 1}};
- if (first) sd->dispatch.x *= decimation_rate;
+ sd->dispatch.x = (u32)ceil_f32((f32)sample_count / (f32)sd->layout.x);
+ sd->dispatch.y = (u32)ceil_f32((f32)pb->parameters.channel_count / (f32)sd->layout.y);
+ sd->dispatch.z = 1;
+ }
- /* NOTE(rnp): decode 2 samples per dispatch when data is i16 */
- if (first && data_kind == BeamformerDataKind_Int16)
- sd->dispatch.x = (u32)ceil_f32((f32)sd->dispatch.x / 2);
+ if (first) sd->dispatch.x *= decimation_rate;
- commit = first || db->decode_mode != BeamformerDecodeMode_None;
+ /* NOTE(rnp): decode 2 samples per dispatch when data is i16 */
+ if (first && data_kind == BeamformerDataKind_Int16)
+ sd->dispatch.x = (u32)ceil_f32((f32)sd->dispatch.x / 2);
+ }
}break;
+
case BeamformerShaderKind_Demodulate:
case BeamformerShaderKind_Filter:
{
- b32 first = slot == 0;
- b32 demod = shader == BeamformerShaderKind_Demodulate;
- BeamformerFilter *f = cp->filters + sp->filter_slot;
-
- time_offset += f->time_delay;
-
- BeamformerFilterBakeParameters *fb = &sd->bake.Filter;
- fb->filter_length = (u32)f->length;
- fb->demodulate = demod;
- fb->complex_filter = f->parameters.complex;
-
- fb->data_kind = data_kind;
- if (!first) fb->data_kind = BeamformerDataKind_Float32;
-
- /* NOTE(rnp): when we are demodulating we pretend that the sampler was alternating
- * between sampling the I portion and the Q portion of an IQ signal. Therefore there
- * is an implicit decimation factor of 2 which must always be included. All code here
- * assumes that the signal was sampled in such a way that supports this operation.
- * To recover IQ[n] from the sampled data (RF[n]) we do the following:
- * I[n] = RF[n]
- * Q[n] = RF[n + 1]
- * IQ[n] = I[n] - j*Q[n]
- */
- if (demod) {
- fb->demodulation_frequency = pb->parameters.demodulation_frequency;
- fb->sampling_frequency = pb->parameters.sampling_frequency / 2;
- fb->decimation_rate = decimation_rate;
- fb->sample_count = pb->parameters.sample_count;
-
- fb->output_channel_stride = das_channel_stride;
- fb->output_sample_stride = das_sample_stride;
- fb->output_transmit_stride = das_transmit_stride;
-
- if (first) {
- fb->input_channel_stride = pb->parameters.sample_count * pb->parameters.acquisition_count / 2;
- fb->input_sample_stride = 1;
- fb->input_transmit_stride = pb->parameters.sample_count / 2;
-
- if (pb->parameters.decode_mode == BeamformerDecodeMode_None) {
- fb->output_floats = 1;
+ if (compute_plan_push_shader(cp, shader, sp)) {
+ b32 first = slot == 0;
+ b32 demod = shader == BeamformerShaderKind_Demodulate;
+ BeamformerFilter *f = cp->filters + sp->filter_slot;
+
+ time_offset += f->time_delay;
+
+ BeamformerFilterBakeParameters *fb = &sd->bake.Filter;
+ fb->filter_length = (u32)f->length;
+ fb->demodulate = demod;
+ fb->complex_filter = f->parameters.complex;
+
+ fb->data_kind = data_kind;
+ if (!first) fb->data_kind = BeamformerDataKind_Float32;
+
+ /* NOTE(rnp): when we are demodulating we pretend that the sampler was alternating
+ * between sampling the I portion and the Q portion of an IQ signal. Therefore there
+ * is an implicit decimation factor of 2 which must always be included. All code here
+ * assumes that the signal was sampled in such a way that supports this operation.
+ * To recover IQ[n] from the sampled data (RF[n]) we do the following:
+ * I[n] = RF[n]
+ * Q[n] = RF[n + 1]
+ * IQ[n] = I[n] - j*Q[n]
+ */
+ if (demod) {
+ fb->demodulation_frequency = pb->parameters.demodulation_frequency;
+ fb->sampling_frequency = pb->parameters.sampling_frequency / 2;
+ fb->decimation_rate = decimation_rate;
+ fb->sample_count = pb->parameters.sample_count;
+
+ fb->output_channel_stride = das_channel_stride;
+ fb->output_sample_stride = das_sample_stride;
+ fb->output_transmit_stride = das_transmit_stride;
+
+ if (first) {
+ fb->input_channel_stride = pb->parameters.sample_count * pb->parameters.acquisition_count / 2;
+ fb->input_sample_stride = 1;
+ fb->input_transmit_stride = pb->parameters.sample_count / 2;
+
+ if (pb->parameters.decode_mode == BeamformerDecodeMode_None) {
+ fb->output_floats = 1;
+ } else {
+ /* NOTE(rnp): output optimized layout for decoding */
+ fb->output_channel_stride = das_channel_stride;
+ fb->output_sample_stride = pb->parameters.acquisition_count;
+ fb->output_transmit_stride = 1;
+ }
} else {
- /* NOTE(rnp): output optimized layout for decoding */
- fb->output_channel_stride = das_channel_stride;
- fb->output_sample_stride = pb->parameters.acquisition_count;
- fb->output_transmit_stride = 1;
+ assert(cp->pipeline.shaders[slot - 1] == BeamformerShaderKind_Decode);
+ fb->input_channel_stride = ld->bake.Decode.output_channel_stride;
+ fb->input_sample_stride = ld->bake.Decode.output_sample_stride;
+ fb->input_transmit_stride = ld->bake.Decode.output_transmit_stride;
}
} else {
- assert(cp->pipeline.shaders[slot - 1] == BeamformerShaderKind_Decode);
- fb->input_channel_stride = ld->bake.Decode.output_channel_stride;
- fb->input_sample_stride = ld->bake.Decode.output_sample_stride;
- fb->input_transmit_stride = ld->bake.Decode.output_transmit_stride;
+ fb->decimation_rate = 1;
+ fb->output_channel_stride = sample_count * pb->parameters.acquisition_count;
+ fb->output_sample_stride = 1;
+ fb->output_transmit_stride = sample_count;
+ fb->input_channel_stride = sample_count * pb->parameters.acquisition_count;
+ fb->input_sample_stride = 1;
+ fb->input_transmit_stride = sample_count;
+ fb->sample_count = sample_count;
}
- } else {
- fb->decimation_rate = 1;
- fb->output_channel_stride = sample_count * pb->parameters.acquisition_count;
- fb->output_sample_stride = 1;
- fb->output_transmit_stride = sample_count;
- fb->input_channel_stride = sample_count * pb->parameters.acquisition_count;
- fb->input_sample_stride = 1;
- fb->input_transmit_stride = sample_count;
- fb->sample_count = sample_count;
- }
-
- /* TODO(rnp): filter may need a different dispatch layout */
- sd->layout = (uv3){{128, 1, 1}};
- sd->dispatch.x = (u32)ceil_f32((f32)sample_count / (f32)sd->layout.x);
- sd->dispatch.y = (u32)ceil_f32((f32)pb->parameters.channel_count / (f32)sd->layout.y);
- sd->dispatch.z = (u32)ceil_f32((f32)pb->parameters.acquisition_count / (f32)sd->layout.z);
- commit = 1;
+ /* TODO(rnp): filter may need a different dispatch layout */
+ sd->layout = (uv3){{128, 1, 1}};
+ sd->dispatch.x = (u32)ceil_f32((f32)sample_count / (f32)sd->layout.x);
+ sd->dispatch.y = (u32)ceil_f32((f32)pb->parameters.channel_count / (f32)sd->layout.y);
+ sd->dispatch.z = (u32)ceil_f32((f32)pb->parameters.acquisition_count / (f32)sd->layout.z);
+ }
}break;
- case BeamformerShaderKind_DAS:{
- BeamformerDASBakeParameters *db = &sd->bake.DAS;
-
- db->data_kind = BeamformerDataKind_Float32;
- if (cp->iq_pipeline) db->data_kind = BeamformerDataKind_Float32Complex;
-
- BeamformerDASPushConstants *du = &cp->das_ubo_data;
- du->xdc_element_pitch = pb->parameters.xdc_element_pitch;
- db->sampling_frequency = sampling_frequency;
- db->demodulation_frequency = pb->parameters.demodulation_frequency;
- db->speed_of_sound = pb->parameters.speed_of_sound;
- db->time_offset = time_offset;
- db->f_number = pb->parameters.f_number;
- db->acquisition_kind = pb->parameters.acquisition_kind;
- db->sample_count = sample_count;
- db->channel_count = pb->parameters.channel_count;
- db->acquisition_count = pb->parameters.acquisition_count;
- db->interpolation_mode = pb->parameters.interpolation_mode;
- db->transmit_angle = pb->parameters.focal_vector.E[0];
- db->focus_depth = pb->parameters.focal_vector.E[1];
- db->transmit_receive_orientation = pb->parameters.transmit_receive_orientation;
-
- // NOTE(rnp): old gcc will miscompile an assignment
- mem_copy(du->voxel_transform.E, pb->parameters.das_voxel_transform.E, sizeof(du->voxel_transform));
- mem_copy(du->xdc_transform.E, pb->parameters.xdc_transform.E, sizeof(du->xdc_transform));
-
- du->voxel_transform = m4_mul(cp->ui_voxel_transform, du->voxel_transform);
-
- u32 id = pb->parameters.acquisition_kind;
-
- if (id == BeamformerAcquisitionKind_UFORCES || id == BeamformerAcquisitionKind_FORCES)
- du->voxel_transform = m4_mul(du->xdc_transform, du->voxel_transform);
-
- db->sparse = id == BeamformerAcquisitionKind_UFORCES ||
- id == BeamformerAcquisitionKind_UHERCULES;
-
- db->single_focus = pb->parameters.single_focus;
- db->single_orientation = pb->parameters.single_orientation;
- db->coherency_weighting = pb->parameters.coherency_weighting;
- db->fast = !pb->parameters.coherency_weighting;
-
- sd->layout = (uv3){{1, 1, 1}};
-
- b32 has_x = cp->output_points.x > 1;
- b32 has_y = cp->output_points.y > 1;
- b32 has_z = cp->output_points.z > 1;
-
- u32 grid_3d_z_size = Max(1, subgroup_size / (4 * 4));
- u32 grid_2d_y_size = Max(1, subgroup_size / 8);
-
- switch (iv3_dimension(cp->output_points)) {
-
- case 1:{
- if (has_x) sd->layout.x = subgroup_size;
- if (has_y) sd->layout.y = subgroup_size;
- if (has_z) sd->layout.z = subgroup_size;
- }break;
-
- case 2:{
- if (has_x && has_y) {sd->layout.x = 8; sd->layout.y = grid_2d_y_size;}
- if (has_x && has_z) {sd->layout.x = 8; sd->layout.z = grid_2d_y_size;}
- if (has_y && has_z) {sd->layout.y = 8; sd->layout.z = grid_2d_y_size;}
- }break;
- case 3:{sd->layout = (uv3){{4, 4, grid_3d_z_size}};}break;
-
- InvalidDefaultCase;
+ case BeamformerShaderKind_DAS:{
+ if (compute_plan_push_shader(cp, shader, sp)) {
+ BeamformerDASBakeParameters *db = &sd->bake.DAS;
+ db->data_kind = BeamformerDataKind_Float32;
+ if (cp->iq_pipeline) db->data_kind = BeamformerDataKind_Float32Complex;
+
+ cp->voxel_transform = m4_mul(cp->ui_voxel_transform, pb->parameters.das_voxel_transform);
+ cp->xdc_element_pitch = pb->parameters.xdc_element_pitch;
+
+ db->sampling_frequency = sampling_frequency;
+ db->demodulation_frequency = pb->parameters.demodulation_frequency;
+ db->speed_of_sound = pb->parameters.speed_of_sound;
+ db->time_offset = time_offset;
+ db->f_number = pb->parameters.f_number;
+ db->acquisition_kind = pb->parameters.acquisition_kind;
+ db->sample_count = sample_count;
+ db->channel_count = pb->parameters.channel_count;
+ db->acquisition_count = pb->parameters.acquisition_count;
+ db->interpolation_mode = pb->parameters.interpolation_mode;
+ db->transmit_angle = pb->parameters.focal_vector.E[0];
+ db->focus_depth = pb->parameters.focal_vector.E[1];
+ db->transmit_receive_orientation = pb->parameters.transmit_receive_orientation;
+
+ // NOTE(rnp): old gcc will miscompile an assignment
+ mem_copy(cp->xdc_transform.E, pb->parameters.xdc_transform.E, sizeof(cp->xdc_transform));
+
+ u32 id = pb->parameters.acquisition_kind;
+ if (id == BeamformerAcquisitionKind_UFORCES || id == BeamformerAcquisitionKind_FORCES)
+ cp->voxel_transform = m4_mul(cp->xdc_transform, cp->voxel_transform);
+
+ db->sparse = id == BeamformerAcquisitionKind_UFORCES || id == BeamformerAcquisitionKind_UHERCULES;
+ db->single_focus = pb->parameters.single_focus;
+ db->single_orientation = pb->parameters.single_orientation;
+ db->coherency_weighting = pb->parameters.coherency_weighting;
+
+ sd->layout = layout_for_output(cp->output_points);
+ sd->dispatch = dispatch_for_output(sd->layout, cp->output_points);
+
+ if (pb->parameters.coherency_weighting &&
+ compute_plan_push_shader(cp, BeamformerShaderKind_CoherencyWeighting, sp))
+ {
+ BeamformerShaderDescriptor *shader_descriptor = cp->shader_descriptors + cp->pipeline.shader_count - 1;
+ shader_descriptor->layout = sd->layout;
+ shader_descriptor->dispatch = sd->dispatch;
+ shader_descriptor->bake.CoherencyWeighting.data_kind = db->data_kind;
+ }
}
+ }break;
- sd->dispatch.x = (u32)ceil_f32((f32)cp->output_points.x / sd->layout.x);
- sd->dispatch.y = (u32)ceil_f32((f32)cp->output_points.y / sd->layout.y);
- sd->dispatch.z = (u32)ceil_f32((f32)cp->output_points.z / sd->layout.z);
+ #if 0
+ case BeamformerShaderKind_Sum:{
+ sd->bake.data_kind = BeamformerDataKind_Float32;
+ if (cp->iq_pipeline)
+ sd->bake.data_kind = BeamformerDataKind_Float32Complex;
+
+ sd->layout = layout_for_output(cp->output_points);
+ sd->dispatch = dispatch_for_output(sd->layout, cp->output_points);
commit = 1;
}break;
- default:{ commit = 1; }break;
- }
+ #endif
- if (commit) {
- u32 index = cp->pipeline.shader_count++;
- cp->pipeline.shaders[index] = shader;
- cp->pipeline.parameters[index] = *sp;
+ default:{}break;
}
}
cp->pipeline.data_kind = data_kind;
}
function void
-stream_push_shader_header(Stream *s, BeamformerShaderKind shader_kind, s8 header)
+stream_append_shader_header(Stream *s, i32 reloadable_index, BeamformerShaderDescriptor *sd, uv3 layout)
{
- stream_append_s8s(s, s8("#version 460 core\n\n"), header);
-
- switch (shader_kind) {
- case BeamformerShaderKind_DAS:{
- stream_append_s8(s, s8(""
- "layout(location = " str(DAS_CYCLE_T_UNIFORM_LOC) ") uniform uint u_cycle_t;\n"
- "layout(location = " str(DAS_FAST_CHANNEL_UNIFORM_LOC) ") uniform int u_channel;\n\n"
- ));
- }break;
- case BeamformerShaderKind_Decode:{
- stream_append_s8s(s, s8(""
- "layout(location = " str(DECODE_FIRST_PASS_UNIFORM_LOC) ") uniform bool u_first_pass;\n\n"
- ));
- }break;
- case BeamformerShaderKind_MinMax:{
- stream_append_s8(s, s8("layout(location = " str(MIN_MAX_MIPS_LEVEL_UNIFORM_LOC)
- ") uniform int u_mip_map;\n\n"));
- }break;
- case BeamformerShaderKind_Sum:{
- stream_append_s8(s, s8("layout(location = " str(SUM_PRESCALE_UNIFORM_LOC)
- ") uniform float u_sum_prescale = 1.0;\n\n"));
- }break;
- default:{}break;
+ stream_append_s8s(s, s8("#version 460 core\n\n"
+ "#extension GL_EXT_buffer_reference : require\n"
+ "#extension GL_EXT_shader_16bit_storage : require\n"
+ "#extension GL_EXT_shader_explicit_arithmetic_types : require\n\n"));
+
+ i32 header_vector_length = beamformer_shader_header_vector_lengths[reloadable_index];
+ i32 *header_vector = beamformer_shader_header_vectors[reloadable_index];
+ for (i32 index = 0; index < header_vector_length; index++)
+ stream_append_s8(s, beamformer_shader_global_header_strings[header_vector[index]]);
+
+ if (layout.x != 0) {
+ stream_append_s8(s, s8("layout(local_size_x = "));
+ stream_append_u64(s, layout.x);
+ stream_append_s8(s, s8(", local_size_y = "));
+ stream_append_u64(s, layout.y);
+ stream_append_s8(s, s8(", local_size_z = "));
+ stream_append_u64(s, layout.z);
+ stream_append_s8(s, s8(") in;\n\n"));
}
-}
-
-function void
-load_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, u32 shader_slot, Arena arena)
-{
- BeamformerShaderKind shader = cp->pipeline.shaders[shader_slot];
-
- u32 program = 0;
- i32 reloadable_index = beamformer_shader_reloadable_index_by_shader[shader];
- if (reloadable_index != -1) {
- BeamformerShaderKind base_shader = beamformer_reloadable_shader_kinds[reloadable_index];
- s8 path;
- if (!BakeShaders)
- path = push_s8_from_parts(&arena, os_path_separator(), s8("shaders"),
- beamformer_reloadable_shader_files[reloadable_index]);
-
- Stream shader_stream = arena_stream(arena);
- stream_push_shader_header(&shader_stream, base_shader, s8(""));
-
- i32 header_vector_length = beamformer_shader_header_vector_lengths[reloadable_index];
- i32 *header_vector = beamformer_shader_header_vectors[reloadable_index];
- for (i32 index = 0; index < header_vector_length; index++)
- stream_append_s8(&shader_stream, beamformer_shader_global_header_strings[header_vector[index]]);
-
- BeamformerShaderDescriptor *sd = cp->shader_descriptors + shader_slot;
-
- if (sd->layout.x != 0) {
- stream_append_s8(&shader_stream, s8("layout(local_size_x = "));
- stream_append_u64(&shader_stream, sd->layout.x);
- stream_append_s8(&shader_stream, s8(", local_size_y = "));
- stream_append_u64(&shader_stream, sd->layout.y);
- stream_append_s8(&shader_stream, s8(", local_size_z = "));
- stream_append_u64(&shader_stream, sd->layout.z);
- stream_append_s8(&shader_stream, s8(") in;\n\n"));
- }
+ if (sd) {
u32 *parameters = (u32 *)&sd->bake;
s8 *names = beamformer_shader_bake_parameter_names[reloadable_index];
u32 float_bits = beamformer_shader_bake_parameter_float_bits[reloadable_index];
i32 count = beamformer_shader_bake_parameter_counts[reloadable_index];
for (i32 index = 0; index < count; index++) {
- stream_append_s8s(&shader_stream, s8("#define "), names[index],
+ stream_append_s8s(s, s8("#define "), names[index],
(float_bits & (1 << index))? s8(" uintBitsToFloat") : s8(" "), s8("(0x"));
- stream_append_hex_u64(&shader_stream, parameters[index]);
- stream_append_s8(&shader_stream, s8(")\n"));
+ stream_append_hex_u64(s, parameters[index]);
+ stream_append_s8(s, s8(")\n"));
}
+ }
- if (!renderdoc_attached())
- stream_append_s8(&shader_stream, s8("\n#line 1\n"));
+ if (!renderdoc_attached())
+ stream_append_s8(s, s8("\n\n#line 1\n"));
+}
+
+function void
+beamformer_reload_pipeline(VulkanHandle *pipeline, BeamformerShaderReloadInfo *sris, u32 count, Arena arena)
+{
+ assume(count <= 2);
+ s8 paths[2];
+ VulkanPipelineCreateInfo infos[2];
+
+ if (!BakeShaders) {
+ for (u32 i = 0; i < count; i++)
+ paths[i] = push_s8_from_parts(&arena, os_path_separator(), s8("shaders"), sris[i].filename_or_data);
+ }
+
+ u32 push_constants_size = 0;
+ for (u32 i = 0; i < count; i++) {
+ Stream shader_stream = arena_stream(arena);
+ i32 reloadable_index = beamformer_shader_reloadable_index_by_shader[sris[i].shader];
+ if (i == 0) push_constants_size = beamformer_shader_push_constant_sizes[reloadable_index];
+ else assert(push_constants_size == beamformer_shader_push_constant_sizes[reloadable_index]);
+
+ stream_append_shader_header(&shader_stream, reloadable_index, sris[i].shader_descriptor, sris[i].layout);
- s8 shader_text;
if (BakeShaders) {
- stream_append_s8(&shader_stream, beamformer_shader_data[reloadable_index]);
- shader_text = arena_stream_commit(&arena, &shader_stream);
+ stream_append_s8(&shader_stream, sris[i].filename_or_data);
} else {
- shader_text = arena_stream_commit(&arena, &shader_stream);
- i64 length = os_read_entire_file((c8 *)path.data, arena.beg, arena_capacity(&arena, u8));
- shader_text.len += length;
- arena_commit(&arena, length);
+ shader_stream.widx += os_read_entire_file((c8 *)paths[i].data,
+ shader_stream.data + shader_stream.widx,
+ shader_stream.cap - shader_stream.widx);
}
- /* TODO(rnp): instance name */
- s8 shader_name = beamformer_shader_names[shader];
- program = load_shader(arena, &shader_text, (u32 []){GL_COMPUTE_SHADER}, 1, shader_name);
+ infos[i].kind = sris[i].shader_kind;
+ infos[i].text = arena_stream_commit_zero(&arena, &shader_stream);
+ infos[i].name = beamformer_shader_names[sris[i].shader];
+
+ //s8 line = s8("---------------\n");
+ //s8 nl = s8("\n");
+ //os_console_log(line.data, line.len);
+ //os_console_log(infos[i].name.data, infos[i].name.len);
+ //os_console_log(nl.data, nl.len);
+ //os_console_log(line.data, line.len);
+ //os_console_log(infos[i].text.data, infos[i].text.len);
+ //os_console_log(line.data, line.len);
}
- glDeleteProgram(cp->programs[shader_slot]);
- cp->programs[shader_slot] = program;
+ vk_pipeline_release(*pipeline);
+ *pipeline = vk_pipeline(infos, count, push_constants_size);
+}
+
+function void
+beamformer_reload_render_pipeline(VulkanHandle *pipeline, BeamformerShaderKind shader, Arena arena)
+{
+ i32 index = beamformer_shader_reloadable_index_by_shader[shader];
+ BeamformerShaderReloadInfo infos[2] = {
+ {
+ .shader = shader,
+ .shader_kind = beamformer_shader_primitive_is_vertex[index] ? VulkanShaderKind_Vertex : VulkanShaderKind_Mesh,
+ .filename_or_data = BakeShaders ? beamformer_shader_data[index][0]
+ : beamformer_reloadable_shader_files[index][0],
+ },
+ {
+ .shader = shader,
+ .shader_kind = VulkanShaderKind_Fragment,
+ .filename_or_data = BakeShaders ? beamformer_shader_data[index][1]
+ : beamformer_reloadable_shader_files[index][1],
+ },
+ };
+ beamformer_reload_pipeline(pipeline, infos, countof(infos), arena);
+}
+
+function void
+beamformer_reload_compute_pipeline(VulkanHandle *pipeline, BeamformerShaderKind shader,
+ BeamformerShaderDescriptor *shader_descriptor, Arena arena)
+{
+ i32 index = beamformer_shader_reloadable_index_by_shader[shader];
+ uv3 layout = shader_descriptor ? shader_descriptor->layout : (uv3){{vk_gpu_info()->subgroup_size, 1, 1}};
+ BeamformerShaderReloadInfo info = {
+ .shader = shader,
+ .shader_kind = VulkanShaderKind_Compute,
+ .shader_descriptor = shader_descriptor,
+ .filename_or_data = BakeShaders ? beamformer_shader_data[index][0]
+ : beamformer_reloadable_shader_files[index][0],
+ .layout = layout,
+ };
+ beamformer_reload_pipeline(pipeline, &info, 1, arena);
}
function void
@@ -755,52 +722,58 @@ beamformer_commit_parameter_block(BeamformerCtx *ctx, BeamformerComputePlan *cp,
cp->shader_hashes[shader_slot] = hash;
}
- #define X(k, t, v) glNamedBufferSubData(cp->ubos[BeamformerComputeUBOKind_##k], \
- 0, sizeof(t), &cp->v ## _ubo_data);
- BEAMFORMER_COMPUTE_UBO_LIST
- #undef X
-
cp->acquisition_count = pb->parameters.acquisition_count;
cp->acquisition_kind = pb->parameters.acquisition_kind;
- u32 decoded_data_size = cp->rf_size;
- if (ctx->compute_context.ping_pong_ssbo_size < decoded_data_size)
- alloc_shader_storage(ctx, decoded_data_size, arena);
+ // NOTE(rnp): buffer size / 2 should be mutiple of 64
+ i64 buffer_size = round_up_to(2 * cp->rf_size, 128);
+ if (ctx->compute_context.ping_pong_buffer.size < buffer_size) {
+ GPUBufferAllocateInfo allocate_info = {.size = buffer_size, .label = s8("PingPongBuffer")};
+ vk_buffer_allocate(&ctx->compute_context.ping_pong_buffer, &allocate_info);
+ // TODO(rnp): figure out how to share with CUDA
+ }
if (cp->hadamard_order != (i32)cp->acquisition_count)
- update_hadamard_texture(cp, (i32)cp->acquisition_count, 0, arena);
-
- mem_copy(cp->voxel_transform.E, pb->parameters.das_voxel_transform.E, sizeof(cp->voxel_transform));
-
- GLenum gl_kind = cp->iq_pipeline ? GL_RG32F : GL_R32F;
- if (cp->average_frames > 1 && !beamformer_frame_compatible(ctx->averaged_frames + 0, cp->output_points, gl_kind)) {
- alloc_beamform_frame(ctx->averaged_frames + 0, cp->output_points, gl_kind, s8("Averaged Frame"), arena);
- alloc_beamform_frame(ctx->averaged_frames + 1, cp->output_points, gl_kind, s8("Averaged Frame"), arena);
- }
+ update_hadamard(cp, (i32)cp->acquisition_count, 0, arena);
}break;
+
case BeamformerParameterBlockRegion_ChannelMapping:{
cuda_set_channel_mapping(pb->channel_mapping);
}break;
+ case BeamformerParameterRegionFlag_TransmitReceiveOrientations:{
+ GPUBuffer *b = &cp->array_parameters;
+ u32 kind = BeamformerComputeArrayParameterKind_TransmitReceiveOrientations;
+ u64 offset = beamformer_compute_array_parameter_offsets[kind];
+ u64 size = beamformer_compute_array_parameter_sizes[kind];
+ {
+ Arena scratch = arena;
+ u16 *u16s = push_array(&scratch, u16, countof(pb->transmit_receive_orientations));
+ for (u32 i = 0; i < countof(pb->transmit_receive_orientations); i++)
+ u16s[i] = pb->transmit_receive_orientations[i];
+
+ vk_buffer_range_upload(b, u16s, offset, size, 0);
+ }
+ }break;
case BeamformerParameterRegionFlag_FocalVectors:
case BeamformerParameterRegionFlag_SparseElements:
- case BeamformerParameterRegionFlag_TransmitReceiveOrientations:
{
- BeamformerComputeTextureKind texture_kind = 0;
- u32 pixel_type = 0, texture_format = 0;
+ u32 kind = BeamformerComputeArrayParameterKind_Count;
switch (region) {
- #define X(kind, _gl, tf, pt, ...) \
- case BeamformerParameterRegionFlag_##kind:{ \
- texture_kind = BeamformerComputeTextureKind_## kind; \
- texture_format = tf; \
- pixel_type = pt; \
+ case BeamformerParameterBlockRegion_FocalVectors:{
+ kind = BeamformerComputeArrayParameterKind_FocalVectors;
+ }break;
+ case BeamformerParameterBlockRegion_SparseElements:{
+ kind = BeamformerComputeArrayParameterKind_SparseElements;
}break;
- BEAMFORMER_COMPUTE_TEXTURE_LIST
- #undef X
InvalidDefaultCase;
}
- glTextureSubImage1D(cp->textures[texture_kind], 0, 0, BeamformerMaxChannelCount,
- texture_format, pixel_type,
- (u8 *)pb + BeamformerParameterBlockRegionOffsets[region]);
+
+ if (kind != BeamformerComputeArrayParameterKind_Count) {
+ GPUBuffer *b = &cp->array_parameters;
+ u64 offset = beamformer_compute_array_parameter_offsets[kind];
+ u64 size = beamformer_compute_array_parameter_sizes[kind];
+ vk_buffer_range_upload(b, (u8 *)pb + BeamformerParameterBlockRegionOffsets[region], offset, size, 0);
+ }
}break;
}
}
@@ -808,127 +781,205 @@ beamformer_commit_parameter_block(BeamformerCtx *ctx, BeamformerComputePlan *cp,
}
function void
-do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame *frame,
- BeamformerShaderKind shader, u32 shader_slot, BeamformerShaderParameters *sp, Arena arena)
+do_compute_shader(BeamformerCtx *ctx, VulkanHandle cmd, BeamformerComputePlan *cp, BeamformerFrame *frame,
+ u32 shader_slot, Arena arena, u64 rf_pointer)
{
BeamformerComputeContext *cc = &ctx->compute_context;
- u32 program = cp->programs[shader_slot];
- glUseProgram(program);
+ u32 output_index = !cc->ping_pong_input_index;
+ u32 input_index = cc->ping_pong_input_index;
- u32 output_ssbo_idx = !cc->last_output_ssbo_index;
- u32 input_ssbo_idx = cc->last_output_ssbo_index;
+ u64 pp_size = cc->ping_pong_buffer.size / 2;
+ u64 pp_input_pointer = cc->ping_pong_buffer.gpu_pointer + input_index * pp_size;
+ u64 pp_output_pointer = cc->ping_pong_buffer.gpu_pointer + output_index * pp_size;
uv3 dispatch = cp->shader_descriptors[shader_slot].dispatch;
- switch (shader) {
- case BeamformerShaderKind_Decode:{
- glBindImageTexture(0, cp->textures[BeamformerComputeTextureKind_Hadamard], 0, 0, 0, GL_READ_ONLY, GL_R16F);
+ vk_command_bind_pipeline(cmd, cp->vulkan_pipelines[shader_slot]);
+
+ switch (cp->pipeline.shaders[shader_slot]) {
+
+ case BeamformerShaderKind_Decode:{
BeamformerDecodeMode mode = cp->shader_descriptors[shader_slot].bake.Decode.decode_mode;
- if (shader_slot == 0) {
- if (mode != BeamformerDecodeMode_None) {
- glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, cc->ping_pong_ssbos[input_ssbo_idx]);
- glProgramUniform1ui(program, DECODE_FIRST_PASS_UNIFORM_LOC, 1);
+ BeamformerDecodePushConstants pc = {
+ .hadamard_buffer = cp->array_parameters.gpu_pointer + offsetof(BeamformerComputeArrayParameters, Hadamard),
+ .output_buffer = pp_output_pointer,
+ };
- glDispatchCompute(dispatch.x, dispatch.y, dispatch.z);
- glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
- }
- }
+ if (shader_slot == 0 && mode != BeamformerDecodeMode_None) {
+ pc.output_rf_buffer = pp_input_pointer;
+ pc.rf_buffer = rf_pointer;
+ pc.first_pass = 1;
- if (mode != BeamformerDecodeMode_None)
- glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, cc->ping_pong_ssbos[input_ssbo_idx]);
+ GPUMemoryBarrierInfo barrier = {
+ .gpu_buffer = &cc->ping_pong_buffer,
+ .offset = pp_input_pointer - cc->ping_pong_buffer.gpu_pointer,
+ .size = pp_size,
+ };
- glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, cc->ping_pong_ssbos[output_ssbo_idx]);
+ vk_command_push_constants(cmd, 0, sizeof(pc), &pc);
+ vk_command_dispatch_compute(cmd, dispatch);
+ vk_command_buffer_memory_barriers(cmd, &barrier, 1);
- glProgramUniform1ui(program, DECODE_FIRST_PASS_UNIFORM_LOC, 0);
+ pc.output_rf_buffer = 0;
+ }
+
+ pc.rf_buffer = pp_input_pointer;
+ pc.first_pass = 0;
- glDispatchCompute(dispatch.x, dispatch.y, dispatch.z);
- glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
+ GPUMemoryBarrierInfo barrier = {
+ .gpu_buffer = &cc->ping_pong_buffer,
+ .offset = pp_output_pointer - cc->ping_pong_buffer.gpu_pointer,
+ .size = pp_size,
+ };
- cc->last_output_ssbo_index = !cc->last_output_ssbo_index;
+ vk_command_push_constants(cmd, 0, sizeof(pc), &pc);
+ vk_command_dispatch_compute(cmd, dispatch);
+ vk_command_buffer_memory_barriers(cmd, &barrier, 1);
+
+ cc->ping_pong_input_index = !cc->ping_pong_input_index;
}break;
+
case BeamformerShaderKind_CudaDecode:{
- cuda_decode(0, output_ssbo_idx, 0);
- cc->last_output_ssbo_index = !cc->last_output_ssbo_index;
+ cuda_decode(0, output_index, 0);
+ cc->ping_pong_input_index = !cc->ping_pong_input_index;
}break;
case BeamformerShaderKind_CudaHilbert:{
- cuda_hilbert(input_ssbo_idx, output_ssbo_idx);
- cc->last_output_ssbo_index = !cc->last_output_ssbo_index;
+ cuda_hilbert(input_index, output_index);
+ cc->ping_pong_input_index = !cc->ping_pong_input_index;
}break;
+
case BeamformerShaderKind_Filter:
case BeamformerShaderKind_Demodulate:
{
- if (shader_slot != 0)
- glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, cc->ping_pong_ssbos[input_ssbo_idx]);
- glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, cc->ping_pong_ssbos[output_ssbo_idx]);
- glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, cp->filters[sp->filter_slot].ssbo);
+ u32 filter_slot = cp->pipeline.parameters[shader_slot].filter_slot;
+ BeamformerFilterPushConstants pc = {
+ .filter_coefficients = cp->filters[filter_slot].buffer.gpu_pointer,
+ .output_data = pp_output_pointer,
+ .input_data = shader_slot == 0 ? rf_pointer : pp_input_pointer,
+ };
- glDispatchCompute(dispatch.x, dispatch.y, dispatch.z);
- glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
+ GPUMemoryBarrierInfo barrier = {
+ .gpu_buffer = &cc->ping_pong_buffer,
+ .offset = pp_output_pointer - cc->ping_pong_buffer.gpu_pointer,
+ .size = pp_size,
+ };
- cc->last_output_ssbo_index = !cc->last_output_ssbo_index;
- }break;
- case BeamformerShaderKind_MinMax:{
- for (i32 i = 1; i < frame->mips; i++) {
- glBindImageTexture(0, frame->texture, i - 1, GL_TRUE, 0, GL_READ_ONLY, GL_RG32F);
- glBindImageTexture(1, frame->texture, i - 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_RG32F);
- glProgramUniform1i(program, MIN_MAX_MIPS_LEVEL_UNIFORM_LOC, i);
+ vk_command_push_constants(cmd, 0, sizeof(pc), &pc);
+ vk_command_dispatch_compute(cmd, dispatch);
+ vk_command_buffer_memory_barriers(cmd, &barrier, 1);
- u32 width = (u32)frame->dim.x >> i;
- u32 height = (u32)frame->dim.y >> i;
- u32 depth = (u32)frame->dim.z >> i;
- glDispatchCompute(ORONE(width / 32), ORONE(height), ORONE(depth / 32));
- glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
- }
+ cc->ping_pong_input_index = !cc->ping_pong_input_index;
}break;
+
case BeamformerShaderKind_DAS:{
local_persist u32 das_cycle_t = 0;
- BeamformerDASBakeParameters *db = &cp->shader_descriptors[shader_slot].bake.DAS;
- if (db->fast) {
- glClearTexImage(frame->texture, 0, GL_RED, GL_FLOAT, 0);
- glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
- glBindImageTexture(0, frame->texture, 0, GL_TRUE, 0, GL_READ_WRITE, cp->iq_pipeline ? GL_RG32F : GL_R32F);
+ GPUBuffer *b = cc->backlog.buffer;
+
+ u64 frame_size = beamformer_frame_byte_size(frame->points, frame->data_kind);
+ u64 incoherent_size = frame_size / beamformer_data_kind_element_count[frame->data_kind];
+
+ BeamformerDASPushConstants pc = {
+ .xdc_element_pitch = cp->xdc_element_pitch,
+ .rf_data = pp_input_pointer,
+ .output_data = b->gpu_pointer + frame->buffer_offset,
+ .incoherent_output = b->gpu_pointer + b->size - incoherent_size,
+ .array_parameters = cp->array_parameters.gpu_pointer + offsetof(BeamformerDASArrayParameters, focal_vectors),
+ .output_size_x = cp->output_points.x,
+ .output_size_y = cp->output_points.y,
+ .output_size_z = cp->output_points.z,
+ .cycle_t = das_cycle_t++,
+ };
+ mem_copy(pc.voxel_transform.E, cp->voxel_transform.E, sizeof(pc.voxel_transform));
+ mem_copy(pc.xdc_transform.E, cp->xdc_transform.E, sizeof(pc.xdc_transform));
+
+ b32 coherent = cp->shader_descriptors[shader_slot].bake.DAS.coherency_weighting;
+
+ i32 loop_end;
+ if (cp->acquisition_kind == BeamformerAcquisitionKind_RCA_VLS ||
+ cp->acquisition_kind == BeamformerAcquisitionKind_RCA_TPW)
+ {
+ /* NOTE(rnp): to avoid repeatedly sampling the whole focal vectors
+ * texture we loop over transmits for VLS/TPW */
+ loop_end = (i32)cp->acquisition_count;
} else {
- glBindImageTexture(0, frame->texture, 0, GL_TRUE, 0, GL_WRITE_ONLY, cp->iq_pipeline ? GL_RG32F : GL_R32F);
+ loop_end = (i32)cp->shader_descriptors[shader_slot].bake.DAS.channel_count;
}
- u32 sparse_texture = cp->textures[BeamformerComputeTextureKind_SparseElements];
- if (!db->sparse) sparse_texture = 0;
-
- glBindBufferBase(GL_UNIFORM_BUFFER, 0, cp->ubos[BeamformerComputeUBOKind_DAS]);
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 1, cc->ping_pong_ssbos[input_ssbo_idx], 0, cp->rf_size);
- glBindImageTexture(1, sparse_texture, 0, 0, 0, GL_READ_ONLY, GL_R16I);
- glBindImageTexture(2, cp->textures[BeamformerComputeTextureKind_FocalVectors], 0, 0, 0, GL_READ_ONLY, GL_RG32F);
- glBindImageTexture(3, cp->textures[BeamformerComputeTextureKind_TransmitReceiveOrientations], 0, 0, 0, GL_READ_ONLY, GL_R8I);
+ GPUMemoryBarrierInfo memory_barriers[2] = {
+ {
+ .gpu_buffer = b,
+ .offset = frame->buffer_offset,
+ .size = frame_size,
+ },
+ {
+ .gpu_buffer = b,
+ .offset = pc.incoherent_output - b->gpu_pointer,
+ .size = incoherent_size,
+ },
+ };
- glProgramUniform1ui(program, DAS_CYCLE_T_UNIFORM_LOC, das_cycle_t++);
+ // NOTE(rnp): barrier to wait for clear pipeline to complete
+ vk_command_buffer_memory_barriers(cmd, memory_barriers, 1 + coherent);
- if (db->fast) {
- i32 loop_end;
- if (db->acquisition_kind == BeamformerAcquisitionKind_RCA_VLS ||
- db->acquisition_kind == BeamformerAcquisitionKind_RCA_TPW)
- {
- /* NOTE(rnp): to avoid repeatedly sampling the whole focal vectors
- * texture we loop over transmits for VLS/TPW */
- loop_end = (i32)db->acquisition_count;
- } else {
- loop_end = (i32)db->channel_count;
- }
- f32 percent_per_step = 1.0f / (f32)loop_end;
- cc->processing_progress = -percent_per_step;
- for (i32 index = 0; index < loop_end; index++) {
- cc->processing_progress += percent_per_step;
- /* IMPORTANT(rnp): prevents OS from coalescing and killing our shader */
- glFinish();
- glProgramUniform1i(program, DAS_FAST_CHANNEL_UNIFORM_LOC, index);
- glDispatchCompute(dispatch.x, dispatch.y, dispatch.z);
- glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
+ vk_command_push_constants(cmd, 0, sizeof(pc), &pc);
+ for (i32 index = 0; index < loop_end; index++) {
+ if (index != 0) {
+ pc.channel_t = index;
+ vk_command_push_constants(cmd, offsetof(BeamformerDASPushConstants, channel_t),
+ sizeof(pc.channel_t), &pc.channel_t);
}
- } else {
- glDispatchCompute(dispatch.x, dispatch.y, dispatch.z);
+ vk_command_dispatch_compute(cmd, dispatch);
+ vk_command_buffer_memory_barriers(cmd, memory_barriers, 1 + coherent);
+ }
+ }break;
+
+ case BeamformerShaderKind_CoherencyWeighting:{
+ GPUBuffer *b = cc->backlog.buffer;
+
+ u64 frame_size = beamformer_frame_byte_size(frame->points, frame->data_kind);
+ u64 incoherent_size = frame_size / beamformer_data_kind_element_count[frame->data_kind];
+
+ GPUMemoryBarrierInfo memory_barrier = {
+ .gpu_buffer = b,
+ .offset = frame->buffer_offset,
+ .size = frame_size,
+ };
+
+ BeamformerCoherencyWeightingPushConstants cwpc = {
+ .left_side_buffer = b->gpu_pointer + frame->buffer_offset,
+ .right_side_buffer = b->gpu_pointer + b->size - incoherent_size,
+ .elements = incoherent_size / beamformer_data_kind_element_size[frame->data_kind],
+ .scale = 1.0f,
+ .output_size_x = cp->output_points.x,
+ .output_size_y = cp->output_points.y,
+ .output_size_z = cp->output_points.z,
+ };
+
+ vk_command_push_constants(cmd, 0, sizeof(cwpc), &cwpc);
+ vk_command_dispatch_compute(cmd, dispatch);
+ vk_command_buffer_memory_barriers(cmd, &memory_barrier, 1);
+ }break;
+
+ // NOTE(rnp): invalid stages should be filtered in planning phase
+ InvalidDefaultCase;
+ }
+
+ #if 0
+ switch (shader) {
+ case BeamformerShaderKind_MinMax:{
+ for (u32 i = 1; i < frame->image.mip_map_levels; i++) {
+ glBindImageTexture(0, frame->texture, i - 1, GL_TRUE, 0, GL_READ_ONLY, GL_RG32F);
+ glBindImageTexture(1, frame->texture, i - 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_RG32F);
+ glProgramUniform1i(program, MIN_MAX_MIPS_LEVEL_UNIFORM_LOC, i);
+
+ u32 width = (u32)frame->dim.x >> i;
+ u32 height = (u32)frame->dim.y >> i;
+ u32 depth = (u32)frame->dim.z >> i;
+ glDispatchCompute(ORONE(width / 32), ORONE(height), ORONE(depth / 32));
+ glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
}
- glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT|GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
}break;
case BeamformerShaderKind_Sum:{
u32 aframe_index = ctx->averaged_frame_index % countof(ctx->averaged_frames);
@@ -950,77 +1001,27 @@ do_compute_shader(BeamformerCtx *ctx, BeamformerComputePlan *cp, BeamformerFrame
assert(to_average == frame_count);
glProgramUniform1f(program, SUM_PRESCALE_UNIFORM_LOC, 1 / (f32)frame_count);
- do_sum_shader(cc, in_textures, frame_count, aframe->texture, aframe->dim);
+ /* NOTE: zero output before summing */
+ glClearTexImage(aframe->texture, 0, GL_RED, GL_FLOAT, 0);
+ glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
+
+ glBindImageTexture(0, out_texture, 0, GL_TRUE, 0, GL_READ_WRITE, GL_RG32F);
+ for (u32 i = 0; i < in_texture_count; i++) {
+ glBindImageTexture(1, in_textures[i], 0, GL_TRUE, 0, GL_READ_ONLY, GL_RG32F);
+ glDispatchCompute(dispatch.x, dispatch.y, dispatch.z);
+ glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
+ }
+
mem_copy(aframe->voxel_transform.E, frame->voxel_transform.E, sizeof(frame->voxel_transform));
aframe->compound_count = frame->compound_count;
aframe->acquisition_kind = frame->acquisition_kind;
}break;
- InvalidDefaultCase;
}
-}
-
-function s8
-shader_text_with_header(s8 header, s8 filepath, b32 has_file, BeamformerShaderKind shader_kind, Arena *arena)
-{
- Stream sb = arena_stream(*arena);
- stream_push_shader_header(&sb, shader_kind, header);
- stream_append_s8(&sb, s8("\n#line 1\n"));
-
- s8 result;
- if (BakeShaders) {
- /* TODO(rnp): better handling of shaders with no backing file */
- if (has_file) {
- i32 reloadable_index = beamformer_shader_reloadable_index_by_shader[shader_kind];
- stream_append_s8(&sb, beamformer_shader_data[reloadable_index]);
- }
- result = arena_stream_commit(arena, &sb);
- } else {
- result = arena_stream_commit(arena, &sb);
- if (has_file) {
- i64 length = os_read_entire_file((c8 *)filepath.data, arena->beg, arena_capacity(arena, u8));
- result.len += length;
- arena_commit(arena, length);
- }
- }
-
- return result;
-}
-
-/* NOTE(rnp): currently this function is only handling rendering shaders.
- * look at load_compute_shader for compute shaders */
-function void
-beamformer_reload_shader(BeamformerCtx *ctx, BeamformerShaderReloadContext *src, Arena arena, s8 shader_name)
-{
- BeamformerShaderKind kind = beamformer_reloadable_shader_kinds[src->reloadable_info_index];
- assert(kind == BeamformerShaderKind_Render3D);
-
- s8 path = push_s8_from_parts(&arena, os_path_separator(), s8("shaders"),
- beamformer_reloadable_shader_files[src->reloadable_info_index]);
-
- i32 shader_count = 1;
- BeamformerShaderReloadContext *link = src->link;
- while (link != src) { shader_count++; link = link->link; }
-
- s8 *shader_texts = push_array(&arena, s8, shader_count);
- u32 *shader_types = push_array(&arena, u32, shader_count);
-
- i32 index = 0;
- do {
- b32 has_file = link->reloadable_info_index >= 0;
- shader_texts[index] = shader_text_with_header(link->header, path, has_file, kind, &arena);
- shader_types[index] = link->gl_type;
- index++;
- link = link->link;
- } while (link != src);
-
- u32 *shader = &ctx->frame_view_render_context.shader;
- glDeleteProgram(*shader);
- *shader = load_shader(arena, shader_texts, shader_types, shader_count, shader_name);
- ctx->frame_view_render_context.updated = 1;
+ #endif
}
function void
-complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena *arena, iptr gl_context)
+complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena *arena)
{
BeamformerComputeContext * cs = &ctx->compute_context;
BeamformerSharedMemory * sm = ctx->shared_memory;
@@ -1029,6 +1030,7 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena *arena, iptr gl_c
while (work) {
b32 can_commit = 1;
switch (work->kind) {
+
case BeamformerWorkKind_ExportBuffer:{
/* TODO(rnp): better way of handling DispatchCompute barrier */
post_sync_barrier(ctx->shared_memory, BeamformerSharedMemoryLockKind_DispatchCompute);
@@ -1036,15 +1038,15 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena *arena, iptr gl_c
BeamformerExportContext *ec = &work->export_context;
switch (ec->kind) {
case BeamformerExportKind_BeamformedData:{
- BeamformerFrame *frame = ctx->latest_frame;
- if (frame) {
- assert(frame->ready_to_present);
- u32 texture = frame->texture;
- iv3 dim = frame->dim;
- u32 out_size = (u32)dim.x * (u32)dim.y * (u32)dim.z * 2 * sizeof(f32);
- if (out_size <= ec->size) {
- glGetTextureImage(texture, 0, GL_RG, GL_FLOAT, (i32)out_size,
- beamformer_shared_memory_scratch_arena(sm, ctx->shared_memory_size).beg);
+ BeamformerFrame *f = ctx->latest_frame;
+ if (f) {
+ u64 frame_size = beamformer_frame_byte_size(f->points, f->data_kind);
+ assert((frame_size & 63) == 0);
+ if (frame_size <= ec->size) {
+ vk_host_wait_timeline(VulkanTimeline_Compute, f->timeline_valid_value, -1ULL);
+ vk_buffer_range_download(beamformer_shared_memory_scratch_arena(sm, ctx->shared_memory_size).beg,
+ ctx->compute_context.backlog.buffer, f->buffer_offset,
+ frame_size, 1);
}
}
}break;
@@ -1062,6 +1064,7 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena *arena, iptr gl_c
beamformer_shared_memory_release_lock(ctx->shared_memory, work->lock);
post_sync_barrier(ctx->shared_memory, BeamformerSharedMemoryLockKind_ExportSync);
}break;
+
case BeamformerWorkKind_CreateFilter:{
/* TODO(rnp): this should probably get deleted and moved to lazy loading */
BeamformerCreateFilterContext *fctx = &work->create_filter_context;
@@ -1070,20 +1073,18 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena *arena, iptr gl_c
BeamformerComputePlan *cp = beamformer_compute_plan_for_block(cs, block, arena);
beamformer_filter_update(cp->filters + slot, fctx->parameters, block, slot, *arena);
}break;
+
case BeamformerWorkKind_ComputeIndirect:{
fill_frame_compute_work(ctx, work, work->compute_indirect_context.view_plane,
work->compute_indirect_context.parameter_block, 1);
} /* FALLTHROUGH */
- case BeamformerWorkKind_Compute:{
- DEBUG_DECL(glClearNamedBufferData(cs->ping_pong_ssbos[0], GL_RG32F, GL_RG, GL_FLOAT, 0);)
- DEBUG_DECL(glClearNamedBufferData(cs->ping_pong_ssbos[1], GL_RG32F, GL_RG, GL_FLOAT, 0);)
- DEBUG_DECL(glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);)
+ case BeamformerWorkKind_Compute:{
push_compute_timing_info(ctx->compute_timing_table,
(ComputeTimingInfo){.kind = ComputeTimingInfoKind_ComputeFrameBegin});
BeamformerComputePlan *cp = beamformer_compute_plan_for_block(cs, work->compute_context.parameter_block, arena);
- if (beamformer_parameter_block_dirty(sm, work->compute_context.parameter_block)) {
+ if unlikely(beamformer_parameter_block_dirty(sm, work->compute_context.parameter_block)) {
u32 block = work->compute_context.parameter_block;
beamformer_commit_parameter_block(ctx, cp, block, *arena);
}
@@ -1094,91 +1095,134 @@ complete_queue(BeamformerCtx *ctx, BeamformWorkQueue *q, Arena *arena, iptr gl_c
static_assert(ISPOWEROF2(BeamformerMaxComputeShaderStages),
"max compute shader stages must be power of 2");
assert((dirty_programs & ~((u32)BeamformerMaxComputeShaderStages - 1)) == 0);
- for EachBit(dirty_programs, slot)
- load_compute_shader(ctx, cp, (u32)slot, *arena);
+ for EachBit(dirty_programs, slot) {
+ beamformer_reload_compute_pipeline(cp->vulkan_pipelines + slot, cp->pipeline.shaders[slot],
+ cp->shader_descriptors + slot, *arena);
+ }
atomic_store_u32(&cs->processing_compute, 1);
- start_renderdoc_capture(gl_context);
- BeamformerFrame *frame = work->compute_context.frame;
+ start_renderdoc_capture();
- GLenum gl_kind = cp->iq_pipeline ? GL_RG32F : GL_R32F;
- if (!beamformer_frame_compatible(frame, cp->output_points, gl_kind))
- alloc_beamform_frame(frame, cp->output_points, gl_kind, s8("Beamformed_Data"), *arena);
+ i32 das_index = -1;
+ b32 has_sum = 0;
+ for (u32 i = 0; i < cp->pipeline.shader_count; i++) {
+ has_sum |= cp->pipeline.shaders[i] == BeamformerShaderKind_Sum;
+ if (cp->pipeline.shaders[i] == BeamformerShaderKind_DAS)
+ das_index = (i32)i;
+ }
- m4 voxel_transform = m4_mul(cp->ui_voxel_transform, cp->voxel_transform);
- mem_copy(frame->voxel_transform.E, voxel_transform.E, sizeof(voxel_transform));
+ b32 das_coherent = das_index >= 0 && cp->shader_descriptors[das_index].bake.DAS.coherency_weighting;
+ u64 reserved_frame_size = 0;
+
+ if (has_sum)
+ reserved_frame_size += beamformer_frame_byte_size(cp->output_points, cp->iq_pipeline ?
+ BeamformerDataKind_Float32Complex :
+ BeamformerDataKind_Float32);
+
+ // TODO(rnp): incoherent sum for different data kinds
+ if (das_coherent)
+ reserved_frame_size += beamformer_frame_byte_size(cp->output_points, BeamformerDataKind_Float32);
+
+ BeamformerFrame *frame = beamformer_frame_next(cs, cp->output_points, cp->iq_pipeline, reserved_frame_size);
frame->acquisition_kind = cp->acquisition_kind;
frame->compound_count = cp->acquisition_count;
-
- BeamformerComputeContext *cc = &ctx->compute_context;
- BeamformerComputePipeline *pipeline = &cp->pipeline;
- /* NOTE(rnp): first stage requires access to raw data buffer directly so we break
- * it out into a separate step. This way data can get released as soon as possible */
- if (pipeline->shader_count > 0) {
- BeamformerRFBuffer *rf = &cs->rf_buffer;
- u32 compute_index = rf->compute_index;
- u32 slot = compute_index % countof(rf->compute_syncs);
-
- if (work->kind == BeamformerWorkKind_ComputeIndirect) {
- /* NOTE(rnp): compute indirect is used when uploading data. if compute thread
- * preempts upload it must wait for slot counter to reach a value it hasn't
- * processed yet. */
- spin_wait(atomic_load_u64(rf->uploaded_data_indices + slot) <= compute_index);
-
- /* NOTE(rnp): if the GPU supports BAR there may be no need to synchronize
- * other than the above spin */
- if (vk_buffer_needs_sync(&rf->buffer))
- glWaitSemaphoreEXT(rf->gl_upload_semaphores[slot], 0, 0, 0, 0, 0);
- } else {
- slot = (rf->compute_index - 1) % countof(rf->compute_syncs);
+ mem_copy(frame->voxel_transform.E, cp->voxel_transform.E, sizeof(cp->voxel_transform));
+
+ VulkanHandle cmd = vk_command_begin(VulkanTimeline_Compute);
+ vk_command_timestamp(cmd);
+
+ if (das_index >= 0) {
+ GPUBuffer *backlog = cs->backlog.buffer;
+ u32 subgroup_size = vk_gpu_info()->subgroup_size;
+ BeamformerBufferClearPushConstants pc = {
+ .data = backlog->gpu_pointer + frame->buffer_offset,
+ .clear_word = 0,
+ .words = beamformer_frame_byte_size(frame->points, frame->data_kind) / sizeof(u32),
+ };
+
+ u32 index = BeamformerShaderKind_BufferClear - BeamformerShaderKind_ComputeInternalFirst;
+ vk_command_bind_pipeline(cmd, cs->compute_internal_pipelines[index]);
+ vk_command_push_constants(cmd, 0, sizeof(pc), &pc);
+ vk_command_dispatch_compute(cmd, (uv3){{(u32)ceil_f32((f32)pc.words / subgroup_size), 1, 1}});
+
+ if (das_coherent) {
+ pc.words = pc.words / beamformer_data_kind_element_count[frame->data_kind];
+ pc.data = backlog->gpu_pointer + backlog->size - sizeof(u32) * pc.words;
+ vk_command_push_constants(cmd, 0, sizeof(pc), &pc);
+ vk_command_dispatch_compute(cmd, (uv3){{(u32)ceil_f32((f32)pc.words / subgroup_size), 1, 1}});
}
+ }
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 1, rf->ssbo, slot * rf->active_rf_size, rf->active_rf_size);
+ BeamformerRFBuffer *rf = &cs->rf_buffer;
+ u32 compute_index = rf->compute_index;
+ u32 slot = compute_index % countof(rf->upload_complete_values);
- glBeginQuery(GL_TIME_ELAPSED, cc->shader_timer_ids[0]);
- do_compute_shader(ctx, cp, frame, pipeline->shaders[0], 0, pipeline->parameters + 0, *arena);
- glEndQuery(GL_TIME_ELAPSED);
+ if (work->kind == BeamformerWorkKind_ComputeIndirect) {
+ // TODO(rnp): this shouldn't be necessary, there should be a way of communicating
+ // what the value will be so that the only the command wait is needed.
+ spin_wait(atomic_load_u64(rf->upload_complete_values + slot) <= compute_index);
- if (work->kind == BeamformerWorkKind_ComputeIndirect) {
- atomic_store_u64(rf->compute_syncs + slot, glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0));
- atomic_add_u64(&rf->compute_index, 1);
- }
+ /* NOTE(rnp): if the GPU supports BAR there may be no need to synchronize
+ * other than the above spin */
+ if (vk_buffer_needs_sync(&rf->buffer))
+ vk_command_wait_timeline(cmd, VulkanTimeline_Transfer, rf->upload_complete_values[slot]);
+ } else {
+ slot = (rf->compute_index - 1) % countof(rf->upload_complete_values);
+ }
+
+ for (u32 i = 0; i < cp->pipeline.shader_count; i++) {
+ do_compute_shader(ctx, cmd, cp, frame, i, *arena,
+ rf->buffer.gpu_pointer + slot * rf->active_rf_size);
+ vk_command_timestamp(cmd);
}
- b32 did_sum_shader = 0;
- for (u32 i = 1; i < pipeline->shader_count; i++) {
- did_sum_shader |= pipeline->shaders[i] == BeamformerShaderKind_Sum;
- glBeginQuery(GL_TIME_ELAPSED, cc->shader_timer_ids[i]);
- do_compute_shader(ctx, cp, frame, pipeline->shaders[i], i, pipeline->parameters + i, *arena);
- glEndQuery(GL_TIME_ELAPSED);
+ u64 end_timeline_value = vk_command_end(cmd, (VulkanHandle){0}, (VulkanHandle){0});
+ if (work->kind == BeamformerWorkKind_ComputeIndirect) {
+ atomic_store_u64(rf->compute_complete_values + slot, end_timeline_value);
+ atomic_add_u64(&rf->compute_index, 1);
}
- /* NOTE(rnp): the first of these blocks until work completes */
- for (u32 i = 0; i < pipeline->shader_count; i++) {
- ComputeTimingInfo info = {0};
- info.kind = ComputeTimingInfoKind_Shader;
- info.shader = pipeline->shaders[i];
- glGetQueryObjectui64v(cc->shader_timer_ids[i], GL_QUERY_RESULT, &info.timer_count);
- push_compute_timing_info(ctx->compute_timing_table, info);
+ atomic_store_u64(&frame->timeline_valid_value, end_timeline_value);
+
+ {
+ Arena scratch = *arena;
+ /* NOTE(rnp): this blocks until work completes */
+ u64 * timestamps = vk_command_read_timestamps(VulkanTimeline_Compute, &scratch);
+
+ u64 last_time = timestamps[0] > 0 ? timestamps[1] : 0;
+ u32 shader_index = 0;
+ for (u64 i = 2; i < timestamps[0] + 1; i++) {
+ push_compute_timing_info(ctx->compute_timing_table, (ComputeTimingInfo){
+ .kind = ComputeTimingInfoKind_Shader,
+ .shader = cp->pipeline.shaders[shader_index],
+ .shader_slot = shader_index,
+ .timer_count = timestamps[i] - last_time,
+ });
+ last_time = timestamps[i];
+ shader_index++;
+ }
}
+
cs->processing_progress = 1;
- frame->ready_to_present = 1;
- if (did_sum_shader) {
+ if (has_sum) {
+ #if 0
u32 aframe_index = ((ctx->averaged_frame_index++) % countof(ctx->averaged_frames));
ctx->averaged_frames[aframe_index].view_plane_tag = frame->view_plane_tag;
ctx->averaged_frames[aframe_index].ready_to_present = 1;
atomic_store_u64((u64 *)&ctx->latest_frame, (u64)(ctx->averaged_frames + aframe_index));
+ #endif
} else {
atomic_store_u64((u64 *)&ctx->latest_frame, (u64)frame);
}
- cs->processing_compute = 0;
+
+ atomic_store_u32(&cs->processing_compute, 0);
push_compute_timing_info(ctx->compute_timing_table,
(ComputeTimingInfo){.kind = ComputeTimingInfoKind_ComputeFrameEnd});
- end_renderdoc_capture(gl_context);
+ end_renderdoc_capture();
}break;
InvalidDefaultCase;
}
@@ -1199,95 +1243,69 @@ coalesce_timing_table(ComputeTimingTable *t, ComputeShaderStats *stats)
u32 target = atomic_load_u32(&t->write_index);
u32 stats_index = (stats->latest_frame_index + 1) % countof(stats->table.times);
- static_assert(BeamformerShaderKind_Count + 1 <= 32, "timing coalescence bitfield test");
- u32 seen_info_test = 0;
+ b32 has_rf = 0;
+ f32 gpu_clocks_to_nano = 1.0e-9f * vk_gpu_info()->timestamp_period_ns;
+ // NOTE(rnp): not equal (the index may wrap)
while (t->read_index != target) {
ComputeTimingInfo info = t->buffer[t->read_index % countof(t->buffer)];
switch (info.kind) {
+
case ComputeTimingInfoKind_ComputeFrameBegin:{
assert(t->compute_frame_active == 0);
t->compute_frame_active = 1;
/* NOTE(rnp): allow multiple instances of same shader to accumulate */
+ t->in_flight_shader_count = 0;
+ memory_clear(t->in_flight_shader_ids, 0, sizeof(t->in_flight_shader_ids));
memory_clear(stats->table.times[stats_index], 0, sizeof(stats->table.times[stats_index]));
}break;
+
case ComputeTimingInfoKind_ComputeFrameEnd:{
assert(t->compute_frame_active == 1);
t->compute_frame_active = 0;
stats->latest_frame_index = stats_index;
stats_index = (stats_index + 1) % countof(stats->table.times);
+ stats->table.shader_count = t->in_flight_shader_count;
+ mem_copy(stats->table.shader_ids, t->in_flight_shader_ids, sizeof(t->in_flight_shader_ids));
}break;
+
case ComputeTimingInfoKind_Shader:{
- stats->table.times[stats_index][info.shader] += (f32)info.timer_count / 1.0e9f;
- seen_info_test |= (1u << info.shader);
+ t->in_flight_shader_count = Max(t->in_flight_shader_count, info.shader_slot + 1u);
+ t->in_flight_shader_ids[info.shader_slot] = info.shader;
+ stats->table.times[stats_index][info.shader_slot] += info.timer_count * gpu_clocks_to_nano;
}break;
+
case ComputeTimingInfoKind_RF_Data:{
stats->latest_rf_index = (stats->latest_rf_index + 1) % countof(stats->table.rf_time_deltas);
- f32 delta = (f32)(info.timer_count - stats->last_rf_timer_count) / 1.0e9f;
+ f32 delta = info.timer_count / (f32)os_system_info()->timer_frequency;
stats->table.rf_time_deltas[stats->latest_rf_index] = delta;
- stats->last_rf_timer_count = info.timer_count;
- seen_info_test |= (1 << BeamformerShaderKind_Count);
+ has_rf = 1;
}break;
}
/* NOTE(rnp): do this at the end so that stats table is always in a consistent state */
- atomic_add_u32(&t->read_index, 1);
+ t->read_index++;
}
- if (seen_info_test) {
- for EachEnumValue(BeamformerShaderKind, shader) {
- if (seen_info_test & (1 << shader)) {
- f32 sum = 0;
- for EachElement(stats->table.times, i)
- sum += stats->table.times[i][shader];
- stats->average_times[shader] = sum / countof(stats->table.times);
- }
- }
+ for (u32 i = 0; i < stats->table.shader_count; i++) {
+ f32 sum = 0;
+ for EachElement(stats->table.times, it)
+ sum += stats->table.times[it][i];
+ stats->average_times[i] = sum / countof(stats->table.times);
+ }
- if (seen_info_test & (1 << BeamformerShaderKind_Count)) {
- f32 sum = 0;
- for EachElement(stats->table.rf_time_deltas, i)
- sum += stats->table.rf_time_deltas[i];
- stats->rf_time_delta_average = sum / countof(stats->table.rf_time_deltas);
- }
+ if (has_rf) {
+ f32 sum = 0;
+ for EachElement(stats->table.rf_time_deltas, i)
+ sum += stats->table.rf_time_deltas[i];
+ stats->rf_time_delta_average = sum / countof(stats->table.rf_time_deltas);
}
}
DEBUG_EXPORT BEAMFORMER_COMPLETE_COMPUTE_FN(beamformer_complete_compute)
{
- BeamformerCtx *ctx = (BeamformerCtx *)user_context;
BeamformerSharedMemory *sm = ctx->shared_memory;
- complete_queue(ctx, &sm->external_work_queue, arena, gl_context);
- complete_queue(ctx, ctx->beamform_work_queue, arena, gl_context);
-}
-
-function void
-beamformer_rf_buffer_allocate(BeamformerRFBuffer *rf, u32 rf_size)
-{
- if ValidHandle(rf->export_handle)
- os_release_handle(rf->export_handle);
-
- OSHandle export = {0};
- vk_buffer_allocate(&rf->buffer, (iz)rf_size, GPUBufferCreateFlags_HostWritable|GPUBufferCreateFlags_MemoryOnly,
- &export, s8(""));
-
- glDeleteBuffers(1, &rf->ssbo);
- glCreateBuffers(1, &rf->ssbo);
-
- glDeleteMemoryObjectsEXT(1, &rf->memory_object);
- glCreateMemoryObjectsEXT(1, &rf->memory_object);
-
- if (OS_WINDOWS) {
- glImportMemoryWin32HandleEXT(rf->memory_object, rf->buffer.size, GL_HANDLE_TYPE_OPAQUE_WIN32_EXT,
- (void *)export.value[0]);
- // NOTE(rnp): w32 does not transfer ownership from handle back to driver
- rf->export_handle = export;
- } else {
- glImportMemoryFdEXT(rf->memory_object, rf->buffer.size, GL_HANDLE_TYPE_OPAQUE_FD_EXT, export.value[0]);
- }
-
- glNamedBufferStorageMemEXT(rf->ssbo, rf->buffer.size, rf->memory_object, 0);
-
- LABEL_GL_OBJECT(GL_BUFFER, rf->ssbo, s8("Raw_RF_SSBO"));
+ complete_queue(ctx, &sm->external_work_queue, arena);
+ complete_queue(ctx, ctx->beamform_work_queue, arena);
}
DEBUG_EXPORT BEAMFORMER_RF_UPLOAD_FN(beamformer_rf_upload)
@@ -1305,22 +1323,20 @@ DEBUG_EXPORT BEAMFORMER_RF_UPLOAD_FN(beamformer_rf_upload)
BeamformerRFBuffer *rf = ctx->rf_buffer;
rf->active_rf_size = vk_round_up_to_sync_size(rf_block_rf_size & 0xFFFFFFFFULL, 64);
- if unlikely(rf->buffer.size < countof(rf->compute_syncs) * rf->active_rf_size)
- beamformer_rf_buffer_allocate(rf, countof(rf->compute_syncs) * rf->active_rf_size);
+ if unlikely(rf->buffer.size < countof(rf->upload_complete_values) * rf->active_rf_size) {
+ GPUBufferAllocateInfo allocate_info = {
+ .size = countof(rf->upload_complete_values) * rf->active_rf_size,
+ .flags = VulkanUsageFlag_HostReadWrite,
+ .label = s8("RawRFBuffer"),
+ };
+ vk_buffer_allocate(&rf->buffer, &allocate_info);
+ }
- u32 slot = rf->insertion_index++ % countof(rf->compute_syncs);
+ u32 slot = rf->insertion_index % countof(rf->upload_complete_values);
/* NOTE(rnp): don't overwrite slot if the compute thread hasn't processed it */
- u64 current_slot_value = rf->uploaded_data_indices[slot];
- spin_wait(atomic_load_u64(&rf->compute_index) < current_slot_value);
-
- if (atomic_load_u64(rf->compute_syncs + slot)) {
- GLenum sync_result = glClientWaitSync(rf->compute_syncs[slot], 0, 1000000000);
- if (sync_result == GL_TIMEOUT_EXPIRED || sync_result == GL_WAIT_FAILED) {
- // TODO(rnp): what do?
- }
- glDeleteSync(rf->compute_syncs[slot]);
- }
+ spin_wait(atomic_load_u64(&rf->compute_index) < rf->upload_complete_values[slot]);
+ vk_host_wait_timeline(VulkanTimeline_Compute, rf->compute_complete_values[slot], -1ULL);
vk_buffer_range_upload(&rf->buffer, beamformer_shared_memory_scratch_arena(sm, ctx->shared_memory_size).beg,
slot * rf->active_rf_size, rf->active_rf_size, 1);
@@ -1329,19 +1345,17 @@ DEBUG_EXPORT BEAMFORMER_RF_UPLOAD_FN(beamformer_rf_upload)
beamformer_shared_memory_release_lock(ctx->shared_memory, (i32)scratch_lock);
post_sync_barrier(ctx->shared_memory, upload_lock);
- if (vk_buffer_needs_sync(&rf->buffer)) {
- // TODO(rnp): vk_buffer_sync
- }
-
- atomic_store_u64(rf->uploaded_data_indices + slot, rf->insertion_index);
- atomic_store_u64(rf->compute_syncs + slot, 0);
+ rf->insertion_index++;
+ atomic_store_u64(rf->upload_complete_values + slot, vk_host_signal_timeline(VulkanTimeline_Transfer));
os_wake_all_waiters(ctx->compute_worker_sync);
- ComputeTimingInfo info = {.kind = ComputeTimingInfoKind_RF_Data};
- glGetQueryObjectui64v(rf->data_timestamp_query, GL_QUERY_RESULT, &info.timer_count);
- glQueryCounter(rf->data_timestamp_query, GL_TIMESTAMP);
- push_compute_timing_info(ctx->compute_timing_table, info);
+ u64 current_time = os_timer_count();
+ push_compute_timing_info(ctx->compute_timing_table, (ComputeTimingInfo){
+ .kind = ComputeTimingInfoKind_RF_Data,
+ .timer_count = current_time - rf->timestamp,
+ });
+ rf->timestamp = current_time;
}
}
@@ -1373,33 +1387,49 @@ beamformer_process_input_events(BeamformerCtx *ctx, BeamformerInput *input,
case BeamformerInputEventKind_ExecutableReload:{
ui_init(ctx, ctx->ui_backing_store);
+ if (!vk_pipeline_valid(ctx->compute_context.compute_internal_pipelines[0])) {
+ for EachElement(ctx->compute_context.compute_internal_pipelines, it) {
+ beamformer_reload_compute_pipeline(ctx->compute_context.compute_internal_pipelines + it,
+ BeamformerShaderKind_ComputeInternalFirst + it, 0,
+ ctx->arena);
+ }
+ }
+
#if BEAMFORMER_RENDERDOC_HOOKS
- start_frame_capture = input->renderdoc_start_frame_capture;
- end_frame_capture = input->renderdoc_end_frame_capture;
+ start_frame_capture = input->renderdoc_start_frame_capture;
+ end_frame_capture = input->renderdoc_end_frame_capture;
+ set_capture_path_template = input->renderdoc_set_capture_file_path_template;
#endif
}break;
case BeamformerInputEventKind_FileEvent:{
BeamformerFileReloadContext *frc = event->file_watch_user_context;
switch (frc->kind) {
- case BeamformerFileReloadKind_Shader:{
- BeamformerShaderReloadContext *src = frc->shader_reload_context;
- BeamformerShaderKind kind = beamformer_reloadable_shader_kinds[src->reloadable_info_index];
- beamformer_reload_shader(ctx, src, ctx->arena, beamformer_shader_names[kind]);
+ case BeamformerFileReloadKind_ComputeInternalShader:{
+ // TODO(rnp): this could stall, better to push it onto compute once queue is better
+ beamformer_reload_compute_pipeline(frc->shader_reload.pipeline, frc->shader_reload.shader, 0, ctx->arena);
}break;
+
case BeamformerFileReloadKind_ComputeShader:{
for EachElement(ctx->compute_context.compute_plans, block) {
BeamformerComputePlan *cp = ctx->compute_context.compute_plans[block];
for (u32 slot = 0; cp && slot < cp->pipeline.shader_count; slot++) {
i32 shader_index = beamformer_shader_reloadable_index_by_shader[cp->pipeline.shaders[slot]];
- if (beamformer_reloadable_shader_kinds[shader_index] == frc->compute_shader_kind)
+ if (beamformer_reloadable_shader_kinds[shader_index] == frc->shader_reload.shader)
atomic_or_u32(&cp->dirty_programs, 1 << slot);
}
}
+ // TODO(rnp): track latest parameter block
if (ctx->latest_frame)
- beamformer_queue_compute(ctx, ctx->latest_frame, ctx->latest_frame->parameter_block);
+ beamformer_queue_compute(ctx, ctx->latest_frame, 0);
+ }break;
+
+ case BeamformerFileReloadKind_RenderShader:{
+ beamformer_reload_render_pipeline(frc->shader_reload.pipeline, frc->shader_reload.shader, ctx->arena);
+ ctx->render_shader_updated = 1;
}break;
+
InvalidDefaultCase;
}
}break;
@@ -1437,5 +1467,5 @@ beamformer_frame_step(BeamformerInput *input)
BeamformerViewPlaneTag tag = frame? frame->view_plane_tag : 0;
draw_ui(ctx, input, frame, tag);
- ctx->frame_view_render_context.updated = 0;
+ ctx->render_shader_updated = 0;
}
diff --git a/beamformer_internal.h b/beamformer_internal.h
@@ -10,12 +10,8 @@
#include "generated/beamformer.meta.c"
#include "generated/beamformer_shaders.c"
-#include <raylib_extended.h>
-#include <rlgl.h>
-
-#include "threads.c"
-#include "util_gl.c"
-#include "util_os.c"
+#include "external/raylib/src/raylib.h"
+#include "external/raylib/src/rlgl.h"
#define beamformer_info(s) s8("[info] " s "\n")
@@ -24,16 +20,63 @@
typedef struct { u64 value[1]; } VulkanHandle;
typedef enum {
- GPUBufferCreateFlags_HostWritable = 1 << 0,
- GPUBufferCreateFlags_MemoryOnly = 1 << 1,
-} GPUBufferCreateFlags;
+ VulkanTimeline_Graphics,
+ VulkanTimeline_Compute,
+ VulkanTimeline_Transfer,
+ VulkanTimeline_Count,
+} VulkanTimeline;
+
+typedef enum {
+ VulkanShaderKind_Vertex,
+ VulkanShaderKind_Mesh,
+ VulkanShaderKind_Fragment,
+ VulkanShaderKind_Compute,
+ VulkanShaderKind_Count,
+} VulkanShaderKind;
+
+typedef enum {
+ VulkanImageUsage_None,
+ VulkanImageUsage_Colour,
+ VulkanImageUsage_DepthStencil,
+ VulkanImageUsage_Count,
+} VulkanImageUsage;
+
+typedef enum {
+ VulkanUsageFlag_ImageSampling = 1 << 0,
+ VulkanUsageFlag_HostReadWrite = 1 << 1, // NOTE: not valid on images
+ /* NOTE: uses:
+ * - image-image copy operations
+ * - buffer-buffer copy operations
+ */
+ VulkanUsageFlag_TransferSource = 1 << 2,
+ VulkanUsageFlag_TransferDestination = 1 << 3,
+} VulkanUsageFlags;
+
+typedef struct {
+ VulkanShaderKind kind;
+ s8 text;
+ s8 name;
+} VulkanPipelineCreateInfo;
typedef struct {
+ VulkanHandle buffer;
u64 gpu_pointer;
i64 size;
- VulkanHandle buffer;
+
+ // NOTE: only used for render models
+ u64 index_count;
} GPUBuffer;
+typedef struct {
+ VulkanHandle image;
+ u32 width;
+ u32 height;
+ u32 samples;
+ u32 mip_map_levels;
+ // TODO(rnp): this is only here for importing from OpenGL, move it back into handle later
+ u64 memory_size;
+} GPUImage;
+
typedef enum {
GPUVendor_AMD = 0x1002,
GPUVendor_NVIDIA = 0x10DE,
@@ -59,28 +102,94 @@ typedef struct {
u64 gpu_heap_used;
} GPUInfo;
+typedef struct {
+ i64 size;
+ VulkanUsageFlags flags;
+
+ // NOTE(rnp): only required if buffer will be used on multiple timelines
+ VulkanTimeline *timelines_used;
+ u32 timeline_count;
+
+ s8 label;
+} GPUBufferAllocateInfo;
+
+typedef struct {
+ GPUBuffer *gpu_buffer;
+ u64 offset;
+ u64 size;
+} GPUMemoryBarrierInfo;
+
+typedef struct {
+ GPUBuffer model;
+ u32 vertex_count;
+ u32 normals_offset;
+} RenderModel;
+
+#include "threads.c"
+#include "util_os.c"
+
///////////////////////////
// NOTE: vulkan layer API
DEBUG_IMPORT void vk_load(OSLibrary vulkan, Arena *memory, Stream *error);
DEBUG_IMPORT GPUInfo *vk_gpu_info(void);
-DEBUG_IMPORT void vk_buffer_allocate(GPUBuffer *, iz size, GPUBufferCreateFlags flags, OSHandle *export, s8 label);
+DEBUG_IMPORT void vk_buffer_allocate(GPUBuffer *, GPUBufferAllocateInfo *info);
DEBUG_IMPORT void vk_buffer_release(GPUBuffer *);
DEBUG_IMPORT void vk_buffer_range_upload(GPUBuffer *, void *data, u64 offset, u64 size, b32 non_temporal);
+DEBUG_IMPORT void vk_buffer_range_download(void *output, GPUBuffer *, u64 source_offset, u64 size, b32 non_temporal);
DEBUG_IMPORT u64 vk_round_up_to_sync_size(u64, u64 min);
-/* NOTE: Compute shaders do not have bindings. Data should be passed using push constants.
+// NOTE: images are 2D only, any other use case should just use a buffer and index in the shader
+DEBUG_IMPORT void vk_image_allocate(GPUImage *, u32 width, u32 height, u32 mips, u32 samples, VulkanImageUsage usage, VulkanUsageFlags flags, OSHandle *export);
+DEBUG_IMPORT void vk_image_release(GPUImage *);
+
+DEBUG_IMPORT void vk_render_model_allocate(GPUBuffer *, void *indices, u64 index_count, u64 model_size, s8 label);
+DEBUG_IMPORT void vk_render_model_range_upload(GPUBuffer *, void *data, u64 offset, u64 size, b32 non_temporal);
+DEBUG_IMPORT void vk_render_model_release(GPUBuffer *);
+
+/* NOTE: Pipelines do not have bindings. Data should be passed using push constants.
* In particular the push constants should contain pointers to gpu memory using the
* BufferDeviceAddress extension. */
// TODO(rnp): change this to accept SPIR-V directly and accept BakeParameters as specialization data
-DEBUG_IMPORT VulkanHandle vk_compute_shader(s8 text, s8 name);
-DEBUG_IMPORT void vk_compute_shader_release(VulkanHandle);
+DEBUG_IMPORT VulkanHandle vk_pipeline(VulkanPipelineCreateInfo *infos, u32 count, u32 push_constants_size);
+DEBUG_IMPORT b32 vk_pipeline_valid(VulkanHandle);
+DEBUG_IMPORT void vk_pipeline_release(VulkanHandle);
-// NOTE: temporary API
DEBUG_IMPORT b32 vk_buffer_needs_sync(GPUBuffer *);
-DEBUG_IMPORT VulkanHandle vk_semaphore_create(OSHandle *export);
+DEBUG_IMPORT VulkanHandle vk_create_semaphore(OSHandle *export);
+
+DEBUG_IMPORT b32 vk_host_wait_timeline(VulkanTimeline timeline, u64 value, u64 timeout_ns);
+DEBUG_IMPORT u64 vk_host_signal_timeline(VulkanTimeline timeline);
+
+DEBUG_IMPORT VulkanHandle vk_command_begin(VulkanTimeline timeline);
+DEBUG_IMPORT void vk_command_bind_pipeline(VulkanHandle command, VulkanHandle pipeline);
+DEBUG_IMPORT void vk_command_buffer_memory_barriers(VulkanHandle command, GPUMemoryBarrierInfo *barriers, u64 count);
+DEBUG_IMPORT void vk_command_dispatch_compute(VulkanHandle command, uv3 dispatch);
+DEBUG_IMPORT void vk_command_push_constants(VulkanHandle command, u32 offset, u32 size, void *values);
+DEBUG_IMPORT void vk_command_timestamp(VulkanHandle command);
+DEBUG_IMPORT void vk_command_wait_timeline(VulkanHandle command, VulkanTimeline timeline, u64 value);
+// NOTE: extra semaphores only exist for synchronization with OpenGL and will be removed in the future
+DEBUG_IMPORT u64 vk_command_end(VulkanHandle command, VulkanHandle wait_semaphore, VulkanHandle finished_semaphore);
+
+DEBUG_IMPORT void vk_command_begin_rendering(VulkanHandle command, GPUImage *restrict colour, GPUImage *restrict depth, GPUImage *restrict resolve);
+DEBUG_IMPORT void vk_command_draw(VulkanHandle command, GPUBuffer *model);
+DEBUG_IMPORT void vk_command_scissor(VulkanHandle command, u32 width, u32 height, u32 x_offset, u32 y_offset);
+DEBUG_IMPORT void vk_command_viewport(VulkanHandle command, f32 width, f32 height, f32 x_offset, f32 y_offset, f32 min_depth, f32 max_depth);
+DEBUG_IMPORT void vk_command_end_rendering(VulkanHandle command);
+
+DEBUG_IMPORT void vk_command_copy_buffer(VulkanHandle command, GPUBuffer *restrict destination, GPUBuffer *restrict source, u64 source_offset, i64 size);
+
+// NOTE: returns array of valid timestamps + 1, first element is the count.
+// Calling thread may stall until results available.
+DEBUG_IMPORT u64 * vk_command_read_timestamps(VulkanTimeline timeline, Arena *arena);
+
+#if BEAMFORMER_RENDERDOC_HOOKS
+DEBUG_IMPORT void * vk_renderdoc_instance_handle(void);
+#else
+#define vk_renderdoc_instance_handle() ((void *)0)
+#endif
///////////////////////////////
// NOTE: CUDA Library Bindings
@@ -119,73 +228,34 @@ CUDALibraryProcedureList
/////////////////////////////////////
// NOTE: Core Beamformer Definitions
-/* TODO(rnp): this should be a UBO */
-#define FRAME_VIEW_MODEL_MATRIX_LOC 0
-#define FRAME_VIEW_VIEW_MATRIX_LOC 1
-#define FRAME_VIEW_PROJ_MATRIX_LOC 2
-#define FRAME_VIEW_DYNAMIC_RANGE_LOC 3
-#define FRAME_VIEW_THRESHOLD_LOC 4
-#define FRAME_VIEW_GAMMA_LOC 5
-#define FRAME_VIEW_LOG_SCALE_LOC 6
-#define FRAME_VIEW_BB_COLOUR_LOC 7
-#define FRAME_VIEW_BB_FRACTION_LOC 8
-#define FRAME_VIEW_SOLID_BB_LOC 10
-
-#define FRAME_VIEW_BB_COLOUR 0.92, 0.88, 0.78, 1.0
-#define FRAME_VIEW_BB_FRACTION 0.007f
-
-#define FRAME_VIEW_RENDER_TARGET_SIZE 1024, 1024
-
-typedef struct {
- u32 shader;
- u32 framebuffers[2]; /* [0] -> multisample target, [1] -> normal target for resolving */
- u32 renderbuffers[2]; /* only used for 3D views, size is fixed */
- b32 updated;
-} FrameViewRenderContext;
-
#include "beamformer_parameters.h"
#include "beamformer_shared_memory.c"
typedef struct {
- iptr elements_offset;
- i32 elements;
- u32 buffer;
- u32 vao;
-} BeamformerRenderModel;
-
-typedef struct {
BeamformerFilterParameters parameters;
- f32 time_delay;
- i32 length;
- u32 ssbo;
+ f32 time_delay;
+ i32 length;
+ GPUBuffer buffer;
} BeamformerFilter;
-/* TODO(rnp): need 1 UBO per filter slot */
-#define BEAMFORMER_COMPUTE_UBO_LIST \
- X(DAS, BeamformerDASPushConstants, das)
-
-#define X(k, ...) BeamformerComputeUBOKind_##k,
-typedef enum {BEAMFORMER_COMPUTE_UBO_LIST BeamformerComputeUBOKind_Count} BeamformerComputeUBOKind;
-#undef X
-
-// X(kind, gl_kind, texture_format, pixel_type)
-#define BEAMFORMER_COMPUTE_TEXTURE_LIST \
- X(FocalVectors, GL_RG32F, GL_RG, GL_FLOAT) \
- X(SparseElements, GL_R16I, GL_RED_INTEGER, GL_SHORT) \
- X(TransmitReceiveOrientations, GL_R8I, GL_RED_INTEGER, GL_BYTE)
-
-#define BEAMFORMER_COMPUTE_TEXTURE_LIST_FULL \
- BEAMFORMER_COMPUTE_TEXTURE_LIST \
- X(Hadamard, GL_R16F)
+// X(kind, format, elements)
+#define BEAMFORMER_COMPUTE_ARRAY_PARAMETERS_LIST \
+ X(Hadamard, f16, BeamformerMaxChannelCount * BeamformerMaxChannelCount) \
+ X(FocalVectors, v2, BeamformerMaxChannelCount) \
+ X(SparseElements, i16, BeamformerMaxChannelCount) \
+ X(TransmitReceiveOrientations, u16, BeamformerMaxChannelCount) \
typedef enum {
- #define X(k, ...) BeamformerComputeTextureKind_##k,
- BEAMFORMER_COMPUTE_TEXTURE_LIST_FULL
+ #define X(k, ...) BeamformerComputeArrayParameterKind_##k,
+ BEAMFORMER_COMPUTE_ARRAY_PARAMETERS_LIST
#undef X
- BeamformerComputeTextureKind_Count
-} BeamformerComputeTextureKind;
-static_assert((BeamformerComputeTextureKind_Count - 1) == BeamformerComputeTextureKind_Hadamard,
- "BeamformerComputeTextureKind_Hadamard must be end of TextureKinds");
+ BeamformerComputeArrayParameterKind_Count
+} BeamformerComputeArrayParameterKind;
+
+// NOTE(rnp): only used to calculate offsets, never used directly
+#define X(name, type, elements) alignas(64) type name[elements];
+typedef struct {BEAMFORMER_COMPUTE_ARRAY_PARAMETERS_LIST} BeamformerComputeArrayParameters;
+#undef X
typedef struct {
uv3 layout;
@@ -197,7 +267,7 @@ typedef struct BeamformerComputePlan BeamformerComputePlan;
struct BeamformerComputePlan {
BeamformerComputePipeline pipeline;
- u32 programs[BeamformerMaxComputeShaderStages];
+ VulkanHandle vulkan_pipelines[BeamformerMaxComputeShaderStages];
u32 dirty_programs;
@@ -214,14 +284,15 @@ struct BeamformerComputePlan {
iv3 output_points;
i32 average_frames;
- u32 textures[BeamformerComputeTextureKind_Count];
- u32 ubos[BeamformerComputeUBOKind_Count];
+ // TODO(rnp): specialization constants
+ v2 xdc_element_pitch;
+ m4 xdc_transform;
+ // TODO(rnp): probably just compute this everytime
+ m4 das_voxel_transform;
- BeamformerFilter filters[BeamformerFilterSlots];
+ GPUBuffer array_parameters;
- #define X(k, type, name) type name ##_ubo_data;
- BEAMFORMER_COMPUTE_UBO_LIST
- #undef X
+ BeamformerFilter filters[BeamformerFilterSlots];
u128 shader_hashes[BeamformerMaxComputeShaderStages];
BeamformerShaderDescriptor shader_descriptors[BeamformerMaxComputeShaderStages];
@@ -230,50 +301,20 @@ struct BeamformerComputePlan {
};
typedef struct {
- // NOTE(rnp): w32 doesn't transfer ownership of these when they are imported
- // into the driver. For now just store them here, this code won't be around for long
- OSHandle upload_semaphores_handles[BeamformerMaxRawDataFramesInFlight];
- VulkanHandle vk_upload_semaphores[BeamformerMaxRawDataFramesInFlight];
- u32 gl_upload_semaphores[BeamformerMaxRawDataFramesInFlight];
-
- GLsync compute_syncs[BeamformerMaxRawDataFramesInFlight];
-
- u64 uploaded_data_indices[BeamformerMaxRawDataFramesInFlight];
+ u64 upload_complete_values[BeamformerMaxRawDataFramesInFlight];
+ u64 compute_complete_values[BeamformerMaxRawDataFramesInFlight];
GPUBuffer buffer;
- OSHandle export_handle;
-
- u32 ssbo, memory_object;
u32 active_rf_size;
- u32 data_timestamp_query;
+
+ u64 timestamp;
u64 insertion_index;
u64 compute_index;
} BeamformerRFBuffer;
typedef struct {
- BeamformerRFBuffer rf_buffer;
-
- BeamformerComputePlan *compute_plans[BeamformerMaxParameterBlocks];
- BeamformerComputePlan *compute_plan_freelist;
-
- /* NOTE(rnp): two interstage ssbos are allocated so that they may be used to
- * ping pong data between compute stages */
- u32 ping_pong_ssbos[2];
- u32 last_output_ssbo_index;
-
- u32 ping_pong_ssbo_size;
-
- f32 processing_progress;
- b32 processing_compute;
-
- u32 shader_timer_ids[BeamformerMaxComputeShaderStages];
-
- BeamformerRenderModel unit_cube_model;
-} BeamformerComputeContext;
-
-typedef struct {
BeamformerComputeStatsTable table;
f32 average_times[BeamformerShaderKind_Count];
@@ -296,7 +337,11 @@ typedef struct {
u64 timer_count;
ComputeTimingInfoKind kind;
union {
- BeamformerShaderKind shader;
+ struct {
+ static_assert(BeamformerShaderKind_Count <= U16_MAX, "");
+ u16 shader;
+ u16 shader_slot;
+ };
};
} ComputeTimingInfo;
@@ -304,6 +349,10 @@ typedef struct {
u32 write_index;
u32 read_index;
b32 compute_frame_active;
+
+ u32 in_flight_shader_count;
+ BeamformerShaderKind in_flight_shader_ids[BeamformerMaxComputeShaderStages];
+
ComputeTimingInfo buffer[4096];
} ComputeTimingTable;
@@ -315,34 +364,57 @@ typedef struct {
i32 *compute_worker_sync;
} BeamformerUploadThreadContext;
-struct BeamformerFrame {
- u32 texture;
- b32 ready_to_present;
-
- iv3 dim;
- i32 mips;
+typedef struct {
+ u64 buffer_offset;
+ u64 timeline_valid_value;
/* NOTE: for use when displaying either prebeamformed frames or on the current frame
* when we intend to recompute on the next frame */
m4 voxel_transform;
- // metadata
- GLenum gl_kind;
+ iv3 points;
+
u32 id;
u32 compound_count;
- u32 parameter_block;
+ BeamformerDataKind data_kind;
BeamformerAcquisitionKind acquisition_kind;
BeamformerViewPlaneTag view_plane_tag;
+} BeamformerFrame;
- BeamformerFrame *next;
-};
+/* NOTE(rnp): backing storage for beamformed frames. The amount of backlog frames
+* is dependant on the currently requested output size. */
+typedef struct {
+ GPUBuffer buffer[1];
+
+ u64 next_offset;
+ u64 counter;
+
+ BeamformerFrame frames[BeamformerMaxBacklogFrames];
+} BeamformerFrameBacklog;
+
+typedef struct {
+ BeamformerRFBuffer rf_buffer;
+
+ BeamformerComputePlan *compute_plans[BeamformerMaxParameterBlocks];
+ BeamformerComputePlan *compute_plan_freelist;
+
+ VulkanHandle compute_internal_pipelines[BeamformerShaderKind_ComputeInternalCount];
+
+ /* NOTE(rnp): used to ping pong data between compute stages.
+ * Half the buffer will be used for reading and the other for writing. */
+ GPUBuffer ping_pong_buffer;
+ u32 ping_pong_input_index;
+
+ f32 processing_progress;
+ b32 processing_compute;
+
+ BeamformerFrameBacklog backlog;
+} BeamformerComputeContext;
typedef struct {
OSThread handle;
Arena arena;
- iptr window_handle;
- iptr gl_context;
iptr user_context;
i32 sync_variable;
b32 awake;
@@ -367,26 +439,15 @@ typedef struct {
u64 frame_timestamp;
- BeamformerComputeContext compute_context;
-
- /* TODO(rnp): ideally this would go in the UI but its hard to manage with the UI
- * destroying itself on hot-reload */
- FrameViewRenderContext frame_view_render_context;
-
Stream error_stream;
- BeamformWorkQueue *beamform_work_queue;
-
- ComputeShaderStats *compute_shader_stats;
- ComputeTimingTable *compute_timing_table;
-
BeamformerSharedMemory *shared_memory;
i64 shared_memory_size;
- BeamformerFrame beamform_frames[BeamformerMaxBacklogFrames];
BeamformerFrame *latest_frame;
- u32 next_render_frame_index;
- u32 display_frame_index;
+
+ // TODO(rnp): track elsewhere
+ b32 render_shader_updated;
/* NOTE: this will only be used when we are averaging */
u32 averaged_frame_index;
@@ -394,31 +455,47 @@ typedef struct {
GLWorkerThreadContext upload_worker;
GLWorkerThreadContext compute_worker;
+
+ BeamformerComputeContext compute_context;
+
+ ComputeShaderStats compute_shader_stats[1];
+ ComputeTimingTable compute_timing_table[1];
+
+ BeamformWorkQueue beamform_work_queue[1];
} BeamformerCtx;
#define BeamformerContextMemory(m) (BeamformerCtx *)align_pointer_up((m), alignof(BeamformerCtx));
typedef enum {
- BeamformerFileReloadKind_Shader,
+ BeamformerFileReloadKind_ComputeInternalShader,
BeamformerFileReloadKind_ComputeShader,
+ BeamformerFileReloadKind_RenderShader,
} BeamformerFileReloadKind;
-typedef struct BeamformerShaderReloadContext BeamformerShaderReloadContext;
-struct BeamformerShaderReloadContext {
- BeamformerShaderReloadContext * link;
- s8 header;
- GLenum gl_type;
- i32 reloadable_info_index;
-};
+typedef struct {
+ BeamformerShaderKind shader;
+ VulkanHandle * pipeline;
+} BeamformerShaderReloadData;
+
+typedef struct {
+ BeamformerShaderKind shader;
+ VulkanShaderKind shader_kind;
+
+ // NOTE(rnp): based on BakeShaders compile time value
+ s8 filename_or_data;
+
+ BeamformerShaderDescriptor *shader_descriptor;
+
+ uv3 layout;
+} BeamformerShaderReloadInfo;
typedef struct {
BeamformerFileReloadKind kind;
union {
- BeamformerShaderReloadContext * shader_reload_context;
- BeamformerShaderKind compute_shader_kind;
+ BeamformerShaderReloadData shader_reload;
};
} BeamformerFileReloadContext;
-#define BEAMFORMER_COMPLETE_COMPUTE_FN(name) void name(iptr user_context, Arena *arena, iptr gl_context)
+#define BEAMFORMER_COMPLETE_COMPUTE_FN(name) void name(BeamformerCtx *ctx, Arena *arena)
typedef BEAMFORMER_COMPLETE_COMPUTE_FN(beamformer_complete_compute_fn);
#define BEAMFORMER_RF_UPLOAD_FN(name) void name(BeamformerUploadThreadContext *ctx)
diff --git a/beamformer_parameters.h b/beamformer_parameters.h
@@ -10,10 +10,12 @@
*/
typedef struct {
+ uint64_t shader_count;
+ uint32_t shader_ids[BeamformerMaxComputeShaderStages];
/* NOTE(rnp): this wants to be iterated on both dimensions. it depends entirely on which
* visualization method you want to use. the coalescing function wants both directions */
- float times[32][BeamformerMaxComputeShaderStages];
- float rf_time_deltas[32];
+ float times[32][BeamformerMaxComputeShaderStages];
+ float rf_time_deltas[32];
} BeamformerComputeStatsTable;
/* X(type, id, pretty name) */
diff --git a/beamformer_shared_memory.c b/beamformer_shared_memory.c
@@ -1,7 +1,5 @@
/* See LICENSE for license details. */
-#define BEAMFORMER_SHARED_MEMORY_VERSION (28UL)
-
-typedef struct BeamformerFrame BeamformerFrame;
+#define BEAMFORMER_SHARED_MEMORY_VERSION (29UL)
typedef enum {
BeamformerWorkKind_Compute,
@@ -39,8 +37,7 @@ typedef enum {BEAMFORMER_SHARED_MEMORY_LOCKS BeamformerSharedMemoryLockKind_Coun
#undef X
typedef struct {
- BeamformerFrame *frame;
- u32 parameter_block;
+ u32 parameter_block;
} BeamformerComputeWorkContext;
typedef struct {
@@ -161,6 +158,8 @@ typedef struct {
/* TODO(rnp): this is really sucky. we need a better way to communicate this */
u64 rf_block_rf_size;
+ u64 max_beamformed_data_size;
+
BeamformerLiveImagingParameters live_imaging_parameters;
BeamformerLiveImagingDirtyFlags live_imaging_dirty_flags;
diff --git a/build.c b/build.c
@@ -741,9 +741,8 @@ build_raylib(Arena a)
{
b32 result = 1, shared = config.debug;
char *libraylib = shared ? OS_SHARED_LINK_LIB("raylib") : OUTPUT_LIB(OS_STATIC_LIB("raylib"));
- if (needs_rebuild(libraylib, "external/include/rlgl.h", "external/raylib")) {
+ if (needs_rebuild(libraylib, "external/raylib")) {
git_submodule_update(a, "external/raylib");
- os_copy_file("external/raylib/src/rlgl.h", "external/include/rlgl.h");
CommandList cc = {0};
cmd_base(&a, &cc, 0, config.debug);
@@ -752,16 +751,17 @@ build_raylib(Arena a)
if (!is_msvc) cmd_append(&a, &cc, "-Wno-unused-but-set-variable");
cmd_append(&a, &cc, "-Iexternal/include", "-Iexternal/raylib/src", "-Iexternal/raylib/src/external/glfw/include");
#define RAYLIB_SOURCES \
+ X(rcore) \
X(rglfw) \
X(rshapes) \
X(rtext) \
X(rtextures) \
X(utils)
#define X(name) "external/raylib/src/" #name ".c",
- char *srcs[] = {"external/rcore_extended.c", RAYLIB_SOURCES};
+ char *srcs[] = {RAYLIB_SOURCES};
#undef X
#define X(name) OUTPUT(OBJECT(#name)),
- char *outs[] = {OUTPUT(OBJECT("rcore_extended")), RAYLIB_SOURCES};
+ char *outs[] = {RAYLIB_SOURCES};
#undef X
if (shared) {
@@ -1045,9 +1045,11 @@ meta_end_and_write_matlab(MetaprogramContext *m, char *path)
X(EndScope) \
X(Enumeration) \
X(Expand) \
+ X(FragmentShader) \
X(Library) \
X(MATLAB) \
X(PushConstants) \
+ X(RenderShader) \
X(Shader) \
X(ShaderAlias) \
X(ShaderGroup) \
@@ -1055,6 +1057,7 @@ meta_end_and_write_matlab(MetaprogramContext *m, char *path)
X(Struct) \
X(Table) \
X(Union) \
+ X(VertexShader) \
typedef enum {
#define X(k, ...) MetaEntryKind_## k,
@@ -1080,14 +1083,14 @@ typedef enum {
} MetaEmitLang;
#define META_KIND_LIST \
- X(M4, m4, mat4, float, single, 64, 16) \
- X(V4, v4, vec4, float, single, 16, 4) \
- X(SV4, iv4, ivec4, int32_t, int32, 16, 4) \
- X(UV4, uv4, uvec4, uint32_t, uint32, 16, 4) \
- X(UV2, uv2, uvec2, uint32_t, uint32, 8, 2) \
- X(V3, v3, vec3, float, single, 12, 3) \
- X(V2, v2, vec2, float, single, 8, 2) \
- X(F32, f32, float, float, single, 4, 1) \
+ X(M4, m4, f32mat4, float, single, 64, 16) \
+ X(V4, v4, f32vec4, float, single, 16, 4) \
+ X(SV4, iv4, i32vec4, int32_t, int32, 16, 4) \
+ X(UV4, uv4, u32vec4, uint32_t, uint32, 16, 4) \
+ X(UV2, uv2, u32vec2, uint32_t, uint32, 8, 2) \
+ X(V3, v3, f32vec3, float, single, 12, 3) \
+ X(V2, v2, f32vec2, float, single, 8, 2) \
+ X(F32, f32, float32_t, float, single, 4, 1) \
X(S32, i32, int32_t, int32_t, int32, 4, 1) \
X(S16, i16, int16_t, int16_t, int16, 2, 1) \
X(S8, i8, int8_t, int8_t, int8, 1, 1) \
@@ -1749,14 +1752,28 @@ typedef struct {
typedef enum {
MetaShaderKind_Alias,
MetaShaderKind_Compute,
+ MetaShaderKind_Render,
MetaShaderKind_Count,
} MetaShaderKind;
+typedef enum {
+ MetaShaderPrimitiveKind_Mesh,
+ MetaShaderPrimitiveKind_Vertex,
+ MetaShaderPrimitiveKind_Count,
+} MetaShaderPrimitiveKind;
+
+typedef struct {
+ MetaShaderPrimitiveKind kind;
+} MetaRenderShader;
+
typedef struct {
MetaShaderKind kind;
MetaIDList entity_reference_ids;
- s8 file;
- MetaEntityID alias_parent_id;
+ s8 files[2];
+ union {
+ MetaEntityID alias_parent_id;
+ MetaRenderShader render;
+ };
} MetaShader;
#define META_STRUCT_FIELDS \
@@ -2320,6 +2337,44 @@ meta_pack_shader_common(MetaContext *ctx, MetaEntityID shader_id, MetaEntry *e,
}
function i64
+meta_pack_render_shader(MetaContext *ctx, MetaEntry *entries, i64 entry_count, MetaEntityID group_entity_id)
+{
+ assert(entries[0].kind == MetaEntryKind_RenderShader);
+
+ MetaEntityID entity_id = meta_intern_entity(ctx, entries->name, MetaEntityKind_Shader,
+ group_entity_id, entries->location, 0);
+ meta_entity(ctx, entity_id)->shader.kind = MetaShaderKind_Render;
+
+ meta_entry_argument_expected(entries);
+
+ MetaEntryScope scope = meta_entry_extract_scope(entries, entry_count);
+ if (scope.consumed > 1) {
+ for (MetaEntry *e = scope.start; e < scope.one_past_last; e++) {
+ switch (e->kind) {
+
+ case MetaEntryKind_VertexShader:{
+ if (meta_entity(ctx, entity_id)->shader.files[0].len)
+ meta_entry_error(e, "primitive shader file redefined\n");
+ meta_entity(ctx, entity_id)->shader.files[0] = meta_entry_argument_expect(e, 0, MetaEntryArgumentKind_String).string;
+ meta_entity(ctx, entity_id)->shader.render.kind = MetaShaderPrimitiveKind_Vertex;
+ }break;
+
+ case MetaEntryKind_FragmentShader:{
+ if (meta_entity(ctx, entity_id)->shader.files[1].len)
+ meta_entry_error(e, "fragment shader file redefined\n");
+ meta_entity(ctx, entity_id)->shader.files[1] = meta_entry_argument_expect(e, 0, MetaEntryArgumentKind_String).string;
+ }break;
+
+ default:{
+ e += meta_pack_shader_common(ctx, entity_id, e, scope.one_past_last - e, group_entity_id);
+ }break;
+ }
+ }
+ }
+ return scope.consumed;
+}
+
+function i64
meta_pack_compute_shader(MetaContext *ctx, MetaEntry *entries, i64 entry_count, MetaEntityID group_entity_id)
{
assert(entries[0].kind == MetaEntryKind_Shader);
@@ -2332,7 +2387,7 @@ meta_pack_compute_shader(MetaContext *ctx, MetaEntry *entries, i64 entry_count,
meta_entry_argument_expected(entries, s8("[file_name]"));
} else if (entries->argument_count == 1) {
s8 shader_file = meta_entry_argument_expect(entries, 0, MetaEntryArgumentKind_String).string;
- meta_entity(ctx, entity_id)->shader.file = shader_file;
+ meta_entity(ctx, entity_id)->shader.files[0] = shader_file;
}
MetaEntryScope scope = meta_entry_extract_scope(entries, entry_count);
@@ -2360,6 +2415,9 @@ meta_pack_shader_group(MetaContext *ctx, MetaEntry *entries, i64 entry_count)
if (scope.consumed > 1) {
for (MetaEntry *e = scope.start; e < scope.one_past_last; e++) {
switch (e->kind) {
+ case MetaEntryKind_RenderShader:{
+ e += meta_pack_render_shader(ctx, e, scope.one_past_last - e, entity_id);
+ }break;
case MetaEntryKind_Shader:{
e += meta_pack_compute_shader(ctx, e, scope.one_past_last - e, entity_id);
}break;
@@ -3480,12 +3538,15 @@ meta_push_shader_reload_info(MetaprogramContext *m, MetaContext *ctx)
}
} meta_end_scope(m, s8("};\n"));
- meta_begin_scope(m, s8("read_only global s8 " META_NAMESPACE_LOWER "_reloadable_shader_files[] = {"));
+ meta_begin_scope(m, s8("read_only global s8 *" META_NAMESPACE_LOWER "_reloadable_shader_files[] = {"));
{
for (da_count shader = 0; shader < ctx->base_shader_count; shader++) {
da_count id = ctx->base_shader_ids[shader];
MetaShader *s = &ctx->entities.data[id].shader;
- meta_push_line(m, s8("s8_comp(\""), s->file, s8("\"),"));
+ meta_begin_line(m, s8("(s8 []){s8_comp(\""), s->files[0], s8("\")"));
+ if (s->files[1].len)
+ meta_push(m, s8(", s8_comp(\""), s->files[1], s8("\")"));
+ meta_end_line(m, s8("},"));
}
} meta_end_scope(m, s8("};\n"));
@@ -3558,7 +3619,7 @@ meta_push_shader_reload_info(MetaprogramContext *m, MetaContext *ctx)
.element_count_style = MetaPushStructStyle_C,
.base_types = meta_kind_glsl_types,
.prefix = str8("\" "),
- .suffix = str8("\\n\""),
+ .suffix = str8(";\\n\""),
});
meta_push_line(m, s8("\"};\\n\""));
meta_push_line(m, s8("\"\\n\"),"));
@@ -3566,7 +3627,7 @@ meta_push_shader_reload_info(MetaprogramContext *m, MetaContext *ctx)
case MetaEntityKind_PushConstants:{
meta_push_line(m, s8("s8_comp(\"\""));
- meta_push_line(m, s8("\"layout(std140, binding = 0) uniform PushConstants {\\n\""));
+ meta_push_line(m, s8("\"layout(push_constant, std430) uniform PushConstants {\\n\""));
meta_push_struct_body(ctx, m, e, (MetaPushStructParameters){
.layout_style = MetaPushStructStyle_C,
.union_style = MetaPushStructStyle_C,
@@ -3593,6 +3654,21 @@ meta_push_shader_reload_info(MetaprogramContext *m, MetaContext *ctx)
m->scratch = ctx->scratch;
}
} meta_end_scope(m, s8("};\n"));
+
+ meta_begin_scope(m, s8("read_only global b8 " META_NAMESPACE_LOWER "_shader_has_primitive[] = {"));
+ for (da_count bs = 0; bs < ctx->base_shader_count; bs++) {
+ MetaShader *s = &ctx->entities.data[ctx->base_shader_ids[bs]].shader;
+ meta_push_line(m, s->kind == MetaShaderKind_Render ? s8("1,") : s8("0,"));
+ }
+ meta_end_scope(m, s8("};\n"));
+
+ meta_begin_scope(m, s8("read_only global b8 " META_NAMESPACE_LOWER "_shader_primitive_is_vertex[] = {"));
+ for (da_count bs = 0; bs < ctx->base_shader_count; bs++) {
+ MetaShader *s = &ctx->entities.data[ctx->base_shader_ids[bs]].shader;
+ b8 vertex = s->kind == MetaShaderKind_Render && s->render.kind == MetaShaderPrimitiveKind_Vertex;
+ meta_push_line(m, vertex ? s8("1,") : s8("0,"));
+ }
+ meta_end_scope(m, s8("};\n"));
}
function void
@@ -3603,30 +3679,67 @@ meta_push_shader_bake(MetaprogramContext *m, MetaContext *ctx)
s8 shader_name = ctx->entity_names.data[ctx->base_shader_ids[bs]];
- meta_begin_line(m, s8("read_only global u8 " META_NAMESPACE_LOWER "_shader_"));
- for (i64 i = 0; i < shader_name.len; i++)
- stream_append_byte(&m->stream, ToLower(shader_name.data[i]));
-
- meta_begin_scope(m, s8("_bytes[] = {")); {
- Arena scratch = m->scratch;
- s8 filename = push_s8_from_parts(&scratch, s8(OS_PATH_SEPARATOR), s8("shaders"), s->file);
- s8 file = read_entire_file((c8 *)filename.data, &scratch);
- metagen_push_byte_array(m, file);
- } meta_end_scope(m, s8("};\n"));
+ for EachElement(s->files, it) {
+ if (s->files[it].len > 0) {
+ meta_begin_line(m, s8("read_only global u8 " META_NAMESPACE_LOWER "_shader_"));
+ for (i64 i = 0; i < shader_name.len; i++)
+ stream_append_byte(&m->stream, ToLower(shader_name.data[i]));
+
+ if (s->kind == MetaShaderKind_Render)
+ meta_push(m, it == 0 ? s8("_primitive") : s8("_fragment"));
+
+ meta_begin_scope(m, s8("_bytes[] = {")); {
+ Arena scratch = m->scratch;
+ s8 filename = push_s8_from_parts(&scratch, s8(OS_PATH_SEPARATOR), s8("shaders"), s->files[it]);
+ s8 file = read_entire_file((c8 *)filename.data, &scratch);
+ metagen_push_byte_array(m, file);
+ } meta_end_scope(m, s8("};\n"));
+ }
+ }
}
- meta_begin_scope(m, s8("read_only global s8 " META_NAMESPACE_LOWER "_shader_data[] = {")); {
+ meta_begin_scope(m, s8("read_only global s8 *" META_NAMESPACE_LOWER "_shader_data[] = {")); {
for (da_count bs = 0; bs < ctx->base_shader_count; bs++) {
+ MetaShader *s = &ctx->entities.data[ctx->base_shader_ids[bs]].shader;
+
s8 shader_name = ctx->entity_names.data[ctx->base_shader_ids[bs]];
- meta_begin_line(m, s8("{.data = " META_NAMESPACE_LOWER "_shader_"));
- for (iz i = 0; i < shader_name.len; i++)
+ if (s->kind == MetaShaderKind_Render) {
+ meta_begin_scope(m, s8("(s8 []){"));
+ meta_indent(m);
+ } else {
+ meta_begin_line(m, s8("(s8 []){"));
+ }
+
+ meta_push(m, s8("{.data = " META_NAMESPACE_LOWER "_shader_"));
+ for (i64 i = 0; i < shader_name.len; i++)
stream_append_byte(&m->stream, ToLower(shader_name.data[i]));
+ if (s->kind == MetaShaderKind_Render)
+ meta_push(m, s8("_primitive"));
+
meta_push(m, s8("_bytes, .len = countof(" META_NAMESPACE_LOWER "_shader_"));
- for (iz i = 0; i < shader_name.len; i++)
+ for (i64 i = 0; i < shader_name.len; i++)
stream_append_byte(&m->stream, ToLower(shader_name.data[i]));
- meta_end_line(m, s8("_bytes)},"));
+
+ if (s->kind == MetaShaderKind_Render)
+ meta_push(m, s8("_primitive"));
+ meta_push(m, s8("_bytes)}"));
+
+ if (s->kind == MetaShaderKind_Render) {
+ meta_end_line(m, s8(","));
+ meta_begin_line(m, s8("{.data = " META_NAMESPACE_LOWER "_shader_"));
+ for (i64 i = 0; i < shader_name.len; i++)
+ stream_append_byte(&m->stream, ToLower(shader_name.data[i]));
+
+ meta_push(m, s8("_fragment_bytes, .len = countof(" META_NAMESPACE_LOWER "_shader_"));
+ for (i64 i = 0; i < shader_name.len; i++)
+ stream_append_byte(&m->stream, ToLower(shader_name.data[i]));
+ meta_end_line(m, s8("_fragment_bytes)}"));
+ }
+
+ if (s->kind == MetaShaderKind_Render) meta_end_scope(m, s8("},"));
+ else meta_end_line(m, s8("},"));
}
} meta_end_scope(m, s8("};\n"));
}
@@ -3662,7 +3775,9 @@ metagen_emit_c_code(MetaContext *ctx, Arena arena)
u32 dep_count = 0;
for (da_count bs = 0; bs < ctx->base_shader_count; bs++) {
MetaShader *s = &ctx->entities.data[ctx->base_shader_ids[bs]].shader;
- deps[dep_count++] = (c8 *)push_s8_from_parts(&m->scratch, s8(OS_PATH_SEPARATOR), s8("shaders"), s->file).data;
+ deps[dep_count++] = (c8 *)push_s8_from_parts(&m->scratch, s8(OS_PATH_SEPARATOR), s8("shaders"), s->files[0]).data;
+ if (s->files[1].len > 0)
+ deps[dep_count++] = (c8 *)push_s8_from_parts(&m->scratch, s8(OS_PATH_SEPARATOR), s8("shaders"), s->files[1]).data;
}
if (needs_rebuild_(out_shaders, deps, dep_count)) {
build_log_generate("Bake Shaders");
@@ -3939,6 +4054,19 @@ metagen_emit_c_code(MetaContext *ctx, Arena arena)
}
} meta_end_scope(m, s8("};\n"));
+ meta_begin_scope(m, s8("read_only global u8 " META_NAMESPACE_LOWER "_shader_push_constant_sizes[] = {"));
+ for (da_count bs = 0; bs < ctx->base_shader_count; bs++) {
+ da_count id = ctx->base_shader_ids[bs];
+ MetaEntity *e = ctx->entities.data + id;
+ MetaEntityID pc_id = meta_entity_first_child_of_kind(ctx, e, MetaEntityKind_PushConstants);
+ if (pc_id.value != 0) {
+ meta_push_line(m, s8("sizeof(" META_NAMESPACE_UPPER), ctx->entity_names.data[id], s8("PushConstants),"));
+ } else {
+ meta_push_line(m, s8("0,"));
+ }
+ }
+ meta_end_scope(m, s8("};\n"));
+
//fprintf(stderr, "%.*s\n", (i32)m.stream.widx, m.stream.data);
result = meta_write_and_reset(m, out_meta);
@@ -4734,7 +4862,7 @@ metagen_load_context(Arena *arena, char *filename)
{
for (da_count shader = 0; shader < ctx->entity_kind_counts[MetaEntityKind_Shader]; shader++) {
MetaEntity *e = ctx->entities.data + ctx->entity_kind_ids[MetaEntityKind_Shader][shader];
- if (e->shader.file.len > 0)
+ if (e->shader.files[0].len > 0)
ctx->base_shader_count++;
}
@@ -4744,14 +4872,14 @@ metagen_load_context(Arena *arena, char *filename)
da_count base_shader_ids_index = 0;
for (da_count shader = 0; shader < ctx->entity_kind_counts[MetaEntityKind_Shader]; shader++) {
da_count id = ctx->entity_kind_ids[MetaEntityKind_Shader][shader];
- if (ctx->entities.data[id].shader.file.len > 0)
+ if (ctx->entities.data[id].shader.files[0].len > 0)
ctx->base_shader_ids[base_shader_ids_index++] = id;
}
// NOTE(rnp): first pass to resolve real shaders
for (da_count shader = 0; shader < ctx->entity_kind_counts[MetaEntityKind_Shader]; shader++) {
da_count id = ctx->entity_kind_ids[MetaEntityKind_Shader][shader];
- if (ctx->entities.data[id].shader.file.len > 0) {
+ if (ctx->entities.data[id].shader.files[0].len > 0) {
ctx->base_shader_id_map[shader] = meta_lookup_id_slow(ctx->base_shader_ids,
ctx->base_shader_count,
id);
diff --git a/external/include/raylib_extended.h b/external/include/raylib_extended.h
@@ -1,2 +0,0 @@
-#include "../raylib/src/raylib.h"
-RLAPI void *GetPlatformWindowHandle(void);
diff --git a/external/rcore_extended.c b/external/rcore_extended.c
@@ -1,8 +0,0 @@
-/* NOTE(rnp): hacky stuff to work around broken raylib garbage */
-#include <raylib_extended.h>
-#include "raylib/src/rcore.c"
-
-void *GetPlatformWindowHandle(void)
-{
- return (void *)platform.handle;
-}
diff --git a/generated/beamformer.meta.c b/generated/beamformer.meta.c
@@ -4,7 +4,7 @@
// NOTE: Constants (Integer)
#define BeamformerFilterSlots (4)
-#define BeamformerMaxBacklogFrames (16)
+#define BeamformerMaxBacklogFrames (4096)
#define BeamformerMaxChannelCount (256)
#define BeamformerMaxEmissionsCount (256)
#define BeamformerMaxComputeShaderStages (16)
@@ -84,23 +84,31 @@ typedef enum {
} BeamformerAcquisitionKind;
typedef enum {
- BeamformerShaderKind_CudaDecode = 0,
- BeamformerShaderKind_CudaHilbert = 1,
- BeamformerShaderKind_Decode = 2,
- BeamformerShaderKind_Filter = 3,
- BeamformerShaderKind_Demodulate = 4,
- BeamformerShaderKind_DAS = 5,
- BeamformerShaderKind_MinMax = 6,
- BeamformerShaderKind_Sum = 7,
- BeamformerShaderKind_Render3D = 8,
+ BeamformerShaderKind_CudaDecode = 0,
+ BeamformerShaderKind_CudaHilbert = 1,
+ BeamformerShaderKind_Decode = 2,
+ BeamformerShaderKind_Filter = 3,
+ BeamformerShaderKind_Demodulate = 4,
+ BeamformerShaderKind_DAS = 5,
+ BeamformerShaderKind_Sum = 6,
+ BeamformerShaderKind_MinMax = 7,
+ BeamformerShaderKind_CoherencyWeighting = 8,
+ BeamformerShaderKind_BufferClear = 9,
+ BeamformerShaderKind_RenderBeamformed = 10,
BeamformerShaderKind_Count,
- BeamformerShaderKind_ComputeFirst = BeamformerShaderKind_CudaDecode,
- BeamformerShaderKind_ComputeLast = BeamformerShaderKind_Sum,
- BeamformerShaderKind_ComputeCount = 8,
- BeamformerShaderKind_RenderFirst = BeamformerShaderKind_Render3D,
- BeamformerShaderKind_RenderLast = BeamformerShaderKind_Render3D,
- BeamformerShaderKind_RenderCount = 1,
+ BeamformerShaderKind_ComputeFirst = BeamformerShaderKind_CudaDecode,
+ BeamformerShaderKind_ComputeLast = BeamformerShaderKind_MinMax,
+ BeamformerShaderKind_ComputeCount = 8,
+ BeamformerShaderKind_ComputeHelpersFirst = BeamformerShaderKind_CoherencyWeighting,
+ BeamformerShaderKind_ComputeHelpersLast = BeamformerShaderKind_CoherencyWeighting,
+ BeamformerShaderKind_ComputeHelpersCount = 1,
+ BeamformerShaderKind_ComputeInternalFirst = BeamformerShaderKind_BufferClear,
+ BeamformerShaderKind_ComputeInternalLast = BeamformerShaderKind_BufferClear,
+ BeamformerShaderKind_ComputeInternalCount = 1,
+ BeamformerShaderKind_RenderFirst = BeamformerShaderKind_RenderBeamformed,
+ BeamformerShaderKind_RenderLast = BeamformerShaderKind_RenderBeamformed,
+ BeamformerShaderKind_RenderCount = 1,
} BeamformerShaderKind;
typedef struct {
@@ -141,7 +149,6 @@ typedef struct {
u32 coherency_weighting;
u32 single_focus;
u32 single_orientation;
- u32 fast;
u32 sparse;
u32 acquisition_count;
u32 acquisition_kind;
@@ -159,12 +166,78 @@ typedef struct {
} BeamformerDASBakeParameters;
typedef struct {
- m4 xdc_transform;
- m4 voxel_transform;
- v2 xdc_element_pitch;
+ u32 data_kind;
+} BeamformerCoherencyWeightingBakeParameters;
+
+typedef struct {
+ u64 hadamard_buffer;
+ u64 rf_buffer;
+ u64 output_buffer;
+ u64 output_rf_buffer;
+ b32 first_pass;
+} BeamformerDecodePushConstants;
+
+typedef struct {
+ u64 input_data;
+ u64 output_data;
+ u64 filter_coefficients;
+} BeamformerFilterPushConstants;
+
+typedef struct {
+ m4 xdc_transform;
+ m4 voxel_transform;
+ v2 xdc_element_pitch;
+ u64 rf_data;
+ u64 output_data;
+ u64 incoherent_output;
+ u64 array_parameters;
+ u32 output_size_x;
+ u32 output_size_y;
+ u32 output_size_z;
+ u32 cycle_t;
+ i32 channel_t;
} BeamformerDASPushConstants;
typedef struct {
+ u64 output_data;
+ u64 input_data;
+ u32 image_elements;
+ f32 scale;
+} BeamformerSumPushConstants;
+
+typedef struct {
+ u64 left_side_buffer;
+ u64 right_side_buffer;
+ u32 elements;
+ f32 scale;
+ u32 output_size_x;
+ u32 output_size_y;
+ u32 output_size_z;
+} BeamformerCoherencyWeightingPushConstants;
+
+typedef struct {
+ u64 data;
+ u32 clear_word;
+ u32 words;
+} BeamformerBufferClearPushConstants;
+
+typedef struct {
+ m4 mvp_matrix;
+ u64 positions;
+ u64 normals;
+ v4 bounding_box_colour;
+ f32 bounding_box_fraction;
+ f32 db_cutoff;
+ f32 threshold;
+ f32 gamma;
+ u64 input_data;
+ u32 input_size_x;
+ u32 input_size_y;
+ u32 input_size_z;
+ u32 data_kind;
+} BeamformerRenderBeamformedPushConstants;
+
+typedef struct {
f32 cycles;
f32 frequency;
} BeamformerSineParameters;
@@ -304,10 +377,17 @@ typedef struct {
BeamformerDataKind data_kind;
} BeamformerSimpleParameters;
+typedef struct {
+ v2 focal_vectors[BeamformerMaxChannelCount];
+ i16 sparse_elements[BeamformerMaxChannelCount];
+ u16 transmit_receive_orientations[BeamformerMaxChannelCount];
+} BeamformerDASArrayParameters;
+
typedef union {
- BeamformerDecodeBakeParameters Decode;
- BeamformerFilterBakeParameters Filter;
- BeamformerDASBakeParameters DAS;
+ BeamformerDecodeBakeParameters Decode;
+ BeamformerFilterBakeParameters Filter;
+ BeamformerDASBakeParameters DAS;
+ BeamformerCoherencyWeightingBakeParameters CoherencyWeighting;
} BeamformerShaderBakeParameters;
read_only global u8 beamformer_data_kind_element_size[] = {
@@ -399,27 +479,33 @@ read_only global s8 beamformer_shader_names[] = {
s8_comp("Filter"),
s8_comp("Demodulate"),
s8_comp("DAS"),
- s8_comp("MinMax"),
s8_comp("Sum"),
- s8_comp("Render3D"),
+ s8_comp("MinMax"),
+ s8_comp("CoherencyWeighting"),
+ s8_comp("BufferClear"),
+ s8_comp("RenderBeamformed"),
};
read_only global BeamformerShaderKind beamformer_reloadable_shader_kinds[] = {
BeamformerShaderKind_Decode,
BeamformerShaderKind_Filter,
BeamformerShaderKind_DAS,
- BeamformerShaderKind_MinMax,
BeamformerShaderKind_Sum,
- BeamformerShaderKind_Render3D,
+ BeamformerShaderKind_MinMax,
+ BeamformerShaderKind_CoherencyWeighting,
+ BeamformerShaderKind_BufferClear,
+ BeamformerShaderKind_RenderBeamformed,
};
-read_only global s8 beamformer_reloadable_shader_files[] = {
- s8_comp("decode.glsl"),
- s8_comp("filter.glsl"),
- s8_comp("das.glsl"),
- s8_comp("min_max.glsl"),
- s8_comp("sum.glsl"),
- s8_comp("render_3d.frag.glsl"),
+read_only global s8 *beamformer_reloadable_shader_files[] = {
+ (s8 []){s8_comp("decode.glsl")},
+ (s8 []){s8_comp("filter.glsl")},
+ (s8 []){s8_comp("das.glsl")},
+ (s8 []){s8_comp("sum.glsl")},
+ (s8 []){s8_comp("min_max.glsl")},
+ (s8 []){s8_comp("coherency_weighting.glsl")},
+ (s8 []){s8_comp("buffer_clear.glsl")},
+ (s8 []){s8_comp("render_3d.vert.glsl"), s8_comp("render_3d.frag.glsl")},
};
read_only global i32 beamformer_shader_reloadable_index_by_shader[] = {
@@ -432,6 +518,8 @@ read_only global i32 beamformer_shader_reloadable_index_by_shader[] = {
3,
4,
5,
+ 6,
+ 7,
};
read_only global i32 beamformer_reloadable_compute_shader_info_indices[] = {
@@ -442,10 +530,18 @@ read_only global i32 beamformer_reloadable_compute_shader_info_indices[] = {
4,
};
-read_only global i32 beamformer_reloadable_render_shader_info_indices[] = {
+read_only global i32 beamformer_reloadable_compute_helpers_shader_info_indices[] = {
5,
};
+read_only global i32 beamformer_reloadable_compute_internal_shader_info_indices[] = {
+ 6,
+};
+
+read_only global i32 beamformer_reloadable_render_shader_info_indices[] = {
+ 7,
+};
+
read_only global s8 beamformer_shader_global_header_strings[] = {
s8_comp(""
"#define DataKind_Int16 0\n"
@@ -460,6 +556,23 @@ read_only global s8 beamformer_shader_global_header_strings[] = {
"#define DecodeMode_Hadamard 1\n"
"\n"),
s8_comp(""
+ "layout(push_constant, std430) uniform PushConstants {\n"
+ " uint64_t hadamard_buffer;\n"
+ " uint64_t rf_buffer;\n"
+ " uint64_t output_buffer;\n"
+ " uint64_t output_rf_buffer;\n"
+ " bool first_pass;\n"
+ "};\n"
+ "\n"),
+ s8_comp(""
+ "layout(push_constant, std430) uniform PushConstants {\n"
+ " uint64_t input_data;\n"
+ " uint64_t output_data;\n"
+ " uint64_t filter_coefficients;\n"
+ "};\n"
+ "\n"),
+ s8_comp("#define MaxChannelCount (256)\n\n"),
+ s8_comp(""
"#define AcquisitionKind_FORCES 0\n"
"#define AcquisitionKind_UFORCES 1\n"
"#define AcquisitionKind_HERCULES 2\n"
@@ -484,30 +597,115 @@ read_only global s8 beamformer_shader_global_header_strings[] = {
"#define RCAOrientation_Columns 2\n"
"\n"),
s8_comp(""
- "layout(std140, binding = 0) uniform PushConstants {\n"
- " mat4 xdc_transform;\n"
- " mat4 voxel_transform;\n"
- " vec2 xdc_element_pitch;\n"
+ "struct DASArrayParameters {\n"
+ " f32vec2 focal_vectors[MaxChannelCount];\n"
+ " int16_t sparse_elements[MaxChannelCount];\n"
+ " uint16_t transmit_receive_orientations[MaxChannelCount];\n"
+ "};\n"
+ "\n"),
+ s8_comp(""
+ "layout(push_constant, std430) uniform PushConstants {\n"
+ " f32mat4 xdc_transform;\n"
+ " f32mat4 voxel_transform;\n"
+ " f32vec2 xdc_element_pitch;\n"
+ " uint64_t rf_data;\n"
+ " uint64_t output_data;\n"
+ " uint64_t incoherent_output;\n"
+ " uint64_t array_parameters;\n"
+ " uint32_t output_size_x;\n"
+ " uint32_t output_size_y;\n"
+ " uint32_t output_size_z;\n"
+ " uint32_t cycle_t;\n"
+ " int32_t channel_t;\n"
+ "};\n"
+ "\n"),
+ s8_comp(""
+ "layout(push_constant, std430) uniform PushConstants {\n"
+ " uint64_t output_data;\n"
+ " uint64_t input_data;\n"
+ " uint32_t image_elements;\n"
+ " float32_t scale;\n"
+ "};\n"
+ "\n"),
+ s8_comp(""
+ "layout(push_constant, std430) uniform PushConstants {\n"
+ " uint64_t left_side_buffer;\n"
+ " uint64_t right_side_buffer;\n"
+ " uint32_t elements;\n"
+ " float32_t scale;\n"
+ " uint32_t output_size_x;\n"
+ " uint32_t output_size_y;\n"
+ " uint32_t output_size_z;\n"
+ "};\n"
+ "\n"),
+ s8_comp(""
+ "layout(push_constant, std430) uniform PushConstants {\n"
+ " uint64_t data;\n"
+ " uint32_t clear_word;\n"
+ " uint32_t words;\n"
+ "};\n"
+ "\n"),
+ s8_comp(""
+ "layout(push_constant, std430) uniform PushConstants {\n"
+ " f32mat4 mvp_matrix;\n"
+ " uint64_t positions;\n"
+ " uint64_t normals;\n"
+ " f32vec4 bounding_box_colour;\n"
+ " float32_t bounding_box_fraction;\n"
+ " float32_t db_cutoff;\n"
+ " float32_t threshold;\n"
+ " float32_t gamma;\n"
+ " uint64_t input_data;\n"
+ " uint32_t input_size_x;\n"
+ " uint32_t input_size_y;\n"
+ " uint32_t input_size_z;\n"
+ " uint32_t data_kind;\n"
"};\n"
"\n"),
};
-read_only global i32 *beamformer_shader_header_vectors[] = {
- (i32 []){0, 1},
- (i32 []){0},
- (i32 []){2, 0, 3, 4, 5},
+read_only global b8 beamformer_shader_has_primitive[] = {
+ 0,
0,
0,
0,
+ 0,
+ 0,
+ 0,
+ 1,
};
-read_only global i32 beamformer_shader_header_vector_lengths[] = {
- 2,
- 1,
- 5,
+read_only global b8 beamformer_shader_primitive_is_vertex[] = {
+ 0,
+ 0,
+ 0,
0,
0,
0,
+ 0,
+ 1,
+};
+
+read_only global i32 *beamformer_shader_header_vectors[] = {
+ (i32 []){0, 1, 2},
+ (i32 []){0, 3},
+ (i32 []){4, 5, 0, 6, 7, 8, 9},
+ (i32 []){0, 10},
+ 0,
+ (i32 []){0, 11},
+ (i32 []){12},
+ (i32 []){0, 13},
+};
+
+read_only global i32 beamformer_shader_header_vector_lengths[] = {
+ 3,
+ 2,
+ 7,
+ 2,
+ 0,
+ 2,
+ 1,
+ 2,
};
read_only global s8 *beamformer_shader_bake_parameter_names[] = {
@@ -547,7 +745,6 @@ read_only global s8 *beamformer_shader_bake_parameter_names[] = {
s8_comp("CoherencyWeighting"),
s8_comp("SingleFocus"),
s8_comp("SingleOrientation"),
- s8_comp("Fast"),
s8_comp("Sparse"),
s8_comp("AcquisitionCount"),
s8_comp("AcquisitionKind"),
@@ -565,13 +762,19 @@ read_only global s8 *beamformer_shader_bake_parameter_names[] = {
},
0,
0,
+ (s8 []){
+ s8_comp("DataKind"),
+ },
+ 0,
0,
};
read_only global u32 beamformer_shader_bake_parameter_float_bits[] = {
0x00000000UL,
0x00006000UL,
- 0x0007f000UL,
+ 0x0003f800UL,
+ 0x00000000UL,
+ 0x00000000UL,
0x00000000UL,
0x00000000UL,
0x00000000UL,
@@ -580,9 +783,22 @@ read_only global u32 beamformer_shader_bake_parameter_float_bits[] = {
read_only global u8 beamformer_shader_bake_parameter_counts[] = {
12,
15,
- 19,
+ 18,
0,
0,
+ 1,
+ 0,
+ 0,
+};
+
+read_only global u8 beamformer_shader_push_constant_sizes[] = {
+ sizeof(BeamformerDecodePushConstants),
+ sizeof(BeamformerFilterPushConstants),
+ sizeof(BeamformerDASPushConstants),
+ sizeof(BeamformerSumPushConstants),
0,
+ sizeof(BeamformerCoherencyWeightingPushConstants),
+ sizeof(BeamformerBufferClearPushConstants),
+ sizeof(BeamformerRenderBeamformedPushConstants),
};
diff --git a/lib/ogl_beamformer_lib.c b/lib/ogl_beamformer_lib.c
@@ -229,6 +229,15 @@ beamformer_get_last_error_string(void)
return beamformer_error_string(beamformer_get_last_error());
}
+u64
+beamformer_maximum_frame_size(void)
+{
+ u64 result = U64_MAX;
+ if (check_shared_memory())
+ result = g_beamformer_library_context.bp->max_beamformed_data_size;
+ return result;
+}
+
void
beamformer_set_global_timeout(u32 timeout_ms)
{
@@ -650,12 +659,14 @@ beamformer_beamform_data(BeamformerSimpleParameters *bp, void *data, uint32_t da
complex |= shader == BeamformerShaderKind_Demodulate || shader == BeamformerShaderKind_CudaHilbert;
}
- iz output_size = output_points.x * output_points.y * output_points.z * (i32)sizeof(f32);
+ u64 output_size = output_points.x * output_points.y * output_points.z * sizeof(f32);
if (complex) output_size *= 2;
+ result = lib_error_check(output_size <= g_beamformer_library_context.bp->max_beamformed_data_size, FrameSizeOverflow);
+
Arena scratch = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp,
g_beamformer_library_context.shared_memory_size);
- if (out_data) result &= lib_error_check(output_size <= arena_capacity(&scratch, u8), ExportSpaceOverflow);
+ if (result && out_data) result &= lib_error_check((iz)output_size <= arena_capacity(&scratch, u8), ExportSpaceOverflow);
if (result) {
result = beamformer_push_data_with_compute(data, data_size, 0, 0);
diff --git a/lib/ogl_beamformer_lib_base.h b/lib/ogl_beamformer_lib_base.h
@@ -27,6 +27,7 @@
X(ExportSpaceOverflow, 16, "not enough space for data export") \
X(SharedMemory, 17, "failed to open shared memory region") \
X(SyncVariable, 18, "failed to acquire lock within timeout period") \
+ X(FrameSizeOverflow, 19, "maximum frame size exceeded") \
#define X(type, num, string) BeamformerLibErrorKind_##type = num,
typedef enum {BEAMFORMER_LIB_ERRORS} BeamformerLibErrorKind;
@@ -38,6 +39,9 @@ BEAMFORMER_LIB_EXPORT BeamformerLibErrorKind beamformer_get_last_error(void);
BEAMFORMER_LIB_EXPORT const char *beamformer_get_last_error_string(void);
BEAMFORMER_LIB_EXPORT const char *beamformer_error_string(BeamformerLibErrorKind kind);
+// NOTE: returns U64_MAX if shared memory could not be opened
+BEAMFORMER_LIB_EXPORT uint64_t beamformer_maximum_frame_size(void);
+
///////////////////////////
// NOTE: Simple API
/* Usage:
diff --git a/main_linux.c b/main_linux.c
@@ -252,16 +252,7 @@ load_platform_libraries(BeamformerInput *input)
#if BEAMFORMER_RENDERDOC_HOOKS
local_persist OSLibrary renderdoc_handle = {OSInvalidHandleValue};
renderdoc_handle = load_library(OS_RENDERDOC_SONAME, 0, RTLD_NOW|RTLD_LOCAL|RTLD_NOLOAD);
- if ValidHandle(renderdoc_handle) {
- renderdoc_get_api_fn *get_api = os_lookup_symbol(renderdoc_handle, "RENDERDOC_GetAPI");
- if (get_api) {
- RenderDocAPI *api = 0;
- if (get_api(10600, (void **)&api)) {
- input->renderdoc_start_frame_capture = RENDERDOC_START_FRAME_CAPTURE(api);
- input->renderdoc_end_frame_capture = RENDERDOC_END_FRAME_CAPTURE(api);
- }
- }
- }
+ load_renderdoc_functions(input, renderdoc_handle);
#endif
}
diff --git a/main_w32.c b/main_w32.c
@@ -301,16 +301,7 @@ load_platform_libraries(BeamformerInput *input)
#if BEAMFORMER_RENDERDOC_HOOKS
local_persist OSLibrary renderdoc_handle = {OSInvalidHandleValue};
renderdoc_handle = get_module(OS_RENDERDOC_SONAME);
- if ValidHandle(renderdoc_handle) {
- renderdoc_get_api_fn *get_api = os_lookup_symbol(renderdoc_handle, "RENDERDOC_GetAPI");
- if (get_api) {
- RenderDocAPI *api = 0;
- if (get_api(10600, (void **)&api)) {
- input->renderdoc_start_frame_capture = RENDERDOC_START_FRAME_CAPTURE(api);
- input->renderdoc_end_frame_capture = RENDERDOC_END_FRAME_CAPTURE(api);
- }
- }
- }
+ load_renderdoc_functions(input, renderdoc_handle);
#endif
}
diff --git a/math.c b/math.c
@@ -153,20 +153,6 @@ subrange_n_from_n_m_count(u64 n, u64 n_count, u64 m)
return result;
}
-function b32
-iv2_equal(iv2 a, iv2 b)
-{
- b32 result = a.x == b.x && a.y == b.y;
- return result;
-}
-
-function b32
-iv3_equal(iv3 a, iv3 b)
-{
- b32 result = a.x == b.x && a.y == b.y && a.z == b.z;
- return result;
-}
-
function i32
iv3_dimension(iv3 points)
{
@@ -574,12 +560,12 @@ function m4
perspective_projection(f32 n, f32 f, f32 fov, f32 aspect)
{
m4 result;
- f32 t = tan_f32(fov / 2.0f);
+ f32 t = n * tan_f32(fov / 2.0f);
f32 r = t * aspect;
f32 a = -(f + n) / (f - n);
f32 b = -2 * f * n / (f - n);
- result.c[0] = (v4){{1 / r, 0, 0, 0}};
- result.c[1] = (v4){{0, 1 / t, 0, 0}};
+ result.c[0] = (v4){{n / r, 0, 0, 0}};
+ result.c[1] = (v4){{0, n / t, 0, 0}};
result.c[2] = (v4){{0, 0, a, -1}};
result.c[3] = (v4){{0, 0, b, 0}};
return result;
diff --git a/opengl.h b/opengl.h
@@ -11,154 +11,63 @@
#include <GL/gl.h>
/* NOTE: do not add extra 0s to these, even at the start -> garbage compilers will complain */
-#define GL_DYNAMIC_STORAGE_BIT 0x0100
#define GL_SHADER_IMAGE_ACCESS_BARRIER_BIT 0x00000020
#define GL_TEXTURE_UPDATE_BARRIER_BIT 0x00000100
-#define GL_SHADER_STORAGE_BARRIER_BIT 0x00002000
-#define GL_UNSIGNED_INT_8_8_8_8 0x8035
-#define GL_TEXTURE_3D 0x806F
-#define GL_MAX_3D_TEXTURE_SIZE 0x8073
-#define GL_MULTISAMPLE 0x809D
+#define GL_NONE 0
+
#define GL_CLAMP_TO_BORDER 0x812D
-#define GL_CLAMP_TO_EDGE 0x812F
-#define GL_DEPTH_COMPONENT24 0x81A6
-#define GL_MAJOR_VERSION 0x821B
-#define GL_MINOR_VERSION 0x821C
-#define GL_RG 0x8227
-#define GL_R16F 0x822D
-#define GL_R32F 0x822E
#define GL_RG32F 0x8230
-#define GL_R8I 0x8231
-#define GL_R16I 0x8233
-#define GL_MAX_COMPUTE_SHARED_MEMORY_SIZE 0x8262
-#define GL_BUFFER 0x82E0
-#define GL_PROGRAM 0x82E2
-#define GL_MIRRORED_REPEAT 0x8370
-#define GL_QUERY_RESULT 0x8866
#define GL_READ_ONLY 0x88B8
#define GL_WRITE_ONLY 0x88B9
#define GL_READ_WRITE 0x88BA
-#define GL_TIME_ELAPSED 0x88BF
-#define GL_STATIC_DRAW 0x88E4
-#define GL_UNIFORM_BUFFER 0x8A11
-#define GL_MAX_UNIFORM_BLOCK_SIZE 0x8A30
-#define GL_FRAGMENT_SHADER 0x8B30
-#define GL_VERTEX_SHADER 0x8B31
-#define GL_COMPILE_STATUS 0x8B81
-#define GL_LINK_STATUS 0x8B82
-#define GL_INFO_LOG_LENGTH 0x8B84
-#define GL_MAX_TEXTURE_BUFFER_SIZE 0x8C2B
-#define GL_COLOR_ATTACHMENT0 0x8CE0
-#define GL_DEPTH_ATTACHMENT 0x8D00
-#define GL_FRAMEBUFFER 0x8D40
-#define GL_RENDERBUFFER 0x8D41
-#define GL_RED_INTEGER 0x8D94
-#define GL_TIMESTAMP 0x8E28
-#define GL_MIN_MAP_BUFFER_ALIGNMENT 0x90BC
-#define GL_SHADER_STORAGE_BUFFER 0x90D2
-#define GL_MAX_SHADER_STORAGE_BLOCK_SIZE 0x90DE
-#define GL_MAX_SERVER_WAIT_TIMEOUT 0x9111
-#define GL_SYNC_GPU_COMMANDS_COMPLETE 0x9117
-#define GL_TIMEOUT_EXPIRED 0x911B
-#define GL_WAIT_FAILED 0x911D
-#define GL_TEXTURE_BUFFER_OFFSET_ALIGNMENT 0x919F
-#define GL_COMPUTE_SHADER 0x91B9
#define GL_DEBUG_OUTPUT 0x92E0
+#define GL_DEDICATED_MEMORY_OBJECT_EXT 0x9581
#define GL_HANDLE_TYPE_OPAQUE_FD_EXT 0x9586
#define GL_HANDLE_TYPE_OPAQUE_WIN32_EXT 0x9587
+#define GL_LAYOUT_COLOR_ATTACHMENT_EXT 0x958E
+#define GL_LAYOUT_SHADER_READ_ONLY_EXT 0x9591
typedef char GLchar;
typedef i64 GLsizeiptr;
typedef i64 GLintptr;
typedef u64 GLuint64;
-typedef struct __GLsync *GLsync;
/* X(name, ret, params) */
#define OGLProcedureList \
- X(glAttachShader, void, (GLuint program, GLuint shader)) \
- X(glBeginQuery, void, (GLenum target, GLuint id)) \
- X(glBindBufferBase, void, (GLenum target, GLuint index, GLuint buffer)) \
- X(glBindBufferRange, void, (GLenum target, GLuint index, GLuint buffer, GLintptr offset, GLsizeiptr size)) \
- X(glBindFramebuffer, void, (GLenum target, GLuint framebuffer)) \
X(glBindImageTexture, void, (GLuint unit, GLuint texture, GLint level, GLboolean layered, GLint layer, GLenum access, GLenum format)) \
- X(glBindTextureUnit, void, (GLuint unit, GLuint texture)) \
- X(glBindVertexArray, void, (GLuint array)) \
- X(glBlitNamedFramebuffer, void, (GLuint sfb, GLuint dfb, GLint sx0, GLint sy0, GLint sx1, GLint sy1, GLint dx0, GLint dy0, GLint dx1, GLint dy1, GLbitfield mask, GLenum filter)) \
- X(glClearNamedBufferData, void, (GLuint buffer, GLenum internalformat, GLenum format, GLenum type, const void *data)) \
X(glClearNamedFramebufferfv, void, (GLuint framebuffer, GLenum buffer, GLint drawbuffer, const GLfloat *value)) \
X(glClearTexImage, void, (GLuint texture, GLint level, GLenum format, GLenum type, const void *data)) \
- X(glClientWaitSync, GLenum, (GLsync sync, GLbitfield flags, GLuint64 timeout)) \
- X(glCompileShader, void, (GLuint shader)) \
- X(glCopyImageSubData, void, (GLuint srcName, GLenum srcTarget, GLint srcLevel, GLint srcX, GLint srcY, GLint srcZ, GLuint dstName, GLenum dstTarget, GLint dstLevel, GLint dstX, GLint dstY, GLint dstZ, GLsizei srcWidth, GLsizei srcHeight, GLsizei srcDepth)) \
- X(glCreateBuffers, void, (GLsizei n, GLuint *buffers)) \
- X(glCreateFramebuffers, void, (GLsizei n, GLuint *ids)) \
- X(glCreateProgram, GLuint, (void)) \
- X(glCreateQueries, void, (GLenum target, GLsizei n, GLuint *ids)) \
- X(glCreateRenderbuffers, void, (GLsizei n, GLuint *renderbuffers)) \
- X(glCreateShader, GLuint, (GLenum shaderType)) \
X(glCreateTextures, void, (GLenum target, GLsizei n, GLuint *textures)) \
- X(glCreateVertexArrays, void, (GLsizei n, GLuint *arrays)) \
X(glDebugMessageCallback, void, (void (*)(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar *message, const void *user), void *user)) \
- X(glDeleteBuffers, void, (GLsizei n, const GLuint *buffers)) \
- X(glDeleteProgram, void, (GLuint program)) \
- X(glDeleteShader, void, (GLuint shader)) \
- X(glDeleteSync, void, (GLsync sync)) \
X(glDispatchCompute, void, (GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z)) \
- X(glEndQuery, void, (GLenum target)) \
- X(glEnableVertexArrayAttrib, void, (GLuint vao, GLuint index)) \
- X(glFenceSync, GLsync, (GLenum condition, GLbitfield flags)) \
- X(glGenerateTextureMipmap, void, (GLuint texture)) \
- X(glGetProgramInfoLog, void, (GLuint program, GLsizei maxLength, GLsizei *length, GLchar *infoLog)) \
- X(glGetProgramiv, void, (GLuint program, GLenum pname, GLint *params)) \
- X(glGetQueryObjectui64v, void, (GLuint id, GLenum pname, GLuint64 *params)) \
- X(glGetShaderInfoLog, void, (GLuint shader, GLsizei maxLength, GLsizei *length, GLchar *infoLog)) \
- X(glGetShaderiv, void, (GLuint shader, GLenum pname, GLint *params)) \
- X(glGetTextureImage, void, (GLuint texture, GLint level, GLenum format, GLenum type, GLsizei bufSize, void *pixels)) \
- X(glLinkProgram, void, (GLuint program)) \
X(glMemoryBarrier, void, (GLbitfield barriers)) \
- X(glNamedBufferData, void, (GLuint buffer, GLsizeiptr size, const void *data, GLenum usage)) \
- X(glNamedBufferStorage, void, (GLuint buffer, GLsizeiptr size, const void *data, GLbitfield flags)) \
- X(glNamedBufferSubData, void, (GLuint buffer, GLintptr offset, GLsizei size, const void *data)) \
- X(glNamedFramebufferRenderbuffer, void, (GLuint fb, GLenum attachment, GLenum renderbuffertarget, GLuint rb)) \
- X(glNamedFramebufferTexture, void, (GLuint fb, GLenum attachment, GLuint texture, GLint level)) \
- X(glNamedRenderbufferStorageMultisample, void, (GLuint rb, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height)) \
X(glObjectLabel, void, (GLenum identifier, GLuint name, GLsizei length, const char *label)) \
- X(glProgramUniform1f, void, (GLuint program, GLint location, GLfloat v0)) \
- X(glProgramUniform1i, void, (GLuint program, GLint location, GLint v0)) \
- X(glProgramUniform1ui, void, (GLuint program, GLint location, GLuint v0)) \
- X(glProgramUniform3iv, void, (GLuint program, GLint location, GLsizei count, const GLint *value)) \
- X(glProgramUniform4fv, void, (GLuint program, GLint location, GLsizei count, const GLfloat *value)) \
- X(glProgramUniformMatrix4fv, void, (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value)) \
- X(glQueryCounter, void, (GLuint id, GLenum target)) \
- X(glShaderSource, void, (GLuint shader, GLsizei count, const GLchar **strings, const GLint *lengths)) \
X(glTextureParameteri, void, (GLuint texture, GLenum pname, GLint param)) \
X(glTextureParameterfv, void, (GLuint texture, GLenum pname, const GLfloat *param)) \
- X(glTextureStorage1D, void, (GLuint texture, GLsizei levels, GLenum internalformat, GLsizei width)) \
- X(glTextureStorage2D, void, (GLuint texture, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height)) \
- X(glTextureStorage3D, void, (GLuint texture, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth)) \
- X(glTextureSubImage1D, void, (GLuint texture, GLint level, GLint xoff, GLsizei width, GLenum format, GLenum type, const void *pix)) \
- X(glTextureSubImage2D, void, (GLuint texture, GLint level, GLint xoff, GLint yoff, GLsizei width, GLsizei height, GLenum format, GLenum type, const void *pix)) \
- X(glTextureSubImage3D, void, (GLuint texture, GLint level, GLint xoff, GLint yoff, GLint zoff, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const void *pix)) \
- X(glUseProgram, void, (GLuint program)) \
- X(glVertexArrayAttribBinding, void, (GLuint vao, GLuint attribindex, GLuint bindingindex)) \
- X(glVertexArrayAttribFormat, void, (GLuint vao, GLuint attribindex, GLint size, GLenum type, GLboolean normalized, GLuint relativeoffset)) \
- X(glVertexArrayElementBuffer, void, (GLuint vao, GLuint buffer)) \
- X(glVertexArrayVertexBuffer, void, (GLuint vao, GLuint bindingindex, GLuint buffer, GLintptr offset, GLsizei stride)) \
-
-#define OGLRequiredExtensionProcedureList \
+#define OGLRequiredExtensionProcedureListBase \
X(glCreateMemoryObjectsEXT, void, (GLsizei n, GLuint *memoryObjects)) \
X(glDeleteMemoryObjectsEXT, void, (GLsizei n, GLuint *memoryObjects)) \
X(glGenSemaphoresEXT, void, (GLsizei n, GLuint *semaphores)) \
- X(glImportMemoryFdEXT, void, (GLuint memory, GLuint64 size, GLenum handleType, int fd)) \
+ X(glMemoryObjectParameterivEXT, void, (GLuint memoryObject, GLenum pname, const GLint *params)) \
+ X(glSignalSemaphoreEXT, void, (GLuint semaphore, GLuint numBufferBarriers, const GLuint *buffers, GLuint numTextureBarriers, const GLuint *textures, const GLenum *dstLayouts)) \
+ X(glTextureStorageMem2DEXT, void, (GLuint texture, GLsizei levels, GLenum internalFormat, GLsizei width, GLsizei height, GLuint memory, GLuint64 offset)) \
+ X(glWaitSemaphoreEXT, void, (GLuint semaphore, GLuint numBufferBarriers, const GLuint *buffers, GLuint numTextureBarriers, const GLuint *textures, const GLenum *srcLayouts)) \
+
+#define OGLRequiredExtensionProcedureListW32 \
X(glImportMemoryWin32HandleEXT, void, (GLuint memory, GLuint64 size, GLenum handleType, void *handle)) \
- X(glImportSemaphoreFdEXT, void, (GLuint semaphore, GLenum handleType, int fd)) \
X(glImportSemaphoreWin32HandleEXT, void, (GLuint semaphore, GLenum handleType, void *handle)) \
- X(glNamedBufferStorageMemEXT, void, (GLuint buffer, GLsizeiptr size, GLuint memory, GLuint64 offset)) \
- X(glWaitSemaphoreEXT, void, (GLuint semaphore, GLuint numBufferBarriers, const GLuint *buffers, GLuint numTextureBarriers, const GLuint *textures, const GLenum *srcLayouts)) \
+#define OGLRequiredExtensionProcedureListLinux \
+ X(glImportMemoryFdEXT, void, (GLuint memory, GLuint64 size, GLenum handleType, int fd)) \
+ X(glImportSemaphoreFdEXT, void, (GLuint semaphore, GLenum handleType, int fd)) \
+
+#define OGLRequiredExtensionProcedureList \
+ OGLRequiredExtensionProcedureListBase \
+ OGLRequiredExtensionProcedureListW32 \
+ OGLRequiredExtensionProcedureListLinux \
#define X(name, ret, params) typedef ret name##_fn params;
OGLProcedureList
diff --git a/shaders/buffer_clear.glsl b/shaders/buffer_clear.glsl
@@ -0,0 +1,11 @@
+/* See LICENSE for license details. */
+layout(std430, buffer_reference, buffer_reference_align = 8) restrict writeonly buffer Buffer {
+ uint32_t values[];
+};
+
+void main()
+{
+ uint32_t word = gl_GlobalInvocationID.x;
+ if (word < words)
+ Buffer(data).values[word] = clear_word;
+}
diff --git a/shaders/coherency_weighting.glsl b/shaders/coherency_weighting.glsl
@@ -0,0 +1,41 @@
+/* See LICENSE for license details. */
+layout(std430, buffer_reference, buffer_reference_align = 8) restrict buffer Int16 {
+ int16_t values[];
+};
+
+layout(std430, buffer_reference, buffer_reference_align = 8) restrict buffer Int16Complex {
+ i16vec2 values[];
+};
+
+layout(std430, buffer_reference, buffer_reference_align = 8) restrict buffer Float32 {
+ float values[];
+};
+
+layout(std430, buffer_reference, buffer_reference_align = 8) restrict buffer Float32Complex {
+ vec2 values[];
+};
+
+#if DataKind == DataKind_Float32
+ #define COHERENT_SAMPLE(index) Float32(left_side_buffer).values[index]
+ #define INCOHERENT_SAMPLE(index) Float32(right_side_buffer).values[index]
+#elif DataKind == DataKind_Float32Complex
+ #define COHERENT_SAMPLE(index) Float32Complex(left_side_buffer).values[index]
+ #define INCOHERENT_SAMPLE(index) Float32(right_side_buffer).values[index]
+#else
+ #error DataKind unsupported for CoherencyWeighting
+#endif
+
+uint32_t output_index(uint32_t x, uint32_t y, uint32_t z)
+{
+ uint32_t result = output_size_x * output_size_y * z + output_size_x * y + x;
+ return result;
+}
+
+void main()
+{
+ uvec3 out_voxel = gl_GlobalInvocationID;
+ if (!all(lessThan(out_voxel, uvec3(output_size_x, output_size_y, output_size_z))))
+ return;
+ uint32_t index = output_index(out_voxel.x, out_voxel.y, out_voxel.z);
+ COHERENT_SAMPLE(index) *= COHERENT_SAMPLE(index) / INCOHERENT_SAMPLE(index);
+}
diff --git a/shaders/das.glsl b/shaders/das.glsl
@@ -1,48 +1,54 @@
/* See LICENSE for license details. */
#if DataKind == DataKind_Float32
- #define SAMPLE_TYPE float
- #define TEXTURE_KIND r32f
- #define RESULT_TYPE_CAST(a) (a).x
- #define OUTPUT_TYPE_CAST(a) vec4((a).x, 0, 0, 0)
- #if !Fast
- #define RESULT_TYPE vec2
- #define RESULT_LAST_INDEX 1
+ #if CoherencyWeighting
+ #define RESULT_TYPE vec2
+ #define RESULT_COHERENT_CAST(a) (a).x
+ #define RESULT_INCOHERENT_CAST(a) (a).y
#endif
+ #define SAMPLE_TYPE float
#elif DataKind == DataKind_Float32Complex
- #define SAMPLE_TYPE vec2
- #define TEXTURE_KIND rg32f
- #define RESULT_TYPE_CAST(a) (a).xy
- #define OUTPUT_TYPE_CAST(a) vec4((a).xy, 0, 0)
- #if !Fast
- #define RESULT_TYPE vec3
- #define RESULT_LAST_INDEX 2
+ #if CoherencyWeighting
+ #define RESULT_TYPE vec3
+ #define RESULT_COHERENT_CAST(a) (a).xy
+ #define RESULT_INCOHERENT_CAST(a) (a).z
#endif
+ #define SAMPLE_TYPE vec2
#else
#error DataKind unsupported for DAS
#endif
-layout(std430, binding = 1) readonly restrict buffer buffer_1 {
- SAMPLE_TYPE rf_data[];
-};
-
#ifndef RESULT_TYPE
#define RESULT_TYPE SAMPLE_TYPE
#endif
-#if Fast
- #define RESULT_STORE(a, length_a) RESULT_TYPE(a)
- layout(TEXTURE_KIND, binding = 0) restrict uniform image3D u_out_data_tex;
+#ifndef RESULT_COHERENT_CAST
+ #define RESULT_COHERENT_CAST(a) (a)
+#endif
+
+#if CoherencyWeighting
+ #define RESULT_STORE(a) RESULT_TYPE(RESULT_COHERENT_CAST(a), length(a))
#else
- #define RESULT_STORE(a, length_a) RESULT_TYPE(a, length_a)
- layout(TEXTURE_KIND, binding = 0) writeonly restrict uniform image3D u_out_data_tex;
+ #define RESULT_STORE(a) (a)
#endif
-layout(r16i, binding = 1) readonly restrict uniform iimage1D sparse_elements;
-layout(rg32f, binding = 2) readonly restrict uniform image1D focal_vectors;
-layout(r8i, binding = 3) readonly restrict uniform iimage1D transmit_receive_orientations;
+layout(std430, buffer_reference, buffer_reference_align = 64) restrict readonly buffer RF {
+ SAMPLE_TYPE values[];
+};
+
+layout(std430, buffer_reference, buffer_reference_align = 64) restrict buffer Output {
+ SAMPLE_TYPE values[];
+};
-#define RX_ORIENTATION(tx_rx) (((tx_rx) >> 0) & 0x0F)
-#define TX_ORIENTATION(tx_rx) (((tx_rx) >> 4) & 0x0F)
+layout(std430, buffer_reference, buffer_reference_align = 64) restrict buffer IncoherentOutput {
+ float values[];
+};
+
+layout(std430, buffer_reference) restrict readonly buffer ArrayParameters {
+ DASArrayParameters data;
+};
+
+#define RX_ORIENTATION(tx_rx) bitfieldExtract((tx_rx), 0, 4)
+#define TX_ORIENTATION(tx_rx) bitfieldExtract((tx_rx), 4, 4)
#define C_SPLINE 0.5
@@ -70,10 +76,10 @@ SAMPLE_TYPE cubic(const int base_index, const float t)
);
SAMPLE_TYPE samples[4] = {
- rf_data[base_index + 0],
- rf_data[base_index + 1],
- rf_data[base_index + 2],
- rf_data[base_index + 3],
+ RF(rf_data).values[base_index + 0],
+ RF(rf_data).values[base_index + 1],
+ RF(rf_data).values[base_index + 2],
+ RF(rf_data).values[base_index + 3],
};
vec4 S = vec4(t * t * t, t * t, t, 1);
@@ -98,13 +104,13 @@ SAMPLE_TYPE sample_rf(const int rf_offset, const float index)
switch (InterpolationMode) {
case InterpolationMode_Nearest:{
if (int(index) >= 0 && int(round(index)) < SampleCount)
- result = rotate_iq(rf_data[rf_offset + int(round(index))], index / SamplingFrequency);
+ result = rotate_iq(RF(rf_data).values[rf_offset + int(round(index))], index / SamplingFrequency);
}break;
case InterpolationMode_Linear:{
if (int(index) >= 0 && int(index) < SampleCount - 1) {
float tk, t = modf(index, tk);
int n = rf_offset + int(tk);
- result = (1 - t) * rf_data[n] + t * rf_data[n + 1];
+ result = (1 - t) * RF(rf_data).values[n] + t * RF(rf_data).values[n + 1];
result = rotate_iq(result, index / SamplingFrequency);
}
}break;
@@ -124,6 +130,12 @@ float sample_index(const float distance)
return time * SamplingFrequency;
}
+uint32_t output_index(uint32_t x, uint32_t y, uint32_t z)
+{
+ uint32_t result = output_size_x * output_size_y * z + output_size_x * y + x;
+ return result;
+}
+
float apodize(const float arg)
{
/* IMPORTANT: do not move calculation of arg into this function. It will generate a
@@ -158,19 +170,22 @@ float cylindrical_wave_transmit_distance(const vec3 point, const float focal_dep
return distance(rca_plane_projection(point, tx_rows), f);
}
-int tx_rx_orientation_for_acquisition(const int acquisition)
+uint16_t tx_rx_orientation_for_acquisition(const int16_t acquisition)
{
- int result = bool(SingleOrientation) ? TransmitReceiveOrientation : imageLoad(transmit_receive_orientations, acquisition).x;
+ uint16_t result = uint16_t(TransmitReceiveOrientation);
+ if (!bool(SingleOrientation))
+ result = ArrayParameters(array_parameters).data.transmit_receive_orientations[acquisition];
return result;
}
-vec2 focal_vector_for_acquisition(const int acquisition)
+vec2 focal_vector_for_acquisition(const int16_t acquisition)
{
- vec2 result = bool(SingleFocus) ? vec2(TransmitAngle, FocusDepth) : imageLoad(focal_vectors, acquisition).xy;
+ vec2 result = bool(SingleFocus) ? vec2(TransmitAngle, FocusDepth)
+ : ArrayParameters(array_parameters).data.focal_vectors[acquisition];
return result;
}
-float rca_transmit_distance(const vec3 world_point, const vec2 focal_vector, const int transmit_receive_orientation)
+float rca_transmit_distance(const vec3 world_point, const vec2 focal_vector, const uint16_t transmit_receive_orientation)
{
float result = 0;
if (TX_ORIENTATION(transmit_receive_orientation) != RCAOrientation_None) {
@@ -189,13 +204,13 @@ float rca_transmit_distance(const vec3 world_point, const vec2 focal_vector, con
RESULT_TYPE RCA(const vec3 world_point)
{
- const int acquisition_start = bool(Fast)? u_channel : 0;
- const int acquisition_end = bool(Fast)? u_channel + 1 : AcquisitionCount;
+ const int16_t acquisition_start = int16_t(channel_t);
+ const int16_t acquisition_end = int16_t(channel_t + 1);
RESULT_TYPE result = RESULT_TYPE(0);
- for (int acquisition = acquisition_start; acquisition < acquisition_end; acquisition++) {
- const int tx_rx_orientation = tx_rx_orientation_for_acquisition(acquisition);
- const bool rx_rows = RX_ORIENTATION(tx_rx_orientation) == RCAOrientation_Rows;
- const vec2 focal_vector = focal_vector_for_acquisition(acquisition);
+ for (int16_t acquisition = acquisition_start; acquisition < acquisition_end; acquisition++) {
+ const uint16_t tx_rx_orientation = tx_rx_orientation_for_acquisition(acquisition);
+ const bool rx_rows = RX_ORIENTATION(tx_rx_orientation) == RCAOrientation_Rows;
+ const vec2 focal_vector = focal_vector_for_acquisition(acquisition);
vec2 xdc_world_point = rca_plane_projection((xdc_transform * vec4(world_point, 1)).xyz, rx_rows);
float transmit_distance = rca_transmit_distance(world_point, focal_vector, tx_rx_orientation);
@@ -209,7 +224,7 @@ RESULT_TYPE RCA(const vec3 world_point)
if (a_arg < 0.5f) {
float sidx = sample_index(transmit_distance + length(receive_vector));
SAMPLE_TYPE value = apodize(a_arg) * sample_rf(rf_offset, sidx);
- result += RESULT_STORE(value, length(value));
+ result += RESULT_STORE(value);
}
rf_offset += SampleCount * AcquisitionCount;
}
@@ -219,10 +234,10 @@ RESULT_TYPE RCA(const vec3 world_point)
RESULT_TYPE HERCULES(const vec3 world_point)
{
- const int tx_rx_orientation = tx_rx_orientation_for_acquisition(0);
- const bool rx_cols = RX_ORIENTATION(tx_rx_orientation) == RCAOrientation_Columns;
- const vec2 focal_vector = focal_vector_for_acquisition(0);
- const vec3 xdc_world_point = (xdc_transform * vec4(world_point, 1)).xyz;
+ const uint16_t tx_rx_orientation = tx_rx_orientation_for_acquisition(int16_t(0));
+ const bool rx_cols = RX_ORIENTATION(tx_rx_orientation) == RCAOrientation_Columns;
+ const vec2 focal_vector = focal_vector_for_acquisition(int16_t(0));
+ const vec3 xdc_world_point = (xdc_transform * vec4(world_point, 1)).xyz;
const float transmit_index = sample_index(rca_transmit_distance(world_point, focal_vector, tx_rx_orientation));
const float z_delta_squared = xdc_world_point.z * xdc_world_point.z;
@@ -231,11 +246,7 @@ RESULT_TYPE HERCULES(const vec3 world_point)
const float apodization_test = 0.25f / (f_number_over_z * f_number_over_z);
RESULT_TYPE result = RESULT_TYPE(0);
- #if Fast
- const int rx_channel = u_channel;
- #else
- for (int rx_channel = 0; rx_channel < ChannelCount; rx_channel++)
- #endif
+ const int rx_channel = channel_t;
{
int rf_offset = rx_channel * SampleCount * AcquisitionCount + Sparse * SampleCount;
rf_offset -= int(InterpolationMode == InterpolationMode_Cubic);
@@ -249,7 +260,8 @@ RESULT_TYPE HERCULES(const vec3 world_point)
else element_receive_delta_squared.y *= element_receive_delta_squared.y;
for (int transmit = Sparse; transmit < AcquisitionCount; transmit++) {
- int tx_channel = bool(Sparse) ? imageLoad(sparse_elements, transmit - Sparse).x : transmit;
+ int tx_channel = bool(Sparse) ? ArrayParameters(array_parameters).data.sparse_elements[transmit - Sparse]
+ : transmit;
if (rx_cols) element_receive_delta_squared.y = xy_world_point.y - tx_channel * xdc_element_pitch.y;
else element_receive_delta_squared.x = xy_world_point.x - tx_channel * xdc_element_pitch.x;
@@ -265,7 +277,7 @@ RESULT_TYPE HERCULES(const vec3 world_point)
float index = transmit_index + sqrt(z_delta_squared + element_delta_squared) * SamplingFrequency / SpeedOfSound;
SAMPLE_TYPE value = apodization * sample_rf(rf_offset, index);
- result += RESULT_STORE(value, length(value));
+ result += RESULT_STORE(value);
}
rf_offset += SampleCount;
@@ -276,8 +288,8 @@ RESULT_TYPE HERCULES(const vec3 world_point)
RESULT_TYPE FORCES(const vec3 xdc_world_point)
{
- const int rx_channel_start = bool(Fast)? u_channel : 0;
- const int rx_channel_end = bool(Fast)? u_channel + 1 : ChannelCount;
+ const int16_t rx_channel_start = int16_t(channel_t);
+ const int16_t rx_channel_end = int16_t(channel_t + 1);
RESULT_TYPE result = RESULT_TYPE(0);
@@ -285,7 +297,7 @@ RESULT_TYPE FORCES(const vec3 xdc_world_point)
float transmit_y_delta = xdc_world_point.y - xdc_element_pitch.y * ChannelCount / 2;
float transmit_yz_squared = transmit_y_delta * transmit_y_delta + z_delta_squared;
- for (int rx_channel = rx_channel_start; rx_channel < rx_channel_end; rx_channel++) {
+ for (int16_t rx_channel = rx_channel_start; rx_channel < rx_channel_end; rx_channel++) {
float receive_x_delta = xdc_world_point.x - rx_channel * xdc_element_pitch.x;
float a_arg = abs(FNumber * receive_x_delta / xdc_world_point.z);
@@ -296,12 +308,13 @@ RESULT_TYPE FORCES(const vec3 xdc_world_point)
float receive_index = sample_index(sqrt(receive_x_delta * receive_x_delta + z_delta_squared));
float apodization = apodize(a_arg);
for (int transmit = Sparse; transmit < AcquisitionCount; transmit++) {
- int tx_channel = bool(Sparse) ? imageLoad(sparse_elements, transmit - Sparse).x : transmit;
+ int tx_channel = bool(Sparse) ? ArrayParameters(array_parameters).data.sparse_elements[transmit - Sparse]
+ : transmit;
float transmit_x_delta = xdc_world_point.x - xdc_element_pitch.x * tx_channel;
float transmit_index = sqrt(transmit_yz_squared + transmit_x_delta * transmit_x_delta) * SamplingFrequency / SpeedOfSound;
SAMPLE_TYPE value = apodization * sample_rf(rf_offset, receive_index + transmit_index);
- result += RESULT_STORE(value, length(value));
+ result += RESULT_STORE(value);
rf_offset += SampleCount;
}
}
@@ -311,15 +324,17 @@ RESULT_TYPE FORCES(const vec3 xdc_world_point)
void main()
{
- ivec3 out_voxel = ivec3(gl_GlobalInvocationID);
- vec3 image_points = vec3(imageSize(u_out_data_tex)) - 1.0f;
- if (!all(lessThan(out_voxel, imageSize(u_out_data_tex))))
+ uvec3 out_voxel = gl_GlobalInvocationID;
+ if (!all(lessThan(out_voxel, uvec3(output_size_x, output_size_y, output_size_z))))
return;
- vec3 point = vec3(out_voxel) / max(vec3(1.0f), image_points);
- vec3 world_point = (voxel_transform * vec4(point, 1)).xyz;
+ vec3 image_points = vec3(output_size_x, output_size_y, output_size_z) - 1.0f;
+ vec3 point = vec3(out_voxel) / max(vec3(1.0f), image_points);
+ vec3 world_point = (voxel_transform * vec4(point, 1)).xyz;
+
+ uint32_t out_index = output_index(out_voxel.x, out_voxel.y, out_voxel.z);
- RESULT_TYPE sum;
+ RESULT_TYPE sum = RESULT_TYPE(0);
switch (AcquisitionKind) {
case AcquisitionKind_FORCES:
case AcquisitionKind_UFORCES:
@@ -340,15 +355,9 @@ void main()
}break;
}
- #if Fast
- sum += RESULT_TYPE_CAST(imageLoad(u_out_data_tex, out_voxel));
- #endif
-
#if CoherencyWeighting
- /* TODO(rnp): scale such that brightness remains ~constant */
- float denominator = sum[RESULT_LAST_INDEX] + float(sum[RESULT_LAST_INDEX] == 0);
- RESULT_TYPE_CAST(sum) *= RESULT_TYPE_CAST(sum) / denominator;
+ IncoherentOutput(incoherent_output).values[out_index] += RESULT_INCOHERENT_CAST(sum);
#endif
- imageStore(u_out_data_tex, out_voxel, OUTPUT_TYPE_CAST(sum));
+ Output(output_data).values[out_index] += RESULT_COHERENT_CAST(sum);
}
diff --git a/shaders/decode.glsl b/shaders/decode.glsl
@@ -9,57 +9,42 @@
*/
#if DataKind == DataKind_Float32
- #define INPUT_DATA_TYPE float
- #define SAMPLE_DATA_TYPE float
- #define SAMPLE_TYPE_CAST(x) (x)
+ #define INPUT_DATA_TYPE float
+ #define SAMPLE_DATA_TYPE float
#elif DataKind == DataKind_Float32Complex
- #define INPUT_DATA_TYPE vec2
- #define SAMPLE_DATA_TYPE vec2
- #define SAMPLE_TYPE_CAST(x) (x)
+ #define INPUT_DATA_TYPE vec2
+ #define SAMPLE_DATA_TYPE vec2
#elif DataKind == DataKind_Int16Complex
- #define INPUT_DATA_TYPE int
- #define SAMPLE_DATA_TYPE vec2
- #define SAMPLE_TYPE_CAST(x) vec2(((x) << 16) >> 16, (x) >> 16)
+ #define INPUT_DATA_TYPE i16vec2
+ #define SAMPLE_DATA_TYPE vec2
#elif DataKind == DataKind_Int16
- #define INPUT_DATA_TYPE int
- #define RF_SAMPLES_PER_INDEX 2
- #if DilateOutput
- #define SAMPLE_DATA_TYPE vec4
- #define SAMPLE_TYPE_CAST(x) vec4(((x) << 16) >> 16, 0, (x) >> 16, 0)
- #else
- #define SAMPLE_DATA_TYPE vec2
- #define SAMPLE_TYPE_CAST(x) vec2(((x) << 16) >> 16, (x) >> 16)
- #define OUTPUT_SAMPLES_PER_INDEX 2
- #endif
+ #define INPUT_DATA_TYPE int16_t
+ #define SAMPLE_DATA_TYPE float
#else
- #error unsupported data kind for Decode
+ #error unsupported data kind for Decode
#endif
-#ifndef OUTPUT_SAMPLES_PER_INDEX
- #define OUTPUT_SAMPLES_PER_INDEX 1
-#endif
-
-#ifndef RF_SAMPLES_PER_INDEX
- #define RF_SAMPLES_PER_INDEX 1
-#endif
+// TODO(rnp): fix DilateOutput
-layout(std430, binding = 1) readonly restrict buffer buffer_1 {
- INPUT_DATA_TYPE rf_data[];
+layout(std430, buffer_reference, buffer_reference_align = 64) restrict readonly buffer RF {
+ INPUT_DATA_TYPE values[];
};
-layout(std430, binding = 2) writeonly restrict buffer buffer_2 {
- INPUT_DATA_TYPE out_rf_data[];
+layout(std430, buffer_reference, buffer_reference_align = 64) restrict writeonly buffer OutputRF {
+ INPUT_DATA_TYPE values[];
};
-layout(std430, binding = 3) writeonly restrict buffer buffer_3 {
- SAMPLE_DATA_TYPE out_data[];
+layout(std430, buffer_reference, buffer_reference_align = 64) restrict writeonly buffer Output {
+ SAMPLE_DATA_TYPE values[];
};
-layout(r16f, binding = 0) readonly restrict uniform image2D hadamard;
+layout(std430, buffer_reference, buffer_reference_align = 64) restrict readonly buffer Hadamard {
+ float16_t values[];
+};
SAMPLE_DATA_TYPE sample_rf_data(uint index)
{
- SAMPLE_DATA_TYPE result = SAMPLE_TYPE_CAST(rf_data[index]);
+ SAMPLE_DATA_TYPE result = SAMPLE_DATA_TYPE(RF(rf_buffer).values[index]);
return result;
}
@@ -67,7 +52,7 @@ SAMPLE_DATA_TYPE sample_rf_data(uint index)
shared INPUT_DATA_TYPE rf[gl_WorkGroupSize.x * TransmitCount];
void run_decode_large(void)
{
- uint time_sample = gl_GlobalInvocationID.x * RF_SAMPLES_PER_INDEX;
+ uint time_sample = gl_GlobalInvocationID.x;
uint channel = gl_GlobalInvocationID.y;
uint transmit = gl_GlobalInvocationID.z * ToProcess;
@@ -78,12 +63,11 @@ void run_decode_large(void)
uint leftover_samples = rf.length() % thread_count;
uint samples_this_thread = samples_per_thread + uint(thread_index < leftover_samples);
- uint rf_offset = (InputChannelStride * channel / RF_SAMPLES_PER_INDEX +
- TransmitCount * gl_WorkGroupID.x * gl_WorkGroupSize.x);
+ uint rf_offset = InputChannelStride * channel + TransmitCount * gl_WorkGroupID.x * gl_WorkGroupSize.x;
for (uint i = 0; i < samples_this_thread; i++) {
uint index = i * thread_count + thread_index;
- rf[index] = rf_data[rf_offset + index];
+ rf[index] = RF(rf_buffer).values[rf_offset + index];
}
barrier();
@@ -94,9 +78,9 @@ void run_decode_large(void)
result[i] = SAMPLE_DATA_TYPE(0);
for (int j = 0; j < TransmitCount; j++) {
- SAMPLE_DATA_TYPE s = SAMPLE_TYPE_CAST(rf[gl_LocalInvocationID.x * TransmitCount + j]);
+ SAMPLE_DATA_TYPE s = SAMPLE_DATA_TYPE(rf[gl_LocalInvocationID.x * TransmitCount + j]);
for (uint i = 0; i < ToProcess; i++)
- result[i] += imageLoad(hadamard, ivec2(j, transmit + i)).x * s;
+ result[i] += s * Hadamard(hadamard_buffer).values[TransmitCount * j + (i + transmit)];
}
for (uint i = 0; i < ToProcess; i++)
@@ -112,30 +96,30 @@ void run_decode_large(void)
for (uint i = 0; i < ToProcess; i++, out_off += OutputTransmitStride)
if (TransmitCount % (gl_WorkGroupSize.z * ToProcess) == 0 || transmit + i < TransmitCount)
- out_data[out_off / OUTPUT_SAMPLES_PER_INDEX] = result[i];
+ Output(output_buffer).values[out_off] = result[i];
}
}
#endif
void run_decode_small(void)
{
- uint time_sample = gl_GlobalInvocationID.x * RF_SAMPLES_PER_INDEX;
+ uint time_sample = gl_GlobalInvocationID.x;
uint channel = gl_GlobalInvocationID.y;
- uint rf_offset = (InputChannelStride * channel + TransmitCount * time_sample) / RF_SAMPLES_PER_INDEX;
+ uint rf_offset = InputChannelStride * channel + TransmitCount * time_sample;
if (time_sample < OutputTransmitStride) {
INPUT_DATA_TYPE rf[TransmitCount];
for (int j = 0; j < TransmitCount; j++)
- rf[j] = rf_data[rf_offset + j];
+ rf[j] = RF(rf_buffer).values[rf_offset + j];
SAMPLE_DATA_TYPE result[TransmitCount];
for (int j = 0; j < TransmitCount; j++)
result[j] = SAMPLE_DATA_TYPE(0);
for (int i = 0; i < TransmitCount; i++) {
- SAMPLE_DATA_TYPE s = SAMPLE_TYPE_CAST(rf[i]);
+ SAMPLE_DATA_TYPE s = SAMPLE_DATA_TYPE(rf[i]);
for (int j = 0; j < TransmitCount; j++) {
- result[j] += imageLoad(hadamard, ivec2(i, j)).x * s;
+ result[j] += s * Hadamard(hadamard_buffer).values[TransmitCount * i + j];
}
}
@@ -145,7 +129,7 @@ void run_decode_small(void)
uint out_off = OutputChannelStride * channel +
OutputSampleStride * time_sample;
for (int i = 0; i < TransmitCount; i++, out_off += OutputTransmitStride)
- out_data[out_off / OUTPUT_SAMPLES_PER_INDEX] = result[i];
+ Output(output_buffer).values[out_off] = result[i];
}
}
@@ -153,40 +137,40 @@ void main()
{
switch (DecodeMode) {
case DecodeMode_None:{
- uint time_sample = gl_GlobalInvocationID.x * RF_SAMPLES_PER_INDEX;
+ uint time_sample = gl_GlobalInvocationID.x;
uint channel = gl_GlobalInvocationID.y;
uint transmit = gl_GlobalInvocationID.z;
if (time_sample < OutputTransmitStride) {
- uint in_off = (InputChannelStride * channel +
- InputTransmitStride * transmit +
- InputSampleStride * time_sample) / RF_SAMPLES_PER_INDEX;
+ uint in_off = InputChannelStride * channel +
+ InputTransmitStride * transmit +
+ InputSampleStride * time_sample;
- uint out_off = (OutputChannelStride * channel +
- OutputTransmitStride * transmit +
- OutputSampleStride * time_sample) / OUTPUT_SAMPLES_PER_INDEX;
+ uint out_off = OutputChannelStride * channel +
+ OutputTransmitStride * transmit +
+ OutputSampleStride * time_sample;
- out_data[out_off] = sample_rf_data(in_off);
+ Output(output_buffer).values[out_off] = sample_rf_data(in_off);
}
}break;
case DecodeMode_Hadamard:{
- if (u_first_pass) {
- uint time_sample = gl_GlobalInvocationID.x * RF_SAMPLES_PER_INDEX;
+ if (first_pass) {
+ uint time_sample = gl_GlobalInvocationID.x;
uint channel = gl_GlobalInvocationID.y;
uint transmit = gl_GlobalInvocationID.z * ToProcess;
if (time_sample < InputTransmitStride) {
- uint out_off = (InputChannelStride * channel + TransmitCount * time_sample) / RF_SAMPLES_PER_INDEX;
- uint in_off = (InputChannelStride * channel + InputSampleStride * time_sample);
+ uint out_off = InputChannelStride * channel + TransmitCount * time_sample;
+ uint in_off = InputChannelStride * channel + InputSampleStride * time_sample;
#if UseSharedMemory
in_off += InputTransmitStride * transmit;
out_off += transmit;
for (uint i = 0; i < ToProcess; i++, in_off += InputTransmitStride) {
if (transmit + i < TransmitCount)
- out_rf_data[out_off + i] = rf_data[in_off / RF_SAMPLES_PER_INDEX];
+ OutputRF(output_rf_buffer).values[out_off + i] = RF(rf_buffer).values[in_off];
}
#else
for (uint i = 0; i < TransmitCount; i++, in_off += InputTransmitStride)
- out_rf_data[out_off + i] = rf_data[in_off / RF_SAMPLES_PER_INDEX];
+ OutputRF(output_rf_buffer).values[out_off + i] = RF(rf_buffer).values[in_off];
#endif
}
} else {
diff --git a/shaders/filter.glsl b/shaders/filter.glsl
@@ -28,16 +28,16 @@
#define apply_filter(iq, h) ((iq) * (h))
#endif
-layout(std430, binding = 1) readonly restrict buffer buffer_1 {
- DATA_TYPE in_data[];
+layout(std430, buffer_reference, buffer_reference_align = 64) restrict readonly buffer Input {
+ DATA_TYPE values[];
};
-layout(std430, binding = 2) writeonly restrict buffer buffer_2 {
- OUT_DATA_TYPE out_data[];
+layout(std430, buffer_reference, buffer_reference_align = 64) restrict writeonly buffer Output {
+ OUT_DATA_TYPE values[];
};
-layout(std430, binding = 3) readonly restrict buffer buffer_3 {
- FILTER_TYPE filter_coefficients[FilterLength];
+layout(std430, buffer_reference, buffer_reference_align = 64) restrict readonly buffer Filter {
+ FILTER_TYPE values[FilterLength];
};
vec2 complex_mul(vec2 a, vec2 b)
@@ -58,7 +58,7 @@ vec2 rotate_iq(vec2 iq, uint index)
SAMPLE_TYPE sample_rf(uint index)
{
- SAMPLE_TYPE result = SAMPLE_TYPE_CAST(in_data[index]);
+ SAMPLE_TYPE result = SAMPLE_TYPE_CAST(Input(input_data).values[index]);
return result;
}
@@ -80,6 +80,8 @@ void main()
/////////////////////////
// NOTE: sample caching
{
+ bool offset_wraps = (DecimationRate * gl_WorkGroupID.x * gl_WorkGroupSize.x) < (FilterLength - 1);
+
in_offset += DecimationRate * gl_WorkGroupID.x * gl_WorkGroupSize.x - (FilterLength - 1);
uint total_samples = rf.length();
@@ -87,10 +89,10 @@ void main()
uint leftover_count = total_samples % thread_count;
uint samples_this_thread = samples_per_thread + uint(thread_index < leftover_count);
- const float scale = bool(ComplexFilter) ? 1 : sqrt(2);
+ const float scale = bool(ComplexFilter) ? 1 : sqrt(2.0f);
for (uint i = 0; i < samples_this_thread; i++) {
uint index = thread_count * i + thread_index;
- if (gl_WorkGroupID.x == 0 && index < FilterLength - 1) {
+ if (offset_wraps && index < FilterLength - 1) {
rf[index] = SAMPLE_TYPE(0);
} else {
#if Demodulate
@@ -107,7 +109,7 @@ void main()
SAMPLE_TYPE result = SAMPLE_TYPE(0);
uint offset = DecimationRate * thread_index;
for (uint j = 0; j < FilterLength; j++)
- result += apply_filter(rf[offset + j], filter_coefficients[j]);
- out_data[out_offset] = RESULT_TYPE_CAST(result);
+ result += apply_filter(rf[offset + j], Filter(filter_coefficients).values[j]);
+ Output(output_data).values[out_offset] = RESULT_TYPE_CAST(result);
}
}
diff --git a/shaders/render_3d.frag.glsl b/shaders/render_3d.frag.glsl
@@ -1,4 +1,15 @@
/* See LICENSE for license details. */
+layout(location = 0) in vec3 normal;
+layout(location = 1) in vec3 texture_coordinate;
+layout(location = 0) out vec4 out_colour;
+
+layout(std430, buffer_reference, buffer_reference_align = 64) readonly buffer InputVec2 {
+ vec2 values[];
+};
+
+layout(std430, buffer_reference, buffer_reference_align = 64) readonly buffer InputFloat {
+ float values[];
+};
/* input: h [0,360] | s,v [0, 1] *
* output: rgb [0,1] */
@@ -20,24 +31,41 @@ float sdf_wire_box_outside(vec3 p, vec3 b, float e)
return result;
}
-int texture_dimension(ivec3 points)
+uint32_t texture_dimension(uvec3 points)
{
- points = ivec3(greaterThan(points, ivec3(1)));
+ points = uvec3(greaterThan(points, uvec3(1)));
return points.x + points.y + points.z;
}
+uint32_t input_index(vec3 uv)
+{
+ uv *= vec3(input_size_x - 1, input_size_y - 1, input_size_z - 1);
+ uint32_t result = input_size_y * input_size_x * uint32_t(uv.z) +
+ input_size_x * uint32_t(uv.y) +
+ uint32_t(uv.x);
+ result = min(result, input_size_z * input_size_y * input_size_x - 1);
+ return result;
+}
float sample_value(vec3 p)
{
- float result = length(texture(u_texture, p).xy);
- float threshold_val = pow(10.0f, u_threshold / 20.0f);
+ float result;
+ if (input_data != 0) {
+ uint32_t index = input_index(texture_coordinate);
+ switch (data_kind) {
+ case DataKind_Float32:{ result = length(InputFloat(input_data).values[index]); }break;
+ case DataKind_Float32Complex:{ result = length(InputVec2(input_data).values[index]); }break;
+ }
+ }
+
+ float threshold_val = pow(10.0f, threshold / 20.0f);
result = clamp(result, 0.0f, threshold_val);
result = result / threshold_val;
- result = pow(result, u_gamma);
+ result = pow(result, gamma);
- if (u_log_scale) {
+ if (db_cutoff > 0) {
result = 20 * log(result) / log(10);
- result = clamp(result, -u_db_cutoff, 0) / -u_db_cutoff;
+ result = clamp(result, -db_cutoff, 0) / -db_cutoff;
result = 1 - result;
}
@@ -54,40 +82,40 @@ float grad(float x)
void main(void)
{
- int dimension = texture_dimension(textureSize(u_texture, 0));
+ uint32_t dimension = texture_dimension(uvec3(input_size_x, input_size_y, input_size_z));
if (dimension == 3) {
// TODO(rnp): add slice offset passed in as a uniform
}
- float smp = sample_value(texture_coordinate);
+ float data = sample_value(texture_coordinate);
//float t = test_texture_coordinate.y;
//smp = smp * smoothstep(-0.4, 1.1, t) * u_gain;
- vec3 p = 2.0f * test_texture_coordinate - 1.0f;
+ vec3 p = 2.0f * texture_coordinate - 1.0f;
switch (dimension) {
case 1:{
- float df = mix(grad(texture_coordinate.x), dFdx(smp),
+ float df = mix(grad(texture_coordinate.x), dFdx(data),
smoothstep(0.0f, 0.55f, abs(texture_coordinate.x - 0.5f)));
- float de = abs(smp - texture_coordinate.y) / sqrt(1.0f + df * df);
+ float de = abs(data - texture_coordinate.y) / sqrt(1.0f + df * df);
float eps = length(fwidth(texture_coordinate.xy));
float thickness = 4.f;
float alpha = smoothstep((0.5f * thickness + 2.0f) * eps, (0.5f * thickness + 0.0f) * eps, de);
- out_colour = vec4(u_bb_colour.xyz, alpha);
+ out_colour = vec4(bounding_box_colour.xyz, alpha);
}break;
case 0: // NOTE(rnp): 0 is a special case for X-Plane Rendering
case 2:
case 3:
{
- float t = clamp(sdf_wire_box_outside(p, vec3(1.0f), u_bb_fraction) / u_bb_fraction, 0, 1);
+ float t = clamp(sdf_wire_box_outside(p, vec3(1.0f), bounding_box_fraction) / bounding_box_fraction, 0, 1);
- out_colour = vec4(t * vec3(smp) + (1 - t) * u_bb_colour.xyz, 1);
- if (u_solid_bb) out_colour = u_bb_colour;
+ out_colour = vec4(t * vec3(data) + (1 - t) * bounding_box_colour.xyz, 1);
+ //if (u_solid_bb) out_colour = u_bb_colour;
}break;
}
diff --git a/shaders/render_3d.vert.glsl b/shaders/render_3d.vert.glsl
@@ -0,0 +1,19 @@
+layout(location = 0) out vec3 f_normal;
+layout(location = 1) out vec3 f_texture_coordinate;
+
+layout(std430, buffer_reference, buffer_reference_align = 16) readonly buffer Vector4 {
+ vec4 values[];
+};
+
+void main()
+{
+ vec3 position = Vector4(positions).values[gl_VertexIndex].xyz;
+ vec3 normal = Vector4(normals).values[gl_VertexIndex].xyz;
+ vec3 texture_coordinate = (2 * position + 1) / 2;
+
+ f_texture_coordinate = texture_coordinate;
+ f_normal = normal;
+ //f_normal = normalize(mat3(mvp_matrix) * normal);
+
+ gl_Position = mvp_matrix * vec4(position, 1);
+}
diff --git a/ui.c b/ui.c
@@ -63,6 +63,10 @@
#define RULER_COLOUR (v4){{1.00f, 0.70f, 0.00f, 1.0f}}
#define BORDER_COLOUR v4_lerp(FG_COLOUR, BG_COLOUR, 0.85f)
+#define FRAME_VIEW_BB_COLOUR (v4){{0.92f, 0.88f, 0.78f, 1.0f}}
+#define FRAME_VIEW_BB_FRACTION 0.007f
+#define FRAME_VIEW_RENDER_TARGET_SIZE 1024, 1024
+
#define MENU_PLUS_COLOUR (v4){{0.33f, 0.42f, 1.00f, 1.00f}}
#define MENU_CLOSE_COLOUR FOCUSED_COLOUR
@@ -308,8 +312,7 @@ struct Variable {
#define BEAMFORMER_FRAME_VIEW_KIND_LIST \
X(Latest, "Latest") \
X(3DXPlane, "3D X-Plane") \
- X(Indexed, "Indexed") \
- X(Copy, "Copy")
+ X(Copy, "Copy") \
typedef enum {
#define X(kind, ...) BeamformerFrameViewKind_##kind,
@@ -322,12 +325,16 @@ typedef struct BeamformerFrameView BeamformerFrameView;
struct BeamformerFrameView {
BeamformerFrameViewKind kind;
b32 dirty;
- BeamformerFrame *frame;
BeamformerFrameView *prev, *next;
- u32 texture;
- i32 texture_mipmaps;
- iv2 texture_dim;
+ // NOTE(rnp): for FrameViewKindCopy
+ GPUBuffer copy_buffer;
+
+ GPUImage colour_image;
+ // NOTE(rnp): temporary, on w32 we must hold onto this when importing vulkan data to OpenGL
+ OSHandle export_handle;
+ u32 memory_object;
+ u32 texture;
/* NOTE(rnp): any pointers to variables are added to the menu and will
* be put onto the freelist if the view is closed. */
@@ -339,14 +346,13 @@ struct BeamformerFrameView {
Variable gamma;
union {
- /* BeamformerFrameViewKind_Latest/BeamformerFrameViewKind_Indexed */
+ /* BeamformerFrameViewKind_Latest/BeamformerFrameViewKind_Copy */
struct {
Variable lateral_scale_bar;
Variable axial_scale_bar;
Variable *lateral_scale_bar_active;
Variable *axial_scale_bar_active;
- /* NOTE(rnp): if kind is Latest selects which plane to use
- * if kind is Indexed selects the index */
+ /* NOTE(rnp): selects which plane to use */
Variable *cycler;
u32 cycler_state;
@@ -354,6 +360,8 @@ struct BeamformerFrameView {
v3 min_coordinate;
v3 max_coordinate;
+
+ BeamformerFrame frame;
};
/* BeamformerFrameViewKind_3DXPlane */
@@ -415,7 +423,6 @@ struct BeamformerUI {
BeamformerFrameView *views;
BeamformerFrameView *view_freelist;
- BeamformerFrame *frame_freelist;
Interaction interaction;
Interaction hot_interaction;
@@ -423,12 +430,20 @@ struct BeamformerUI {
InputState text_input_state;
- /* TODO(rnp): ideally this isn't copied all over the place */
- BeamformerRenderModel unit_cube_model;
+ VulkanHandle pipelines[BeamformerShaderKind_RenderCount];
+
+ OSHandle render_semaphores_export[2];
+ VulkanHandle render_semaphores[2];
+ u32 render_semaphores_gl[2];
+
+ GPUImage render_3d_image;
+ GPUImage render_3d_depth_image;
+ RenderModel unit_cube_model;
v2_sll *scale_bar_savepoint_freelist;
- BeamformerFrame *latest_plane[BeamformerViewPlaneTag_Count + 1];
+ BeamformerFrame latest_plane[BeamformerViewPlaneTag_Count + 1];
+ b32 latest_plane_valid[BeamformerViewPlaneTag_Count + 1];
BeamformerUIParameters params;
b32 flush_params;
@@ -439,8 +454,6 @@ struct BeamformerUI {
f32 off_axis_position;
f32 beamform_plane;
- FrameViewRenderContext *frame_view_render_context;
-
BeamformerSharedMemory * shared_memory;
BeamformerCtx * beamformer_context;
};
@@ -640,9 +653,9 @@ make_raylib_texture(BeamformerFrameView *v)
{
Texture result;
result.id = v->texture;
- result.width = v->texture_dim.w;
- result.height = v->texture_dim.h;
- result.mipmaps = v->texture_mipmaps;
+ result.width = v->colour_image.width;
+ result.height = v->colour_image.height;
+ result.mipmaps = v->colour_image.mip_map_levels;
result.format = PIXELFORMAT_UNCOMPRESSED_R8G8B8A8;
return result;
}
@@ -743,16 +756,11 @@ push_custom_view_title(Stream *s, Variable *var)
#undef X
stream_append_s8(s, labels[*bv->cycler->cycler.state % (BeamformerViewPlaneTag_Count + 1)]);
}break;
- case BeamformerFrameViewKind_Indexed:{
- stream_append_s8(s, s8(": Index {"));
- stream_append_u64(s, *bv->cycler->cycler.state % BeamformerMaxBacklogFrames);
- stream_append_s8(s, s8("} ["));
- }break;
case BeamformerFrameViewKind_3DXPlane:{ stream_append_s8(s, s8(": 3D X-Plane")); }break;
InvalidDefaultCase;
}
if (bv->kind != BeamformerFrameViewKind_3DXPlane) {
- stream_append_hex_u64(s, bv->frame? bv->frame->id : 0);
+ stream_append_hex_u64(s, bv->frame.id);
stream_append_byte(s, ']');
}
}break;
@@ -954,19 +962,37 @@ table_end_subtable(Table *table)
}
function void
-resize_frame_view(BeamformerFrameView *view, iv2 dim)
+resize_frame_view(BeamformerFrameView *view, uv2 dim)
{
+ if ValidHandle(view->export_handle) os_release_handle(view->export_handle);
+
+ glDeleteMemoryObjectsEXT(1, &view->memory_object);
+ glCreateMemoryObjectsEXT(1, &view->memory_object);
+
glDeleteTextures(1, &view->texture);
glCreateTextures(GL_TEXTURE_2D, 1, &view->texture);
- view->texture_dim = dim;
- view->texture_mipmaps = (i32)ctz_u64((u64)Max(dim.x, dim.y)) + 1;
- glTextureStorage2D(view->texture, view->texture_mipmaps, GL_RGBA8, dim.x, dim.y);
+ vk_image_allocate(&view->colour_image, dim.w, dim.h, 1, 1, VulkanImageUsage_Colour,
+ VulkanUsageFlag_ImageSampling, &view->export_handle);
- glGenerateTextureMipmap(view->texture);
+ glMemoryObjectParameterivEXT(view->memory_object, GL_DEDICATED_MEMORY_OBJECT_EXT, (GLint []){1});
+
+ if (OS_WINDOWS) {
+ glImportMemoryWin32HandleEXT(view->memory_object, view->colour_image.memory_size,
+ GL_HANDLE_TYPE_OPAQUE_WIN32_EXT, (void *)view->export_handle.value[0]);
+ // NOTE(rnp): w32 does not transfer ownership from handle back to driver
+ } else {
+ glImportMemoryFdEXT(view->memory_object, view->colour_image.memory_size,
+ GL_HANDLE_TYPE_OPAQUE_FD_EXT, view->export_handle.value[0]);
+ view->export_handle.value[0] = OSInvalidHandleValue;
+ }
+
+ glTextureStorageMem2DEXT(view->texture, view->colour_image.mip_map_levels, GL_RGBA8,
+ view->colour_image.width, view->colour_image.height,
+ view->memory_object, 0);
/* NOTE(rnp): work around raylib's janky texture sampling */
- v4 border_colour = (v4){{0, 0, 0, 1}};
+ v4 border_colour = {{0, 0, 0, 1}};
if (view->kind != BeamformerFrameViewKind_Copy) border_colour = (v4){0};
glTextureParameteri(view->texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
glTextureParameteri(view->texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
@@ -983,11 +1009,8 @@ resize_frame_view(BeamformerFrameView *view, iv2 dim)
function void
ui_beamformer_frame_view_release_subresources(BeamformerUI *ui, BeamformerFrameView *bv, BeamformerFrameViewKind kind)
{
- if (kind == BeamformerFrameViewKind_Copy && bv->frame) {
- glDeleteTextures(1, &bv->frame->texture);
- bv->frame->texture = 0;
- SLLPushFreelist(bv->frame, ui->frame_freelist);
- }
+ if (kind == BeamformerFrameViewKind_Copy)
+ vk_buffer_release(&bv->copy_buffer);
if (kind != BeamformerFrameViewKind_3DXPlane) {
if (bv->axial_scale_bar.scale_bar.savepoint_stack)
@@ -1289,10 +1312,10 @@ ui_beamformer_frame_view_convert(BeamformerUI *ui, Arena *arena, Variable *view,
bv->threshold.real32 = old? old->threshold.real32 : 55.0f;
bv->gamma.scaled_real32.val = old? old->gamma.scaled_real32.val : 1.0f;
bv->gamma.scaled_real32.scale = old? old->gamma.scaled_real32.scale : 0.05f;
- bv->min_coordinate = (old && old->frame) ? m4_mul_v4(old->frame->voxel_transform, (v4){{0.0f, 0.0f, 0.0f, 1.0f}}).xyz
- : (v3){0};
- bv->max_coordinate = (old && old->frame) ? m4_mul_v4(old->frame->voxel_transform, (v4){{1.0f, 1.0f, 1.0f, 1.0f}}).xyz
- : (v3){0};
+ bv->min_coordinate = old ? m4_mul_v4(old->frame.voxel_transform, (v4){{0.0f, 0.0f, 0.0f, 1.0f}}).xyz
+ : (v3){0};
+ bv->max_coordinate = old ? m4_mul_v4(old->frame.voxel_transform, (v4){{1.0f, 1.0f, 1.0f, 1.0f}}).xyz
+ : (v3){0};
#define X(_t, pretty) s8_comp(pretty),
read_only local_persist s8 kind_labels[] = {BEAMFORMER_FRAME_VIEW_KIND_LIST};
@@ -1302,7 +1325,7 @@ ui_beamformer_frame_view_convert(BeamformerUI *ui, Arena *arena, Variable *view,
/* TODO(rnp): this is quite dumb. what we actually want is to render directly
* into the view region with the appropriate size for that region (scissor) */
- resize_frame_view(bv, (iv2){{FRAME_VIEW_RENDER_TARGET_SIZE}});
+ resize_frame_view(bv, (uv2){{FRAME_VIEW_RENDER_TARGET_SIZE}});
switch (kind) {
case BeamformerFrameViewKind_3DXPlane:{
@@ -1329,11 +1352,10 @@ ui_beamformer_frame_view_convert(BeamformerUI *ui, Arena *arena, Variable *view,
axial->zoom_starting_coord = F32_INFINITY;
b32 copy = kind == BeamformerFrameViewKind_Copy;
- v3 normal = (v3){.y = 1.0f};
- if (old && old->frame)
- normal = cross(old->frame->voxel_transform.c[0].xyz, old->frame->voxel_transform.c[1].xyz);
+ v3 N = (v3){.y = 1.0f};
+ if (old) N = cross(old->frame.voxel_transform.c[0].xyz, old->frame.voxel_transform.c[1].xyz);
- BeamformerViewPlaneTag plane = ui_plane_layout_from_normal(v3_normalize(normal));
+ BeamformerViewPlaneTag plane = ui_plane_layout_from_normal(v3_normalize(N));
switch (plane) {
case BeamformerViewPlaneTag_XY:{
lateral->min_value = copy ? &bv->min_coordinate.x : &ui->min_coordinate.x;
@@ -1395,10 +1417,6 @@ ui_beamformer_frame_view_convert(BeamformerUI *ui, Arena *arena, Variable *view,
&bv->cycler_state, labels, countof(labels));
bv->cycler_state = BeamformerViewPlaneTag_Count;
}break;
- case BeamformerFrameViewKind_Indexed:{
- bv->cycler = add_variable_cycler(ui, menu, arena, 0, ui->small_font, s8("Index:"),
- &bv->cycler_state, 0, BeamformerMaxBacklogFrames);
- }break;
default:{}break;
}
@@ -1411,6 +1429,7 @@ ui_beamformer_frame_view_new(BeamformerUI *ui, Arena *arena)
BeamformerFrameView *result = SLLPopFreelist(ui->view_freelist);
if (!result) result = push_struct_no_zero(arena, typeof(*result));
zero_struct(result);
+ result->export_handle.value[0] = OSInvalidHandleValue;
DLLPushDown(result, ui->views);
return result;
}
@@ -1534,19 +1553,29 @@ ui_add_live_frame_view(BeamformerUI *ui, Variable *view, RegionSplitDirection di
function void
ui_beamformer_frame_view_copy_frame(BeamformerUI *ui, BeamformerFrameView *new, BeamformerFrameView *old)
{
- assert(old->frame);
- new->frame = SLLPopFreelist(ui->frame_freelist);
- if (!new->frame) new->frame = push_struct(&ui->arena, typeof(*new->frame));
+ mem_copy(&new->frame, &old->frame, sizeof(old->frame));
+
+ iv3 points = new->frame.points;
+ i64 frame_size = points.x * points.y * points.z * beamformer_data_kind_byte_size[new->frame.data_kind];
- mem_copy(new->frame, old->frame, sizeof(*new->frame));
- new->frame->texture = 0;
- new->frame->next = 0;
- alloc_beamform_frame(new->frame, old->frame->dim, old->frame->gl_kind, s8("Frame Copy: "), ui->arena);
+ Stream sb = arena_stream(ui->arena);
+ stream_append_s8(&sb, s8("Frame Copy ["));
+ stream_append_hex_u64(&sb, new->frame.id);
+ stream_append_s8(&sb, s8("]"));
+ stream_append_byte(&sb, 0);
+
+ GPUBufferAllocateInfo allocate_info = {
+ .size = frame_size,
+ .flags = VulkanUsageFlag_TransferDestination,
+ .label = stream_to_s8(&sb),
+ };
+ vk_buffer_allocate(&new->copy_buffer, &allocate_info);
- glCopyImageSubData(old->frame->texture, GL_TEXTURE_3D, 0, 0, 0, 0,
- new->frame->texture, GL_TEXTURE_3D, 0, 0, 0, 0,
- new->frame->dim.x, new->frame->dim.y, new->frame->dim.z);
- glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
+ GPUBuffer *backlog = ui->beamformer_context->compute_context.backlog.buffer;
+ VulkanHandle cmd = vk_command_begin(VulkanTimeline_Compute);
+ vk_command_wait_timeline(cmd, VulkanTimeline_Compute, old->frame.timeline_valid_value);
+ vk_command_copy_buffer(cmd, &new->copy_buffer, backlog, old->frame.buffer_offset, frame_size);
+ new->frame.timeline_valid_value = vk_command_end(cmd, (VulkanHandle){0}, (VulkanHandle){0});
}
function void
@@ -1557,10 +1586,6 @@ ui_copy_frame(BeamformerUI *ui, Variable *view, RegionSplitDirection direction)
assert(view->type == VT_UI_VIEW);
BeamformerFrameView *old = view->view.child->generic;
- /* TODO(rnp): hack; it would be better if this was unreachable with a 0 old->frame */
- if (!old->frame)
- return;
-
Variable *new_region = ui_split_region(ui, region, view, direction);
new_region->region_split.right = add_beamformer_frame_view(ui, new_region, &ui->arena,
BeamformerFrameViewKind_Copy, 1, old);
@@ -1641,7 +1666,7 @@ function m4
projection_matrix_for_x_plane_view(BeamformerFrameView *view)
{
assert(view->kind == BeamformerFrameViewKind_3DXPlane);
- f32 aspect = (f32)view->texture_dim.w / (f32)view->texture_dim.h;
+ f32 aspect = (f32)view->colour_image.width / (f32)view->colour_image.height;
m4 result = perspective_projection(10e-3f, 500e-3f, 45.0f * PI / 180.0f, aspect);
return result;
}
@@ -1679,22 +1704,35 @@ view_plane_tag_from_x_plane_shift(BeamformerFrameView *view, Variable *x_plane_s
function void
render_single_xplane(BeamformerUI *ui, BeamformerFrameView *view, Variable *x_plane_shift,
- u32 program, f32 rotation_turns, v3 translate, BeamformerViewPlaneTag tag)
-{
- u32 texture = 0;
- if (ui->latest_plane[tag])
- texture = ui->latest_plane[tag]->texture;
+ f32 rotation_turns, v3 translate, BeamformerViewPlaneTag tag,
+ VulkanHandle command, BeamformerRenderBeamformedPushConstants *pc)
+{
+ GPUBuffer *beamformed_buffer = ui->beamformer_context->compute_context.backlog.buffer;
+ pc->input_data = 0;
+ if (ui->latest_plane_valid[tag]) {
+ BeamformerFrame *f = ui->latest_plane + tag;
+ pc->input_data = beamformed_buffer->gpu_pointer + f->buffer_offset;
+ pc->input_size_x = f->points.x;
+ pc->input_size_y = f->points.y;
+ pc->input_size_z = f->points.z;
+ pc->data_kind = f->data_kind;
+ vk_command_wait_timeline(command, VulkanTimeline_Compute, f->timeline_valid_value);
+ }
+
+ v3 camera = camera_for_x_plane_view(ui, view);
+ v3 scale = beamformer_frame_view_plane_size(ui, view);
- v3 scale = beamformer_frame_view_plane_size(ui, view);
m4 model_transform = y_aligned_volume_transform(scale, translate, rotation_turns);
+ m4 view_m = view_matrix_for_x_plane_view(ui, view, camera);
+ m4 projection = projection_matrix_for_x_plane_view(view);
+
+ //pc->mvp_matrix = m4_mul(m4_mul(model_transform, view_m), projection);
+ pc->mvp_matrix = m4_mul(projection, m4_mul(view_m, model_transform));
+ pc->bounding_box_colour = v4_lerp(FG_COLOUR, HOVERED_COLOUR, x_plane_shift->hover_t);
+ pc->bounding_box_fraction = FRAME_VIEW_BB_FRACTION;
- v4 colour = v4_lerp(FG_COLOUR, HOVERED_COLOUR, x_plane_shift->hover_t);
- glProgramUniformMatrix4fv(program, FRAME_VIEW_MODEL_MATRIX_LOC, 1, 0, model_transform.E);
- glProgramUniform4fv(program, FRAME_VIEW_BB_COLOUR_LOC, 1, colour.E);
- glProgramUniform1ui(program, FRAME_VIEW_SOLID_BB_LOC, 0);
- glBindTextureUnit(0, texture);
- glDrawElements(GL_TRIANGLES, ui->unit_cube_model.elements, GL_UNSIGNED_SHORT,
- (void *)ui->unit_cube_model.elements_offset);
+ vk_command_push_constants(command, 0, sizeof(*pc), pc);
+ vk_command_draw(command, &ui->unit_cube_model.model);
XPlaneShift *xp = &x_plane_shift->x_plane_shift;
v3 xp_delta = v3_sub(xp->end_point, xp->start_point);
@@ -1706,64 +1744,59 @@ render_single_xplane(BeamformerUI *ui, BeamformerFrameView *view, Variable *x_pl
/* TODO(rnp): there is no reason to compute the rotation matrix again */
model_transform = y_aligned_volume_transform(scale, v3_add(f, translate), rotation_turns);
- glProgramUniformMatrix4fv(program, FRAME_VIEW_MODEL_MATRIX_LOC, 1, 0, model_transform.E);
- glProgramUniform1ui(program, FRAME_VIEW_SOLID_BB_LOC, 1);
- glProgramUniform4fv(program, FRAME_VIEW_BB_COLOUR_LOC, 1, HOVERED_COLOUR.E);
- glDrawElements(GL_TRIANGLES, ui->unit_cube_model.elements, GL_UNSIGNED_SHORT,
- (void *)ui->unit_cube_model.elements_offset);
+ pc->mvp_matrix = m4_mul(projection, m4_mul(view_m, model_transform));
+ pc->bounding_box_colour = HOVERED_COLOUR;
+ pc->bounding_box_fraction = 1.0f;
+ pc->input_data = 0;
+
+ vk_command_push_constants(command, 0, sizeof(*pc), pc);
+ vk_command_draw(command, &ui->unit_cube_model.model);
}
}
function void
-render_3D_xplane(BeamformerUI *ui, BeamformerFrameView *view, u32 program)
+render_3D_xplane(BeamformerUI *ui, BeamformerFrameView *view, VulkanHandle command, BeamformerRenderBeamformedPushConstants *pc)
{
if (view->demo->bool32) {
view->rotation += dt_for_frame * 0.125f;
if (view->rotation > 1.0f) view->rotation -= 1.0f;
}
- v3 camera = camera_for_x_plane_view(ui, view);
- m4 view_m = view_matrix_for_x_plane_view(ui, view, camera);
- m4 projection = projection_matrix_for_x_plane_view(view);
-
- glProgramUniformMatrix4fv(program, FRAME_VIEW_VIEW_MATRIX_LOC, 1, 0, view_m.E);
- glProgramUniformMatrix4fv(program, FRAME_VIEW_PROJ_MATRIX_LOC, 1, 0, projection.E);
- glProgramUniform1f(program, FRAME_VIEW_BB_FRACTION_LOC, FRAME_VIEW_BB_FRACTION);
-
v3 model_translate = offset_x_plane_position(ui, view, BeamformerViewPlaneTag_XZ);
- render_single_xplane(ui, view, view->x_plane_shifts + 0, program,
+ render_single_xplane(ui, view, view->x_plane_shifts + 0,
x_plane_rotation_for_view_plane(view, BeamformerViewPlaneTag_XZ),
- model_translate, BeamformerViewPlaneTag_XZ);
+ model_translate, BeamformerViewPlaneTag_XZ, command, pc);
model_translate = offset_x_plane_position(ui, view, BeamformerViewPlaneTag_YZ);
model_translate.y -= 0.0001f;
- render_single_xplane(ui, view, view->x_plane_shifts + 1, program,
+ render_single_xplane(ui, view, view->x_plane_shifts + 1,
x_plane_rotation_for_view_plane(view, BeamformerViewPlaneTag_YZ),
- model_translate, BeamformerViewPlaneTag_YZ);
+ model_translate, BeamformerViewPlaneTag_YZ, command, pc);
}
function void
-render_2D_plane(BeamformerUI *ui, BeamformerFrameView *view, u32 program)
+render_2D_plane(BeamformerUI *ui, BeamformerFrameView *view, VulkanHandle command, BeamformerRenderBeamformedPushConstants *pc)
{
m4 view_m = m4_identity();
m4 model = m4_scale((v3){{2.0f, 2.0f, 0.0f}});
m4 projection = orthographic_projection(0, 1, 1, 1);
- glProgramUniformMatrix4fv(program, FRAME_VIEW_MODEL_MATRIX_LOC, 1, 0, model.E);
- glProgramUniformMatrix4fv(program, FRAME_VIEW_VIEW_MATRIX_LOC, 1, 0, view_m.E);
- glProgramUniformMatrix4fv(program, FRAME_VIEW_PROJ_MATRIX_LOC, 1, 0, projection.E);
+ GPUBuffer *beamformed_buffer = ui->beamformer_context->compute_context.backlog.buffer;
+ pc->mvp_matrix = m4_mul(m4_mul(model, view_m), projection);
+ pc->input_data = beamformed_buffer->gpu_pointer + view->frame.buffer_offset,
+ pc->input_size_x = view->frame.points.x,
+ pc->input_size_y = view->frame.points.y,
+ pc->input_size_z = view->frame.points.z,
+ pc->data_kind = view->frame.data_kind,
- glProgramUniform1f(program, FRAME_VIEW_BB_FRACTION_LOC, 0);
- glBindTextureUnit(0, view->frame->texture);
- glDrawElements(GL_TRIANGLES, ui->unit_cube_model.elements, GL_UNSIGNED_SHORT,
- (void *)ui->unit_cube_model.elements_offset);
+ vk_command_wait_timeline(command, VulkanTimeline_Compute, view->frame.timeline_valid_value);
+ vk_command_push_constants(command, 0, sizeof(*pc), pc);
+ vk_command_draw(command, &ui->unit_cube_model.model);
}
function b32
frame_view_ready_to_present(BeamformerUI *ui, BeamformerFrameView *view)
{
- b32 result = !iv2_equal((iv2){0}, view->texture_dim) && view->frame;
- result |= view->kind == BeamformerFrameViewKind_3DXPlane &&
- ui->latest_plane[BeamformerViewPlaneTag_Count];
+ b32 result = view->colour_image.width > 0 || view->colour_image.height > 0;
return result;
}
@@ -1772,76 +1805,61 @@ view_update(BeamformerUI *ui, BeamformerFrameView *view)
{
if (view->kind == BeamformerFrameViewKind_Latest) {
u32 index = *view->cycler->cycler.state;
- view->dirty |= view->frame != ui->latest_plane[index];
- view->frame = ui->latest_plane[index];
- if (view->dirty && view->frame) {
- view->min_coordinate = m4_mul_v4(view->frame->voxel_transform, (v4){{0.0f, 0.0f, 0.0f, 1.0f}}).xyz;
- view->max_coordinate = m4_mul_v4(view->frame->voxel_transform, (v4){{1.0f, 1.0f, 1.0f, 1.0f}}).xyz;
+ view->dirty |= view->frame.timeline_valid_value != ui->latest_plane[index].timeline_valid_value;
+ mem_copy(&view->frame, ui->latest_plane + index, sizeof(view->frame));
+ if (view->dirty) {
+ view->min_coordinate = m4_mul_v4(view->frame.voxel_transform, (v4){{0.0f, 0.0f, 0.0f, 1.0f}}).xyz;
+ view->max_coordinate = m4_mul_v4(view->frame.voxel_transform, (v4){{1.0f, 1.0f, 1.0f, 1.0f}}).xyz;
}
}
/* TODO(rnp): x-z or y-z */
- view->dirty |= ui->frame_view_render_context->updated;
+ // TODO(rnp): how to track this now? use pipeline handle value?
+ view->dirty |= ui->beamformer_context->render_shader_updated;
view->dirty |= view->kind == BeamformerFrameViewKind_3DXPlane;
- b32 result = frame_view_ready_to_present(ui, view) && view->dirty;
+ b32 result = view->dirty;
return result;
}
function void
update_frame_views(BeamformerUI *ui, Rect window)
{
- FrameViewRenderContext *ctx = ui->frame_view_render_context;
- b32 fbo_bound = 0;
for (BeamformerFrameView *view = ui->views; view; view = view->next) {
if (view_update(ui, view)) {
- //start_renderdoc_capture(0);
-
- if (!fbo_bound) {
- fbo_bound = 1;
- glBindFramebuffer(GL_FRAMEBUFFER, ctx->framebuffers[0]);
- glUseProgram(ctx->shader);
- glBindVertexArray(ui->unit_cube_model.vao);
- glEnable(GL_DEPTH_TEST);
- }
+ BeamformerRenderBeamformedPushConstants pc = {
+ .bounding_box_colour = FRAME_VIEW_BB_COLOUR,
+ .db_cutoff = view->log_scale->bool32 ? view->dynamic_range.real32 : 0,
+ .threshold = view->threshold.real32,
+ .gamma = view->gamma.scaled_real32.val,
+ .positions = ui->unit_cube_model.model.gpu_pointer,
+ .normals = ui->unit_cube_model.model.gpu_pointer + ui->unit_cube_model.normals_offset,
+ };
- u32 fb = ctx->framebuffers[0];
- u32 program = ctx->shader;
- glViewport(0, 0, view->texture_dim.w, view->texture_dim.h);
- glProgramUniform1f(program, FRAME_VIEW_THRESHOLD_LOC, view->threshold.real32);
- glProgramUniform1f(program, FRAME_VIEW_DYNAMIC_RANGE_LOC, view->dynamic_range.real32);
- glProgramUniform1f(program, FRAME_VIEW_GAMMA_LOC, view->gamma.scaled_real32.val);
- glProgramUniform1ui(program, FRAME_VIEW_LOG_SCALE_LOC, view->log_scale->bool32);
+ //start_renderdoc_capture();
- glNamedFramebufferRenderbuffer(fb, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, ctx->renderbuffers[0]);
- glNamedFramebufferRenderbuffer(fb, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, ctx->renderbuffers[1]);
- glClearNamedFramebufferfv(fb, GL_COLOR, 0, (f32 []){0, 0, 0, 0});
- glClearNamedFramebufferfv(fb, GL_DEPTH, 0, (f32 []){1});
+ glSignalSemaphoreEXT(ui->render_semaphores_gl[0], 0, 0, 1, &view->texture, (GLenum []){GL_NONE});
+ VulkanHandle cmd = vk_command_begin(VulkanTimeline_Graphics);
+ vk_command_bind_pipeline(cmd, ui->pipelines[BeamformerShaderKind_RenderBeamformed - BeamformerShaderKind_RenderFirst]);
+ vk_command_begin_rendering(cmd, &ui->render_3d_image, &ui->render_3d_depth_image, &view->colour_image);
+ vk_command_viewport(cmd, view->colour_image.width, view->colour_image.height, 0, 0, 0.0f, 1.0f);
+ vk_command_scissor(cmd, view->colour_image.width, view->colour_image.height, 0, 0);
if (view->kind == BeamformerFrameViewKind_3DXPlane) {
- render_3D_xplane(ui, view, program);
+ render_3D_xplane(ui, view, cmd, &pc);
} else {
- render_2D_plane(ui, view, program);
+ render_2D_plane(ui, view, cmd, &pc);
}
+ vk_command_end_rendering(cmd);
+ vk_command_end(cmd, ui->render_semaphores[0], ui->render_semaphores[1]);
- /* NOTE(rnp): resolve multisampled scene */
- glNamedFramebufferTexture(ctx->framebuffers[1], GL_COLOR_ATTACHMENT0, view->texture, 0);
- glBlitNamedFramebuffer(fb, ctx->framebuffers[1], 0, 0, FRAME_VIEW_RENDER_TARGET_SIZE,
- 0, 0, view->texture_dim.w, view->texture_dim.h, GL_COLOR_BUFFER_BIT, GL_NEAREST);
+ glWaitSemaphoreEXT(ui->render_semaphores_gl[1], 0, 0, 1, &view->texture, (GLenum[]){GL_LAYOUT_COLOR_ATTACHMENT_EXT});
- glGenerateTextureMipmap(view->texture);
- view->dirty = 0;
+ //end_renderdoc_capture();
- //end_renderdoc_capture(0);
+ view->dirty = 0;
}
}
- if (fbo_bound) {
- glBindFramebuffer(GL_FRAMEBUFFER, 0);
- glViewport((i32)window.pos.x, (i32)window.pos.y, (i32)window.size.w, (i32)window.size.h);
- /* NOTE(rnp): I don't trust raylib to not mess with us */
- glBindVertexArray(0);
- glDisable(GL_DEPTH_TEST);
- }
}
function Color
@@ -2420,9 +2438,9 @@ draw_view_ruler(BeamformerFrameView *view, Arena a, Rect view_rect, TextSpec ts)
{
v2 vr_max_p = v2_add(view_rect.pos, view_rect.size);
- v3 U = view->frame->voxel_transform.c[0].xyz;
- v3 V = view->frame->voxel_transform.c[1].xyz;
- v3 min = view->frame->voxel_transform.c[3].xyz;
+ v3 U = view->frame.voxel_transform.c[0].xyz;
+ v3 V = view->frame.voxel_transform.c[1].xyz;
+ v3 min = view->frame.voxel_transform.c[3].xyz;
v2 start_uv = plane_uv(v3_sub(view->ruler.start, min), U, V);
v2 end_uv = plane_uv(v3_sub(view->ruler.end, min), U, V);
@@ -2487,7 +2505,7 @@ draw_3D_xplane_frame_view(BeamformerUI *ui, Arena arena, Variable *var, Rect dis
assert(var->type == VT_BEAMFORMER_FRAME_VIEW);
BeamformerFrameView *view = var->generic;
- f32 aspect = (f32)view->texture_dim.w / (f32)view->texture_dim.h;
+ f32 aspect = (f32)view->colour_image.width / (f32)view->colour_image.height;
Rect vr = display_rect;
if (aspect > 1.0f) vr.size.w = vr.size.h;
else vr.size.h = vr.size.w;
@@ -2536,7 +2554,7 @@ draw_3D_xplane_frame_view(BeamformerUI *ui, Arena arena, Variable *var, Rect dis
it->hover_t = CLAMP01(it->hover_t);
}
- Rectangle tex_r = {0, 0, (f32)view->texture_dim.w, (f32)view->texture_dim.h};
+ Rectangle tex_r = {0, 0, (f32)view->colour_image.width, (f32)view->colour_image.height};
NPatchInfo tex_np = {tex_r, 0, 0, 0, 0, NPATCH_NINE_PATCH};
DrawTextureNPatch(make_raylib_texture(view), tex_np, rl_rect(vr), (Vector2){0}, 0, WHITE);
@@ -2548,9 +2566,9 @@ draw_beamformer_frame_view(BeamformerUI *ui, Arena a, Variable *var, Rect displa
{
assert(var->type == VT_BEAMFORMER_FRAME_VIEW);
BeamformerFrameView *view = var->generic;
- BeamformerFrame *frame = view->frame;
+ BeamformerFrame *frame = &view->frame;
- b32 is_1d = iv3_dimension(frame->dim) == 1;
+ b32 is_1d = iv3_dimension(frame->points) == 1;
f32 txt_w = measure_text(ui->small_font, s8("-288.8 mm")).w;
f32 scale_bar_size = 1.2f * txt_w + RULER_TICK_LENGTH;
@@ -2603,11 +2621,11 @@ draw_beamformer_frame_view(BeamformerUI *ui, Arena a, Variable *var, Rect displa
Rectangle tex_r;
if (is_1d) {
- tex_r = (Rectangle){0, 0, view->texture_dim.x, -view->texture_dim.y};
+ tex_r = (Rectangle){0, 0, view->colour_image.width, -view->colour_image.height};
} else {
v2 pixels_per_meter = {
- .w = (f32)view->texture_dim.w / output_dim.w,
- .h = (f32)view->texture_dim.h / output_dim.h,
+ .w = (f32)view->colour_image.width / output_dim.w,
+ .h = (f32)view->colour_image.height / output_dim.h,
};
/* NOTE(rnp): math to resize the texture without stretching when the view changes
@@ -2742,22 +2760,19 @@ push_compute_time(Arena *arena, s8 prefix, f32 time)
function v2
draw_compute_stats_bar_view(BeamformerUI *ui, Arena arena, ComputeShaderStats *stats,
- BeamformerShaderKind *stages, u32 stages_count, f32 compute_time_sum,
- TextSpec ts, Rect r, v2 mouse)
+ f32 compute_time_sum, TextSpec ts, Rect r, v2 mouse)
{
read_only local_persist s8 frame_labels[] = {s8_comp("0:"), s8_comp("-1:"), s8_comp("-2:"), s8_comp("-3:")};
f32 total_times[countof(frame_labels)] = {0};
+
+ u32 stages = stats->table.shader_count;
Table *table = table_new(&arena, countof(frame_labels), TextAlignment_Right, TextAlignment_Left);
for (u32 i = 0; i < countof(frame_labels); i++) {
TableCell *cells = table_push_row(table, &arena, TRK_CELLS)->data;
cells[0].text = frame_labels[i];
u32 frame_index = (stats->latest_frame_index - i) % countof(stats->table.times);
- u32 seen_shaders = 0;
- for (u32 j = 0; j < stages_count; j++) {
- if ((seen_shaders & (1u << stages[j])) == 0)
- total_times[i] += stats->table.times[frame_index][stages[j]];
- seen_shaders |= (1u << stages[j]);
- }
+ for (u32 j = 0; j < stages; j++)
+ total_times[i] += stats->table.times[frame_index][j];
}
v2 result = table_extent(table, arena, ts.font);
@@ -2784,14 +2799,14 @@ draw_compute_stats_bar_view(BeamformerUI *ui, Arena arena, ComputeShaderStats *s
Rect rect;
rect.pos = v2_add(cr.pos, (v2){{cr.size.w + table->cell_pad.w , cr.size.h * 0.15f}});
rect.size = (v2){.y = 0.7f * cr.size.h};
- for (u32 i = 0; i < stages_count; i++) {
- rect.size.w = total_width * stats->table.times[frame_index][stages[i]] / total_times[row_index];
+ for (u32 i = 0; i < stages; i++) {
+ rect.size.w = total_width * stats->table.times[frame_index][i] / total_times[row_index];
Color color = colour_from_normalized(g_colour_palette[i % countof(g_colour_palette)]);
DrawRectangleRec(rl_rect(rect), color);
if (point_in_rect(mouse, rect)) {
text_pos = v2_add(rect.pos, (v2){.x = table->cell_pad.w});
- s8 name = push_s8_from_parts(&arena, s8(""), beamformer_shader_names[stages[i]], s8(": "));
- mouse_text = push_compute_time(&arena, name, stats->table.times[frame_index][stages[i]]);
+ s8 name = push_s8_from_parts(&arena, s8(""), beamformer_shader_names[stats->table.shader_ids[i]], s8(": "));
+ mouse_text = push_compute_time(&arena, name, stats->table.times[frame_index][i]);
}
rect.pos.x += rect.size.w;
}
@@ -2865,19 +2880,13 @@ draw_compute_stats_view(BeamformerUI *ui, Arena arena, Variable *view, Rect r, v
ComputeStatsView *csv = &view->compute_stats_view;
ComputeShaderStats *stats = csv->compute_shader_stats;
f32 compute_time_sum = 0;
- u32 stages = cp->pipeline.shader_count;
+ u32 stages = stats->table.shader_count;
TextSpec text_spec = {.font = &ui->font, .colour = FG_COLOUR, .flags = TF_LIMITED};
ui_blinker_update(&csv->blink, BLINK_SPEED);
- static_assert(BeamformerShaderKind_ComputeCount <= 32, "shader kind bitfield test");
- u32 seen_shaders = 0;
- for (u32 i = 0; i < stages; i++) {
- BeamformerShaderKind index = cp->pipeline.shaders[i];
- if ((seen_shaders & (1u << index)) == 0)
- compute_time_sum += stats->average_times[index];
- seen_shaders |= (1u << index);
- }
+ for (u32 index = 0; index < stages; index++)
+ compute_time_sum += stats->average_times[index];
v2 result = {0};
@@ -2886,13 +2895,12 @@ draw_compute_stats_view(BeamformerUI *ui, Arena arena, Variable *view, Rect r, v
case ComputeStatsViewKind_Average:{
da_reserve(&arena, table, stages);
for (u32 i = 0; i < stages; i++) {
- push_table_time_row(table, &arena, beamformer_shader_names[cp->pipeline.shaders[i]],
- stats->average_times[cp->pipeline.shaders[i]]);
+ push_table_time_row(table, &arena, beamformer_shader_names[stats->table.shader_ids[i]],
+ stats->average_times[i]);
}
}break;
case ComputeStatsViewKind_Bar:{
- result = draw_compute_stats_bar_view(ui, arena, stats, cp->pipeline.shaders, stages,
- compute_time_sum, text_spec, r, mouse);
+ result = draw_compute_stats_bar_view(ui, arena, stats, compute_time_sum, text_spec, r, mouse);
r.pos = v2_add(r.pos, (v2){.y = result.y});
}break;
InvalidDefaultCase;
@@ -2920,9 +2928,9 @@ draw_compute_stats_view(BeamformerUI *ui, Arena arena, Variable *view, Rect r, v
cell_rect.size.w = t->widths[column];
text_spec.limits.size.w = r.size.w - (cell_rect.pos.x - it->start_x);
- if (column == 0 && row_index < stages && cp->programs[row_index] == 0 &&
- cp->pipeline.shaders[row_index] != BeamformerShaderKind_CudaHilbert &&
- cp->pipeline.shaders[row_index] != BeamformerShaderKind_CudaDecode)
+ if (column == 0 && row_index < stages && vk_pipeline_valid(cp->vulkan_pipelines[row_index]) == 0 &&
+ stats->table.shader_ids[row_index] != BeamformerShaderKind_CudaHilbert &&
+ stats->table.shader_ids[row_index] != BeamformerShaderKind_CudaDecode)
{
text_spec.colour = v4_lerp(FG_COLOUR, FOCUSED_COLOUR, ease_in_out_quartic(csv->blink.t));
} else {
@@ -3707,7 +3715,7 @@ ui_begin_interact(BeamformerUI *ui, v2 mouse, b32 scroll)
switch (++bv->ruler.state) {
case RulerState_Start:{
hot.kind = InteractionKind_Ruler;
- bv->ruler.start = world_point_from_plane_uv(bv->frame->voxel_transform,
+ bv->ruler.start = world_point_from_plane_uv(bv->frame.voxel_transform,
rect_uv(mouse, hot.rect));
}break;
case RulerState_Hold:{}break;
@@ -3791,7 +3799,7 @@ ui_extra_actions(BeamformerUI *ui, Variable *var)
ui_beamformer_frame_view_release_subresources(ui, old, last_kind);
ui_beamformer_frame_view_convert(ui, &ui->arena, view->child, view->menu, old->kind, old, log_scale);
- if (new->kind == BeamformerFrameViewKind_Copy && old->frame)
+ if (new->kind == BeamformerFrameViewKind_Copy)
ui_beamformer_frame_view_copy_frame(ui, new, old);
DLLRemove(old);
@@ -3947,7 +3955,7 @@ ui_interact(BeamformerUI *ui, BeamformerInput *input, Rect window_rect)
assert(it->var->type == VT_BEAMFORMER_FRAME_VIEW);
BeamformerFrameView *bv = it->var->generic;
v2 mouse = clamp_v2_rect(input_mouse, it->rect);
- bv->ruler.end = world_point_from_plane_uv(bv->frame->voxel_transform, rect_uv(mouse, it->rect));
+ bv->ruler.end = world_point_from_plane_uv(bv->frame.voxel_transform, rect_uv(mouse, it->rect));
}break;
case InteractionKind_Drag:{
if (!IsMouseButtonDown(MOUSE_BUTTON_LEFT) && !IsMouseButtonDown(MOUSE_BUTTON_RIGHT)) {
@@ -4037,8 +4045,6 @@ ui_init(BeamformerCtx *ctx, Arena store)
if (!ui) {
ui = ctx->ui = push_struct(&store, typeof(*ui));
ui->arena = store;
- ui->frame_view_render_context = &ctx->frame_view_render_context;
- ui->unit_cube_model = ctx->compute_context.unit_cube_model;
ui->shared_memory = ctx->shared_memory;
ui->beamformer_context = ctx;
@@ -4072,9 +4078,130 @@ ui_init(BeamformerCtx *ctx, Arena store)
split->region_split.left = add_compute_progress_bar(split, ctx);
split->region_split.right = add_compute_stats_view(ui, split, &ui->arena, ctx);
+ u32 samples = vk_gpu_info()->max_msaa_samples;
+ vk_image_allocate(&ui->render_3d_image, FRAME_VIEW_RENDER_TARGET_SIZE, 1, samples, VulkanImageUsage_Colour, 0, 0);
+ vk_image_allocate(&ui->render_3d_depth_image, FRAME_VIEW_RENDER_TARGET_SIZE, 1, samples, VulkanImageUsage_DepthStencil, 0, 0);
+
+ glGenSemaphoresEXT(countof(ui->render_semaphores_gl), ui->render_semaphores_gl);
+ for EachElement(ui->render_semaphores, it)
+ ui->render_semaphores[it] = vk_create_semaphore(ui->render_semaphores_export + it);
+
+ if (OS_WINDOWS) {
+ glImportSemaphoreWin32HandleEXT(ui->render_semaphores_gl[0], GL_HANDLE_TYPE_OPAQUE_WIN32_EXT, (void *)ui->render_semaphores_export[0].value[0]);
+ glImportSemaphoreWin32HandleEXT(ui->render_semaphores_gl[1], GL_HANDLE_TYPE_OPAQUE_WIN32_EXT, (void *)ui->render_semaphores_export[1].value[0]);
+ } else {
+ glImportSemaphoreFdEXT(ui->render_semaphores_gl[0], GL_HANDLE_TYPE_OPAQUE_FD_EXT, ui->render_semaphores_export[0].value[0]);
+ glImportSemaphoreFdEXT(ui->render_semaphores_gl[1], GL_HANDLE_TYPE_OPAQUE_FD_EXT, ui->render_semaphores_export[1].value[0]);
+ ui->render_semaphores_export[0].value[0] = OSInvalidHandleValue;
+ ui->render_semaphores_export[1].value[0] = OSInvalidHandleValue;
+ }
+
+ if (!BakeShaders)
+ {
+ for EachElement(beamformer_reloadable_render_shader_info_indices, it) {
+ i32 index = beamformer_reloadable_render_shader_info_indices[it];
+ for (u32 i = 0; i < 2; i++) {
+ BeamformerFileReloadContext *frc = push_struct(&ui->arena, typeof(*frc));
+ frc->kind = BeamformerFileReloadKind_RenderShader;
+ frc->shader_reload.shader = beamformer_reloadable_shader_kinds[index];
+ frc->shader_reload.pipeline = ui->pipelines + it;
+
+ Arena scratch = ui->arena;
+ s8 file = push_s8_from_parts(&scratch, os_path_separator(), s8("shaders"),
+ beamformer_reloadable_shader_files[index][i]);
+
+ os_add_file_watch((char *)file.data, file.len, frc);
+ }
+ }
+ }
+
+ f32 unit_cube_vertices[] = {
+ 0.5f, 0.5f, -0.5f, 0.0f,
+ 0.5f, 0.5f, -0.5f, 0.0f,
+ 0.5f, 0.5f, -0.5f, 0.0f,
+ 0.5f, -0.5f, -0.5f, 0.0f,
+ 0.5f, -0.5f, -0.5f, 0.0f,
+ 0.5f, -0.5f, -0.5f, 0.0f,
+ 0.5f, 0.5f, 0.5f, 0.0f,
+ 0.5f, 0.5f, 0.5f, 0.0f,
+ 0.5f, 0.5f, 0.5f, 0.0f,
+ 0.5f, -0.5f, 0.5f, 0.0f,
+ 0.5f, -0.5f, 0.5f, 0.0f,
+ 0.5f, -0.5f, 0.5f, 0.0f,
+ -0.5f, 0.5f, -0.5f, 0.0f,
+ -0.5f, 0.5f, -0.5f, 0.0f,
+ -0.5f, 0.5f, -0.5f, 0.0f,
+ -0.5f, -0.5f, -0.5f, 0.0f,
+ -0.5f, -0.5f, -0.5f, 0.0f,
+ -0.5f, -0.5f, -0.5f, 0.0f,
+ -0.5f, 0.5f, 0.5f, 0.0f,
+ -0.5f, 0.5f, 0.5f, 0.0f,
+ -0.5f, 0.5f, 0.5f, 0.0f,
+ -0.5f, -0.5f, 0.5f, 0.0f,
+ -0.5f, -0.5f, 0.5f, 0.0f,
+ -0.5f, -0.5f, 0.5f, 0.0f,
+ };
+ f32 unit_cube_normals[] = {
+ 0.0f, 0.0f, -1.0f, 0.0f,
+ 0.0f, 1.0f, 0.0f, 0.0f,
+ 1.0f, 0.0f, 0.0f, 0.0f,
+ 0.0f, 0.0f, -1.0f, 0.0f,
+ 0.0f, -1.0f, 0.0f, 0.0f,
+ 1.0f, 0.0f, 0.0f, 0.0f,
+ 0.0f, 0.0f, 1.0f, 0.0f,
+ 0.0f, 1.0f, 0.0f, 0.0f,
+ 1.0f, 0.0f, 0.0f, 0.0f,
+ 0.0f, 0.0f, 1.0f, 0.0f,
+ 0.0f, -1.0f, 0.0f, 0.0f,
+ 1.0f, 0.0f, 0.0f, 0.0f,
+ 0.0f, 0.0f, -1.0f, 0.0f,
+ 0.0f, 1.0f, 0.0f, 0.0f,
+ -1.0f, 0.0f, 0.0f, 0.0f,
+ 0.0f, 0.0f, -1.0f, 0.0f,
+ 0.0f, -1.0f, 0.0f, 0.0f,
+ -1.0f, 0.0f, 0.0f, 0.0f,
+ 0.0f, 0.0f, 1.0f, 0.0f,
+ 0.0f, 1.0f, 0.0f, 0.0f,
+ -1.0f, 0.0f, 0.0f, 0.0f,
+ 0.0f, 0.0f, 1.0f, 0.0f,
+ 0.0f, -1.0f, 0.0f, 0.0f,
+ -1.0f, 0.0f, 0.0f, 0.0f,
+ };
+ u16 unit_cube_indices[] = {
+ 1, 13, 19,
+ 1, 19, 7,
+ 9, 6, 18,
+ 9, 18, 21,
+ 23, 20, 14,
+ 23, 14, 17,
+ 16, 4, 10,
+ 16, 10, 22,
+ 5, 2, 8,
+ 5, 8, 11,
+ 15, 12, 0,
+ 15, 0, 3
+ };
+
+ static_assert(countof(unit_cube_normals) == countof(unit_cube_vertices), "");
+
+ RenderModel *rm = &ui->unit_cube_model;
+ rm->vertex_count = countof(unit_cube_vertices) / 4;
+ rm->normals_offset = round_up_to(sizeof(unit_cube_vertices), 16);
+
+ u64 model_size = 2 * round_up_to(sizeof(unit_cube_vertices), 16);
+ vk_render_model_allocate(&rm->model, unit_cube_indices, countof(unit_cube_indices), model_size, s8("unit_cube_model"));
+ vk_render_model_range_upload(&rm->model, unit_cube_vertices, 0, sizeof(unit_cube_vertices), 0);
+ vk_render_model_range_upload(&rm->model, unit_cube_normals, rm->normals_offset, sizeof(unit_cube_normals), 0);
+
/* NOTE(rnp): shrink variable size once this fires */
assert((uz)(ui->arena.beg - (u8 *)ui) < KB(64));
}
+
+ for EachElement(beamformer_reloadable_render_shader_info_indices, it) {
+ i32 index = beamformer_reloadable_render_shader_info_indices[it];
+ BeamformerShaderKind shader = beamformer_reloadable_shader_kinds[index];
+ beamformer_reload_render_pipeline(ui->pipelines + it, shader, ui->arena);
+ }
}
function void
@@ -4091,8 +4218,15 @@ draw_ui(BeamformerCtx *ctx, BeamformerInput *input, BeamformerFrame *frame_to_dr
{
BeamformerUI *ui = ctx->ui;
- ui->latest_plane[BeamformerViewPlaneTag_Count] = frame_to_draw;
- ui->latest_plane[frame_plane] = frame_to_draw;
+ if (frame_to_draw) {
+ mem_copy(ui->latest_plane + BeamformerViewPlaneTag_Count, frame_to_draw, sizeof(*frame_to_draw));
+ mem_copy(ui->latest_plane + frame_plane, frame_to_draw, sizeof(*frame_to_draw));
+ ui->latest_plane_valid[BeamformerViewPlaneTag_Count] = 1;
+ ui->latest_plane_valid[frame_plane] = 1;
+ } else {
+ ui->latest_plane_valid[BeamformerViewPlaneTag_Count] = 0;
+ ui->latest_plane_valid[frame_plane] = 0;
+ }
asan_poison_region(ui->arena.beg, ui->arena.end - ui->arena.beg);
@@ -4151,7 +4285,7 @@ draw_ui(BeamformerCtx *ctx, BeamformerInput *input, BeamformerFrame *frame_to_dr
if (pb) {
ui->flush_params = 0;
- iv3 points = ctx->latest_frame->dim;
+ iv3 points = ctx->latest_frame->points;
i32 dimension = iv3_dimension(points);
// TODO(rnp): this is immediate mode code that should be in the ui building code
diff --git a/util.c b/util.c
@@ -607,14 +607,6 @@ s8_scan_backwards(s8 s, u8 byte)
}
function s8
-s8_trim_trailing(s8 s, u8 byte)
-{
- s8 result = s;
- while (result.len >= 1 && result.data[result.len - 1] == byte) result.len--;
- return result;
-}
-
-function s8
s8_cut_head(s8 s, iz cut)
{
s8 result = s;
diff --git a/util.h b/util.h
@@ -377,18 +377,20 @@ typedef OS_WRITE_NEW_FILE_FN(os_write_new_file_fn);
#define RENDERDOC_GET_API_FN(name) b32 name(u32 version, void **out_api)
typedef RENDERDOC_GET_API_FN(renderdoc_get_api_fn);
-#define RENDERDOC_START_FRAME_CAPTURE_FN(name) void name(iptr gl_context, iptr window_handle)
+#define RENDERDOC_START_FRAME_CAPTURE_FN(name) void name(void *instance_handle, iptr window_handle)
typedef RENDERDOC_START_FRAME_CAPTURE_FN(renderdoc_start_frame_capture_fn);
-#define RENDERDOC_END_FRAME_CAPTURE_FN(name) b32 name(iptr gl_context, iptr window_handle)
+#define RENDERDOC_END_FRAME_CAPTURE_FN(name) b32 name(void *instance_handle, iptr window_handle)
typedef RENDERDOC_END_FRAME_CAPTURE_FN(renderdoc_end_frame_capture_fn);
-typedef alignas(16) u8 RenderDocAPI[216];
-#define RENDERDOC_API_FN_ADDR(a, offset) (*(iptr *)((*a) + offset))
-#define RENDERDOC_START_FRAME_CAPTURE(a) (renderdoc_start_frame_capture_fn *)RENDERDOC_API_FN_ADDR(a, 152)
-#define RENDERDOC_END_FRAME_CAPTURE(a) (renderdoc_end_frame_capture_fn *) RENDERDOC_API_FN_ADDR(a, 168)
+#define RENDERDOC_SET_CAPTURE_PATH_TEMPLATE_FN(name) void name(const char *template)
+typedef RENDERDOC_SET_CAPTURE_PATH_TEMPLATE_FN(renderdoc_set_capture_path_template_fn);
-#define LABEL_GL_OBJECT(type, id, s) {s8 _s = (s); glObjectLabel(type, id, (i32)_s.len, (c8 *)_s.data);}
+typedef alignas(16) u8 RenderDocAPI[216];
+#define RENDERDOC_API_FN_ADDR(a, offset) (*(iptr *)((*a) + offset))
+#define RENDERDOC_START_FRAME_CAPTURE(a) (renderdoc_start_frame_capture_fn *) RENDERDOC_API_FN_ADDR(a, 152)
+#define RENDERDOC_END_FRAME_CAPTURE(a) (renderdoc_end_frame_capture_fn *) RENDERDOC_API_FN_ADDR(a, 168)
+#define RENDERDOC_SET_CAPTURE_PATH_TEMPLATE(a) (renderdoc_set_capture_path_template_fn *) RENDERDOC_API_FN_ADDR(a, 184)
#include "util.c"
#include "math.c"
diff --git a/util_gl.c b/util_gl.c
@@ -1,69 +0,0 @@
-/* See LICENSE for license details. */
-function u32
-compile_shader(Arena a, u32 type, s8 shader, s8 name)
-{
- u32 sid = glCreateShader(type);
- glShaderSource(sid, 1, (const char **)&shader.data, (int *)&shader.len);
- glCompileShader(sid);
-
- i32 res = 0;
- glGetShaderiv(sid, GL_COMPILE_STATUS, &res);
-
- if (res == GL_FALSE) {
- Stream buf = arena_stream(a);
- stream_append_s8s(&buf, s8("\n"), name, s8(": failed to compile\n"));
-
- i32 len = 0, out_len = 0;
- glGetShaderiv(sid, GL_INFO_LOG_LENGTH, &len);
- glGetShaderInfoLog(sid, len, &out_len, (char *)(buf.data + buf.widx));
- stream_commit(&buf, out_len);
- glDeleteShader(sid);
- os_console_log(buf.data, buf.widx);
-
- sid = 0;
- }
-
- return sid;
-}
-
-function u32
-link_program(Arena a, u32 *shader_ids, i32 shader_id_count)
-{
- i32 success = 0;
- u32 result = glCreateProgram();
- for (i32 i = 0; i < shader_id_count; i++)
- glAttachShader(result, shader_ids[i]);
- glLinkProgram(result);
- glGetProgramiv(result, GL_LINK_STATUS, &success);
- if (success == GL_FALSE) {
- i32 len = 0;
- Stream buf = arena_stream(a);
- stream_append_s8(&buf, s8("shader link error: "));
- glGetProgramInfoLog(result, buf.cap - buf.widx, &len, (c8 *)(buf.data + buf.widx));
- stream_reset(&buf, len);
- stream_append_byte(&buf, '\n');
- os_console_log(buf.data, buf.widx);
- glDeleteProgram(result);
- result = 0;
- }
- return result;
-}
-
-function u32
-load_shader(Arena arena, s8 *shader_texts, u32 *shader_types, i32 count, s8 name)
-{
- u32 result = 0;
- u32 *ids = push_array(&arena, u32, count);
- b32 valid = 1;
- for (i32 i = 0; i < count; i++) {
- ids[i] = compile_shader(arena, shader_types[i], shader_texts[i], name);
- valid &= ids[i] != 0;
- }
-
- if (valid) result = link_program(arena, ids, count);
- for (i32 i = 0; i < count; i++) glDeleteShader(ids[i]);
-
- if (result) glObjectLabel(GL_PROGRAM, result, (i32)name.len, (c8 *)name.data);
-
- return result;
-}
diff --git a/util_os.c b/util_os.c
@@ -24,3 +24,21 @@ release_lock(i32 *lock)
atomic_store_u32(lock, 0);
os_wake_all_waiters(lock);
}
+
+#if BEAMFORMER_RENDERDOC_HOOKS
+function void
+load_renderdoc_functions(BeamformerInput *input, OSLibrary rdoc)
+{
+ if ValidHandle(rdoc) {
+ renderdoc_get_api_fn *get_api = os_lookup_symbol(rdoc, "RENDERDOC_GetAPI");
+ if (get_api) {
+ RenderDocAPI *api = 0;
+ if (get_api(10600, (void **)&api)) {
+ input->renderdoc_start_frame_capture = RENDERDOC_START_FRAME_CAPTURE(api);
+ input->renderdoc_end_frame_capture = RENDERDOC_END_FRAME_CAPTURE(api);
+ input->renderdoc_set_capture_file_path_template = RENDERDOC_SET_CAPTURE_PATH_TEMPLATE(api);
+ }
+ }
+ }
+}
+#endif
diff --git a/vulkan.c b/vulkan.c
@@ -1,3 +1,8 @@
+/* See LICENSE for license details. */
+// TODO(rnp)
+// [ ]: what is needed for HDR? I think it makes sense to just default to it nowadays
+// [ ]: once opengl is removed switch images to SRGB and/or 16 bit Float
+
#include "beamformer_internal.h"
#include "vulkan.h"
#include "external/glslang/glslang/Include/glslang_c_interface.h"
@@ -7,6 +12,9 @@
#define ValidVulkanHandle(h) ((h).value[0] != 0)
+#define MaxCommandBuffersInFlight BeamformerMaxRawDataFramesInFlight
+#define MaxCommandBufferTimestamps (64)
+
typedef enum {
VulkanQueueKind_Graphics,
VulkanQueueKind_Compute,
@@ -28,17 +36,44 @@ typedef struct {
void * host_pointer;
VulkanMemoryKind memory_kind;
+
+ // NOTE: only used when the buffer is backing a VulkanRenderModel.
+ VkIndexType index_type;
} VulkanBuffer;
typedef struct {
- VkPipeline pipeline;
- VkPipelineLayout layout;
-} VulkanShader;
+ VkDeviceMemory memory;
+ VkImage image;
+ VkImageView view;
+} VulkanImage;
+
+typedef struct {
+ VkPipeline pipeline;
+ VkPipelineLayout layout;
+ VkShaderStageFlags stage_flags;
+} VulkanPipeline;
+
+typedef struct {
+ VkSemaphore semaphore;
+ u64 value;
+} VulkanSemaphore;
+
+typedef struct {
+ VulkanQueueKind kind;
+ u32 command_buffer_index;
+
+ // NOTE(rnp): since there may not be QueueKind_Count queues, when putting values into this
+ // array you must be careful to map through the queue_indices array in the vulkan_context.
+ u64 in_flight_wait_values[VulkanQueueKind_Count];
+} VulkanCommandBuffer;
typedef enum {
VulkanEntityKind_Buffer,
+ VulkanEntityKind_CommandBuffer,
+ VulkanEntityKind_Image,
+ VulkanEntityKind_Pipeline,
+ VulkanEntityKind_RenderModel,
VulkanEntityKind_Semaphore,
- VulkanEntityKind_Shader,
} VulkanEntityKind;
typedef struct VulkanEntity VulkanEntity;
@@ -46,9 +81,11 @@ struct VulkanEntity {
VulkanEntity * next;
VulkanEntityKind kind;
union {
- VulkanBuffer buffer;
- VkSemaphore semaphore;
- VulkanShader shader;
+ VulkanBuffer buffer;
+ VulkanCommandBuffer command_buffer;
+ VulkanImage image;
+ VulkanPipeline pipeline;
+ VulkanSemaphore semaphore;
} as;
};
@@ -59,10 +96,21 @@ typedef alignas(64) struct {
u16 queue_index;
VkQueue queue;
- u8 _pad[48];
+ VkQueryPool query_pool;
+ u32 query_pool_occupied[VulkanQueueKind_Count];
+
+ u32 next_command_buffer_index;
+ VkCommandPool command_pool;
+ VkCommandBuffer command_buffers[MaxCommandBuffersInFlight];
+ u64 command_buffer_submission_values[MaxCommandBuffersInFlight];
+
+ VulkanSemaphore timeline_semaphore;
+
+ VkPipelineStageFlags2 pipeline_stage_flags;
+
+ VulkanPipeline *bound_pipeline;
} VulkanQueue;
-static_assert(sizeof(VulkanQueue) == 64 && alignof(VulkanQueue) == 64,
- "VulkanQueue must be placed on its own cacheline");
+static_assert(alignof(VulkanQueue) == 64, "VulkanQueue must be placed on its own cacheline");
typedef struct {
Arena arena;
@@ -72,8 +120,9 @@ typedef struct {
VkDevice device;
VkPhysicalDevice physical_device;
- // NOTE(rnp): fallback for when a compute shader fails to compile
- VulkanShader default_compute_shader;
+ // NOTE(rnp): fallback for when a shader fails to compile
+ VulkanPipeline default_compute_pipeline;
+ VulkanPipeline default_graphics_pipeline;
GPUInfo gpu_info;
@@ -88,6 +137,14 @@ typedef struct {
} memory_info;
VulkanQueue * queues[VulkanQueueKind_Count];
+ // NOTE(rnp): there are a few places in the code where simply going through the queues map
+ // is not sufficient. those places need to know of the unique queues which unique queue
+ // is being referred to. that code uses this map instead.
+ u16 queue_indices[VulkanQueueKind_Count];
+ u16 unique_queues;
+
+ VkFormat swap_chain_image_format;
+ VkFormat depth_stencil_format;
VulkanEntity * entity_freelist;
Arena entity_arena;
@@ -110,8 +167,11 @@ read_only global const char *vk_required_instance_extensions[] = {
#endif
#define VK_REQUIRED_DEVICE_EXTENSIONS_LIST \
+ X("VK_KHR_16bit_storage") \
X("VK_KHR_external_memory") \
X("VK_KHR_external_semaphore") \
+ X("VK_KHR_storage_buffer_storage_class") \
+ X("VK_KHR_timeline_semaphore") \
VK_OS_REQUIRED_DEVICE_EXTENSIONS_LIST
#define X(str) str,
@@ -126,6 +186,42 @@ VK_REQUIRED_DEVICE_EXTENSIONS_LIST
};
#undef X
+#define VK_REQUIRED_PHYSICAL_FEATURES \
+ X(shaderInt16) \
+ X(shaderInt64) \
+
+#define VK_REQUIRED_PHYSICAL_11_FEATURES \
+ X(storageBuffer16BitAccess) \
+
+#define VK_REQUIRED_PHYSICAL_12_FEATURES \
+ X(bufferDeviceAddress) \
+ X(shaderFloat16) \
+ X(timelineSemaphore) \
+
+#define VK_REQUIRED_PHYSICAL_13_FEATURES \
+ X(dynamicRendering) \
+ X(synchronization2) \
+
+#define VK_DEBUG_EXTENSIONS \
+ X(VK_KHR, shader_non_semantic_info) \
+ X(VK_KHR, shader_relaxed_extended_instruction) \
+
+#define X(p, s, ...) #p "_" #s,
+read_only global const char *vk_debug_extensions[] = {VK_DEBUG_EXTENSIONS};
+#undef X
+#define X(p, s, ...) sizeof(#p "_" #s) - 1,
+read_only global u32 vk_debug_extension_name_lengths[] = {VK_DEBUG_EXTENSIONS};
+#undef X
+
+global union {
+ struct {
+ #define X(_, name, ...) b8 name;
+ VK_DEBUG_EXTENSIONS
+ #undef X
+ };
+ b8 E[countof(vk_debug_extensions)];
+} vulkan_debug;
+
global VulkanContext vulkan_context[1];
/* NOTE(rnp): the idea here is to set reasonable development constraints.
@@ -214,6 +310,15 @@ global glslang_resource_t glslc_resource_constraints[1] = {{
},
}};
+
+#if BEAMFORMER_RENDERDOC_HOOKS
+DEBUG_IMPORT void *
+vk_renderdoc_instance_handle(void)
+{
+ return *((void **)vulkan_context->handle);
+}
+#endif
+
function VulkanEntity *
vk_entity_allocate(VulkanEntityKind kind)
{
@@ -246,6 +351,16 @@ vk_entity_data(VulkanHandle h, VulkanEntityKind kind)
return &e->as;
}
+function VkCommandBuffer
+vk_command_buffer(VulkanHandle h)
+{
+ VulkanCommandBuffer *vcb = vk_entity_data(h, VulkanEntityKind_CommandBuffer);
+ VulkanQueue *vq = vulkan_context->queues[vcb->kind];
+
+ VkCommandBuffer result = vq->command_buffers[vcb->command_buffer_index];
+ return result;
+}
+
#define glslang_log(a, ...) glslang_log_(a, arg_list(s8, __VA_ARGS__))
function void
glslang_log_(Arena arena, s8 *items, uz count)
@@ -253,8 +368,8 @@ glslang_log_(Arena arena, s8 *items, uz count)
Stream sb = arena_stream(arena);
stream_append_s8(&sb, glslang_info(""));
stream_append_s8s_(&sb, items, count);
- s8 log = s8_trim_trailing(stream_to_s8(&sb), '\n');
- os_console_log(log.data, log.len);
+ if (sb.data[sb.widx - 1] != '\n') stream_append_byte(&sb, '\n');
+ os_console_log(sb.data, sb.widx);
}
function s8
@@ -271,7 +386,7 @@ glsl_to_spirv(Arena *arena, u32 kind, s8 shader_text, s8 name)
.target_language = GLSLANG_TARGET_SPV,
.target_language_version = GLSLANG_TARGET_SPV_1_6,
.code = (c8 *)shader_text.data,
- .default_version = 100,
+ .default_version = 460,
.default_profile = GLSLANG_NO_PROFILE,
.force_default_version_and_profile = 0,
.forward_compatible = 0,
@@ -302,13 +417,13 @@ glsl_to_spirv(Arena *arena, u32 kind, s8 shader_text, s8 name)
glslang_program_add_shader(program, shader);
i32 messages = GLSLANG_MSG_DEBUG_INFO_BIT|GLSLANG_MSG_SPV_RULES_BIT|GLSLANG_MSG_VULKAN_RULES_BIT;
if (glslang_program_link(program, messages)) {
- glslang_spv_options_t options = {
- .validate = 1,
- .generate_debug_info = 1,
- .emit_nonsemantic_shader_debug_info = 1,
- .emit_nonsemantic_shader_debug_source = 1,
- //.disable_optimizer = 1,
- };
+ glslang_spv_options_t options = {.validate = 1,};
+
+ if (vulkan_debug.shader_non_semantic_info) {
+ options.generate_debug_info = 1;
+ options.emit_nonsemantic_shader_debug_info = 1;
+ options.emit_nonsemantic_shader_debug_source = 1;
+ }
glslang_program_add_source_text(program, kind, (c8 *)shader_text.data, shader_text.len);
glslang_program_SPIRV_generate_with_options(program, kind, &options);
@@ -342,7 +457,7 @@ vk_shader_kind_to_glslang_shader_kind(u32 kind)
function VkShaderModule
vk_compile_shader_module(Arena arena, u32 kind, s8 text, s8 name)
{
- VkShaderModule result = 0;
+ VkShaderModule result = {0};
s8 spirv = glsl_to_spirv(&arena, vk_shader_kind_to_glslang_shader_kind(kind), text, name);
VkShaderModuleCreateInfo create_info = {
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
@@ -350,19 +465,45 @@ vk_compile_shader_module(Arena arena, u32 kind, s8 text, s8 name)
.pCode = (u32 *)spirv.data,
};
if (spirv.len > 0) vkCreateShaderModule(vulkan_context->device, &create_info, 0, &result);
+
+ return result;
+}
+
+function VkShaderStageFlags
+vk_stage_flags_from_shader_kind(VulkanShaderKind kind)
+{
+ read_only local_persist VkShaderStageFlags map[VulkanShaderKind_Count + 1] = {
+ [VulkanShaderKind_Vertex] = VK_SHADER_STAGE_VERTEX_BIT,
+ [VulkanShaderKind_Mesh] = VK_SHADER_STAGE_MESH_BIT_EXT,
+ [VulkanShaderKind_Fragment] = VK_SHADER_STAGE_FRAGMENT_BIT,
+ [VulkanShaderKind_Compute] = VK_SHADER_STAGE_COMPUTE_BIT,
+ [VulkanShaderKind_Count] = 0,
+ };
+ VkShaderStageFlags result = map[Clamp((u32)kind, 0, VulkanShaderKind_Count)];
return result;
}
-function VulkanShader
-vk_compute_pipeline_from_shader_text(Arena arena, s8 text, s8 name)
+function VulkanPipeline
+vk_compute_pipeline_from_shader_text(Arena arena, s8 text, s8 name, u32 push_constants_size)
{
- VulkanShader result = {0};
+ VulkanPipeline result = {.stage_flags = VK_SHADER_STAGE_COMPUTE_BIT};
VkShaderModule module = vk_compile_shader_module(arena, VK_SHADER_STAGE_COMPUTE_BIT, text, name);
if (module) {
- VkPipelineLayoutCreateInfo pli = {.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
- vkCreatePipelineLayout(vulkan_context->device, &pli, 0, &result.layout);
+ VkPushConstantRange push_constant_range = {
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .offset = 0,
+ .size = push_constants_size,
+ };
+
+ VkPipelineLayoutCreateInfo pipeline_layout_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .pushConstantRangeCount = push_constants_size ? 1 : 0,
+ .pPushConstantRanges = push_constants_size ? &push_constant_range : 0,
+ };
+
+ vkCreatePipelineLayout(vulkan_context->device, &pipeline_layout_create_info, 0, &result.layout);
- VkComputePipelineCreateInfo pi = {
+ VkComputePipelineCreateInfo pipeline_create_info = {
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.layout = result.layout,
.stage = {
@@ -373,10 +514,376 @@ vk_compute_pipeline_from_shader_text(Arena arena, s8 text, s8 name)
},
};
- vkCreateComputePipelines(vulkan_context->device, 0, 1, &pi, 0, &result.pipeline);
+ vkCreateComputePipelines(vulkan_context->device, 0, 1, &pipeline_create_info, 0, &result.pipeline);
vkDestroyShaderModule(vulkan_context->device, module, 0);
}
+ if (result.pipeline == 0) result = vulkan_context->default_compute_pipeline;
+
+ return result;
+}
+
+function VulkanPipeline
+vk_graphics_pipeline_from_infos(Arena arena, VulkanPipelineCreateInfo *infos, u32 count, u32 push_constants_size)
+{
+ assume(count == 2);
+
+ VulkanPipeline result = {0};
+ VkShaderModule modules[2];
+
+ modules[0] = vk_compile_shader_module(arena, vk_stage_flags_from_shader_kind(infos[0].kind),
+ infos[0].text, infos[0].name);
+ modules[1] = vk_compile_shader_module(arena, vk_stage_flags_from_shader_kind(infos[1].kind),
+ infos[1].text, infos[1].name);
+ if (modules[0] && modules[1]) {
+ result.stage_flags = vk_stage_flags_from_shader_kind(infos[0].kind)
+ | vk_stage_flags_from_shader_kind(infos[1].kind);
+
+ VkPushConstantRange pcr = {
+ .stageFlags = result.stage_flags,
+ .offset = 0,
+ .size = push_constants_size,
+ };
+
+ VkPipelineLayoutCreateInfo pipeline_layout_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .pushConstantRangeCount = push_constants_size ? 1 : 0,
+ .pPushConstantRanges = push_constants_size ? &pcr : 0,
+ };
+
+ vkCreatePipelineLayout(vulkan_context->device, &pipeline_layout_info, 0, &result.layout);
+
+ VkPipelineShaderStageCreateInfo shader_stage_create_infos[2] = {
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = vk_stage_flags_from_shader_kind(infos[0].kind),
+ .module = modules[0],
+ .pName = "main",
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = vk_stage_flags_from_shader_kind(infos[1].kind),
+ .module = modules[1],
+ .pName = "main",
+ },
+ };
+
+ VkPipelineVertexInputStateCreateInfo vertex_input_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ };
+
+ VkPipelineInputAssemblyStateCreateInfo input_assembly_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
+ };
+
+ VkPipelineViewportStateCreateInfo viewport_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .viewportCount = 1,
+ .scissorCount = 1,
+ };
+
+ VkPipelineRasterizationStateCreateInfo rasterization_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .lineWidth = 1.0f,
+ .cullMode = VK_CULL_MODE_BACK_BIT,
+ .frontFace = VK_FRONT_FACE_CLOCKWISE,
+ };
+
+ VkPipelineMultisampleStateCreateInfo multisampling_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .rasterizationSamples = vulkan_context->gpu_info.max_msaa_samples,
+ };
+
+ VkPipelineDepthStencilStateCreateInfo depth_test_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+ .depthTestEnable = 1,
+ .depthWriteEnable = 1,
+ .depthCompareOp = VK_COMPARE_OP_LESS,
+ .depthBoundsTestEnable = 1,
+ .stencilTestEnable = 0,
+ .front = {0},
+ .back = {0},
+ .minDepthBounds = 0.0f,
+ .maxDepthBounds = 1.0f,
+ };
+
+ u32 colour_mask = VK_COLOR_COMPONENT_R_BIT|VK_COLOR_COMPONENT_G_BIT|VK_COLOR_COMPONENT_B_BIT|VK_COLOR_COMPONENT_A_BIT;
+ VkPipelineColorBlendAttachmentState blend_state = {
+ .colorWriteMask = colour_mask,
+ .blendEnable = 1,
+ .srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA,
+ .dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
+ .colorBlendOp = VK_BLEND_OP_ADD,
+ .srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE,
+ .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .alphaBlendOp = VK_BLEND_OP_ADD,
+ };
+
+ VkPipelineColorBlendStateCreateInfo colour_blend_state_create = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .logicOpEnable = 0,
+ .logicOp = VK_LOGIC_OP_COPY,
+ .attachmentCount = 1,
+ .pAttachments = &blend_state,
+ };
+
+ VkDynamicState dynamic_states[] = {
+ VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR,
+ };
+
+ VkPipelineDynamicStateCreateInfo dynamic_state_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .dynamicStateCount = countof(dynamic_states),
+ .pDynamicStates = dynamic_states,
+ };
+
+ //VkFormat colour_attachment_format = VK_FORMAT_R8G8B8A8_SRGB;
+ VkFormat colour_attachment_format = VK_FORMAT_R8G8B8A8_UNORM;
+ VkPipelineRenderingCreateInfo rendering_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
+ .colorAttachmentCount = 1,
+ .pColorAttachmentFormats = &colour_attachment_format,
+ .depthAttachmentFormat = vulkan_context->depth_stencil_format,
+ .stencilAttachmentFormat = vulkan_context->depth_stencil_format,
+ };
+
+ VkGraphicsPipelineCreateInfo pci = {
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .pNext = &rendering_create_info,
+ .stageCount = countof(shader_stage_create_infos),
+ .pStages = shader_stage_create_infos,
+ .pVertexInputState = &vertex_input_info,
+ .pInputAssemblyState = &input_assembly_info,
+ .pViewportState = &viewport_info,
+ .pRasterizationState = &rasterization_info,
+ .pMultisampleState = &multisampling_info,
+ .pDepthStencilState = &depth_test_create_info,
+ .pColorBlendState = &colour_blend_state_create,
+ .pDynamicState = &dynamic_state_info,
+ .layout = result.layout,
+ };
+
+ vkCreateGraphicsPipelines(vulkan_context->device, 0, 1, &pci,0, &result.pipeline);
+ }
+
+ if (modules[0]) vkDestroyShaderModule(vulkan_context->device, modules[0], 0);
+ if (modules[1]) vkDestroyShaderModule(vulkan_context->device, modules[1], 0);
+
+ if (result.pipeline == 0) result = vulkan_context->default_graphics_pipeline;
+
+ return result;
+}
+
+function VulkanSemaphore
+vk_make_semaphore(OSHandle *export)
+{
+ VulkanContext *vk = vulkan_context;
+
+ VkSemaphoreCreateInfo sci = {.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO};
+ VkExportSemaphoreCreateInfo esci = {
+ .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
+ .handleTypes = OS_WINDOWS ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
+ : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
+ };
+ VkSemaphoreTypeCreateInfo stc = {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
+ .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
+ };
+
+ if (export) sci.pNext = &esci;
+ else sci.pNext = &stc;
+
+ VulkanSemaphore result = {0};
+
+ vkCreateSemaphore(vk->device, &sci, 0, &result.semaphore);
+
+ if (export) {
+ if (OS_WINDOWS) {
+ VkSemaphoreGetWin32HandleInfoKHR ghi = {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR,
+ .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT,
+ .semaphore = result.semaphore,
+ };
+ void *handle;
+ vkGetSemaphoreWin32HandleKHR(vk->device, &ghi, &handle);
+ export->value[0] = (u64)handle;
+ } else {
+ VkSemaphoreGetFdInfoKHR ghi = {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
+ .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
+ .semaphore = result.semaphore,
+ };
+ i32 handle;
+ vkGetSemaphoreFdKHR(vk->device, &ghi, &handle);
+ export->value[0] = (u64)handle;
+ }
+ }
+
+ return result;
+}
+
+function void
+vk_release_memory(VkDeviceMemory memory, u64 size)
+{
+ VulkanContext *vk = vulkan_context;
+ vkFreeMemory(vk->device, memory, 0);
+ atomic_add_u64(&vk->gpu_info.gpu_heap_used, -size);
+}
+
+function b32
+vk_allocate_memory(VkDeviceMemory *memory, u64 size, VulkanMemoryKind kind, VkMemoryAllocateFlags flags,
+ VkMemoryDedicatedAllocateInfo *dedicated_allocate_info, OSHandle *export)
+{
+ VulkanContext *vk = vulkan_context;
+
+ VkExportMemoryAllocateInfo export_info = {
+ .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO,
+ .handleTypes = OS_WINDOWS ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
+ : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
+ };
+
+ VkMemoryAllocateFlagsInfo memory_allocate_flags_info = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
+ .flags = flags,
+ .pNext = dedicated_allocate_info,
+ };
+
+ if (export) {
+ export_info.pNext = dedicated_allocate_info;
+ memory_allocate_flags_info.pNext = &export_info;
+ }
+
+ VkMemoryAllocateInfo memory_allocate_info = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+ .allocationSize = size,
+ .memoryTypeIndex = vk->memory_info.memory_type_indices[kind],
+ .pNext = &memory_allocate_flags_info,
+ };
+
+ b32 result = vkAllocateMemory(vk->device, &memory_allocate_info, 0, memory) == VK_SUCCESS;
+ if (result) {
+ atomic_add_u64(&vk->gpu_info.gpu_heap_used, memory_allocate_info.allocationSize);
+
+ if (export) {
+ if (OS_WINDOWS) {
+ VkMemoryGetWin32HandleInfoKHR handle_info = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR,
+ .memory = *memory,
+ .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT,
+ };
+ void *handle;
+ vkGetMemoryWin32HandleKHR(vk->device, &handle_info, &handle);
+ export->value[0] = (u64)handle;
+ } else {
+ VkMemoryGetFdInfoKHR fd_info = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
+ .memory = *memory,
+ .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
+ };
+ i32 fd;
+ vkGetMemoryFdKHR(vk->device, &fd_info, &fd);
+ export->value[0] = (u64)fd;
+ }
+ }
+ }
+ return result;
+}
+
+function u32
+vk_index_size(VkIndexType type)
+{
+ u32 result = 0;
+ switch (type) {
+ case VK_INDEX_TYPE_UINT16:{ result = 2; }break;
+ case VK_INDEX_TYPE_UINT32:{ result = 4; }break;
+ InvalidDefaultCase;
+ }
+ return result;
+}
+
+typedef struct {
+ GPUBuffer *gpu_buffer;
+ u64 size;
+ VulkanUsageFlags flags;
+ u32 queue_family_count;
+ u32 queue_family_indices[VulkanTimeline_Count];
+ VkIndexType index_type;
+ s8 label;
+} VulkanBufferAllocateInfo;
+
+function b32
+vk_buffer_allocate_common(VulkanBuffer *vb, VulkanBufferAllocateInfo *ai)
+{
+ VulkanContext *vk = vulkan_context;
+
+ // TODO(rnp): this probably should be handled, its usually 4GB. likely
+ // need to chain multiple allocations and handle it in shader code
+ u64 size = Min(ai->size, vk->memory_info.max_allocation_size & ~(vk->memory_info.non_coherent_atom_size - 1));
+
+ VkBufferCreateInfo buffer_create_info = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .usage = VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+ .size = size,
+ .sharingMode = ai->queue_family_count > 1 ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = ai->queue_family_count,
+ .pQueueFamilyIndices = ai->queue_family_indices,
+ };
+
+ if (ai->flags & VulkanUsageFlag_TransferSource)
+ buffer_create_info.usage |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
+
+ if (ai->flags & VulkanUsageFlag_TransferDestination)
+ buffer_create_info.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+
+ if (ai->index_type != VK_INDEX_TYPE_NONE_KHR)
+ buffer_create_info.usage |= VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
+
+ vkCreateBuffer(vk->device, &buffer_create_info, 0, &vb->buffer);
+
+ VkMemoryRequirements memory_requirements;
+ vkGetBufferMemoryRequirements(vk->device, vb->buffer, &memory_requirements);
+
+ assert((u64)size <= memory_requirements.size);
+ size = memory_requirements.size;
+
+ VkMemoryDedicatedAllocateInfo dedicated_allocate_info = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
+ .buffer = vb->buffer,
+ };
+
+ /* NOTE(rnp): to create a CPU writable buffer:
+ * 1. try to allocate and map the entire buffer
+ * - this may fail if the buffer is bigger than the BAR size
+ * (unknowable from vulkan), or the memory space has become
+ * too fragmented (unlikely)
+ * 2. if allocation or mapping fails we must chain a host buffer
+ * for staging. If this happens in practice we should add
+ * the ability to import an existing external allocation
+ */
+ b32 host_read_write = (ai->flags & VulkanUsageFlag_HostReadWrite) != 0;
+ vb->memory_kind = host_read_write ? VulkanMemoryKind_BAR : VulkanMemoryKind_Device;
+
+ b32 result = 0;
+ // TODO(rnp): this may fail if the allocation is too big for the BAR size
+ // it needs to handled properly
+ if (vk_allocate_memory(&vb->memory, size, vb->memory_kind, VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT, &dedicated_allocate_info, 0)) {
+ result = 1;
+ ai->gpu_buffer->size = size;
+
+ vb->index_type = ai->index_type;
+ if (host_read_write)
+ vkMapMemory(vk->device, vb->memory, 0, size, 0, &vb->host_pointer);
+
+ vkBindBufferMemory(vk->device, vb->buffer, vb->memory, 0);
+ VkBufferDeviceAddressInfo buffer_device_address_info = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
+ .buffer = vb->buffer,
+ };
+ ai->gpu_buffer->gpu_pointer = vkGetBufferDeviceAddress(vk->device, &buffer_device_address_info);
+ }
return result;
}
@@ -398,7 +905,9 @@ vk_load_instance(void)
/* TODO(rnp): debug only, and check for these before enabling */
const char *validation_layers[] = {
+ #if BEAMFORMER_DEBUG
"VK_LAYER_KHRONOS_validation",
+ #endif
};
VkInstanceCreateInfo instance_create_info = {
@@ -410,6 +919,23 @@ vk_load_instance(void)
.enabledLayerCount = countof(validation_layers),
};
+ #if 0 && BEAMFORMER_DEBUG
+ VkValidationFeatureEnableEXT validation_feature_enables[] = {
+ VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT,
+ VK_VALIDATION_FEATURE_ENABLE_BEST_PRACTICES_EXT,
+ VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT,
+ VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT,
+ };
+
+ VkValidationFeaturesEXT validation_features = {
+ .sType = VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT,
+ .enabledValidationFeatureCount = countof(validation_feature_enables),
+ .pEnabledValidationFeatures = validation_feature_enables,
+ };
+
+ instance_create_info.pNext = &validation_features;
+ #endif
+
vkCreateInstance(&instance_create_info, 0, &vulkan_context->handle);
#define X(name, ...) name = (name##_fn *)vkGetInstanceProcAddr(vulkan_context->handle, #name);
@@ -451,7 +977,7 @@ vk_load_physical_device(Arena arena, Stream *err)
VkPhysicalDeviceProperties2 dp = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2};
VkPhysicalDeviceVulkan11Properties v11p = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES};
- dp.pNext= &v11p;
+ dp.pNext = &v11p;
vkGetPhysicalDeviceProperties2(vk->physical_device, &dp);
@@ -498,6 +1024,89 @@ vk_load_physical_device(Arena arena, Stream *err)
}
fatal(stream_to_s8(err));
}
+
+ #if BEAMFORMER_DEBUG
+ for (u32 index = 0; index < extension_count; index++) {
+ for EachElement(vk_debug_extensions, it) {
+ s8 test = {
+ .data = (u8 *)vk_debug_extensions[it],
+ .len = vk_debug_extension_name_lengths[it],
+ };
+ vulkan_debug.E[it] |= s8_equal(test, ext_str8s[index]);
+ }
+ }
+ #endif
+ }
+
+ {
+ VkPhysicalDeviceFeatures2 df = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2};
+ VkPhysicalDeviceVulkan11Features v11f = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES};
+ VkPhysicalDeviceVulkan12Features v12f = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES};
+ VkPhysicalDeviceVulkan13Features v13f = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES};
+ df.pNext = &v11f;
+ v11f.pNext = &v12f;
+ v12f.pNext = &v13f;
+ vkGetPhysicalDeviceFeatures2(vk->physical_device, &df);
+
+ {
+ b32 all_supported = 1;
+ #define X(name, ...) all_supported &= df.features.name;
+ VK_REQUIRED_PHYSICAL_FEATURES
+ #undef X
+
+ if (!all_supported) {
+ stream_append_s8(err, vulkan_info("fatal error: missing physical device features:\n"));
+ #define X(name, ...) if (!df.features.name) stream_append_s8(err, s8(" " #name "\n"));
+ VK_REQUIRED_PHYSICAL_FEATURES
+ #undef X
+ fatal(stream_to_s8(err));
+ }
+ }
+
+ {
+ b32 all_supported = 1;
+ #define X(name, ...) all_supported &= v11f.name;
+ VK_REQUIRED_PHYSICAL_11_FEATURES
+ #undef X
+
+ if (!all_supported) {
+ stream_append_s8(err, vulkan_info("fatal error: missing physical device features:\n"));
+ #define X(name, ...) if (!v11f.name) stream_append_s8(err, s8(" " #name "\n"));
+ VK_REQUIRED_PHYSICAL_11_FEATURES
+ #undef X
+ fatal(stream_to_s8(err));
+ }
+ }
+
+ {
+ b32 all_supported = 1;
+ #define X(name, ...) all_supported &= v12f.name;
+ VK_REQUIRED_PHYSICAL_12_FEATURES
+ #undef X
+
+ if (!all_supported) {
+ stream_append_s8(err, vulkan_info("fatal error: missing physical device features:\n"));
+ #define X(name, ...) if (!v12f.name) stream_append_s8(err, s8(" " #name "\n"));
+ VK_REQUIRED_PHYSICAL_12_FEATURES
+ #undef X
+ fatal(stream_to_s8(err));
+ }
+ }
+
+ {
+ b32 all_supported = 1;
+ #define X(name, ...) all_supported &= v13f.name;
+ VK_REQUIRED_PHYSICAL_13_FEATURES
+ #undef X
+
+ if (!all_supported) {
+ stream_append_s8(err, vulkan_info("fatal error: missing physical device features:\n"));
+ #define X(name, ...) if (!v13f.name) stream_append_s8(err, s8(" " #name "\n"));
+ VK_REQUIRED_PHYSICAL_13_FEATURES
+ #undef X
+ fatal(stream_to_s8(err));
+ }
+ }
}
VkPhysicalDeviceMemoryProperties2 mp = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2};
@@ -686,27 +1295,30 @@ vk_load_queues(Arena *memory, Stream *err)
assigned_subindices[VulkanQueueKind_Transfer] += 1;
}
- u32 unique_queues = 0;
for EachElement(assigned_subindices, it)
- unique_queues += assigned_subindices[it];
+ vk->unique_queues += assigned_subindices[it];
end_temp_arena(arena_save);
/////////////////////////////////////////////
// NOTE(rnp): fill in info and create device
-
- VulkanQueue *qs = push_array(memory, VulkanQueue, unique_queues);
for EachElement(vk->queues, it) {
u32 index = queue_subindices[it];
for (i32 i = 0; i < queue_indices[it]; i++)
index += assigned_subindices[i];
-
- vk->queues[it] = qs + index;
- qs[index].queue_family = queue_indices[it];
- qs[index].queue_index = queue_subindices[it];
+ vk->queue_indices[it] = index;
}
- VkDeviceQueueCreateInfo queue_create_infos[VulkanQueueKind_Count];
+ for EachElement(vk->queues, it) {
+ if (vk->queues[vk->queue_indices[it]] == 0) {
+ vk->queues[vk->queue_indices[it]] = push_struct(memory, VulkanQueue);
+ vk->queues[vk->queue_indices[it]]->queue_family = queue_indices[it];
+ vk->queues[vk->queue_indices[it]]->queue_index = queue_subindices[it];
+ }
+ vk->queues[it] = vk->queues[vk->queue_indices[it]];
+ }
+
+ VkDeviceQueueCreateInfo queue_create_infos[VulkanQueueKind_Count];
f32 queue_priorities[VulkanQueueKind_Count][VulkanQueueKind_Count];
for (u32 i = 0; i < VulkanQueueKind_Count; i++)
@@ -716,7 +1328,7 @@ vk_load_queues(Arena *memory, Stream *err)
u32 queue_create_index = 0;
b32 queue_info_filled[VulkanQueueKind_Count] = {0};
- for (u32 q = 0; q < unique_queues; q++) {
+ for (u32 q = 0; q < vk->unique_queues; q++) {
u32 base_q = queue_indices[q];
if (!queue_info_filled[base_q]) {
queue_create_infos[queue_create_index++] = (VkDeviceQueueCreateInfo){
@@ -729,14 +1341,63 @@ vk_load_queues(Arena *memory, Stream *err)
queue_info_filled[base_q] = 1;
}
- VkPhysicalDeviceFeatures device_features = {0};
+ VkPhysicalDeviceVulkan13Features v13f = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES,
+ #define X(name, ...) .name = 1,
+ VK_REQUIRED_PHYSICAL_13_FEATURES
+ #undef X
+ };
+
+ VkPhysicalDeviceShaderRelaxedExtendedInstructionFeaturesKHR pdsre = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_RELAXED_EXTENDED_INSTRUCTION_FEATURES_KHR,
+ .shaderRelaxedExtendedInstruction = 1,
+ };
+ if (vulkan_debug.shader_relaxed_extended_instruction) v13f.pNext = &pdsre;
+
+ VkPhysicalDeviceVulkan12Features v12f = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
+ .pNext = &v13f,
+ #define X(name, ...) .name = 1,
+ VK_REQUIRED_PHYSICAL_12_FEATURES
+ #undef X
+ };
+
+ VkPhysicalDeviceVulkan11Features v11f = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
+ .pNext = &v12f,
+ #define X(name, ...) .name = 1,
+ VK_REQUIRED_PHYSICAL_11_FEATURES
+ #undef X
+ };
+ VkPhysicalDeviceFeatures2 device_features = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
+ .pNext = &v11f,
+ .features = {
+ #define X(name, ...) .name = 1,
+ VK_REQUIRED_PHYSICAL_FEATURES
+ #undef X
+ },
+ };
+
+ Arena arena = *memory;
+ u32 enabled_count = countof(vk_required_device_extensions) + countof(vk_debug_extensions);
+ const char **enabled_extensions = push_array(&arena, const char *, enabled_count);
+
+ enabled_count = 0;
+ for EachElement(vk_required_device_extensions, it)
+ enabled_extensions[enabled_count++] = vk_required_device_extensions[it];
+
+ for EachElement(vk_debug_extensions, it)
+ if (vulkan_debug.E[it])
+ enabled_extensions[enabled_count++] = vk_debug_extensions[it];
+
VkDeviceCreateInfo device_create_info = {
.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
+ .pNext = &device_features,
.pQueueCreateInfos = queue_create_infos,
.queueCreateInfoCount = queue_create_index,
- .pEnabledFeatures = &device_features,
- .ppEnabledExtensionNames = vk_required_device_extensions,
- .enabledExtensionCount = countof(vk_required_device_extensions),
+ .ppEnabledExtensionNames = enabled_extensions,
+ .enabledExtensionCount = enabled_count,
};
vkCreateDevice(vk->physical_device, &device_create_info, 0, &vk->device);
@@ -744,9 +1405,69 @@ vk_load_queues(Arena *memory, Stream *err)
VkDeviceProcedureList
#undef X
- for (u32 q = 0; q < unique_queues; q++) {
+ for (u32 q = 0; q < vk->unique_queues; q++) {
VulkanQueue *qp = vk->queues[q];
vkGetDeviceQueue(vk->device, qp->queue_family, qp->queue_index, &qp->queue);
+
+ VkCommandPoolCreateInfo command_pool_create_info = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+ .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
+ .queueFamilyIndex = qp->queue_family,
+ };
+ vkCreateCommandPool(vk->device, &command_pool_create_info, 0, &qp->command_pool);
+
+ VkCommandBufferAllocateInfo command_buffer_allocate_info = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+ .commandPool = qp->command_pool,
+ .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+ .commandBufferCount = countof(qp->command_buffers),
+ };
+ vkAllocateCommandBuffers(vk->device, &command_buffer_allocate_info, qp->command_buffers);
+
+ qp->timeline_semaphore = vk_make_semaphore(0);
+
+ VkQueryPoolCreateInfo query_pool_create_info = {
+ .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
+ .queryType = VK_QUERY_TYPE_TIMESTAMP,
+ .queryCount = countof(qp->command_buffers) * MaxCommandBufferTimestamps,
+ };
+ vkCreateQueryPool(vk->device, &query_pool_create_info, 0, &qp->query_pool);
+ }
+
+ vk->queues[VulkanQueueKind_Graphics]->pipeline_stage_flags |= VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT;
+ vk->queues[VulkanQueueKind_Compute]->pipeline_stage_flags |= VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT;
+}
+
+function void
+vk_load_graphics(void)
+{
+ VulkanContext *vk = vulkan_context;
+
+ // NOTE: swap chain image format
+ {
+ }
+
+ // NOTE: depth/stencil format
+ {
+ VkFormat depth_formats[] = {
+ VK_FORMAT_D32_SFLOAT_S8_UINT,
+ VK_FORMAT_D24_UNORM_S8_UINT,
+ VK_FORMAT_D16_UNORM_S8_UINT,
+ };
+
+ vk->depth_stencil_format = VK_FORMAT_UNDEFINED;
+ for EachElement(depth_formats, it) {
+ VkFormatProperties3 format_properties3 = {.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3};
+ VkFormatProperties2 format_properties2 = {
+ .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
+ .pNext = &format_properties3,
+ };
+ vkGetPhysicalDeviceFormatProperties2(vk->physical_device, depth_formats[it], &format_properties2);
+ if (format_properties3.optimalTilingFeatures & VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT) {
+ vk->depth_stencil_format = depth_formats[it];
+ break;
+ }
+ }
}
}
@@ -772,17 +1493,43 @@ vk_load(OSLibrary vulkan_library_handle, Arena *memory, Stream *err)
vk_load_instance();
vk_load_physical_device(vulkan_context->arena, err);
vk_load_queues(&vulkan_context->arena, err);
+ vk_load_graphics();
- // TODO: setup compute pipeline
read_only local_persist s8 default_compute_shader = s8(""
"#version 430 core\n"
+ "layout(push_constant) uniform pc { uint data[256 / 4]; };\n"
"void main() {}\n"
"\n");
+ vk->default_compute_pipeline = vk_compute_pipeline_from_shader_text(vk->arena, default_compute_shader,
+ s8("error_compute_shader"), 256);
+
+ read_only local_persist s8 default_vertex_shader = s8(""
+ "#version 430 core\n"
+ "layout(push_constant) uniform pc { uint data[256 / 4]; };\n"
+ "void main() {gl_Position = vec4(0);}\n"
+ "\n");
+ read_only local_persist s8 default_fragment_shader = s8(""
+ "#version 430 core\n"
+ "layout(location = 0) out vec4 out_colour;"
+ "layout(push_constant) uniform pc { uint data[256 / 4]; };\n"
+ "void main() {out_colour = vec4(0.5f, 0.0f, 0.5f, 1.0f);}\n"
+ "\n");
- vk->default_compute_shader = vk_compute_pipeline_from_shader_text(vk->arena, default_compute_shader,
- s8("error_compute_shader"));
+ VulkanPipelineCreateInfo pipeline_create_infos[2] = {
+ {
+ .kind = VulkanShaderKind_Vertex,
+ .text = default_vertex_shader,
+ .name = s8("error_vertex_shader"),
+ },
+ {
+ .kind = VulkanShaderKind_Fragment,
+ .text = default_fragment_shader,
+ .name = s8("error_fragment_shader"),
+ },
+ };
+ vk->default_graphics_pipeline = vk_graphics_pipeline_from_infos(vk->arena, pipeline_create_infos, 2, 256);
- // TODO: setup render pipeline
+ // TODO: setup ui render pipeline
if (err->widx > 0) {
os_console_log(err->data, err->widx);
@@ -796,110 +1543,63 @@ vk_gpu_info(void)
return &vulkan_context->gpu_info;
}
-DEBUG_IMPORT void
-vk_buffer_release(GPUBuffer *b)
+function void
+vk_vulkan_buffer_release(VulkanBuffer *vb, u64 size)
{
VulkanContext *vk = vulkan_context;
- if ValidVulkanHandle(b->buffer) {
- VulkanBuffer *vb = vk_entity_data(b->buffer, VulkanEntityKind_Buffer);
- // TODO(rnp): this happens implicitly, probably just delete this if block
- if (vb->host_pointer)
- vkUnmapMemory(vk->device, vb->memory);
+ VulkanEntity *e = (VulkanEntity *)((u8 *)vb - offsetof(VulkanEntity, as));
+ // TODO(rnp): this happens implicitly, probably just delete this if block
+ if (vb->host_pointer)
+ vkUnmapMemory(vk->device, vb->memory);
- if (vb->buffer)
- vkDestroyBuffer(vk->device, vb->buffer, 0);
+ if (vb->buffer)
+ vkDestroyBuffer(vk->device, vb->buffer, 0);
- vkFreeMemory(vk->device, vb->memory, 0);
- if (vb->memory_kind != VulkanMemoryKind_Host)
- vk->gpu_info.gpu_heap_used -= b->size;
+ vk_release_memory(vb->memory, vb->memory_kind != VulkanMemoryKind_Host ? size : 0);
+ vk_entity_release(e);
+}
- vk_entity_release((VulkanEntity *)b->buffer.value[0]);
- }
+DEBUG_IMPORT void
+vk_buffer_release(GPUBuffer *b)
+{
+ if ValidVulkanHandle(b->buffer)
+ vk_vulkan_buffer_release(vk_entity_data(b->buffer, VulkanEntityKind_Buffer), b->size);
zero_struct(b);
}
DEBUG_IMPORT void
-vk_buffer_allocate(GPUBuffer *b, iz size, GPUBufferCreateFlags flags, OSHandle *export, s8 label)
+vk_buffer_allocate(GPUBuffer *b, GPUBufferAllocateInfo *info)
{
- vk_buffer_release(b);
VulkanContext *vk = vulkan_context;
- VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_Buffer);
- VulkanBuffer *vb = &e->as.buffer;
-
- b->buffer.value[0] = (u64)e;
-
- assert(size > 0);
-
- // TODO(rnp): this probably should be handled, its usually 4GB. likely
- // need to chain multiple allocations and handle it in shader code
- assert((u64)size <= vk->memory_info.max_allocation_size);
- size = (iz)Min((u64)size, vk->memory_info.max_allocation_size);
-
- u64 remaining = vk->gpu_info.gpu_heap_size - vk->gpu_info.gpu_heap_used;
- VkExportMemoryAllocateInfo ei = {
- .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO,
- .handleTypes = OS_WINDOWS ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
- : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
- };
+ vk_buffer_release(b);
- VkMemoryAllocateFlagsInfo mafi = {
- .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
- //.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT,
- .pNext = (export) ? & ei: 0,
- };
+ assert(info->size > 0);
- /* NOTE(rnp): to create a CPU writable buffer:
- * 1. try to allocate and map the entire buffer
- * - this may fail if the buffer is bigger than the BAR size
- * (unknowable from vulkan), or the memory space has become
- * too fragmented (unlikely)
- * 2. if allocation or mapping fails we must chain a host buffer
- * for staging. If this happens in practice we should add
- * the ability to import an existing external allocation
- */
- vb->memory_kind = flags & GPUBufferCreateFlags_HostWritable ? VulkanMemoryKind_BAR : VulkanMemoryKind_Device;
- VkMemoryAllocateInfo mai = {
- .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
- .allocationSize = Min((u64)size, remaining),
- .memoryTypeIndex = vk->memory_info.memory_type_indices[vb->memory_kind],
- .pNext = &mafi,
+ VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_Buffer);
+ VulkanBufferAllocateInfo vulkan_buffer_allocate_info = {
+ .gpu_buffer = b,
+ .size = (u64)info->size,
+ .flags = info->flags,
+ .index_type = VK_INDEX_TYPE_NONE_KHR,
+ .label = info->label,
};
- // TODO(rnp): this may fail if the allocation is too big for the BAR size
- // it needs to handled properly
- if (vkAllocateMemory(vk->device, &mai, 0, &vb->memory) == VK_SUCCESS) {
- vk->gpu_info.gpu_heap_used += mai.allocationSize;
- b->size = mai.allocationSize;
-
- if (flags & GPUBufferCreateFlags_HostWritable)
- vkMapMemory(vk->device, vb->memory, 0, b->size, 0, &vb->host_pointer);
+ u32 queue_index_hit_count[VulkanQueueKind_Count] = {0};
+ for (u32 it = 0; it < info->timeline_count; it++)
+ queue_index_hit_count[vk->queue_indices[info->timelines_used[it]]]++;
- if (export) {
- if (OS_WINDOWS) {
- VkMemoryGetWin32HandleInfoKHR handle_info = {
- .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR,
- .memory = vb->memory,
- .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT,
- };
- void *handle;
- vkGetMemoryWin32HandleKHR(vk->device, &handle_info, &handle);
- export->value[0] = (u64)handle;
- } else {
- VkMemoryGetFdInfoKHR fd_info = {
- .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
- .memory = vb->memory,
- .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
- };
- i32 fd;
- vkGetMemoryFdKHR(vk->device, &fd_info, &fd);
- export->value[0] = (u64)fd;
- }
+ for EachElement(queue_index_hit_count, it) {
+ if (queue_index_hit_count[it] > 0) {
+ u32 index = vulkan_buffer_allocate_info.queue_family_count++;
+ vulkan_buffer_allocate_info.queue_family_indices[index] = vk->queues[vk->queue_indices[it]]->queue_family;
}
}
- if ((flags & GPUBufferCreateFlags_MemoryOnly) == 0) {
- // TODO(rnp): create and bind memory to buffer
+ if (vk_buffer_allocate_common(&e->as.buffer, &vulkan_buffer_allocate_info)) {
+ b->buffer.value[0] = (u64)e;
+ } else {
+ vk_entity_release(e);
}
}
@@ -925,108 +1625,854 @@ vk_round_up_to_sync_size(u64 size, u64 min)
return result;
}
-DEBUG_IMPORT void
-vk_buffer_range_upload(GPUBuffer *b, void *data, u64 offset, u64 size, b32 non_temporal)
+function force_inline void
+vk_buffer_buffer_copy(VulkanBuffer *destination, VulkanBuffer *source, u64 destination_offset, u64 source_offset, u64 size, b32 non_temporal)
{
VulkanContext *vk = vulkan_context;
- VulkanBuffer *vb = vk_entity_data(b->buffer, VulkanEntityKind_Buffer);
- switch (vb->memory_kind) {
- case VulkanMemoryKind_Host:
+ switch (source->memory_kind) {
case VulkanMemoryKind_BAR:
{
- assert(vb->host_pointer);
- void *dest = (u8 *)vb->host_pointer + offset;
- // NOTE(rnp): don't trash the CPU cache for large data stores
- if (non_temporal) memory_copy_non_temporal(dest, data, size);
- else mem_copy(dest, data, size);
-
- b32 coherent = vk->memory_info.memory_host_coherent[vb->memory_kind];
- if (!coherent) {
- u64 nca_size = vk->memory_info.non_coherent_atom_size;
- VkMappedMemoryRange mrs[1] = {{
- .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
- .memory = vb->memory,
- .offset = offset - (offset % nca_size),
- .size = vk_round_up_to_sync_size(size, nca_size),
- }};
- vkFlushMappedMemoryRanges(vk->device, countof(mrs), mrs);
+ switch (destination->memory_kind) {
+ case VulkanMemoryKind_Host:{
+ if (destination->memory) {
+ // TODO(rnp): there is likely a more efficient way of doing this in this case
+ InvalidCodePath;
+ } else {
+ assert(source->host_pointer);
+ b32 coherent = vk->memory_info.memory_host_coherent[source->memory_kind];
+ if (!coherent) {
+ u64 nca_size = vk->memory_info.non_coherent_atom_size;
+ VkMappedMemoryRange mrs[1] = {{
+ .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+ .memory = source->memory,
+ .offset = source_offset - (source_offset % nca_size),
+ .size = vk_round_up_to_sync_size(size, nca_size),
+ }};
+ vkInvalidateMappedMemoryRanges(vk->device, countof(mrs), mrs);
+ }
+
+ void *dest = (u8 *)destination->host_pointer + destination_offset;
+ void *src = (u8 *)source->host_pointer + source_offset;
+
+ // NOTE(rnp): don't trash the CPU cache for large data stores
+ if (non_temporal) memory_copy_non_temporal(dest, src, size);
+ else mem_copy(dest, src, size);
+ }
+ }break;
+ InvalidDefaultCase;
+ }
+ }break;
+
+ case VulkanMemoryKind_Host:{
+ switch (destination->memory_kind) {
+ case VulkanMemoryKind_BAR:{
+ assert(destination->host_pointer);
+
+ void *dest = (u8 *)destination->host_pointer + destination_offset;
+ void *src = (u8 *)source->host_pointer + source_offset;
+
+ // NOTE(rnp): don't trash the CPU cache for large data stores
+ if (non_temporal) memory_copy_non_temporal(dest, src, size);
+ else mem_copy(dest, src, size);
+
+ b32 coherent = vk->memory_info.memory_host_coherent[destination->memory_kind];
+ if (!coherent) {
+ u64 nca_size = vk->memory_info.non_coherent_atom_size;
+ VkMappedMemoryRange mrs[1] = {{
+ .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+ .memory = destination->memory,
+ .offset = destination_offset - (destination_offset % nca_size),
+ .size = vk_round_up_to_sync_size(size, nca_size),
+ }};
+ vkFlushMappedMemoryRanges(vk->device, countof(mrs), mrs);
+ }
+ }break;
+ InvalidDefaultCase;
+
}
}break;
+
// TODO(rnp): use transfer queue when not mapped
InvalidDefaultCase;
}
}
-DEBUG_IMPORT VulkanHandle
-vk_semaphore_create(OSHandle *export)
+DEBUG_IMPORT void
+vk_buffer_range_upload(GPUBuffer *b, void *data, u64 offset, u64 size, b32 non_temporal)
{
+ VulkanBuffer *db = vk_entity_data(b->buffer, VulkanEntityKind_Buffer);
+ VulkanBuffer sb = {
+ .host_pointer = data,
+ .memory_kind = VulkanMemoryKind_Host,
+ };
+ vk_buffer_buffer_copy(db, &sb, offset, 0, size, non_temporal);
+}
+
+DEBUG_IMPORT void
+vk_buffer_range_download(void *destination, GPUBuffer *source, u64 offset, u64 size, b32 non_temporal)
+{
+ VulkanBuffer *sb = vk_entity_data(source->buffer, VulkanEntityKind_Buffer);
+ VulkanBuffer db = {
+ .host_pointer = destination,
+ .memory_kind = VulkanMemoryKind_Host,
+ };
+ vk_buffer_buffer_copy(&db, sb, 0, offset, size, non_temporal);
+}
+
+DEBUG_IMPORT void
+vk_render_model_release(GPUBuffer *model)
+{
+ if ValidVulkanHandle(model->buffer)
+ vk_vulkan_buffer_release(vk_entity_data(model->buffer, VulkanEntityKind_RenderModel), model->size);
+ zero_struct(model);
+}
+
+DEBUG_IMPORT void
+vk_render_model_allocate(GPUBuffer *model, void *indices, u64 index_count, u64 model_size, s8 label)
+{
+ vk_render_model_release(model);
+
+ VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_RenderModel);
+
+ assert(index_count <= U32_MAX);
+ VkIndexType index_type;
+ if (index_count <= U16_MAX) index_type = VK_INDEX_TYPE_UINT16;
+ else index_type = VK_INDEX_TYPE_UINT32;
+
+ i64 indices_size = round_up_to(vk_index_size(index_type) * index_count, 64);
+
+ i64 size = round_up_to(model_size + indices_size, 64);
+ assert(size > 0);
+
+ VulkanBufferAllocateInfo vulkan_buffer_allocate_info = {
+ .gpu_buffer = model,
+ .size = (u64)size,
+ .flags = VulkanUsageFlag_HostReadWrite,
+ .index_type = index_type,
+ .label = label,
+ .queue_family_count = 1,
+ .queue_family_indices[0] = vulkan_context->queues[VulkanQueueKind_Graphics]->queue_family,
+ };
+ if (vk_buffer_allocate_common(&e->as.buffer, &vulkan_buffer_allocate_info)) {
+ model->buffer.value[0] = (u64)e;
+ model->index_count = index_count;
+ model->gpu_pointer += indices_size;
+
+ VulkanBuffer sb = {
+ .host_pointer = indices,
+ .memory_kind = VulkanMemoryKind_Host,
+ };
+
+ vk_buffer_buffer_copy(&e->as.buffer, &sb, 0, 0, vk_index_size(index_type) * index_count, 0);
+ } else {
+ vk_entity_release(e);
+ }
+}
+
+DEBUG_IMPORT void
+vk_render_model_range_upload(GPUBuffer *model, void *data, u64 offset, u64 size, b32 non_temporal)
+{
+ VulkanBuffer *db = vk_entity_data(model->buffer, VulkanEntityKind_RenderModel);
+ VulkanBuffer sb = {
+ .host_pointer = data,
+ .memory_kind = VulkanMemoryKind_Host,
+ };
+
+ offset += round_up_to(vk_index_size(db->index_type) * model->index_count, 64);
+
+ vk_buffer_buffer_copy(db, &sb, offset, 0, size, non_temporal);
+}
+
+DEBUG_IMPORT void
+vk_image_release(GPUImage *image)
+{
+ if ValidVulkanHandle(image->image) {
+ VulkanContext *vk = vulkan_context;
+ VulkanImage *vi = vk_entity_data(image->image, VulkanEntityKind_Image);
+
+ vkDestroyImageView(vk->device, vi->view, 0);
+ vkDestroyImage(vk->device, vi->image, 0);
+ vk_release_memory(vi->memory, image->memory_size);
+
+ vk_entity_release((VulkanEntity *)image->image.value[0]);
+ }
+ zero_struct(image);
+}
+
+DEBUG_IMPORT void
+vk_image_allocate(GPUImage *image, u32 width, u32 height, u32 mips, u32 samples,
+ VulkanImageUsage usage, VulkanUsageFlags flags, OSHandle *export)
+{
+ assert(IsPowerOfTwo(samples));
+
+ vk_image_release(image);
+
VulkanContext *vk = vulkan_context;
+ VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_Image);
+ VulkanImage *vi = &e->as.image;
+
+ image->image.value[0] = (u64)e;
+ image->width = Min(width, vk->gpu_info.max_image_dimension_2D);
+ image->height = Min(height, vk->gpu_info.max_image_dimension_2D);
+ image->mip_map_levels = Max(mips, 1);
+ image->samples = Min(samples, vk->gpu_info.max_msaa_samples);
+
+ VkFormat usage_format_map[VulkanImageUsage_Count + 1] = {
+ [VulkanImageUsage_None] = VK_FORMAT_UNDEFINED,
+ //[VulkanImageUsage_Colour] = VK_FORMAT_R8G8B8A8_SRGB,
+ [VulkanImageUsage_Colour] = VK_FORMAT_R8G8B8A8_UNORM,
+ [VulkanImageUsage_DepthStencil] = vk->depth_stencil_format,
+ [VulkanImageUsage_Count] = VK_FORMAT_UNDEFINED,
+ };
- VkSemaphoreCreateInfo sci = {.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO};
- VkExportSemaphoreCreateInfo esci = {
- .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
- .handleTypes = OS_WINDOWS ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
- : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
+ read_only local_persist VkImageUsageFlagBits usage_extra_bit_map[VulkanImageUsage_Count + 1] = {
+ [VulkanImageUsage_None] = 0,
+ [VulkanImageUsage_Colour] = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+ [VulkanImageUsage_DepthStencil] = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
+ [VulkanImageUsage_Count] = 0,
};
- if (export) sci.pNext = &esci;
+ read_only local_persist VkImageAspectFlags usage_image_aspect_map[VulkanImageUsage_Count + 1] = {
+ [VulkanImageUsage_None] = 0,
+ [VulkanImageUsage_Colour] = VK_IMAGE_ASPECT_COLOR_BIT,
+ [VulkanImageUsage_DepthStencil] = VK_IMAGE_ASPECT_DEPTH_BIT|VK_IMAGE_ASPECT_STENCIL_BIT,
+ [VulkanImageUsage_Count] = 0,
+ };
+
+ usage = Clamp((u32)usage, 0, VulkanImageUsage_Count);
+ VkImageUsageFlagBits usage_flags = usage_extra_bit_map[usage];
+
+ if (flags & VulkanUsageFlag_ImageSampling) usage_flags |= VK_IMAGE_USAGE_SAMPLED_BIT;
+ if (flags & VulkanUsageFlag_TransferSource) usage_flags |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
+ if (flags & VulkanUsageFlag_TransferDestination) usage_flags |= VK_IMAGE_USAGE_TRANSFER_DST_BIT;
+
+ u32 queue_family = vk->queues[VulkanQueueKind_Graphics]->queue_family;
+ VkImageCreateInfo image_create_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ .flags = export ? VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT : 0,
+ .imageType = VK_IMAGE_TYPE_2D,
+ .format = usage_format_map[usage],
+ .extent = {image->width, image->height, 1},
+ .mipLevels = image->mip_map_levels,
+ .arrayLayers = 1,
+ .samples = image->samples,
+ .tiling = VK_IMAGE_TILING_OPTIMAL,
+ .usage = usage_flags,
+ // NOTE(rnp): needed if multiple queue families are accessed
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .queueFamilyIndexCount = 1,
+ .pQueueFamilyIndices = &queue_family,
+ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ };
+
+ VkExternalMemoryImageCreateInfo external_memory_image_create_info = {
+ .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
+ .handleTypes = OS_WINDOWS ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
+ : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
+ };
+
+ if (export) image_create_info.pNext = &external_memory_image_create_info;
+
+ vkCreateImage(vk->device, &image_create_info, 0, &vi->image);
+
+ VkMemoryRequirements memory_requirements;
+ vkGetImageMemoryRequirements(vk->device, vi->image, &memory_requirements);
+
+ VkMemoryDedicatedAllocateInfo dedicated_allocate_info = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
+ .image = vi->image,
+ };
+
+ if (vk_allocate_memory(&vi->memory, memory_requirements.size, VulkanMemoryKind_Device, 0, &dedicated_allocate_info, export)) {
+ image->memory_size = memory_requirements.size;
+ vkBindImageMemory(vk->device, vi->image, vi->memory, 0);
+
+ VkImageViewCreateInfo image_view_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = vi->image,
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = usage_format_map[usage],
+ .subresourceRange = {
+ .aspectMask = usage_image_aspect_map[usage],
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ },
+ };
+ vkCreateImageView(vk->device, &image_view_info, 0, &vi->view);
+ } else {
+ vkDestroyImage(vk->device, vi->image, 0);
+ vk_entity_release(e);
+ zero_struct(image);
+ }
+}
+
+DEBUG_IMPORT VulkanHandle
+vk_create_semaphore(OSHandle *export)
+{
VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_Semaphore);
+ e->as.semaphore = vk_make_semaphore(export);
VulkanHandle result = {(u64)e};
+ return result;
+}
- vkCreateSemaphore(vk->device, &sci, 0, &e->as.semaphore);
-
- if (export) {
- if (OS_WINDOWS) {
- VkSemaphoreGetWin32HandleInfoKHR ghi = {
- .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR,
- .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT,
- .semaphore = e->as.semaphore,
- };
- void *handle;
- vkGetSemaphoreWin32HandleKHR(vk->device, &ghi, &handle);
- export->value[0] = (u64)handle;
- } else {
- VkSemaphoreGetFdInfoKHR ghi = {
- .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
- .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
- .semaphore = e->as.semaphore,
- };
- i32 handle;
- vkGetSemaphoreFdKHR(vk->device, &ghi, &handle);
- export->value[0] = (u64)handle;
- }
+DEBUG_IMPORT b32
+vk_host_wait_timeline(VulkanTimeline timeline, u64 value, u64 timeout_ns)
+{
+ b32 result = 0;
+ if Between(timeline, 0, VulkanTimeline_Count - 1) {
+ VulkanContext *vk = vulkan_context;
+ VulkanQueue *vq = vk->queues[timeline];
+ VkSemaphoreWaitInfo semaphore_wait_info = {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
+ .pSemaphores = &vq->timeline_semaphore.semaphore,
+ .semaphoreCount = 1,
+ .pValues = &value,
+ };
+ result = vkWaitSemaphores(vk->device, &semaphore_wait_info, timeout_ns) == VK_SUCCESS;
}
+ return result;
+}
+DEBUG_IMPORT u64
+vk_host_signal_timeline(VulkanTimeline timeline)
+{
+ u64 result = -1;
+ if Between(timeline, 0, VulkanTimeline_Count - 1) {
+ VulkanContext *vk = vulkan_context;
+ VulkanQueue *vq = vk->queues[timeline];
+ VulkanSemaphore *vs = &vq->timeline_semaphore;
+ result = ++vs->value;
+ VkSemaphoreSignalInfo ssi = {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO,
+ .semaphore = vs->semaphore,
+ .value = result,
+ };
+ vkSignalSemaphore(vk->device, &ssi);
+ }
return result;
}
DEBUG_IMPORT VulkanHandle
-vk_compute_shader(s8 text, s8 name)
+vk_pipeline(VulkanPipelineCreateInfo *infos, u32 count, u32 push_constants_size)
{
+ assert(Between(count, 1, 2));
+ assert(count == 2 || infos[0].kind == VulkanShaderKind_Compute);
+
VulkanHandle result = {0};
DeferLoop(take_lock(&vulkan_context->arena_lock, -1), release_lock(&vulkan_context->arena_lock))
{
Arena arena = vulkan_context->arena;
- VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_Shader);
+ VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_Pipeline);
result = (VulkanHandle){(u64)e};
- e->as.shader = vk_compute_pipeline_from_shader_text(arena, text, name);
- if (e->as.shader.pipeline == 0) e->as.shader = vulkan_context->default_compute_shader;
+ if (count == 2) e->as.pipeline = vk_graphics_pipeline_from_infos(arena, infos, count, push_constants_size);
+ else e->as.pipeline = vk_compute_pipeline_from_shader_text(arena, infos[0].text, infos[0].name, push_constants_size);
}
return result;
}
-DEBUG_IMPORT void
-vk_compute_shader_release(VulkanHandle h)
+DEBUG_IMPORT b32
+vk_pipeline_valid(VulkanHandle h)
{
+ b32 result = 0;
if ValidVulkanHandle(h) {
- VulkanShader *vs = vk_entity_data(h, VulkanEntityKind_Shader);
- if (vs->pipeline != vulkan_context->default_compute_shader.pipeline) {
- vkDestroyPipeline(vulkan_context->device, vs->pipeline, 0);
- vkDestroyPipelineLayout(vulkan_context->device, vs->layout, 0);
+ VulkanPipeline *vp = vk_entity_data(h, VulkanEntityKind_Pipeline);
+ if (vp->stage_flags == VK_SHADER_STAGE_COMPUTE_BIT)
+ result = vp->pipeline != vulkan_context->default_compute_pipeline.pipeline;
+ else
+ result = vp->pipeline != vulkan_context->default_graphics_pipeline.pipeline;
+ }
+ return result;
+}
+
+DEBUG_IMPORT void
+vk_pipeline_release(VulkanHandle h)
+{
+ if (vk_pipeline_valid(h)) {
+ VulkanEntity *e = (VulkanEntity *)h.value[0];
+ VulkanTimeline timeline;
+ if (e->as.pipeline.stage_flags == VK_SHADER_STAGE_COMPUTE_BIT) timeline = VulkanTimeline_Compute;
+ else timeline = VulkanTimeline_Graphics;
+
+ VulkanQueue *vq = vulkan_context->queues[timeline];
+ DeferLoop(take_lock(&vq->lock, -1), release_lock(&vq->lock))
+ {
+ u32 index = (vq->next_command_buffer_index - 1) % countof(vq->command_buffers);
+ vk_host_wait_timeline(timeline, vq->command_buffer_submission_values[index], -1ULL);
+
+ if (&e->as.pipeline == vq->bound_pipeline)
+ vq->bound_pipeline = 0;
+
+ vkDestroyPipeline(vulkan_context->device, e->as.pipeline.pipeline, 0);
+ vkDestroyPipelineLayout(vulkan_context->device, e->as.pipeline.layout, 0);
+ }
+ vk_entity_release(e);
+ }
+}
+
+DEBUG_IMPORT VulkanHandle
+vk_command_begin(VulkanTimeline timeline)
+{
+ VulkanHandle result = {0};
+ if Between(timeline, 0, VulkanTimeline_Count - 1) {
+ VulkanContext *vk = vulkan_context;
+ VulkanQueue *vq = vk->queues[timeline];
+
+ take_lock(&vq->lock, -1);
+
+ VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_CommandBuffer);
+ VulkanCommandBuffer *vcb = &e->as.command_buffer;
+ u32 index = vq->next_command_buffer_index++ % countof(vq->command_buffers);
+ vcb->kind = (VulkanQueueKind)timeline;
+ vcb->command_buffer_index = index;
+
+ // TODO(rnp): probably not the best to have this here but it will likely not be hit
+ b32 wait_result = vk_host_wait_timeline(timeline, vq->command_buffer_submission_values[index], -1ULL);
+ assert(wait_result);
+
+ VkCommandBufferBeginInfo buffer_begin_info = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+ };
+
+ vq->query_pool_occupied[index] = 0;
+
+ vkBeginCommandBuffer(vq->command_buffers[index], &buffer_begin_info);
+ vkCmdResetQueryPool(vq->command_buffers[index], vq->query_pool,
+ index * MaxCommandBufferTimestamps, MaxCommandBufferTimestamps);
+
+ result = (VulkanHandle){(u64)e};
+ }
+ return result;
+}
+
+DEBUG_IMPORT void
+vk_command_bind_pipeline(VulkanHandle command, VulkanHandle pipeline)
+{
+ if ValidVulkanHandle(command) {
+ VulkanContext *vk = vulkan_context;
+ VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer);
+ VulkanQueue *vq = vk->queues[vcb->kind];
+
+ VulkanPipeline *vp = 0;
+ if ValidVulkanHandle(pipeline) {
+ vp = vk_entity_data(pipeline, VulkanEntityKind_Pipeline);
+ } else if (vcb->kind == VulkanQueueKind_Compute) {
+ vp = &vk->default_compute_pipeline;
+ } else if (vcb->kind == VulkanQueueKind_Graphics) {
+ vp = &vk->default_graphics_pipeline;
+ } else {
+ InvalidCodePath;
+ }
+
+ read_only local_persist VkPipelineBindPoint bind_point_lut[VulkanQueueKind_Count] = {
+ [VulkanQueueKind_Graphics] = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ [VulkanQueueKind_Compute] = VK_PIPELINE_BIND_POINT_COMPUTE,
+ [VulkanQueueKind_Transfer] = -1,
+ };
+
+ VkPipelineBindPoint bind_point = bind_point_lut[vcb->kind];
+ assert(bind_point != (VkPipelineBindPoint)-1);
+
+ vkCmdBindPipeline(vq->command_buffers[vcb->command_buffer_index], bind_point, vp->pipeline);
+ vq->bound_pipeline = vp;
+ }
+}
+
+DEBUG_IMPORT void
+vk_command_buffer_memory_barriers(VulkanHandle command, GPUMemoryBarrierInfo *barriers, u64 count)
+{
+ if ValidVulkanHandle(command) {
+ VulkanContext *vk = vulkan_context;
+ VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer);
+ VulkanQueue *vq = vk->queues[vcb->kind];
+
+ DeferLoop(take_lock(&vk->arena_lock, -1), release_lock(&vk->arena_lock))
+ {
+ Arena arena = vk->arena;
+ u32 valid_count = 0;
+ VkBufferMemoryBarrier2 *memory_barriers = push_array(&arena, VkBufferMemoryBarrier2, count);
+ for (u64 it = 0; it < count; it++) {
+ if ValidVulkanHandle(barriers[it].gpu_buffer->buffer) {
+ u32 index = valid_count++;
+ VulkanBuffer *vb = vk_entity_data(barriers[it].gpu_buffer->buffer, VulkanEntityKind_Buffer);
+ memory_barriers[index].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2;
+ memory_barriers[index].srcStageMask = vq->pipeline_stage_flags;
+ memory_barriers[index].srcAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT;
+ memory_barriers[index].dstStageMask = vq->pipeline_stage_flags;
+ memory_barriers[index].dstAccessMask = VK_ACCESS_2_MEMORY_READ_BIT;
+ memory_barriers[index].srcQueueFamilyIndex = vq->queue_family;
+ memory_barriers[index].dstQueueFamilyIndex = vq->queue_family;
+ memory_barriers[index].buffer = vb->buffer;
+ memory_barriers[index].offset = barriers[it].offset;
+ memory_barriers[index].size = barriers[it].size;
+ }
+ }
+
+ VkDependencyInfo dependancy_info = {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .bufferMemoryBarrierCount = valid_count,
+ .pBufferMemoryBarriers = memory_barriers,
+ };
+
+ vkCmdPipelineBarrier2(vq->command_buffers[vcb->command_buffer_index], &dependancy_info);
+ }
+ }
+}
+
+DEBUG_IMPORT void
+vk_command_dispatch_compute(VulkanHandle command, uv3 dispatch)
+{
+ assert(dispatch.x <= U16_MAX);
+ assert(dispatch.y <= U16_MAX);
+ assert(dispatch.z <= U16_MAX);
+ if ValidVulkanHandle(command) {
+ VkCommandBuffer cmd = vk_command_buffer(command);
+ vkCmdDispatch(cmd, dispatch.x, dispatch.y, dispatch.z);
+ }
+}
+
+DEBUG_IMPORT void
+vk_command_push_constants(VulkanHandle command, u32 offset, u32 size, void *values)
+{
+ if ValidVulkanHandle(command) {
+ VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer);
+ VulkanQueue *vq = vulkan_context->queues[vcb->kind];
+ VulkanPipeline *vp = vq->bound_pipeline;
+
+ assert(vp);
+
+ vkCmdPushConstants(vq->command_buffers[vcb->command_buffer_index], vp->layout, vp->stage_flags,
+ offset, size, values);
+ }
+}
+
+DEBUG_IMPORT void
+vk_command_timestamp(VulkanHandle command)
+{
+ if ValidVulkanHandle(command) {
+ VulkanContext *vk = vulkan_context;
+ VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer);
+ VulkanQueue *vq = vk->queues[vcb->kind];
+
+ read_only local_persist VkPipelineStageFlags2 stage_lut[VulkanQueueKind_Count] = {
+ [VulkanQueueKind_Graphics] = VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT,
+ [VulkanQueueKind_Compute] = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ [VulkanQueueKind_Transfer] = -1,
+ };
+
+ VkPipelineStageFlags2 stage = stage_lut[vcb->kind];
+ assert(stage != (VkPipelineStageFlags2)-1);
+
+ if (vq->query_pool_occupied[vcb->command_buffer_index] < MaxCommandBufferTimestamps) {
+ u32 query_index = vq->query_pool_occupied[vcb->command_buffer_index]++;
+ vkCmdWriteTimestamp2(vq->command_buffers[vcb->command_buffer_index], stage,
+ vq->query_pool,
+ vcb->command_buffer_index * MaxCommandBufferTimestamps + query_index);
}
- vk_entity_release((VulkanEntity *)h.value[0]);
}
}
+
+DEBUG_IMPORT void
+vk_command_wait_timeline(VulkanHandle command, VulkanTimeline timeline, u64 value)
+{
+ if (ValidVulkanHandle(command) && Between(timeline, 0, VulkanTimeline_Count - 1)) {
+ VulkanContext *vk = vulkan_context;
+ VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer);
+
+ u32 wait_index = vk->queue_indices[timeline];
+ vcb->in_flight_wait_values[wait_index] = Max(value, vcb->in_flight_wait_values[wait_index]);
+ }
+}
+
+DEBUG_IMPORT u64
+vk_command_end(VulkanHandle command, VulkanHandle wait_semaphore, VulkanHandle finished_semaphore)
+{
+ u64 result = -1;
+ if ValidVulkanHandle(command) {
+ VulkanContext *vk = vulkan_context;
+ VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer);
+ VulkanQueue *vq = vk->queues[vcb->kind];
+ VulkanSemaphore *vs = &vq->timeline_semaphore;
+
+ vkEndCommandBuffer(vq->command_buffers[vcb->command_buffer_index]);
+
+ VkCommandBufferSubmitInfo command_buffer_submit_info = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
+ .commandBuffer = vq->command_buffers[vcb->command_buffer_index],
+ };
+
+ result = ++vs->value;
+
+ u32 signal_submit_info_count = 1;
+ VkSemaphoreSubmitInfo signal_submit_infos[2] = {{
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
+ .semaphore = vs->semaphore,
+ .value = result,
+ .stageMask = vq->pipeline_stage_flags,
+ }};
+
+ if ValidVulkanHandle(finished_semaphore) {
+ VulkanSemaphore *fs = vk_entity_data(finished_semaphore, VulkanEntityKind_Semaphore);
+ signal_submit_infos[signal_submit_info_count++] = (VkSemaphoreSubmitInfo){
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
+ .semaphore = fs->semaphore,
+ .stageMask = vq->pipeline_stage_flags,
+ };
+ }
+
+ u32 wait_submit_info_count = 0;
+ VkSemaphoreSubmitInfo wait_submit_infos[VulkanQueueKind_Count + 1];
+ for (u32 i = 0; i < vk->unique_queues; i++) {
+ u32 queue_index = vk->queue_indices[i];
+ if (vcb->in_flight_wait_values[queue_index] > 0) {
+ VulkanQueue *q = vk->queues[queue_index];
+ VkSemaphoreSubmitInfo wait_ssi = {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
+ .semaphore = q->timeline_semaphore.semaphore,
+ .value = vcb->in_flight_wait_values[queue_index],
+ .stageMask = q->pipeline_stage_flags,
+ };
+ wait_submit_infos[wait_submit_info_count++] = wait_ssi;
+ }
+ }
+
+ if ValidVulkanHandle(wait_semaphore) {
+ VulkanSemaphore *ws = vk_entity_data(wait_semaphore, VulkanEntityKind_Semaphore);
+ wait_submit_infos[wait_submit_info_count++] = (VkSemaphoreSubmitInfo){
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
+ .semaphore = ws->semaphore,
+ .stageMask = vq->pipeline_stage_flags,
+ };
+ }
+
+ VkSubmitInfo2 submit_info = {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
+ .commandBufferInfoCount = 1,
+ .pCommandBufferInfos = &command_buffer_submit_info,
+ .waitSemaphoreInfoCount = wait_submit_info_count,
+ .pWaitSemaphoreInfos = wait_submit_infos,
+ .signalSemaphoreInfoCount = signal_submit_info_count,
+ .pSignalSemaphoreInfos = signal_submit_infos,
+ };
+
+ vkQueueSubmit2(vq->queue, 1, &submit_info, 0);
+
+ vq->bound_pipeline = 0;
+
+ atomic_store_u64(vq->command_buffer_submission_values + vcb->command_buffer_index, result);
+
+ release_lock(&vq->lock);
+
+ vk_entity_release((VulkanEntity *)command.value[0]);
+ }
+ return result;
+}
+
+DEBUG_IMPORT void
+vk_command_begin_rendering(VulkanHandle command, GPUImage *colour, GPUImage *depth, GPUImage *resolve)
+{
+ if ValidVulkanHandle(command) {
+ VkCommandBuffer cmd = vk_command_buffer(command);
+
+ assert((colour->width == depth->width) && (colour->height == depth->height));
+
+ VulkanImage *ci = vk_entity_data(colour->image, VulkanEntityKind_Image);
+ VulkanImage *di = vk_entity_data(depth->image, VulkanEntityKind_Image);
+ VulkanImage *ri = 0;
+ if (resolve) ri = vk_entity_data(resolve->image, VulkanEntityKind_Image);
+
+ // NOTE: Layout Transitions
+ {
+ u32 image_memory_barrier_count = 2;
+ VkImageMemoryBarrier2 image_memory_barriers[3] = {
+ {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
+ .srcStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
+ .srcAccessMask = 0,
+ .dstStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
+ .dstAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT|VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT,
+ .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ .newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ .image = ci->image,
+ .subresourceRange = {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ },
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
+ .srcStageMask = VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT|VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT,
+ .srcAccessMask = 0,
+ .dstStageMask = VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT|VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT,
+ .dstAccessMask = VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
+ .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ .newLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
+ .image = di->image,
+ .subresourceRange = {
+ .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT|VK_IMAGE_ASPECT_STENCIL_BIT,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ },
+ },
+ };
+
+ if (resolve) image_memory_barriers[image_memory_barrier_count++] = (VkImageMemoryBarrier2){
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
+ .srcStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
+ .srcAccessMask = 0,
+ .dstStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT|VK_PIPELINE_STAGE_2_RESOLVE_BIT,
+ .dstAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT|VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT,
+ .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ .newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ .image = ri->image,
+ .subresourceRange = {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ },
+ };
+
+ VkDependencyInfo dependency_info = {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .imageMemoryBarrierCount = image_memory_barrier_count,
+ .pImageMemoryBarriers = image_memory_barriers,
+ };
+
+ vkCmdPipelineBarrier2(cmd, &dependency_info);
+ }
+
+ VkRenderingAttachmentInfo colour_attachment = {
+ .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
+ .imageView = ci->view,
+ .imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ .resolveMode = ri ? VK_RESOLVE_MODE_AVERAGE_BIT : 0,
+ .resolveImageView = ri ? ri->view : 0,
+ .resolveImageLayout = ri ? VK_IMAGE_LAYOUT_GENERAL : 0,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .clearValue = {.color = {{0.0f, 0.0f, 0.0f, 0.0f}}},
+ };
+
+ VkRenderingAttachmentInfo depth_stencil_attachment = {
+ .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
+ .imageView = di->view,
+ .imageLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .clearValue = {.depthStencil = {1.0f, 0}},
+ };
+
+ VkRenderingInfo rendering_info = {
+ .sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
+ .renderArea = {.offset = {0}, .extent = {colour->width, colour->height}},
+ .layerCount = 1,
+ .colorAttachmentCount = 1,
+ .pColorAttachments = &colour_attachment,
+ .pDepthAttachment = &depth_stencil_attachment,
+ .pStencilAttachment = &depth_stencil_attachment,
+ };
+
+ vkCmdBeginRendering(cmd, &rendering_info);
+ }
+}
+
+DEBUG_IMPORT void
+vk_command_draw(VulkanHandle command, GPUBuffer *model)
+{
+ if (ValidVulkanHandle(command) && ValidVulkanHandle(model->buffer)) {
+ VkCommandBuffer cmd = vk_command_buffer(command);
+ VulkanBuffer *vb = vk_entity_data(model->buffer, VulkanEntityKind_RenderModel);
+ vkCmdBindIndexBuffer2(cmd, vb->buffer, 0, vk_index_size(vb->index_type) * model->index_count, vb->index_type);
+ vkCmdDrawIndexed(cmd, model->index_count, 1, 0, 0, 0);
+ }
+}
+
+DEBUG_IMPORT void
+vk_command_scissor(VulkanHandle command, u32 width, u32 height, u32 x_offset, u32 y_offset)
+{
+ if ValidVulkanHandle(command) {
+ VkCommandBuffer cmd = vk_command_buffer(command);
+ VkRect2D scissor = {.offset = {x_offset, y_offset}, .extent = {width, height}};
+ vkCmdSetScissor(cmd, 0, 1, &scissor);
+ }
+}
+
+DEBUG_IMPORT void
+vk_command_viewport(VulkanHandle command, f32 width, f32 height, f32 x_offset, f32 y_offset, f32 min_depth, f32 max_depth)
+{
+ if ValidVulkanHandle(command) {
+ VkCommandBuffer cmd = vk_command_buffer(command);
+ VkViewport viewport = {x_offset, y_offset, width, height, min_depth, max_depth};
+ vkCmdSetViewport(cmd, 0, 1, &viewport);
+ }
+}
+
+DEBUG_IMPORT void
+vk_command_end_rendering(VulkanHandle command)
+{
+ if ValidVulkanHandle(command) vkCmdEndRendering(vk_command_buffer(command));
+}
+
+DEBUG_IMPORT void
+vk_command_copy_buffer(VulkanHandle command, GPUBuffer *restrict destination,
+ GPUBuffer *restrict source, u64 source_offset, i64 size)
+{
+ if (ValidVulkanHandle(command) && ValidVulkanHandle(destination->buffer) && ValidVulkanHandle(source->buffer)) {
+ VkCommandBuffer cmd = vk_command_buffer(command);
+ VulkanBuffer *db = vk_entity_data(destination->buffer, VulkanEntityKind_Buffer);
+ VulkanBuffer *sb = vk_entity_data(source->buffer, VulkanEntityKind_Buffer);
+
+ VkBufferCopy2 buffer_copy = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2,
+ .srcOffset = source_offset,
+ .dstOffset = 0,
+ .size = size,
+ };
+
+ VkCopyBufferInfo2 copy_buffer_info = {
+ .sType = VK_STRUCTURE_TYPE_COPY_BUFFER_INFO_2,
+ .srcBuffer = sb->buffer,
+ .dstBuffer = db->buffer,
+ .regionCount = 1,
+ .pRegions = &buffer_copy,
+ };
+
+ vkCmdCopyBuffer2(cmd, ©_buffer_info);
+ }
+}
+
+DEBUG_IMPORT u64 *
+vk_command_read_timestamps(VulkanTimeline timeline, Arena *arena)
+{
+ u64 *result = 0;
+ if Between(timeline, 0, VulkanTimeline_Count - 1) {
+ VulkanContext *vk = vulkan_context;
+ VulkanQueue *vq = vk->queues[timeline];
+ DeferLoop(take_lock(&vq->lock, -1), release_lock(&vq->lock)) {
+ u32 index = (vq->next_command_buffer_index - 1) % countof(vq->command_buffers);
+ u32 count = vq->query_pool_occupied[index];
+ if (count > 0) {
+ result = push_array(arena, u64, count + 1);
+ result[0] = count;
+
+ vkGetQueryPoolResults(vk->device, vq->query_pool, index * MaxCommandBufferTimestamps, count,
+ count * sizeof(u64), result + 1, 8, VK_QUERY_RESULT_WAIT_BIT);
+ }
+ }
+ } else {
+ result = push_array(arena, u64, 1);
+ }
+ return result;
+}
diff --git a/vulkan.h b/vulkan.h
@@ -24,7 +24,9 @@
typedef uint32_t VkBool32;
typedef uint32_t VkFlags;
+typedef uint64_t VkFlags64;
typedef uint32_t VkSampleMask;
+typedef uint64_t VkDeviceAddress;
typedef uint64_t VkDeviceSize;
VK_HANDLE(VkBuffer);
VK_HANDLE(VkCommandBuffer);
@@ -41,6 +43,7 @@ VK_HANDLE(VkPhysicalDevice);
VK_HANDLE(VkPipeline);
VK_HANDLE(VkPipelineCache);
VK_HANDLE(VkPipelineLayout);
+VK_HANDLE(VkQueryPool);
VK_HANDLE(VkQueue);
VK_HANDLE(VkRenderPass);
VK_HANDLE(VkSampler);
@@ -51,53 +54,85 @@ VK_HANDLE(VkSwapchainKHR);
typedef enum {
VK_SUCCESS = 0,
+ VK_TIMEOUT = 2,
VK_SUBOPTIMAL_KHR = 1000001003,
VK_ERROR_OUT_OF_DATE_KHR = -1000001004,
VK_RESULT_MAX_ENUM = 0x7FFFFFFF
} VkResult;
typedef enum {
- VK_STRUCTURE_TYPE_APPLICATION_INFO = 0,
- VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO = 1,
- VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO = 2,
- VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO = 3,
- VK_STRUCTURE_TYPE_SUBMIT_INFO = 4,
- VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO = 5,
- VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE = 6,
- VK_STRUCTURE_TYPE_FENCE_CREATE_INFO = 8,
- VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO = 9,
- VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO = 15,
- VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO = 16,
- VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO = 18,
- VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO = 19,
- VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO = 20,
- VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO = 22,
- VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO = 23,
- VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO = 24,
- VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO = 26,
- VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO = 27,
- VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO = 28,
- VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO = 29,
- VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO = 30,
- VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO = 37,
- VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO = 38,
- VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO = 39,
- VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO = 40,
- VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO = 42,
- VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO = 43,
- VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES = 50,
- VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR = 1000001000,
- VK_STRUCTURE_TYPE_PRESENT_INFO_KHR = 1000001001,
- VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2 = 1000059001,
- VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2 = 1000059006,
- VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO = 1000060000,
- VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO = 1000072002,
- VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR = 1000073003,
- VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR = 1000074002,
- VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO = 1000077000,
- VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR = 1000078003,
- VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR = 1000079001,
- VK_STRUCTURE_TYPE_MAX_ENUM = 0x7FFFFFFF,
+ VK_STRUCTURE_TYPE_APPLICATION_INFO = 0,
+ VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO = 1,
+ VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO = 2,
+ VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO = 3,
+ VK_STRUCTURE_TYPE_SUBMIT_INFO = 4,
+ VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO = 5,
+ VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE = 6,
+ VK_STRUCTURE_TYPE_FENCE_CREATE_INFO = 8,
+ VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO = 9,
+ VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO = 11,
+ VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO = 12,
+ VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO = 14,
+ VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO = 15,
+ VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO = 16,
+ VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO = 18,
+ VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO = 19,
+ VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO = 20,
+ VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO = 22,
+ VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO = 23,
+ VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO = 24,
+ VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO = 25,
+ VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO = 26,
+ VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO = 27,
+ VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO = 28,
+ VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO = 29,
+ VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO = 30,
+ VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO = 37,
+ VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO = 38,
+ VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO = 39,
+ VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO = 40,
+ VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO = 42,
+ VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO = 43,
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES = 49,
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES = 50,
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES = 51,
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES = 52,
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES = 53,
+ VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR = 1000001000,
+ VK_STRUCTURE_TYPE_PRESENT_INFO_KHR = 1000001001,
+ VK_STRUCTURE_TYPE_RENDERING_INFO = 1000044000,
+ VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO = 1000044001,
+ VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO = 1000044002,
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2 = 1000059000,
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2 = 1000059001,
+ VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2 = 1000059002,
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2 = 1000059006,
+ VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO = 1000060000,
+ VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO = 1000072001,
+ VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO = 1000072002,
+ VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR = 1000073003,
+ VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR = 1000074002,
+ VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO = 1000077000,
+ VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR = 1000078003,
+ VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR = 1000079001,
+ VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO = 1000127001,
+ VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO = 1000207002,
+ VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO = 1000207003,
+ VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO = 1000207004,
+ VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO = 1000207005,
+ VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO = 1000244001,
+ VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT = 1000247000,
+ VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2 = 1000314001,
+ VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2 = 1000314002,
+ VK_STRUCTURE_TYPE_DEPENDENCY_INFO = 1000314003,
+ VK_STRUCTURE_TYPE_SUBMIT_INFO_2 = 1000314004,
+ VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO = 1000314005,
+ VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO = 1000314006,
+ VK_STRUCTURE_TYPE_COPY_BUFFER_INFO_2 = 1000337000,
+ VK_STRUCTURE_TYPE_BUFFER_COPY_2 = 1000337006,
+ VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3 = 1000360000,
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_RELAXED_EXTENDED_INSTRUCTION_FEATURES_KHR = 1000558000,
+ VK_STRUCTURE_TYPE_MAX_ENUM = 0x7FFFFFFF,
} VkStructureType;
typedef enum {
@@ -110,6 +145,27 @@ typedef enum {
} VkPhysicalDeviceType;
typedef enum {
+ VK_QUERY_TYPE_OCCLUSION = 0,
+ VK_QUERY_TYPE_PIPELINE_STATISTICS = 1,
+ VK_QUERY_TYPE_TIMESTAMP = 2,
+ VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR = 1000023000,
+ VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT = 1000028004,
+ VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR = 1000116000,
+ VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR = 1000150000,
+ VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR = 1000150001,
+ VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_NV = 1000165000,
+ VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL = 1000210000,
+ VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR = 1000299000,
+ VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT = 1000328000,
+ VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT = 1000382000,
+ VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR = 1000386000,
+ VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR = 1000386001,
+ VK_QUERY_TYPE_MICROMAP_SERIALIZATION_SIZE_EXT = 1000396000,
+ VK_QUERY_TYPE_MICROMAP_COMPACTED_SIZE_EXT = 1000396001,
+ VK_QUERY_TYPE_MAX_ENUM = 0x7FFFFFFF
+} VkQueryType;
+
+typedef enum {
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND = 0,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT = 1,
VK_SYSTEM_ALLOCATION_SCOPE_CACHE = 2,
@@ -180,6 +236,121 @@ typedef enum {
} VkPipelineStageFlagBits;
typedef VkFlags VkPipelineStageFlags;
+typedef enum {
+ VK_PIPELINE_STAGE_2_NONE = 0ULL,
+ VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT = 0x00000001ULL,
+ VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT = 0x00000002ULL,
+ VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT = 0x00000004ULL,
+ VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT = 0x00000008ULL,
+ VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT = 0x00000010ULL,
+ VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT = 0x00000020ULL,
+ VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT = 0x00000040ULL,
+ VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT = 0x00000080ULL,
+ VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT = 0x00000100ULL,
+ VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT = 0x00000200ULL,
+ VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT = 0x00000400ULL,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT = 0x00000800ULL,
+ VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT = 0x00001000ULL,
+ VK_PIPELINE_STAGE_2_TRANSFER_BIT = 0x00001000ULL,
+ VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT = 0x00002000ULL,
+ VK_PIPELINE_STAGE_2_HOST_BIT = 0x00004000ULL,
+ VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT = 0x00008000ULL,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT = 0x00010000ULL,
+ VK_PIPELINE_STAGE_2_COPY_BIT = 0x100000000ULL,
+ VK_PIPELINE_STAGE_2_RESOLVE_BIT = 0x200000000ULL,
+ VK_PIPELINE_STAGE_2_BLIT_BIT = 0x400000000ULL,
+ VK_PIPELINE_STAGE_2_CLEAR_BIT = 0x800000000ULL,
+ VK_PIPELINE_STAGE_2_INDEX_INPUT_BIT = 0x1000000000ULL,
+ VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT = 0x2000000000ULL,
+ VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT = 0x4000000000ULL,
+ VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR = 0x04000000ULL,
+ VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR = 0x08000000ULL,
+ VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT = 0x01000000ULL,
+ VK_PIPELINE_STAGE_2_CONDITIONAL_RENDERING_BIT_EXT = 0x00040000ULL,
+ VK_PIPELINE_STAGE_2_COMMAND_PREPROCESS_BIT_NV = 0x00020000ULL,
+ VK_PIPELINE_STAGE_2_COMMAND_PREPROCESS_BIT_EXT = 0x00020000ULL,
+ VK_PIPELINE_STAGE_2_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR = 0x00400000ULL,
+ VK_PIPELINE_STAGE_2_SHADING_RATE_IMAGE_BIT_NV = 0x00400000ULL,
+ VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR = 0x02000000ULL,
+ VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR = 0x00200000ULL,
+ VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_NV = 0x00200000ULL,
+ VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_NV = 0x02000000ULL,
+ VK_PIPELINE_STAGE_2_FRAGMENT_DENSITY_PROCESS_BIT_EXT = 0x00800000ULL,
+ VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_NV = 0x00080000ULL,
+ VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_NV = 0x00100000ULL,
+ VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT = 0x00080000ULL,
+ VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_EXT = 0x00100000ULL,
+ VK_PIPELINE_STAGE_2_SUBPASS_SHADER_BIT_HUAWEI = 0x8000000000ULL,
+ VK_PIPELINE_STAGE_2_INVOCATION_MASK_BIT_HUAWEI = 0x10000000000ULL,
+ VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_COPY_BIT_KHR = 0x10000000ULL,
+ VK_PIPELINE_STAGE_2_MICROMAP_BUILD_BIT_EXT = 0x40000000ULL,
+ VK_PIPELINE_STAGE_2_CLUSTER_CULLING_SHADER_BIT_HUAWEI = 0x20000000000ULL,
+ VK_PIPELINE_STAGE_2_OPTICAL_FLOW_BIT_NV = 0x20000000ULL,
+ VK_PIPELINE_STAGE_2_CONVERT_COOPERATIVE_VECTOR_MATRIX_BIT_NV = 0x100000000000ULL,
+ VK_PIPELINE_STAGE_2_DATA_GRAPH_BIT_ARM = 0x40000000000ULL,
+ VK_PIPELINE_STAGE_2_COPY_INDIRECT_BIT_KHR = 0x400000000000ULL,
+ VK_PIPELINE_STAGE_2_MEMORY_DECOMPRESSION_BIT_EXT = 0x200000000000ULL,
+} VkPipelineStageFlagBits2;
+typedef VkFlags64 VkPipelineStageFlags2;
+
+typedef enum {
+ VK_ACCESS_2_NONE = 0ULL,
+ VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT = 0x00000001ULL,
+ VK_ACCESS_2_INDEX_READ_BIT = 0x00000002ULL,
+ VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT = 0x00000004ULL,
+ VK_ACCESS_2_UNIFORM_READ_BIT = 0x00000008ULL,
+ VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT = 0x00000010ULL,
+ VK_ACCESS_2_SHADER_READ_BIT = 0x00000020ULL,
+ VK_ACCESS_2_SHADER_WRITE_BIT = 0x00000040ULL,
+ VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT = 0x00000080ULL,
+ VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT = 0x00000100ULL,
+ VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT = 0x00000200ULL,
+ VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT = 0x00000400ULL,
+ VK_ACCESS_2_TRANSFER_READ_BIT = 0x00000800ULL,
+ VK_ACCESS_2_TRANSFER_WRITE_BIT = 0x00001000ULL,
+ VK_ACCESS_2_HOST_READ_BIT = 0x00002000ULL,
+ VK_ACCESS_2_HOST_WRITE_BIT = 0x00004000ULL,
+ VK_ACCESS_2_MEMORY_READ_BIT = 0x00008000ULL,
+ VK_ACCESS_2_MEMORY_WRITE_BIT = 0x00010000ULL,
+ VK_ACCESS_2_SHADER_SAMPLED_READ_BIT = 0x100000000ULL,
+ VK_ACCESS_2_SHADER_STORAGE_READ_BIT = 0x200000000ULL,
+ VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT = 0x400000000ULL,
+ VK_ACCESS_2_VIDEO_DECODE_READ_BIT_KHR = 0x800000000ULL,
+ VK_ACCESS_2_VIDEO_DECODE_WRITE_BIT_KHR = 0x1000000000ULL,
+ VK_ACCESS_2_VIDEO_ENCODE_READ_BIT_KHR = 0x2000000000ULL,
+ VK_ACCESS_2_VIDEO_ENCODE_WRITE_BIT_KHR = 0x4000000000ULL,
+ VK_ACCESS_2_SHADER_TILE_ATTACHMENT_READ_BIT_QCOM = 0x8000000000000ULL,
+ VK_ACCESS_2_SHADER_TILE_ATTACHMENT_WRITE_BIT_QCOM = 0x10000000000000ULL,
+ VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT = 0x02000000ULL,
+ VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT = 0x04000000ULL,
+ VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT = 0x08000000ULL,
+ VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT = 0x00100000ULL,
+ VK_ACCESS_2_COMMAND_PREPROCESS_READ_BIT_NV = 0x00020000ULL,
+ VK_ACCESS_2_COMMAND_PREPROCESS_WRITE_BIT_NV = 0x00040000ULL,
+ VK_ACCESS_2_COMMAND_PREPROCESS_READ_BIT_EXT = 0x00020000ULL,
+ VK_ACCESS_2_COMMAND_PREPROCESS_WRITE_BIT_EXT = 0x00040000ULL,
+ VK_ACCESS_2_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT_KHR = 0x00800000ULL,
+ VK_ACCESS_2_SHADING_RATE_IMAGE_READ_BIT_NV = 0x00800000ULL,
+ VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_KHR = 0x00200000ULL,
+ VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR = 0x00400000ULL,
+ VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_NV = 0x00200000ULL,
+ VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_NV = 0x00400000ULL,
+ VK_ACCESS_2_FRAGMENT_DENSITY_MAP_READ_BIT_EXT = 0x01000000ULL,
+ VK_ACCESS_2_COLOR_ATTACHMENT_READ_NONCOHERENT_BIT_EXT = 0x00080000ULL,
+ VK_ACCESS_2_DESCRIPTOR_BUFFER_READ_BIT_EXT = 0x20000000000ULL,
+ VK_ACCESS_2_INVOCATION_MASK_READ_BIT_HUAWEI = 0x8000000000ULL,
+ VK_ACCESS_2_SHADER_BINDING_TABLE_READ_BIT_KHR = 0x10000000000ULL,
+ VK_ACCESS_2_MICROMAP_READ_BIT_EXT = 0x100000000000ULL,
+ VK_ACCESS_2_MICROMAP_WRITE_BIT_EXT = 0x200000000000ULL,
+ VK_ACCESS_2_OPTICAL_FLOW_READ_BIT_NV = 0x40000000000ULL,
+ VK_ACCESS_2_OPTICAL_FLOW_WRITE_BIT_NV = 0x80000000000ULL,
+ VK_ACCESS_2_DATA_GRAPH_READ_BIT_ARM = 0x800000000000ULL,
+ VK_ACCESS_2_DATA_GRAPH_WRITE_BIT_ARM = 0x1000000000000ULL,
+ VK_ACCESS_2_MEMORY_DECOMPRESSION_READ_BIT_EXT = 0x80000000000000ULL,
+ VK_ACCESS_2_MEMORY_DECOMPRESSION_WRITE_BIT_EXT = 0x100000000000000ULL,
+} VkAccessFlagBits2;
+typedef VkFlags64 VkAccessFlags2;
+
typedef VkFlags VkDeviceCreateFlags;
typedef enum {
@@ -189,6 +360,128 @@ typedef enum {
} VkPointClippingBehavior;
typedef enum {
+ VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT = 0x00000001,
+ VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT = 0x00000002,
+ VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT = 0x00000004,
+ VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT = 0x00000008,
+ VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT = 0x00000010,
+ VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT = 0x00000020,
+ VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT = 0x00000040,
+ VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT = 0x00000080,
+ VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT = 0x00000100,
+ VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT = 0x00000200,
+ VK_FORMAT_FEATURE_BLIT_SRC_BIT = 0x00000400,
+ VK_FORMAT_FEATURE_BLIT_DST_BIT = 0x00000800,
+ VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT = 0x00001000,
+ VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_CUBIC_BIT_EXT = 0x00002000,
+ VK_FORMAT_FEATURE_TRANSFER_SRC_BIT = 0x00004000,
+ VK_FORMAT_FEATURE_TRANSFER_DST_BIT = 0x00008000,
+ VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT = 0x00010000,
+ VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT = 0x00020000,
+ VK_FORMAT_FEATURE_SAMPLED_IMAGE_YCBCR_CONVERSION_LINEAR_FILTER_BIT = 0x00040000,
+ VK_FORMAT_FEATURE_SAMPLED_IMAGE_YCBCR_CONVERSION_SEPARATE_RECONSTRUCTION_FILTER_BIT = 0x00080000,
+ VK_FORMAT_FEATURE_SAMPLED_IMAGE_YCBCR_CONVERSION_CHROMA_RECONSTRUCTION_EXPLICIT_BIT = 0x00100000,
+ VK_FORMAT_FEATURE_SAMPLED_IMAGE_YCBCR_CONVERSION_CHROMA_RECONSTRUCTION_EXPLICIT_FORCEABLE_BIT = 0x00200000,
+ VK_FORMAT_FEATURE_DISJOINT_BIT = 0x00400000,
+ VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT = 0x00800000,
+ VK_FORMAT_FEATURE_FRAGMENT_DENSITY_MAP_BIT_EXT = 0x01000000,
+ VK_FORMAT_FEATURE_VIDEO_DECODE_OUTPUT_BIT_KHR = 0x02000000,
+ VK_FORMAT_FEATURE_VIDEO_DECODE_DPB_BIT_KHR = 0x04000000,
+ VK_FORMAT_FEATURE_VIDEO_ENCODE_DPB_BIT_KHR = 0x10000000,
+ VK_FORMAT_FEATURE_ACCELERATION_STRUCTURE_VERTEX_BUFFER_BIT_KHR = 0x20000000,
+ VK_FORMAT_FEATURE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR = 0x40000000,
+ VK_FORMAT_FEATURE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
+} VkFormatFeatureFlagBits;
+typedef VkFlags VkFormatFeatureFlags;
+
+
+typedef enum {
+ VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT = (1ULL << 0),
+ VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT = (1ULL << 1),
+ VK_FORMAT_FEATURE_2_STORAGE_IMAGE_ATOMIC_BIT = (1ULL << 2),
+ VK_FORMAT_FEATURE_2_UNIFORM_TEXEL_BUFFER_BIT = (1ULL << 3),
+ VK_FORMAT_FEATURE_2_STORAGE_TEXEL_BUFFER_BIT = (1ULL << 4),
+ VK_FORMAT_FEATURE_2_STORAGE_TEXEL_BUFFER_ATOMIC_BIT = (1ULL << 5),
+ VK_FORMAT_FEATURE_2_VERTEX_BUFFER_BIT = (1ULL << 6),
+ VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT = (1ULL << 7),
+ VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BLEND_BIT = (1ULL << 8),
+ VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT = (1ULL << 9),
+ VK_FORMAT_FEATURE_2_BLIT_SRC_BIT = (1ULL << 10),
+ VK_FORMAT_FEATURE_2_BLIT_DST_BIT = (1ULL << 11),
+ VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_FILTER_LINEAR_BIT = (1ULL << 12),
+ VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_FILTER_CUBIC_BIT = (1ULL << 13),
+ VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT = (1ULL << 14),
+ VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT = (1ULL << 15),
+ VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_FILTER_MINMAX_BIT = (1ULL << 16),
+ VK_FORMAT_FEATURE_2_MIDPOINT_CHROMA_SAMPLES_BIT = (1ULL << 17),
+ VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_YCBCR_CONVERSION_LINEAR_FILTER_BIT = (1ULL << 18),
+ VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_YCBCR_CONVERSION_SEPARATE_RECONSTRUCTION_FILTER_BIT = (1ULL << 19),
+ VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_YCBCR_CONVERSION_CHROMA_RECONSTRUCTION_EXPLICIT_BIT = (1ULL << 20),
+ VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_YCBCR_CONVERSION_CHROMA_RECONSTRUCTION_EXPLICIT_FORCEABLE_BIT = (1ULL << 21),
+ VK_FORMAT_FEATURE_2_DISJOINT_BIT = (1ULL << 22),
+ VK_FORMAT_FEATURE_2_COSITED_CHROMA_SAMPLES_BIT = (1ULL << 23),
+ VK_FORMAT_FEATURE_2_FRAGMENT_DENSITY_MAP_BIT_EXT = (1ULL << 24),
+ VK_FORMAT_FEATURE_2_VIDEO_DECODE_OUTPUT_BIT_KHR = (1ULL << 25),
+ VK_FORMAT_FEATURE_2_VIDEO_DECODE_DPB_BIT_KHR = (1ULL << 26),
+ VK_FORMAT_FEATURE_2_VIDEO_ENCODE_INPUT_BIT_KHR = (1ULL << 27),
+ VK_FORMAT_FEATURE_2_VIDEO_ENCODE_DPB_BIT_KHR = (1ULL << 28),
+ VK_FORMAT_FEATURE_2_ACCELERATION_STRUCTURE_VERTEX_BUFFER_BIT_KHR = (1ULL << 29),
+ VK_FORMAT_FEATURE_2_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR = (1ULL << 30),
+ VK_FORMAT_FEATURE_2_STORAGE_READ_WITHOUT_FORMAT_BIT = (1ULL << 31),
+ VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT = (1ULL << 32),
+ VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_DEPTH_COMPARISON_BIT = (1ULL << 33),
+ VK_FORMAT_FEATURE_2_WEIGHT_IMAGE_BIT_QCOM = (1ULL << 34),
+ VK_FORMAT_FEATURE_2_WEIGHT_SAMPLED_IMAGE_BIT_QCOM = (1ULL << 35),
+ VK_FORMAT_FEATURE_2_BLOCK_MATCHING_BIT_QCOM = (1ULL << 36),
+ VK_FORMAT_FEATURE_2_BOX_FILTER_SAMPLED_BIT_QCOM = (1ULL << 37),
+ VK_FORMAT_FEATURE_2_LINEAR_COLOR_ATTACHMENT_BIT_NV = (1ULL << 38),
+ VK_FORMAT_FEATURE_2_TENSOR_SHADER_BIT_ARM = (1ULL << 39),
+ VK_FORMAT_FEATURE_2_OPTICAL_FLOW_IMAGE_BIT_NV = (1ULL << 40),
+ VK_FORMAT_FEATURE_2_OPTICAL_FLOW_VECTOR_BIT_NV = (1ULL << 41),
+ VK_FORMAT_FEATURE_2_OPTICAL_FLOW_COST_BIT_NV = (1ULL << 42),
+ VK_FORMAT_FEATURE_2_TENSOR_IMAGE_ALIASING_BIT_ARM = (1ULL << 43),
+
+ VK_FORMAT_FEATURE_2_HOST_IMAGE_TRANSFER_BIT = (1ULL << 46),
+
+ VK_FORMAT_FEATURE_2_TENSOR_DATA_GRAPH_BIT_ARM = (1ULL << 48),
+ VK_FORMAT_FEATURE_2_VIDEO_ENCODE_QUANTIZATION_DELTA_MAP_BIT_KHR = (1ULL << 49),
+ VK_FORMAT_FEATURE_2_VIDEO_ENCODE_EMPHASIS_MAP_BIT_KHR = (1ULL << 50),
+ VK_FORMAT_FEATURE_2_ACCELERATION_STRUCTURE_RADIUS_BUFFER_BIT_NV = (1ULL << 51),
+ VK_FORMAT_FEATURE_2_DEPTH_COPY_ON_COMPUTE_QUEUE_BIT_KHR = (1ULL << 52),
+ VK_FORMAT_FEATURE_2_DEPTH_COPY_ON_TRANSFER_QUEUE_BIT_KHR = (1ULL << 53),
+ VK_FORMAT_FEATURE_2_STENCIL_COPY_ON_COMPUTE_QUEUE_BIT_KHR = (1ULL << 54),
+ VK_FORMAT_FEATURE_2_STENCIL_COPY_ON_TRANSFER_QUEUE_BIT_KHR = (1ULL << 55),
+
+ VK_FORMAT_FEATURE_2_COPY_IMAGE_INDIRECT_DST_BIT_KHR = (1ULL << 59),
+} VkFormatFeatureFlagBits2;
+typedef VkFlags64 VkFormatFeatureFlags2;
+
+typedef enum {
+ VK_IMAGE_CREATE_SPARSE_BINDING_BIT = 0x00000001,
+ VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT = 0x00000002,
+ VK_IMAGE_CREATE_SPARSE_ALIASED_BIT = 0x00000004,
+ VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT = 0x00000008,
+ VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT = 0x00000010,
+ VK_IMAGE_CREATE_ALIAS_BIT = 0x00000400,
+ VK_IMAGE_CREATE_SPLIT_INSTANCE_BIND_REGIONS_BIT = 0x00000040,
+ VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT = 0x00000020,
+ VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT = 0x00000080,
+ VK_IMAGE_CREATE_EXTENDED_USAGE_BIT = 0x00000100,
+ VK_IMAGE_CREATE_PROTECTED_BIT = 0x00000800,
+ VK_IMAGE_CREATE_DISJOINT_BIT = 0x00000200,
+ VK_IMAGE_CREATE_CORNER_SAMPLED_BIT_NV = 0x00002000,
+ VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT = 0x00001000,
+ VK_IMAGE_CREATE_SUBSAMPLED_BIT_EXT = 0x00004000,
+ VK_IMAGE_CREATE_DESCRIPTOR_BUFFER_CAPTURE_REPLAY_BIT_EXT = 0x00010000,
+ VK_IMAGE_CREATE_MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_BIT_EXT = 0x00040000,
+ VK_IMAGE_CREATE_2D_VIEW_COMPATIBLE_BIT_EXT = 0x00020000,
+ VK_IMAGE_CREATE_VIDEO_PROFILE_INDEPENDENT_BIT_KHR = 0x00100000,
+ VK_IMAGE_CREATE_FRAGMENT_DENSITY_MAP_OFFSET_BIT_EXT = 0x00008000,
+ VK_IMAGE_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
+} VkImageCreateFlagBits;
+typedef VkFlags VkImageCreateFlags;
+
+typedef enum {
VK_SAMPLE_COUNT_1_BIT = 0x00000001,
VK_SAMPLE_COUNT_2_BIT = 0x00000002,
VK_SAMPLE_COUNT_4_BIT = 0x00000004,
@@ -253,6 +546,14 @@ typedef enum {
VK_IMAGE_VIEW_TYPE_MAX_ENUM = 0x7FFFFFFF
} VkImageViewType;
+typedef enum VkIndexType {
+ VK_INDEX_TYPE_UINT16 = 0,
+ VK_INDEX_TYPE_UINT32 = 1,
+ VK_INDEX_TYPE_UINT8 = 1000265000,
+ VK_INDEX_TYPE_NONE_KHR = 1000165000,
+ VK_INDEX_TYPE_MAX_ENUM = 0x7FFFFFFF
+} VkIndexType;
+
typedef enum {
VK_BLEND_FACTOR_ZERO = 0,
VK_BLEND_FACTOR_ONE = 1,
@@ -336,9 +637,22 @@ typedef enum {
VK_FENCE_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
} VkFenceCreateFlagBits;
typedef VkFlags VkFenceCreateFlags;
+
+typedef enum {
+ VK_QUERY_POOL_CREATE_RESET_BIT_KHR = 0x00000001,
+ VK_QUERY_POOL_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
+} VkQueryPoolCreateFlagBits;
+typedef VkFlags VkQueryPoolCreateFlags;
+
typedef VkFlags VkSemaphoreCreateFlags;
typedef enum {
+ VK_SEMAPHORE_WAIT_ANY_BIT = 0x00000001,
+ VK_SEMAPHORE_WAIT_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
+} VkSemaphoreWaitFlagBits;
+typedef VkFlags VkSemaphoreWaitFlags;
+
+typedef enum {
VK_IMAGE_VIEW_CREATE_FRAGMENT_DENSITY_MAP_DYNAMIC_BIT_EXT = 0x00000001,
VK_IMAGE_VIEW_CREATE_DESCRIPTOR_BUFFER_CAPTURE_REPLAY_BIT_EXT = 0x00000004,
VK_IMAGE_VIEW_CREATE_FRAGMENT_DENSITY_MAP_DEFERRED_BIT_EXT = 0x00000002,
@@ -816,6 +1130,20 @@ typedef enum {
} VkFormat;
typedef enum {
+ VK_IMAGE_TILING_OPTIMAL = 0,
+ VK_IMAGE_TILING_LINEAR = 1,
+ VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT = 1000158000,
+ VK_IMAGE_TILING_MAX_ENUM = 0x7FFFFFFF
+} VkImageTiling;
+
+typedef enum {
+ VK_IMAGE_TYPE_1D = 0,
+ VK_IMAGE_TYPE_2D = 1,
+ VK_IMAGE_TYPE_3D = 2,
+ VK_IMAGE_TYPE_MAX_ENUM = 0x7FFFFFFF
+} VkImageType;
+
+typedef enum {
VK_COLOR_SPACE_SRGB_NONLINEAR_KHR = 0,
VK_COLOR_SPACE_DISPLAY_P3_NONLINEAR_EXT = 1000104001,
VK_COLOR_SPACE_EXTENDED_SRGB_LINEAR_EXT = 1000104002,
@@ -843,6 +1171,61 @@ typedef enum {
} VkSharingMode;
typedef enum {
+ VK_QUERY_RESULT_64_BIT = 0x00000001,
+ VK_QUERY_RESULT_WAIT_BIT = 0x00000002,
+ VK_QUERY_RESULT_WITH_AVAILABILITY_BIT = 0x00000004,
+ VK_QUERY_RESULT_PARTIAL_BIT = 0x00000008,
+ VK_QUERY_RESULT_WITH_STATUS_BIT_KHR = 0x00000010,
+ VK_QUERY_RESULT_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
+} VkQueryResultFlagBits;
+typedef VkFlags VkQueryResultFlags;
+
+typedef enum {
+ VK_BUFFER_CREATE_SPARSE_BINDING_BIT = 0x00000001,
+ VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT = 0x00000002,
+ VK_BUFFER_CREATE_SPARSE_ALIASED_BIT = 0x00000004,
+ VK_BUFFER_CREATE_PROTECTED_BIT = 0x00000008,
+ VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT = 0x00000010,
+ VK_BUFFER_CREATE_DESCRIPTOR_BUFFER_CAPTURE_REPLAY_BIT_EXT = 0x00000020,
+ VK_BUFFER_CREATE_VIDEO_PROFILE_INDEPENDENT_BIT_KHR = 0x00000040,
+ VK_BUFFER_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
+} VkBufferCreateFlagBits;
+typedef VkFlags VkBufferCreateFlags;
+
+typedef enum {
+ VK_BUFFER_USAGE_TRANSFER_SRC_BIT = 0x00000001,
+ VK_BUFFER_USAGE_TRANSFER_DST_BIT = 0x00000002,
+ VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT = 0x00000004,
+ VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT = 0x00000008,
+ VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT = 0x00000010,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT = 0x00000020,
+ VK_BUFFER_USAGE_INDEX_BUFFER_BIT = 0x00000040,
+ VK_BUFFER_USAGE_VERTEX_BUFFER_BIT = 0x00000080,
+ VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT = 0x00000100,
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT = 0x00020000,
+ VK_BUFFER_USAGE_VIDEO_DECODE_SRC_BIT_KHR = 0x00002000,
+ VK_BUFFER_USAGE_VIDEO_DECODE_DST_BIT_KHR = 0x00004000,
+ VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT = 0x00000800,
+ VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT = 0x00001000,
+ VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT = 0x00000200,
+ VK_BUFFER_USAGE_EXECUTION_GRAPH_SCRATCH_BIT_AMDX = 0x02000000,
+ VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR = 0x00080000,
+ VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR = 0x00100000,
+ VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR = 0x00000400,
+ VK_BUFFER_USAGE_VIDEO_ENCODE_DST_BIT_KHR = 0x00008000,
+ VK_BUFFER_USAGE_VIDEO_ENCODE_SRC_BIT_KHR = 0x00010000,
+ VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT = 0x00200000,
+ VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT = 0x00400000,
+ VK_BUFFER_USAGE_PUSH_DESCRIPTORS_DESCRIPTOR_BUFFER_BIT_EXT = 0x04000000,
+ VK_BUFFER_USAGE_MICROMAP_BUILD_INPUT_READ_ONLY_BIT_EXT = 0x00800000,
+ VK_BUFFER_USAGE_MICROMAP_STORAGE_BIT_EXT = 0x01000000,
+ VK_BUFFER_USAGE_TILE_MEMORY_BIT_QCOM = 0x08000000,
+ VK_BUFFER_USAGE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
+} VkBufferUsageFlagBits;
+typedef VkFlags VkBufferUsageFlags;
+typedef VkFlags VkBufferViewCreateFlags;
+
+typedef enum {
VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT = 0x00000001,
VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT = 0x00000002,
VK_PIPELINE_SHADER_STAGE_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
@@ -1079,12 +1462,6 @@ typedef enum {
typedef VkFlags VkDescriptorSetLayoutCreateFlags;
typedef enum {
- VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT = 0x00000001,
- VK_ATTACHMENT_DESCRIPTION_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
-} VkAttachmentDescriptionFlagBits;
-typedef VkFlags VkAttachmentDescriptionFlags;
-
-typedef enum {
VK_DESCRIPTOR_TYPE_SAMPLER = 0,
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER = 1,
VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE = 2,
@@ -1138,13 +1515,6 @@ typedef enum {
} VkCommandBufferLevel;
typedef enum {
- VK_SUBPASS_CONTENTS_INLINE = 0,
- VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS = 1,
- VK_SUBPASS_CONTENTS_INLINE_AND_SECONDARY_COMMAND_BUFFERS_KHR = 1000451000,
- VK_SUBPASS_CONTENTS_MAX_ENUM = 0x7FFFFFFF
-} VkSubpassContents;
-
-typedef enum {
VK_IMAGE_LAYOUT_UNDEFINED = 0,
VK_IMAGE_LAYOUT_GENERAL = 1,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL = 2,
@@ -1195,26 +1565,6 @@ typedef enum {
typedef VkFlags VkFramebufferCreateFlags;
typedef enum {
- VK_RENDER_PASS_CREATE_TRANSFORM_BIT_QCOM = 0x00000002,
- VK_RENDER_PASS_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
-} VkRenderPassCreateFlagBits;
-typedef VkFlags VkRenderPassCreateFlags;
-
-typedef enum {
- VK_SUBPASS_DESCRIPTION_PER_VIEW_ATTRIBUTES_BIT_NVX = 0x00000001,
- VK_SUBPASS_DESCRIPTION_PER_VIEW_POSITION_X_ONLY_BIT_NVX = 0x00000002,
- VK_SUBPASS_DESCRIPTION_FRAGMENT_REGION_BIT_QCOM = 0x00000004,
- VK_SUBPASS_DESCRIPTION_SHADER_RESOLVE_BIT_QCOM = 0x00000008,
- VK_SUBPASS_DESCRIPTION_TILE_SHADING_APRON_BIT_QCOM = 0x00000100,
- VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_COLOR_ACCESS_BIT_EXT = 0x00000010,
- VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_DEPTH_ACCESS_BIT_EXT = 0x00000020,
- VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_STENCIL_ACCESS_BIT_EXT = 0x00000040,
- VK_SUBPASS_DESCRIPTION_ENABLE_LEGACY_DITHERING_BIT_EXT = 0x00000080,
- VK_SUBPASS_DESCRIPTION_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
-} VkSubpassDescriptionFlagBits;
-typedef VkFlags VkSubpassDescriptionFlags;
-
-typedef enum {
VK_COMMAND_POOL_CREATE_TRANSIENT_BIT = 0x00000001,
VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT = 0x00000002,
VK_COMMAND_POOL_CREATE_PROTECTED_BIT = 0x00000004,
@@ -1296,24 +1646,24 @@ typedef enum {
typedef VkFlags VkMemoryAllocateFlags;
typedef enum {
- VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT = 0x00000001,
- VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT = 0x00000002,
- VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT = 0x00000004,
- VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_BIT = 0x00000008,
- VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_KMT_BIT = 0x00000010,
- VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT = 0x00000020,
- VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE_BIT = 0x00000040,
- VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT = 0x00000200,
- VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID = 0x00000400,
- VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT = 0x00000080,
- VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_MAPPED_FOREIGN_MEMORY_BIT_EXT = 0x00000100,
- VK_EXTERNAL_MEMORY_HANDLE_TYPE_ZIRCON_VMO_BIT_FUCHSIA = 0x00000800,
- VK_EXTERNAL_MEMORY_HANDLE_TYPE_RDMA_ADDRESS_BIT_NV = 0x00001000,
- VK_EXTERNAL_MEMORY_HANDLE_TYPE_SCREEN_BUFFER_BIT_QNX = 0x00004000,
- VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLBUFFER_BIT_EXT = 0x00010000,
- VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLTEXTURE_BIT_EXT = 0x00020000,
- VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLHEAP_BIT_EXT = 0x00040000,
- VK_EXTERNAL_MEMORY_HANDLE_TYPE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF,
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT = 0x00000001,
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT = 0x00000002,
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT = 0x00000004,
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_BIT = 0x00000008,
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_KMT_BIT = 0x00000010,
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT = 0x00000020,
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE_BIT = 0x00000040,
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT = 0x00000200,
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID = 0x00000400,
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT = 0x00000080,
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_MAPPED_FOREIGN_MEMORY_BIT_EXT = 0x00000100,
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_ZIRCON_VMO_BIT_FUCHSIA = 0x00000800,
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_RDMA_ADDRESS_BIT_NV = 0x00001000,
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_SCREEN_BUFFER_BIT_QNX = 0x00004000,
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLBUFFER_BIT_EXT = 0x00010000,
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLTEXTURE_BIT_EXT = 0x00020000,
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLHEAP_BIT_EXT = 0x00040000,
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF,
} VkExternalMemoryHandleTypeFlagBits;
typedef VkFlags VkExternalMemoryHandleTypeFlags;
@@ -1328,6 +1678,44 @@ typedef enum {
} VkExternalSemaphoreHandleTypeFlagBits;
typedef VkFlags VkExternalSemaphoreHandleTypeFlags;
+typedef enum {
+ VK_SEMAPHORE_TYPE_BINARY = 0,
+ VK_SEMAPHORE_TYPE_TIMELINE = 1,
+ VK_SEMAPHORE_TYPE_MAX_ENUM = 0x7FFFFFFF
+} VkSemaphoreType;
+
+typedef enum {
+ VK_RESOLVE_MODE_NONE = 0,
+ VK_RESOLVE_MODE_SAMPLE_ZERO_BIT = 0x00000001,
+ VK_RESOLVE_MODE_AVERAGE_BIT = 0x00000002,
+ VK_RESOLVE_MODE_MIN_BIT = 0x00000004,
+ VK_RESOLVE_MODE_MAX_BIT = 0x00000008,
+ VK_RESOLVE_MODE_EXTERNAL_FORMAT_DOWNSAMPLE_BIT_ANDROID = 0x00000010,
+ VK_RESOLVE_MODE_CUSTOM_BIT_EXT = 0x00000020,
+ VK_RESOLVE_MODE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
+} VkResolveModeFlagBits;
+typedef VkFlags VkResolveModeFlags;
+
+typedef enum {
+ VK_RENDERING_CONTENTS_SECONDARY_COMMAND_BUFFERS_BIT = 0x00000001,
+ VK_RENDERING_SUSPENDING_BIT = 0x00000002,
+ VK_RENDERING_RESUMING_BIT = 0x00000004,
+ VK_RENDERING_ENABLE_LEGACY_DITHERING_BIT_EXT = 0x00000008,
+ VK_RENDERING_CONTENTS_INLINE_BIT_KHR = 0x00000010,
+ VK_RENDERING_PER_LAYER_FRAGMENT_DENSITY_BIT_VALVE = 0x00000020,
+ VK_RENDERING_FRAGMENT_REGION_BIT_EXT = 0x00000040,
+ VK_RENDERING_CUSTOM_RESOLVE_BIT_EXT = 0x00000080,
+ VK_RENDERING_LOCAL_READ_CONCURRENT_ACCESS_CONTROL_BIT_KHR = 0x00000100,
+ VK_RENDERING_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
+} VkRenderingFlagBits;
+typedef VkFlags VkRenderingFlags;
+
+typedef enum {
+ VK_SUBMIT_PROTECTED_BIT = 0x00000001,
+ VK_SUBMIT_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
+} VkSubmitFlagBits;
+typedef VkFlags VkSubmitFlags;
+
typedef struct {
uint32_t width;
uint32_t height;
@@ -1371,18 +1759,6 @@ typedef struct {
} VkLayerProperties;
typedef struct {
- VkStructureType sType;
- const void * pNext;
- uint32_t waitSemaphoreCount;
- const VkSemaphore * pWaitSemaphores;
- const VkPipelineStageFlags * pWaitDstStageMask;
- uint32_t commandBufferCount;
- const VkCommandBuffer * pCommandBuffers;
- uint32_t signalSemaphoreCount;
- const VkSemaphore * pSignalSemaphores;
-} VkSubmitInfo;
-
-typedef struct {
VkStructureType sType;
const void * pNext;
const char * pApplicationName;
@@ -1393,6 +1769,26 @@ typedef struct {
} VkApplicationInfo;
typedef struct {
+ VkFormatFeatureFlags linearTilingFeatures;
+ VkFormatFeatureFlags optimalTilingFeatures;
+ VkFormatFeatureFlags bufferFeatures;
+} VkFormatProperties;
+
+typedef struct {
+ VkStructureType sType;
+ void * pNext;
+ VkFormatProperties formatProperties;
+} VkFormatProperties2;
+
+typedef struct {
+ VkStructureType sType;
+ void * pNext;
+ VkFormatFeatureFlags2 linearTilingFeatures;
+ VkFormatFeatureFlags2 optimalTilingFeatures;
+ VkFormatFeatureFlags2 bufferFeatures;
+} VkFormatProperties3;
+
+typedef struct {
VkStructureType sType;
const void * pNext;
VkInstanceCreateFlags flags;
@@ -1640,6 +2036,107 @@ typedef struct {
} VkPhysicalDeviceFeatures;
typedef struct {
+ VkStructureType sType;
+ void * pNext;
+ VkBool32 storageBuffer16BitAccess;
+ VkBool32 uniformAndStorageBuffer16BitAccess;
+ VkBool32 storagePushConstant16;
+ VkBool32 storageInputOutput16;
+ VkBool32 multiview;
+ VkBool32 multiviewGeometryShader;
+ VkBool32 multiviewTessellationShader;
+ VkBool32 variablePointersStorageBuffer;
+ VkBool32 variablePointers;
+ VkBool32 protectedMemory;
+ VkBool32 samplerYcbcrConversion;
+ VkBool32 shaderDrawParameters;
+} VkPhysicalDeviceVulkan11Features;
+
+typedef struct {
+ VkStructureType sType;
+ void * pNext;
+ VkBool32 samplerMirrorClampToEdge;
+ VkBool32 drawIndirectCount;
+ VkBool32 storageBuffer8BitAccess;
+ VkBool32 uniformAndStorageBuffer8BitAccess;
+ VkBool32 storagePushConstant8;
+ VkBool32 shaderBufferInt64Atomics;
+ VkBool32 shaderSharedInt64Atomics;
+ VkBool32 shaderFloat16;
+ VkBool32 shaderInt8;
+ VkBool32 descriptorIndexing;
+ VkBool32 shaderInputAttachmentArrayDynamicIndexing;
+ VkBool32 shaderUniformTexelBufferArrayDynamicIndexing;
+ VkBool32 shaderStorageTexelBufferArrayDynamicIndexing;
+ VkBool32 shaderUniformBufferArrayNonUniformIndexing;
+ VkBool32 shaderSampledImageArrayNonUniformIndexing;
+ VkBool32 shaderStorageBufferArrayNonUniformIndexing;
+ VkBool32 shaderStorageImageArrayNonUniformIndexing;
+ VkBool32 shaderInputAttachmentArrayNonUniformIndexing;
+ VkBool32 shaderUniformTexelBufferArrayNonUniformIndexing;
+ VkBool32 shaderStorageTexelBufferArrayNonUniformIndexing;
+ VkBool32 descriptorBindingUniformBufferUpdateAfterBind;
+ VkBool32 descriptorBindingSampledImageUpdateAfterBind;
+ VkBool32 descriptorBindingStorageImageUpdateAfterBind;
+ VkBool32 descriptorBindingStorageBufferUpdateAfterBind;
+ VkBool32 descriptorBindingUniformTexelBufferUpdateAfterBind;
+ VkBool32 descriptorBindingStorageTexelBufferUpdateAfterBind;
+ VkBool32 descriptorBindingUpdateUnusedWhilePending;
+ VkBool32 descriptorBindingPartiallyBound;
+ VkBool32 descriptorBindingVariableDescriptorCount;
+ VkBool32 runtimeDescriptorArray;
+ VkBool32 samplerFilterMinmax;
+ VkBool32 scalarBlockLayout;
+ VkBool32 imagelessFramebuffer;
+ VkBool32 uniformBufferStandardLayout;
+ VkBool32 shaderSubgroupExtendedTypes;
+ VkBool32 separateDepthStencilLayouts;
+ VkBool32 hostQueryReset;
+ VkBool32 timelineSemaphore;
+ VkBool32 bufferDeviceAddress;
+ VkBool32 bufferDeviceAddressCaptureReplay;
+ VkBool32 bufferDeviceAddressMultiDevice;
+ VkBool32 vulkanMemoryModel;
+ VkBool32 vulkanMemoryModelDeviceScope;
+ VkBool32 vulkanMemoryModelAvailabilityVisibilityChains;
+ VkBool32 shaderOutputViewportIndex;
+ VkBool32 shaderOutputLayer;
+ VkBool32 subgroupBroadcastDynamicId;
+} VkPhysicalDeviceVulkan12Features;
+
+typedef struct {
+ VkStructureType sType;
+ void * pNext;
+ VkBool32 robustImageAccess;
+ VkBool32 inlineUniformBlock;
+ VkBool32 descriptorBindingInlineUniformBlockUpdateAfterBind;
+ VkBool32 pipelineCreationCacheControl;
+ VkBool32 privateData;
+ VkBool32 shaderDemoteToHelperInvocation;
+ VkBool32 shaderTerminateInvocation;
+ VkBool32 subgroupSizeControl;
+ VkBool32 computeFullSubgroups;
+ VkBool32 synchronization2;
+ VkBool32 textureCompressionASTC_HDR;
+ VkBool32 shaderZeroInitializeWorkgroupMemory;
+ VkBool32 dynamicRendering;
+ VkBool32 shaderIntegerDotProduct;
+ VkBool32 maintenance4;
+} VkPhysicalDeviceVulkan13Features;
+
+typedef struct {
+ VkStructureType sType;
+ void * pNext;
+ VkBool32 shaderRelaxedExtendedInstruction;
+} VkPhysicalDeviceShaderRelaxedExtendedInstructionFeaturesKHR;
+
+typedef struct {
+ VkStructureType sType;
+ void * pNext;
+ VkPhysicalDeviceFeatures features;
+} VkPhysicalDeviceFeatures2;
+
+typedef struct {
VkQueueFlags queueFlags;
uint32_t queueCount;
uint32_t timestampValidBits;
@@ -1718,7 +2215,6 @@ typedef struct {
VkColorSpaceKHR colorSpace;
} VkSurfaceFormatKHR;
-
typedef struct {
VkStructureType sType;
const void * pNext;
@@ -1726,39 +2222,82 @@ typedef struct {
} VkFenceCreateInfo;
typedef struct {
+ VkStructureType sType;
+ const void * pNext;
+ VkQueryPoolCreateFlags flags;
+ VkQueryType queryType;
+ uint32_t queryCount;
+ VkQueryPipelineStatisticFlags pipelineStatistics;
+} VkQueryPoolCreateInfo;
+
+typedef struct {
VkStructureType sType;
const void * pNext;
VkSemaphoreCreateFlags flags;
} VkSemaphoreCreateInfo;
typedef struct {
- VkStructureType sType;
- const void * pNext;
- VkExternalSemaphoreHandleTypeFlags handleTypes;
+ VkStructureType sType;
+ const void * pNext;
+ VkSemaphoreType semaphoreType;
+ uint64_t initialValue;
+} VkSemaphoreTypeCreateInfo;
+
+typedef struct {
+ VkStructureType sType;
+ const void * pNext;
+ uint32_t waitSemaphoreValueCount;
+ const uint64_t * pWaitSemaphoreValues;
+ uint32_t signalSemaphoreValueCount;
+ const uint64_t * pSignalSemaphoreValues;
+} VkTimelineSemaphoreSubmitInfo;
+
+typedef struct {
+ VkStructureType sType;
+ const void * pNext;
+ VkSemaphoreWaitFlags flags;
+ uint32_t semaphoreCount;
+ const VkSemaphore * pSemaphores;
+ const uint64_t * pValues;
+} VkSemaphoreWaitInfo;
+
+typedef struct {
+ VkStructureType sType;
+ const void * pNext;
+ VkSemaphore semaphore;
+ uint64_t value;
+} VkSemaphoreSignalInfo;
+
+typedef struct {
+ VkStructureType sType;
+ const void * pNext;
+ VkSemaphore semaphore;
+ uint64_t value;
+ VkPipelineStageFlags2 stageMask;
+ uint32_t deviceIndex;
+} VkSemaphoreSubmitInfo;
+
+typedef struct {
+ VkStructureType sType;
+ const void * pNext;
+ VkExternalSemaphoreHandleTypeFlags handleTypes;
} VkExportSemaphoreCreateInfo;
typedef struct {
- VkStructureType sType;
- const void * pNext;
- VkSemaphore semaphore;
- VkExternalSemaphoreHandleTypeFlagBits handleType;
+ VkStructureType sType;
+ const void * pNext;
+ VkSemaphore semaphore;
+ VkExternalSemaphoreHandleTypeFlagBits handleType;
} VkSemaphoreGetWin32HandleInfoKHR;
typedef struct {
- VkStructureType sType;
- const void * pNext;
- VkSemaphore semaphore;
- VkExternalSemaphoreHandleTypeFlagBits handleType;
+ VkStructureType sType;
+ const void * pNext;
+ VkSemaphore semaphore;
+ VkExternalSemaphoreHandleTypeFlagBits handleType;
} VkSemaphoreGetFdInfoKHR;
typedef struct {
- VkComponentSwizzle r;
- VkComponentSwizzle g;
- VkComponentSwizzle b;
- VkComponentSwizzle a;
-} VkComponentMapping;
-
-typedef struct {
VkImageAspectFlags aspectMask;
uint32_t baseMipLevel;
uint32_t levelCount;
@@ -1767,6 +2306,64 @@ typedef struct {
} VkImageSubresourceRange;
typedef struct {
+ VkStructureType sType;
+ const void * pNext;
+ VkPipelineStageFlags2 srcStageMask;
+ VkAccessFlags2 srcAccessMask;
+ VkPipelineStageFlags2 dstStageMask;
+ VkAccessFlags2 dstAccessMask;
+} VkMemoryBarrier2;
+
+typedef struct {
+ VkStructureType sType;
+ const void * pNext;
+ VkPipelineStageFlags2 srcStageMask;
+ VkAccessFlags2 srcAccessMask;
+ VkPipelineStageFlags2 dstStageMask;
+ VkAccessFlags2 dstAccessMask;
+ uint32_t srcQueueFamilyIndex;
+ uint32_t dstQueueFamilyIndex;
+ VkBuffer buffer;
+ VkDeviceSize offset;
+ VkDeviceSize size;
+} VkBufferMemoryBarrier2;
+
+typedef struct {
+ VkStructureType sType;
+ const void * pNext;
+ VkPipelineStageFlags2 srcStageMask;
+ VkAccessFlags2 srcAccessMask;
+ VkPipelineStageFlags2 dstStageMask;
+ VkAccessFlags2 dstAccessMask;
+ VkImageLayout oldLayout;
+ VkImageLayout newLayout;
+ uint32_t srcQueueFamilyIndex;
+ uint32_t dstQueueFamilyIndex;
+ VkImage image;
+ VkImageSubresourceRange subresourceRange;
+} VkImageMemoryBarrier2;
+
+typedef struct {
+ VkStructureType sType;
+ const void * pNext;
+ VkDependencyFlags dependencyFlags;
+ uint32_t memoryBarrierCount;
+ const VkMemoryBarrier2 * pMemoryBarriers;
+ uint32_t bufferMemoryBarrierCount;
+ const VkBufferMemoryBarrier2 * pBufferMemoryBarriers;
+ uint32_t imageMemoryBarrierCount;
+ const VkImageMemoryBarrier2 * pImageMemoryBarriers;
+} VkDependencyInfo;
+
+
+typedef struct {
+ VkComponentSwizzle r;
+ VkComponentSwizzle g;
+ VkComponentSwizzle b;
+ VkComponentSwizzle a;
+} VkComponentMapping;
+
+typedef struct {
VkStructureType sType;
const void * pNext;
VkImageViewCreateFlags flags;
@@ -1778,6 +2375,35 @@ typedef struct {
} VkImageViewCreateInfo;
typedef struct {
+ VkStructureType sType;
+ const void * pNext;
+ VkBufferCreateFlags flags;
+ VkDeviceSize size;
+ VkBufferUsageFlags usage;
+ VkSharingMode sharingMode;
+ uint32_t queueFamilyIndexCount;
+ const uint32_t * pQueueFamilyIndices;
+} VkBufferCreateInfo;
+
+typedef struct {
+ VkStructureType sType;
+ const void * pNext;
+ VkImageCreateFlags flags;
+ VkImageType imageType;
+ VkFormat format;
+ VkExtent3D extent;
+ uint32_t mipLevels;
+ uint32_t arrayLayers;
+ VkSampleCountFlagBits samples;
+ VkImageTiling tiling;
+ VkImageUsageFlags usage;
+ VkSharingMode sharingMode;
+ uint32_t queueFamilyIndexCount;
+ const uint32_t * pQueueFamilyIndices;
+ VkImageLayout initialLayout;
+} VkImageCreateInfo;
+
+typedef struct {
VkStructureType sType;
const void * pNext;
VkShaderModuleCreateFlags flags;
@@ -1965,69 +2591,57 @@ typedef struct {
const VkPushConstantRange * pPushConstantRanges;
} VkPipelineLayoutCreateInfo;
-typedef struct {
- VkAttachmentDescriptionFlags flags;
- VkFormat format;
- VkSampleCountFlagBits samples;
- VkAttachmentLoadOp loadOp;
- VkAttachmentStoreOp storeOp;
- VkAttachmentLoadOp stencilLoadOp;
- VkAttachmentStoreOp stencilStoreOp;
- VkImageLayout initialLayout;
- VkImageLayout finalLayout;
-} VkAttachmentDescription;
+typedef union {
+ float float32[4];
+ int32_t int32[4];
+ uint32_t uint32[4];
+} VkClearColorValue;
typedef struct {
- uint32_t attachment;
- VkImageLayout layout;
-} VkAttachmentReference;
+ float depth;
+ uint32_t stencil;
+} VkClearDepthStencilValue;
+
+typedef union {
+ VkClearColorValue color;
+ VkClearDepthStencilValue depthStencil;
+} VkClearValue;
typedef struct {
- VkStructureType sType;
- const void * pNext;
- VkFramebufferCreateFlags flags;
- VkRenderPass renderPass;
- uint32_t attachmentCount;
- const VkImageView * pAttachments;
- uint32_t width;
- uint32_t height;
- uint32_t layers;
-} VkFramebufferCreateInfo;
-
-typedef struct {
- VkSubpassDescriptionFlags flags;
- VkPipelineBindPoint pipelineBindPoint;
- uint32_t inputAttachmentCount;
- const VkAttachmentReference * pInputAttachments;
- uint32_t colorAttachmentCount;
- const VkAttachmentReference * pColorAttachments;
- const VkAttachmentReference * pResolveAttachments;
- const VkAttachmentReference * pDepthStencilAttachment;
- uint32_t preserveAttachmentCount;
- const uint32_t * pPreserveAttachments;
-} VkSubpassDescription;
-
-typedef struct {
- uint32_t srcSubpass;
- uint32_t dstSubpass;
- VkPipelineStageFlags srcStageMask;
- VkPipelineStageFlags dstStageMask;
- VkAccessFlags srcAccessMask;
- VkAccessFlags dstAccessMask;
- VkDependencyFlags dependencyFlags;
-} VkSubpassDependency;
+ VkStructureType sType;
+ const void * pNext;
+ VkImageView imageView;
+ VkImageLayout imageLayout;
+ VkResolveModeFlagBits resolveMode;
+ VkImageView resolveImageView;
+ VkImageLayout resolveImageLayout;
+ VkAttachmentLoadOp loadOp;
+ VkAttachmentStoreOp storeOp;
+ VkClearValue clearValue;
+} VkRenderingAttachmentInfo;
typedef struct {
- VkStructureType sType;
- const void * pNext;
- VkRenderPassCreateFlags flags;
- uint32_t attachmentCount;
- const VkAttachmentDescription * pAttachments;
- uint32_t subpassCount;
- const VkSubpassDescription * pSubpasses;
- uint32_t dependencyCount;
- const VkSubpassDependency * pDependencies;
-} VkRenderPassCreateInfo;
+ VkStructureType sType;
+ const void * pNext;
+ VkRenderingFlags flags;
+ VkRect2D renderArea;
+ uint32_t layerCount;
+ uint32_t viewMask;
+ uint32_t colorAttachmentCount;
+ const VkRenderingAttachmentInfo * pColorAttachments;
+ const VkRenderingAttachmentInfo * pDepthAttachment;
+ const VkRenderingAttachmentInfo * pStencilAttachment;
+} VkRenderingInfo;
+
+typedef struct {
+ VkStructureType sType;
+ const void * pNext;
+ uint32_t viewMask;
+ uint32_t colorAttachmentCount;
+ const VkFormat * pColorAttachmentFormats;
+ VkFormat depthAttachmentFormat;
+ VkFormat stencilAttachmentFormat;
+} VkPipelineRenderingCreateInfo;
typedef struct {
VkStructureType sType;
@@ -2062,21 +2676,41 @@ typedef struct {
const VkCommandBufferInheritanceInfo * pInheritanceInfo;
} VkCommandBufferBeginInfo;
-typedef union {
- float float32[4];
- int32_t int32[4];
- uint32_t uint32[4];
-} VkClearColorValue;
+typedef struct {
+ VkStructureType sType;
+ const void * pNext;
+ VkCommandBuffer commandBuffer;
+ uint32_t deviceMask;
+} VkCommandBufferSubmitInfo;
typedef struct {
- float depth;
- uint32_t stencil;
-} VkClearDepthStencilValue;
+ VkStructureType sType;
+ const void * pNext;
+ VkSubmitFlags flags;
+ uint32_t waitSemaphoreInfoCount;
+ const VkSemaphoreSubmitInfo * pWaitSemaphoreInfos;
+ uint32_t commandBufferInfoCount;
+ const VkCommandBufferSubmitInfo * pCommandBufferInfos;
+ uint32_t signalSemaphoreInfoCount;
+ const VkSemaphoreSubmitInfo * pSignalSemaphoreInfos;
+} VkSubmitInfo2;
-typedef union {
- VkClearColorValue color;
- VkClearDepthStencilValue depthStencil;
-} VkClearValue;
+typedef struct {
+ VkStructureType sType;
+ const void * pNext;
+ VkDeviceSize srcOffset;
+ VkDeviceSize dstOffset;
+ VkDeviceSize size;
+} VkBufferCopy2;
+
+typedef struct {
+ VkStructureType sType;
+ const void * pNext;
+ VkBuffer srcBuffer;
+ VkBuffer dstBuffer;
+ uint32_t regionCount;
+ const VkBufferCopy2 * pRegions;
+} VkCopyBufferInfo2;
typedef struct {
VkStructureType sType;
@@ -2126,25 +2760,44 @@ typedef struct {
} VkAllocationCallbacks;
typedef struct {
- VkStructureType sType;
- const void * pNext;
- VkDeviceMemory memory;
- VkDeviceSize offset;
- VkDeviceSize size;
+ VkStructureType sType;
+ const void * pNext;
+ VkDeviceMemory memory;
+ VkDeviceSize offset;
+ VkDeviceSize size;
} VkMappedMemoryRange;
typedef struct {
VkStructureType sType;
const void * pNext;
+ VkBuffer buffer;
+} VkBufferDeviceAddressInfo;
+
+typedef struct {
+ VkStructureType sType;
+ const void * pNext;
VkDeviceSize allocationSize;
uint32_t memoryTypeIndex;
} VkMemoryAllocateInfo;
typedef struct {
- VkStructureType sType;
- const void * pNext;
- VkMemoryAllocateFlags flags;
- uint32_t deviceMask;
+ VkDeviceSize size;
+ VkDeviceSize alignment;
+ uint32_t memoryTypeBits;
+} VkMemoryRequirements;
+
+typedef struct {
+ VkStructureType sType;
+ const void * pNext;
+ VkImage image;
+ VkBuffer buffer;
+} VkMemoryDedicatedAllocateInfo;
+
+typedef struct {
+ VkStructureType sType;
+ const void * pNext;
+ VkMemoryAllocateFlags flags;
+ uint32_t deviceMask;
} VkMemoryAllocateFlagsInfo;
typedef struct {
@@ -2154,17 +2807,23 @@ typedef struct {
} VkExportMemoryAllocateInfo;
typedef struct {
- VkStructureType sType;
- const void * pNext;
- VkDeviceMemory memory;
- VkExternalMemoryHandleTypeFlagBits handleType;
+ VkStructureType sType;
+ const void * pNext;
+ VkExternalMemoryHandleTypeFlags handleTypes;
+} VkExternalMemoryImageCreateInfo;
+
+typedef struct {
+ VkStructureType sType;
+ const void * pNext;
+ VkDeviceMemory memory;
+ VkExternalMemoryHandleTypeFlagBits handleType;
} VkMemoryGetWin32HandleInfoKHR;
typedef struct {
- VkStructureType sType;
- const void * pNext;
- VkDeviceMemory memory;
- VkExternalMemoryHandleTypeFlagBits handleType;
+ VkStructureType sType;
+ const void * pNext;
+ VkDeviceMemory memory;
+ VkExternalMemoryHandleTypeFlagBits handleType;
} VkMemoryGetFdInfoKHR;
typedef struct {
@@ -2193,6 +2852,36 @@ typedef struct {
const VkDescriptorSetLayoutBinding * pBindings;
} VkDescriptorSetLayoutCreateInfo;
+typedef enum {
+ VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT = 0,
+ VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT = 1,
+ VK_VALIDATION_FEATURE_ENABLE_BEST_PRACTICES_EXT = 2,
+ VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT = 3,
+ VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT = 4,
+ VK_VALIDATION_FEATURE_ENABLE_MAX_ENUM_EXT = 0x7FFFFFFF
+} VkValidationFeatureEnableEXT;
+
+typedef enum {
+ VK_VALIDATION_FEATURE_DISABLE_ALL_EXT = 0,
+ VK_VALIDATION_FEATURE_DISABLE_SHADERS_EXT = 1,
+ VK_VALIDATION_FEATURE_DISABLE_THREAD_SAFETY_EXT = 2,
+ VK_VALIDATION_FEATURE_DISABLE_API_PARAMETERS_EXT = 3,
+ VK_VALIDATION_FEATURE_DISABLE_OBJECT_LIFETIMES_EXT = 4,
+ VK_VALIDATION_FEATURE_DISABLE_CORE_CHECKS_EXT = 5,
+ VK_VALIDATION_FEATURE_DISABLE_UNIQUE_HANDLES_EXT = 6,
+ VK_VALIDATION_FEATURE_DISABLE_SHADER_VALIDATION_CACHE_EXT = 7,
+ VK_VALIDATION_FEATURE_DISABLE_MAX_ENUM_EXT = 0x7FFFFFFF
+} VkValidationFeatureDisableEXT;
+
+typedef struct {
+ VkStructureType sType;
+ const void * pNext;
+ uint32_t enabledValidationFeatureCount;
+ const VkValidationFeatureEnableEXT * pEnabledValidationFeatures;
+ uint32_t disabledValidationFeatureCount;
+ const VkValidationFeatureDisableEXT * pDisabledValidationFeatures;
+} VkValidationFeaturesEXT;
+
/* X(name, ret, params) */
#define VkLoaderProcedureList \
@@ -2208,6 +2897,8 @@ typedef struct {
X(vkEnumerateDeviceExtensionProperties, VkResult, (VkPhysicalDevice physicalDevice, const char *pLayerName, uint32_t *pPropertyCount, VkExtensionProperties *pProperties)) \
X(vkEnumeratePhysicalDevices, VkResult, (VkInstance instance, uint32_t *pPhysicalDeviceCount, VkPhysicalDevice *pPhysicalDevices)) \
X(vkGetDeviceProcAddr, void *, (VkDevice device, const char *pName)) \
+ X(vkGetPhysicalDeviceFeatures2, void, (VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures2 *pFeatures)) \
+ X(vkGetPhysicalDeviceFormatProperties2, void, (VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties2 *pFormatProperties)) \
X(vkGetPhysicalDeviceMemoryProperties2, void, (VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)) \
X(vkGetPhysicalDeviceProperties2, void, (VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties2 *pProperties)) \
X(vkGetPhysicalDeviceQueueFamilyProperties, void, (VkPhysicalDevice physicalDevice, uint32_t *pQueueFamilyPropertyCount, VkQueueFamilyProperties *pQueueFamilyProperties)) \
@@ -2215,24 +2906,58 @@ typedef struct {
/* X(name, ret, params) */
#define VkDeviceProcedureList \
- X(vkAllocateMemory, VkResult, (VkDevice device, const VkMemoryAllocateInfo *pAllocateInfo, const VkAllocationCallbacks *pAllocator, VkDeviceMemory *pMemory)) \
- X(vkCreateComputePipelines, VkResult, (VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount, const VkComputePipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)) \
- X(vkCreatePipelineLayout, VkResult, (VkDevice device, const VkPipelineLayoutCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkPipelineLayout *pPipelineLayout)) \
- X(vkCreateSemaphore, VkResult, (VkDevice device, const VkSemaphoreCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkSemaphore *pSemaphore)) \
- X(vkCreateShaderModule, VkResult, (VkDevice device, const VkShaderModuleCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkShaderModule *pShaderModule)) \
- X(vkDestroyBuffer, void, (VkDevice device, VkBuffer buffer, const VkAllocationCallbacks *pAllocator)) \
- X(vkDestroyPipeline, void, (VkDevice device, VkPipeline pipeline, const VkAllocationCallbacks *pAllocator)) \
- X(vkDestroyPipelineLayout, void, (VkDevice device, VkPipelineLayout pipelineLayout, const VkAllocationCallbacks *pAllocator)) \
- X(vkDestroyShaderModule, void, (VkDevice device, VkShaderModule shaderModule, const VkAllocationCallbacks *pAllocator)) \
- X(vkFlushMappedMemoryRanges, VkResult, (VkDevice device, uint32_t memoryRangeCount, const VkMappedMemoryRange *pMemoryRanges)) \
- X(vkFreeMemory, void, (VkDevice device, VkDeviceMemory memory, const VkAllocationCallbacks *pAllocator)) \
- X(vkGetDeviceQueue, void, (VkDevice device, uint32_t queueFamilyIndex, uint32_t queueIndex, VkQueue *pQueue)) \
- X(vkGetMemoryFdKHR, VkResult, (VkDevice device, const VkMemoryGetFdInfoKHR *pGetFdInfo, int *pFd)) \
- X(vkGetMemoryWin32HandleKHR, VkResult, (VkDevice device, const VkMemoryGetWin32HandleInfoKHR *pGetWin32HandleInfo, void **pHandle)) \
- X(vkGetSemaphoreFdKHR, VkResult, (VkDevice device, const VkSemaphoreGetFdInfoKHR *pGetFdInfo, int *pFd)) \
- X(vkGetSemaphoreWin32HandleKHR, VkResult, (VkDevice device, const VkSemaphoreGetWin32HandleInfoKHR *pGetWin32HandleInfo, void **pHandle)) \
- X(vkMapMemory, VkResult, (VkDevice device, VkDeviceMemory memory, VkDeviceSize offset, VkDeviceSize size, VkMemoryMapFlags flags, void **ppData)) \
- X(vkUnmapMemory, void, (VkDevice device, VkDeviceMemory memory)) \
+ X(vkAllocateCommandBuffers, VkResult, (VkDevice device, const VkCommandBufferAllocateInfo *pAllocateInfo, VkCommandBuffer *pCommandBuffers)) \
+ X(vkAllocateMemory, VkResult, (VkDevice device, const VkMemoryAllocateInfo *pAllocateInfo, const VkAllocationCallbacks *pAllocator, VkDeviceMemory *pMemory)) \
+ X(vkBindBufferMemory, VkResult, (VkDevice device, VkBuffer buffer, VkDeviceMemory memory, VkDeviceSize memoryOffset)) \
+ X(vkBindImageMemory, VkResult, (VkDevice device, VkImage image, VkDeviceMemory memory, VkDeviceSize memoryOffset)) \
+ X(vkCreateBuffer, VkResult, (VkDevice device, const VkBufferCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkBuffer *pBuffer)) \
+ X(vkCreateCommandPool, VkResult, (VkDevice device, const VkCommandPoolCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkCommandPool *pCommandPool)) \
+ X(vkCreateComputePipelines, VkResult, (VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount, const VkComputePipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)) \
+ X(vkCreateGraphicsPipelines, VkResult, (VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount, const VkGraphicsPipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)) \
+ X(vkCreateImage, VkResult, (VkDevice device, const VkImageCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkImage *pImage)) \
+ X(vkCreateImageView, VkResult, (VkDevice device, const VkImageViewCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkImageView *pView)) \
+ X(vkCreatePipelineLayout, VkResult, (VkDevice device, const VkPipelineLayoutCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkPipelineLayout *pPipelineLayout)) \
+ X(vkCreateQueryPool, VkResult, (VkDevice device, const VkQueryPoolCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkQueryPool *pQueryPool)) \
+ X(vkCreateSemaphore, VkResult, (VkDevice device, const VkSemaphoreCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkSemaphore *pSemaphore)) \
+ X(vkCreateShaderModule, VkResult, (VkDevice device, const VkShaderModuleCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkShaderModule *pShaderModule)) \
+ X(vkDestroyBuffer, void, (VkDevice device, VkBuffer buffer, const VkAllocationCallbacks *pAllocator)) \
+ X(vkDestroyImage, void, (VkDevice device, VkImage image, const VkAllocationCallbacks *pAllocator)) \
+ X(vkDestroyImageView, void, (VkDevice device, VkImageView imageView, const VkAllocationCallbacks *pAllocator)) \
+ X(vkDestroyPipeline, void, (VkDevice device, VkPipeline pipeline, const VkAllocationCallbacks *pAllocator)) \
+ X(vkDestroyPipelineLayout, void, (VkDevice device, VkPipelineLayout pipelineLayout, const VkAllocationCallbacks *pAllocator)) \
+ X(vkDestroyShaderModule, void, (VkDevice device, VkShaderModule shaderModule, const VkAllocationCallbacks *pAllocator)) \
+ X(vkFlushMappedMemoryRanges, VkResult, (VkDevice device, uint32_t memoryRangeCount, const VkMappedMemoryRange *pMemoryRanges)) \
+ X(vkFreeMemory, void, (VkDevice device, VkDeviceMemory memory, const VkAllocationCallbacks *pAllocator)) \
+ X(vkGetBufferDeviceAddress, VkDeviceAddress, (VkDevice device, const VkBufferDeviceAddressInfo *pInfo)) \
+ X(vkGetBufferMemoryRequirements, void, (VkDevice device, VkBuffer buffer, VkMemoryRequirements *pMemoryRequirements)) \
+ X(vkGetDeviceQueue, void, (VkDevice device, uint32_t queueFamilyIndex, uint32_t queueIndex, VkQueue *pQueue)) \
+ X(vkGetImageMemoryRequirements, void, (VkDevice device, VkImage image, VkMemoryRequirements *pMemoryRequirements)) \
+ X(vkGetMemoryFdKHR, VkResult, (VkDevice device, const VkMemoryGetFdInfoKHR *pGetFdInfo, int *pFd)) \
+ X(vkGetMemoryWin32HandleKHR, VkResult, (VkDevice device, const VkMemoryGetWin32HandleInfoKHR *pGetWin32HandleInfo, void **pHandle)) \
+ X(vkGetQueryPoolResults, VkResult, (VkDevice device, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount, size_t dataSize, void *pData, VkDeviceSize stride, VkQueryResultFlags flags)) \
+ X(vkGetSemaphoreFdKHR, VkResult, (VkDevice device, const VkSemaphoreGetFdInfoKHR *pGetFdInfo, int *pFd)) \
+ X(vkGetSemaphoreWin32HandleKHR, VkResult, (VkDevice device, const VkSemaphoreGetWin32HandleInfoKHR *pGetWin32HandleInfo, void **pHandle)) \
+ X(vkInvalidateMappedMemoryRanges, VkResult, (VkDevice device, uint32_t memoryRangeCount, const VkMappedMemoryRange *pMemoryRanges)) \
+ X(vkMapMemory, VkResult, (VkDevice device, VkDeviceMemory memory, VkDeviceSize offset, VkDeviceSize size, VkMemoryMapFlags flags, void **ppData)) \
+ X(vkSignalSemaphore, VkResult, (VkDevice device, const VkSemaphoreSignalInfo *pSignalInfo)) \
+ X(vkUnmapMemory, void, (VkDevice device, VkDeviceMemory memory)) \
+ X(vkWaitSemaphores, VkResult, (VkDevice device, const VkSemaphoreWaitInfo *pWaitInfo, uint64_t timeout)) \
+ X(vkBeginCommandBuffer, VkResult, (VkCommandBuffer commandBuffer, const VkCommandBufferBeginInfo *pBeginInfo)) \
+ X(vkCmdBeginRendering, void, (VkCommandBuffer commandBuffer, const VkRenderingInfo *pRenderingInfo)) \
+ X(vkCmdBindIndexBuffer2, void, (VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, VkIndexType indexType)) \
+ X(vkCmdBindPipeline, void, (VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline pipeline)) \
+ X(vkCmdCopyBuffer2, void, (VkCommandBuffer commandBuffer, const VkCopyBufferInfo2 *pCopyBufferInfo)) \
+ X(vkCmdDispatch, void, (VkCommandBuffer commandBuffer, uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ)) \
+ X(vkCmdDrawIndexed, void, (VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance)) \
+ X(vkCmdEndRendering, void, (VkCommandBuffer commandBuffer)) \
+ X(vkCmdPipelineBarrier2, void, (VkCommandBuffer commandBuffer, const VkDependencyInfo *pDependencyInfo)) \
+ X(vkCmdPushConstants, void, (VkCommandBuffer commandBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size, const void *pValues)) \
+ X(vkCmdResetQueryPool, void, (VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount)) \
+ X(vkCmdSetScissor, void, (VkCommandBuffer commandBuffer, uint32_t firstScissor, uint32_t scissorCount, const VkRect2D *pScissors)) \
+ X(vkCmdSetViewport, void, (VkCommandBuffer commandBuffer, uint32_t firstViewport, uint32_t viewportCount, const VkViewport *pViewports)) \
+ X(vkCmdWriteTimestamp2, void, (VkCommandBuffer commandBuffer, VkPipelineStageFlags2 stage, VkQueryPool queryPool, uint32_t query)) \
+ X(vkEndCommandBuffer, VkResult, (VkCommandBuffer commandBuffer)) \
+ X(vkQueueSubmit2, VkResult, (VkQueue queue, uint32_t submitCount, const VkSubmitInfo2 *pSubmits, VkFence fence)) \
#define X(name, ret, params) typedef ret name##_fn params;