beamformer_internal.h (16802B)
1 /* See LICENSE for license details. */ 2 #ifndef BEAMFORMER_INTERNAL_H 3 #define BEAMFORMER_INTERNAL_H 4 5 #include "beamformer.h" 6 7 #include "util.h" 8 #include "opengl.h" 9 10 #include "generated/beamformer.meta.c" 11 #include "generated/beamformer_shaders.c" 12 13 #include "external/raylib/src/raylib.h" 14 #include "external/raylib/src/rlgl.h" 15 16 #define beamformer_info(s) s8("[info] " s "\n") 17 18 #define os_path_separator() (s8){.data = &os_system_info()->path_separator_byte, .len = 1} 19 20 typedef struct { u64 value[1]; } VulkanHandle; 21 22 typedef enum { 23 VulkanTimeline_Graphics, 24 VulkanTimeline_Compute, 25 VulkanTimeline_Transfer, 26 VulkanTimeline_Count, 27 } VulkanTimeline; 28 29 typedef enum { 30 VulkanShaderKind_Vertex, 31 VulkanShaderKind_Mesh, 32 VulkanShaderKind_Fragment, 33 VulkanShaderKind_Compute, 34 VulkanShaderKind_Count, 35 } VulkanShaderKind; 36 37 typedef enum { 38 VulkanImageUsage_None, 39 VulkanImageUsage_Colour, 40 VulkanImageUsage_DepthStencil, 41 VulkanImageUsage_Count, 42 } VulkanImageUsage; 43 44 typedef enum { 45 VulkanUsageFlag_ImageSampling = 1 << 0, 46 VulkanUsageFlag_HostReadWrite = 1 << 1, // NOTE: not valid on images 47 /* NOTE: uses: 48 * - image-image copy operations 49 * - buffer-buffer copy operations 50 */ 51 VulkanUsageFlag_TransferSource = 1 << 2, 52 VulkanUsageFlag_TransferDestination = 1 << 3, 53 } VulkanUsageFlags; 54 55 typedef struct { 56 VulkanShaderKind kind; 57 s8 text; 58 s8 name; 59 } VulkanPipelineCreateInfo; 60 61 typedef struct { 62 VulkanHandle handle; 63 u64 gpu_pointer; 64 i64 size; 65 66 // NOTE: only used for render models 67 u64 index_count; 68 } GPUBuffer; 69 70 typedef struct { 71 VulkanHandle image; 72 u32 width; 73 u32 height; 74 u32 samples; 75 u32 mip_map_levels; 76 // TODO(rnp): this is only here for importing from OpenGL, move it back into handle later 77 u64 memory_size; 78 } GPUImage; 79 80 typedef enum { 81 GPUVendor_AMD = 0x1002, 82 GPUVendor_NVIDIA = 0x10DE, 83 GPUVendor_Qualcomm = 0x5143, 84 GPUVendor_Intel = 0x8086, 85 } GPUVendor; 86 87 typedef struct { 88 s8 name; 89 GPUVendor vendor; 90 91 f32 timestamp_period_ns; 92 93 u32 max_compute_shared_memory_size; 94 u16 max_msaa_samples; 95 u16 subgroup_size; 96 97 b32 cooperative_matrix; 98 99 u32 max_image_dimension_2D; 100 // NOTE(rnp): vulkan compute will output to a buffer so this won't be relevant 101 u32 max_image_dimension_3D; 102 103 u64 gpu_heap_size; 104 u64 gpu_heap_used; 105 } GPUInfo; 106 107 typedef struct { 108 i64 size; 109 VulkanUsageFlags flags; 110 111 // NOTE(rnp): only required if buffer will be used on multiple timelines 112 VulkanTimeline *timelines_used; 113 u32 timeline_count; 114 115 s8 label; 116 } GPUBufferAllocateInfo; 117 118 typedef struct { 119 GPUBuffer *gpu_buffer; 120 u64 offset; 121 u64 size; 122 } GPUMemoryBarrierInfo; 123 124 typedef struct { 125 GPUBuffer model; 126 u32 vertex_count; 127 u32 normals_offset; 128 } RenderModel; 129 130 typedef struct { 131 BeamformerShaderResourceKind kind; 132 VulkanHandle handle; 133 u32 slot; 134 } BeamformerShaderResourceInfo; 135 136 #include "threads.c" 137 #include "util_os_ui.c" 138 #include "util_os.c" 139 140 /////////////////////////// 141 // NOTE: vulkan layer API 142 DEBUG_IMPORT void vk_load(OSLibrary vulkan, Arena *memory, Stream *error); 143 144 DEBUG_IMPORT GPUInfo *vk_gpu_info(void); 145 146 DEBUG_IMPORT void vk_buffer_allocate(GPUBuffer *, GPUBufferAllocateInfo *info); 147 DEBUG_IMPORT void vk_buffer_release(GPUBuffer *); 148 DEBUG_IMPORT void vk_buffer_range_upload(GPUBuffer *, void *data, u64 offset, u64 size, b32 non_temporal); 149 DEBUG_IMPORT void vk_buffer_range_download(void *output, GPUBuffer *, u64 source_offset, u64 size, b32 non_temporal); 150 DEBUG_IMPORT u64 vk_round_up_to_sync_size(u64, u64 min); 151 152 // NOTE: images are 2D only, any other use case should just use a buffer and index in the shader 153 DEBUG_IMPORT void vk_image_allocate(GPUImage *, u32 width, u32 height, u32 mips, u32 samples, VulkanImageUsage usage, VulkanUsageFlags flags, OSHandle *export, s8 label); 154 DEBUG_IMPORT void vk_image_release(GPUImage *); 155 156 DEBUG_IMPORT void vk_render_model_allocate(GPUBuffer *, void *indices, u64 index_count, u64 model_size, s8 label); 157 DEBUG_IMPORT void vk_render_model_range_upload(GPUBuffer *, void *data, u64 offset, u64 size, b32 non_temporal); 158 DEBUG_IMPORT void vk_render_model_release(GPUBuffer *); 159 160 DEBUG_IMPORT void vk_bind_shader_resources(BeamformerShaderResourceInfo *infos, u64 info_count); 161 162 /* NOTE: Pipelines do not have bindings. Data should be passed using push constants. 163 * In particular the push constants should contain pointers to gpu memory using the 164 * BufferDeviceAddress extension. */ 165 // TODO(rnp): change this to accept SPIR-V directly and accept BakeParameters as specialization data 166 DEBUG_IMPORT VulkanHandle vk_pipeline(VulkanPipelineCreateInfo *infos, u32 count, u32 push_constants_size); 167 DEBUG_IMPORT b32 vk_pipeline_valid(VulkanHandle); 168 DEBUG_IMPORT void vk_pipeline_release(VulkanHandle); 169 170 DEBUG_IMPORT b32 vk_buffer_needs_sync(GPUBuffer *); 171 172 DEBUG_IMPORT VulkanHandle vk_create_semaphore(OSHandle *export); 173 174 DEBUG_IMPORT b32 vk_host_wait_timeline(VulkanTimeline timeline, u64 value, u64 timeout_ns); 175 DEBUG_IMPORT u64 vk_host_signal_timeline(VulkanTimeline timeline); 176 177 DEBUG_IMPORT VulkanHandle vk_command_begin(VulkanTimeline timeline); 178 DEBUG_IMPORT void vk_command_bind_pipeline(VulkanHandle command, VulkanHandle pipeline); 179 DEBUG_IMPORT void vk_command_buffer_memory_barriers(VulkanHandle command, GPUMemoryBarrierInfo *barriers, u64 count); 180 DEBUG_IMPORT void vk_command_dispatch_compute(VulkanHandle command, uv3 dispatch); 181 DEBUG_IMPORT void vk_command_push_constants(VulkanHandle command, u32 offset, u32 size, void *values); 182 DEBUG_IMPORT void vk_command_timestamp(VulkanHandle command); 183 DEBUG_IMPORT void vk_command_wait_timeline(VulkanHandle command, VulkanTimeline timeline, u64 value); 184 // NOTE: extra semaphores only exist for synchronization with OpenGL and will be removed in the future 185 DEBUG_IMPORT u64 vk_command_end(VulkanHandle command, VulkanHandle wait_semaphore, VulkanHandle finished_semaphore); 186 187 DEBUG_IMPORT void vk_command_begin_rendering(VulkanHandle command, GPUImage *restrict colour, GPUImage *restrict depth, GPUImage *restrict resolve); 188 DEBUG_IMPORT void vk_command_draw(VulkanHandle command, GPUBuffer *model); 189 DEBUG_IMPORT void vk_command_scissor(VulkanHandle command, u32 width, u32 height, u32 x_offset, u32 y_offset); 190 DEBUG_IMPORT void vk_command_viewport(VulkanHandle command, f32 width, f32 height, f32 x_offset, f32 y_offset, f32 min_depth, f32 max_depth); 191 DEBUG_IMPORT void vk_command_end_rendering(VulkanHandle command); 192 193 DEBUG_IMPORT void vk_command_copy_buffer(VulkanHandle command, GPUBuffer *restrict destination, GPUBuffer *restrict source, u64 source_offset, i64 size); 194 195 // NOTE: returns array of valid timestamps + 1, first element is the count. 196 // Calling thread may stall until results available. 197 DEBUG_IMPORT u64 * vk_command_read_timestamps(VulkanTimeline timeline, Arena *arena); 198 199 #if BEAMFORMER_RENDERDOC_HOOKS 200 DEBUG_IMPORT void * vk_renderdoc_instance_handle(void); 201 202 DEBUG_IMPORT renderdoc_start_frame_capture_fn *start_frame_capture; 203 DEBUG_IMPORT renderdoc_set_capture_path_template_fn *set_capture_path_template; 204 DEBUG_IMPORT renderdoc_end_frame_capture_fn *end_frame_capture; 205 #define start_renderdoc_capture() do { \ 206 if (set_capture_path_template) set_capture_path_template("captures/ogl.rdc"); \ 207 if (start_frame_capture) start_frame_capture(vk_renderdoc_instance_handle(), 0); \ 208 } while(0) 209 #define end_renderdoc_capture() if (end_frame_capture) end_frame_capture(vk_renderdoc_instance_handle(), 0) 210 #define renderdoc_attached(...) (start_frame_capture != 0) 211 212 #else 213 #define start_renderdoc_capture(...) 214 #define end_renderdoc_capture(...) 215 #define renderdoc_attached(...) (0) 216 #endif 217 218 /////////////////////////////// 219 // NOTE: CUDA Library Bindings 220 221 #define CUDA_INIT_FN(name) void name(u32 *input_dims, u32 *decoded_dims) 222 typedef CUDA_INIT_FN(cuda_init_fn); 223 CUDA_INIT_FN(cuda_init_stub) {} 224 225 #define CUDA_REGISTER_BUFFERS_FN(name) void name(u32 *rf_data_ssbos, u32 rf_buffer_count, u32 raw_data_ssbo) 226 typedef CUDA_REGISTER_BUFFERS_FN(cuda_register_buffers_fn); 227 CUDA_REGISTER_BUFFERS_FN(cuda_register_buffers_stub) {} 228 229 #define CUDA_HILBERT_FN(name) void name(u32 input_buffer_idx, u32 output_buffer_idx) 230 typedef CUDA_HILBERT_FN(cuda_hilbert_fn); 231 CUDA_HILBERT_FN(cuda_hilbert_stub) {} 232 233 #define CUDA_SET_CHANNEL_MAPPING_FN(name) void name(i16 *channel_mapping) 234 typedef CUDA_SET_CHANNEL_MAPPING_FN(cuda_set_channel_mapping_fn); 235 CUDA_SET_CHANNEL_MAPPING_FN(cuda_set_channel_mapping_stub) {} 236 237 #define CUDALibraryProcedureList \ 238 X(hilbert, "cuda_hilbert") \ 239 X(init, "init_cuda_configuration") \ 240 X(register_buffers, "register_cuda_buffers") \ 241 X(set_channel_mapping, "cuda_set_channel_mapping") 242 243 #define X(name, ...) DEBUG_IMPORT cuda_## name ##_fn *cuda_## name; 244 CUDALibraryProcedureList 245 #undef X 246 247 ///////////////////////////////////// 248 // NOTE: Core Beamformer Definitions 249 250 #include "beamformer_parameters.h" 251 #include "beamformer_shared_memory.c" 252 253 typedef struct { 254 BeamformerFilterParameters parameters; 255 f32 time_delay; 256 i32 length; 257 GPUBuffer buffer; 258 } BeamformerFilter; 259 260 // X(kind, format, elements) 261 #define BEAMFORMER_COMPUTE_ARRAY_PARAMETERS_LIST \ 262 X(Hadamard, f16, BeamformerMaxChannelCount * BeamformerMaxChannelCount) \ 263 X(FocalVectors, v2, BeamformerMaxChannelCount) \ 264 X(SparseElements, i16, BeamformerMaxChannelCount) \ 265 X(TransmitReceiveOrientations, u16, BeamformerMaxChannelCount) \ 266 267 typedef enum { 268 #define X(k, ...) BeamformerComputeArrayParameterKind_##k, 269 BEAMFORMER_COMPUTE_ARRAY_PARAMETERS_LIST 270 #undef X 271 BeamformerComputeArrayParameterKind_Count 272 } BeamformerComputeArrayParameterKind; 273 274 // NOTE(rnp): only used to calculate offsets, never used directly 275 #define X(name, type, elements) alignas(64) type name[elements]; 276 typedef struct {BEAMFORMER_COMPUTE_ARRAY_PARAMETERS_LIST} BeamformerComputeArrayParameters; 277 #undef X 278 279 typedef struct { 280 uv3 layout; 281 uv3 dispatch; 282 BeamformerDataKind input_data_kind; 283 BeamformerDataKind output_data_kind; 284 BeamformerShaderBakeParameters bake; 285 } BeamformerShaderDescriptor; 286 287 typedef struct BeamformerComputePlan BeamformerComputePlan; 288 struct BeamformerComputePlan { 289 BeamformerComputePipeline pipeline; 290 291 VulkanHandle vulkan_pipelines[BeamformerMaxComputeShaderStages]; 292 293 u32 first_image_shader_index; 294 u32 channel_count; 295 u32 raw_channel_byte_stride; 296 297 u32 dirty_programs; 298 299 BeamformerAcquisitionKind acquisition_kind; 300 u32 acquisition_count; 301 302 u32 rf_size; 303 i32 hadamard_order; 304 b32 iq_pipeline; 305 306 m4 voxel_transform; 307 m4 ui_voxel_transform; 308 309 iv3 output_points; 310 i32 average_frames; 311 312 // TODO(rnp): specialization constants 313 v2 xdc_element_pitch; 314 m4 xdc_transform; 315 // TODO(rnp): probably just compute this everytime 316 m4 das_voxel_transform; 317 318 GPUBuffer array_parameters; 319 320 BeamformerFilter filters[BeamformerFilterSlots]; 321 322 u128 shader_hashes[BeamformerMaxComputeShaderStages]; 323 BeamformerShaderDescriptor shader_descriptors[BeamformerMaxComputeShaderStages]; 324 325 BeamformerComputePlan *next; 326 }; 327 328 typedef struct { 329 u64 upload_complete_values[BeamformerMaxRawDataFramesInFlight]; 330 u64 compute_complete_values[BeamformerMaxRawDataFramesInFlight]; 331 332 GPUBuffer buffer; 333 334 u32 active_rf_size; 335 336 u64 timestamp; 337 338 u64 insertion_index; 339 u64 compute_index; 340 } BeamformerRFBuffer; 341 342 typedef struct { 343 BeamformerComputeStatsTable table; 344 f32 average_times[BeamformerShaderKind_Count]; 345 346 u64 last_rf_timer_count; 347 f32 rf_time_delta_average; 348 349 u32 latest_frame_index; 350 u32 latest_rf_index; 351 } ComputeShaderStats; 352 353 /* TODO(rnp): maybe this also gets used for CPU timing info as well */ 354 typedef enum { 355 ComputeTimingInfoKind_ComputeFrameBegin, 356 ComputeTimingInfoKind_ComputeFrameEnd, 357 ComputeTimingInfoKind_Shader, 358 ComputeTimingInfoKind_RF_Data, 359 } ComputeTimingInfoKind; 360 361 typedef struct { 362 u64 timer_count; 363 ComputeTimingInfoKind kind; 364 union { 365 struct { 366 static_assert(BeamformerShaderKind_Count <= U16_MAX, ""); 367 u16 shader; 368 u16 shader_slot; 369 }; 370 }; 371 } ComputeTimingInfo; 372 373 typedef struct { 374 u32 write_index; 375 u32 read_index; 376 b32 compute_frame_active; 377 378 u32 in_flight_shader_count; 379 BeamformerShaderKind in_flight_shader_ids[BeamformerMaxComputeShaderStages]; 380 381 ComputeTimingInfo buffer[4096]; 382 } ComputeTimingTable; 383 384 typedef struct { 385 BeamformerRFBuffer *rf_buffer; 386 BeamformerSharedMemory *shared_memory; 387 i64 shared_memory_size; 388 ComputeTimingTable *compute_timing_table; 389 i32 *compute_worker_sync; 390 } BeamformerUploadThreadContext; 391 392 typedef struct { 393 u64 buffer_offset; 394 u64 timeline_valid_value; 395 396 /* NOTE: for use when displaying either prebeamformed frames or on the current frame 397 * when we intend to recompute on the next frame */ 398 m4 voxel_transform; 399 400 iv3 points; 401 402 u32 id; 403 u32 compound_count; 404 BeamformerDataKind data_kind; 405 BeamformerAcquisitionKind acquisition_kind; 406 BeamformerViewPlaneTag view_plane_tag; 407 } BeamformerFrame; 408 409 /* NOTE(rnp): backing storage for beamformed frames. The amount of backlog frames 410 * is dependant on the currently requested output size. */ 411 typedef struct { 412 GPUBuffer buffer[1]; 413 414 u64 next_offset; 415 u64 counter; 416 417 BeamformerFrame frames[BeamformerMaxBacklogFrames]; 418 } BeamformerFrameBacklog; 419 420 typedef struct { 421 BeamformerRFBuffer rf_buffer; 422 423 BeamformerComputePlan *compute_plans[BeamformerMaxParameterBlocks]; 424 BeamformerComputePlan *compute_plan_freelist; 425 426 VulkanHandle compute_internal_pipelines[BeamformerShaderKind_ComputeInternalCount]; 427 428 /* NOTE(rnp): used to ping pong data between compute stages. 429 * 430 * Allocate one extra slot for DAS output to allow overlap with the next 431 * channel chunk batch. To obtain optimal overlap we need 2 extra slots 432 * and we need to ping pong submissions between queues. This is not 433 * implemented so we only do 1 extra slot for now. 434 */ 435 #define PING_PONG_BUFFER_SLOTS (2 + 1) 436 GPUBuffer ping_pong_buffer; 437 u32 ping_pong_input_index; 438 439 f32 processing_progress; 440 b32 processing_compute; 441 442 BeamformerFrameBacklog backlog; 443 } BeamformerComputeContext; 444 445 typedef struct { 446 OSThread handle; 447 448 Arena arena; 449 iptr user_context; 450 i32 sync_variable; 451 b32 awake; 452 } GLWorkerThreadContext; 453 454 typedef enum { 455 BeamformerState_Uninitialized = 0, 456 BeamformerState_Running, 457 BeamformerState_ShouldClose, 458 BeamformerState_Terminated, 459 } BeamformerState; 460 461 typedef struct { 462 BeamformerState state; 463 464 iv2 window_size; 465 466 Arena arena; 467 Arena ui_backing_store; 468 void *ui; 469 u32 ui_dirty_parameter_blocks; 470 471 u64 frame_timestamp; 472 473 Stream error_stream; 474 475 BeamformerSharedMemory *shared_memory; 476 i64 shared_memory_size; 477 478 BeamformerFrame *latest_frame; 479 480 // TODO(rnp): track elsewhere 481 b32 render_shader_updated; 482 483 /* NOTE: this will only be used when we are averaging */ 484 u32 averaged_frame_index; 485 BeamformerFrame averaged_frames[2]; 486 487 GLWorkerThreadContext upload_worker; 488 GLWorkerThreadContext compute_worker; 489 490 BeamformerComputeContext compute_context; 491 492 ComputeShaderStats compute_shader_stats[1]; 493 ComputeTimingTable compute_timing_table[1]; 494 495 BeamformWorkQueue beamform_work_queue[1]; 496 } BeamformerCtx; 497 #define BeamformerContextMemory(m) (BeamformerCtx *)align_pointer_up((m), alignof(BeamformerCtx)); 498 499 typedef enum { 500 BeamformerFileReloadKind_ComputeInternalShader, 501 BeamformerFileReloadKind_ComputeShader, 502 BeamformerFileReloadKind_RenderShader, 503 } BeamformerFileReloadKind; 504 505 typedef struct { 506 BeamformerShaderKind shader; 507 VulkanHandle * pipeline; 508 } BeamformerShaderReloadData; 509 510 typedef struct { 511 BeamformerShaderKind shader; 512 VulkanShaderKind shader_kind; 513 514 // NOTE(rnp): based on BakeShaders compile time value 515 s8 filename_or_data; 516 517 BeamformerShaderDescriptor *shader_descriptor; 518 519 uv3 layout; 520 } BeamformerShaderReloadInfo; 521 522 typedef struct { 523 BeamformerFileReloadKind kind; 524 union { 525 BeamformerShaderReloadData shader_reload; 526 }; 527 } BeamformerFileReloadContext; 528 529 #define BEAMFORMER_COMPLETE_COMPUTE_FN(name) void name(BeamformerCtx *ctx, Arena *arena) 530 typedef BEAMFORMER_COMPLETE_COMPUTE_FN(beamformer_complete_compute_fn); 531 532 #define BEAMFORMER_RF_UPLOAD_FN(name) void name(BeamformerUploadThreadContext *ctx) 533 typedef BEAMFORMER_RF_UPLOAD_FN(beamformer_rf_upload_fn); 534 535 #define BEAMFORMER_DEBUG_UI_DEINIT_FN(name) void name(BeamformerCtx *ctx) 536 typedef BEAMFORMER_DEBUG_UI_DEINIT_FN(beamformer_debug_ui_deinit_fn); 537 538 #endif /* BEAMFORMER_INTERNAL_H */