ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

beamformer_internal.h (16802B)


      1 /* See LICENSE for license details. */
      2 #ifndef BEAMFORMER_INTERNAL_H
      3 #define BEAMFORMER_INTERNAL_H
      4 
      5 #include "beamformer.h"
      6 
      7 #include "util.h"
      8 #include "opengl.h"
      9 
     10 #include "generated/beamformer.meta.c"
     11 #include "generated/beamformer_shaders.c"
     12 
     13 #include "external/raylib/src/raylib.h"
     14 #include "external/raylib/src/rlgl.h"
     15 
     16 #define beamformer_info(s) s8("[info] " s "\n")
     17 
     18 #define os_path_separator() (s8){.data = &os_system_info()->path_separator_byte, .len = 1}
     19 
     20 typedef struct { u64 value[1]; } VulkanHandle;
     21 
     22 typedef enum {
     23 	VulkanTimeline_Graphics,
     24 	VulkanTimeline_Compute,
     25 	VulkanTimeline_Transfer,
     26 	VulkanTimeline_Count,
     27 } VulkanTimeline;
     28 
     29 typedef enum {
     30 	VulkanShaderKind_Vertex,
     31 	VulkanShaderKind_Mesh,
     32 	VulkanShaderKind_Fragment,
     33 	VulkanShaderKind_Compute,
     34 	VulkanShaderKind_Count,
     35 } VulkanShaderKind;
     36 
     37 typedef enum {
     38 	VulkanImageUsage_None,
     39 	VulkanImageUsage_Colour,
     40 	VulkanImageUsage_DepthStencil,
     41 	VulkanImageUsage_Count,
     42 } VulkanImageUsage;
     43 
     44 typedef enum {
     45 	VulkanUsageFlag_ImageSampling       = 1 << 0,
     46 	VulkanUsageFlag_HostReadWrite       = 1 << 1, // NOTE: not valid on images
     47 	/* NOTE: uses:
     48 	 * - image-image copy operations
     49 	 * - buffer-buffer copy operations
     50 	 */
     51 	VulkanUsageFlag_TransferSource      = 1 << 2,
     52 	VulkanUsageFlag_TransferDestination = 1 << 3,
     53 } VulkanUsageFlags;
     54 
     55 typedef struct {
     56 	VulkanShaderKind kind;
     57 	s8               text;
     58 	s8               name;
     59 } VulkanPipelineCreateInfo;
     60 
     61 typedef struct {
     62 	VulkanHandle handle;
     63 	u64          gpu_pointer;
     64 	i64          size;
     65 
     66 	// NOTE: only used for render models
     67 	u64          index_count;
     68 } GPUBuffer;
     69 
     70 typedef struct {
     71 	VulkanHandle image;
     72 	u32          width;
     73 	u32          height;
     74 	u32          samples;
     75 	u32          mip_map_levels;
     76 	// TODO(rnp): this is only here for importing from OpenGL, move it back into handle later
     77 	u64          memory_size;
     78 } GPUImage;
     79 
     80 typedef enum {
     81 	GPUVendor_AMD      = 0x1002,
     82 	GPUVendor_NVIDIA   = 0x10DE,
     83 	GPUVendor_Qualcomm = 0x5143,
     84 	GPUVendor_Intel    = 0x8086,
     85 } GPUVendor;
     86 
     87 typedef struct {
     88 	s8        name;
     89 	GPUVendor vendor;
     90 
     91 	f32 timestamp_period_ns;
     92 
     93 	u32 max_compute_shared_memory_size;
     94 	u16 max_msaa_samples;
     95 	u16 subgroup_size;
     96 
     97 	b32 cooperative_matrix;
     98 
     99 	u32 max_image_dimension_2D;
    100 	// NOTE(rnp): vulkan compute will output to a buffer so this won't be relevant
    101 	u32 max_image_dimension_3D;
    102 
    103 	u64 gpu_heap_size;
    104 	u64 gpu_heap_used;
    105 } GPUInfo;
    106 
    107 typedef struct {
    108 	i64               size;
    109 	VulkanUsageFlags  flags;
    110 
    111 	// NOTE(rnp): only required if buffer will be used on multiple timelines
    112 	VulkanTimeline   *timelines_used;
    113 	u32               timeline_count;
    114 
    115 	s8                label;
    116 } GPUBufferAllocateInfo;
    117 
    118 typedef struct {
    119 	GPUBuffer *gpu_buffer;
    120 	u64        offset;
    121 	u64        size;
    122 } GPUMemoryBarrierInfo;
    123 
    124 typedef struct {
    125 	GPUBuffer model;
    126 	u32       vertex_count;
    127 	u32       normals_offset;
    128 } RenderModel;
    129 
    130 typedef struct {
    131 	BeamformerShaderResourceKind kind;
    132 	VulkanHandle                 handle;
    133 	u32                          slot;
    134 } BeamformerShaderResourceInfo;
    135 
    136 #include "threads.c"
    137 #include "util_os_ui.c"
    138 #include "util_os.c"
    139 
    140 ///////////////////////////
    141 // NOTE: vulkan layer API
    142 DEBUG_IMPORT void vk_load(OSLibrary vulkan, Arena *memory, Stream *error);
    143 
    144 DEBUG_IMPORT GPUInfo *vk_gpu_info(void);
    145 
    146 DEBUG_IMPORT void vk_buffer_allocate(GPUBuffer *, GPUBufferAllocateInfo *info);
    147 DEBUG_IMPORT void vk_buffer_release(GPUBuffer *);
    148 DEBUG_IMPORT void vk_buffer_range_upload(GPUBuffer *, void *data, u64 offset, u64 size, b32 non_temporal);
    149 DEBUG_IMPORT void vk_buffer_range_download(void *output, GPUBuffer *, u64 source_offset, u64 size, b32 non_temporal);
    150 DEBUG_IMPORT u64  vk_round_up_to_sync_size(u64, u64 min);
    151 
    152 // NOTE: images are 2D only, any other use case should just use a buffer and index in the shader
    153 DEBUG_IMPORT void vk_image_allocate(GPUImage *, u32 width, u32 height, u32 mips, u32 samples, VulkanImageUsage usage, VulkanUsageFlags flags, OSHandle *export, s8 label);
    154 DEBUG_IMPORT void vk_image_release(GPUImage *);
    155 
    156 DEBUG_IMPORT void vk_render_model_allocate(GPUBuffer *, void *indices, u64 index_count, u64 model_size, s8 label);
    157 DEBUG_IMPORT void vk_render_model_range_upload(GPUBuffer *, void *data, u64 offset, u64 size, b32 non_temporal);
    158 DEBUG_IMPORT void vk_render_model_release(GPUBuffer *);
    159 
    160 DEBUG_IMPORT void vk_bind_shader_resources(BeamformerShaderResourceInfo *infos, u64 info_count);
    161 
    162 /* NOTE: Pipelines do not have bindings. Data should be passed using push constants.
    163  * In particular the push constants should contain pointers to gpu memory using the
    164  * BufferDeviceAddress extension. */
    165 // TODO(rnp): change this to accept SPIR-V directly and accept BakeParameters as specialization data
    166 DEBUG_IMPORT VulkanHandle vk_pipeline(VulkanPipelineCreateInfo *infos, u32 count, u32 push_constants_size);
    167 DEBUG_IMPORT b32          vk_pipeline_valid(VulkanHandle);
    168 DEBUG_IMPORT void         vk_pipeline_release(VulkanHandle);
    169 
    170 DEBUG_IMPORT b32 vk_buffer_needs_sync(GPUBuffer *);
    171 
    172 DEBUG_IMPORT VulkanHandle vk_create_semaphore(OSHandle *export);
    173 
    174 DEBUG_IMPORT b32          vk_host_wait_timeline(VulkanTimeline timeline, u64 value, u64 timeout_ns);
    175 DEBUG_IMPORT u64          vk_host_signal_timeline(VulkanTimeline timeline);
    176 
    177 DEBUG_IMPORT VulkanHandle vk_command_begin(VulkanTimeline timeline);
    178 DEBUG_IMPORT void         vk_command_bind_pipeline(VulkanHandle command, VulkanHandle pipeline);
    179 DEBUG_IMPORT void         vk_command_buffer_memory_barriers(VulkanHandle command, GPUMemoryBarrierInfo *barriers, u64 count);
    180 DEBUG_IMPORT void         vk_command_dispatch_compute(VulkanHandle command, uv3 dispatch);
    181 DEBUG_IMPORT void         vk_command_push_constants(VulkanHandle command, u32 offset, u32 size, void *values);
    182 DEBUG_IMPORT void         vk_command_timestamp(VulkanHandle command);
    183 DEBUG_IMPORT void         vk_command_wait_timeline(VulkanHandle command, VulkanTimeline timeline, u64 value);
    184 // NOTE: extra semaphores only exist for synchronization with OpenGL and will be removed in the future
    185 DEBUG_IMPORT u64          vk_command_end(VulkanHandle command, VulkanHandle wait_semaphore, VulkanHandle finished_semaphore);
    186 
    187 DEBUG_IMPORT void         vk_command_begin_rendering(VulkanHandle command, GPUImage *restrict colour, GPUImage *restrict depth, GPUImage *restrict resolve);
    188 DEBUG_IMPORT void         vk_command_draw(VulkanHandle command, GPUBuffer *model);
    189 DEBUG_IMPORT void         vk_command_scissor(VulkanHandle command, u32 width, u32 height, u32 x_offset, u32 y_offset);
    190 DEBUG_IMPORT void         vk_command_viewport(VulkanHandle command, f32 width, f32 height, f32 x_offset, f32 y_offset, f32 min_depth, f32 max_depth);
    191 DEBUG_IMPORT void         vk_command_end_rendering(VulkanHandle command);
    192 
    193 DEBUG_IMPORT void         vk_command_copy_buffer(VulkanHandle command, GPUBuffer *restrict destination, GPUBuffer *restrict source, u64 source_offset, i64 size);
    194 
    195 // NOTE: returns array of valid timestamps + 1, first element is the count.
    196 //       Calling thread may stall until results available.
    197 DEBUG_IMPORT u64 *        vk_command_read_timestamps(VulkanTimeline timeline, Arena *arena);
    198 
    199 #if BEAMFORMER_RENDERDOC_HOOKS
    200 DEBUG_IMPORT void *       vk_renderdoc_instance_handle(void);
    201 
    202 DEBUG_IMPORT renderdoc_start_frame_capture_fn       *start_frame_capture;
    203 DEBUG_IMPORT renderdoc_set_capture_path_template_fn *set_capture_path_template;
    204 DEBUG_IMPORT renderdoc_end_frame_capture_fn         *end_frame_capture;
    205 #define start_renderdoc_capture()  do { \
    206 	if (set_capture_path_template) set_capture_path_template("captures/ogl.rdc"); \
    207 	if (start_frame_capture)       start_frame_capture(vk_renderdoc_instance_handle(), 0); \
    208 } while(0)
    209 #define end_renderdoc_capture()   if (end_frame_capture)   end_frame_capture(vk_renderdoc_instance_handle(), 0)
    210 #define renderdoc_attached(...)   (start_frame_capture != 0)
    211 
    212 #else
    213 #define start_renderdoc_capture(...)
    214 #define end_renderdoc_capture(...)
    215 #define renderdoc_attached(...) (0)
    216 #endif
    217 
    218 ///////////////////////////////
    219 // NOTE: CUDA Library Bindings
    220 
    221 #define CUDA_INIT_FN(name) void name(u32 *input_dims, u32 *decoded_dims)
    222 typedef CUDA_INIT_FN(cuda_init_fn);
    223 CUDA_INIT_FN(cuda_init_stub) {}
    224 
    225 #define CUDA_REGISTER_BUFFERS_FN(name) void name(u32 *rf_data_ssbos, u32 rf_buffer_count, u32 raw_data_ssbo)
    226 typedef CUDA_REGISTER_BUFFERS_FN(cuda_register_buffers_fn);
    227 CUDA_REGISTER_BUFFERS_FN(cuda_register_buffers_stub) {}
    228 
    229 #define CUDA_HILBERT_FN(name) void name(u32 input_buffer_idx, u32 output_buffer_idx)
    230 typedef CUDA_HILBERT_FN(cuda_hilbert_fn);
    231 CUDA_HILBERT_FN(cuda_hilbert_stub) {}
    232 
    233 #define CUDA_SET_CHANNEL_MAPPING_FN(name) void name(i16 *channel_mapping)
    234 typedef CUDA_SET_CHANNEL_MAPPING_FN(cuda_set_channel_mapping_fn);
    235 CUDA_SET_CHANNEL_MAPPING_FN(cuda_set_channel_mapping_stub) {}
    236 
    237 #define CUDALibraryProcedureList \
    238 	X(hilbert,             "cuda_hilbert")             \
    239 	X(init,                "init_cuda_configuration")  \
    240 	X(register_buffers,    "register_cuda_buffers")    \
    241 	X(set_channel_mapping, "cuda_set_channel_mapping")
    242 
    243 #define X(name, ...) DEBUG_IMPORT cuda_## name ##_fn *cuda_## name;
    244 CUDALibraryProcedureList
    245 #undef X
    246 
    247 /////////////////////////////////////
    248 // NOTE: Core Beamformer Definitions
    249 
    250 #include "beamformer_parameters.h"
    251 #include "beamformer_shared_memory.c"
    252 
    253 typedef struct {
    254 	BeamformerFilterParameters parameters;
    255 	f32                        time_delay;
    256 	i32                        length;
    257 	GPUBuffer                  buffer;
    258 } BeamformerFilter;
    259 
    260 // X(kind, format, elements)
    261 #define BEAMFORMER_COMPUTE_ARRAY_PARAMETERS_LIST \
    262 	X(Hadamard,                    f16, BeamformerMaxChannelCount * BeamformerMaxChannelCount) \
    263 	X(FocalVectors,                v2,  BeamformerMaxChannelCount) \
    264 	X(SparseElements,              i16, BeamformerMaxChannelCount) \
    265 	X(TransmitReceiveOrientations, u16, BeamformerMaxChannelCount) \
    266 
    267 typedef enum {
    268 	#define X(k, ...) BeamformerComputeArrayParameterKind_##k,
    269 	BEAMFORMER_COMPUTE_ARRAY_PARAMETERS_LIST
    270 	#undef X
    271 	BeamformerComputeArrayParameterKind_Count
    272 } BeamformerComputeArrayParameterKind;
    273 
    274 // NOTE(rnp): only used to calculate offsets, never used directly
    275 #define X(name, type, elements) alignas(64) type name[elements];
    276 typedef struct {BEAMFORMER_COMPUTE_ARRAY_PARAMETERS_LIST} BeamformerComputeArrayParameters;
    277 #undef X
    278 
    279 typedef struct {
    280 	uv3 layout;
    281 	uv3 dispatch;
    282 	BeamformerDataKind input_data_kind;
    283 	BeamformerDataKind output_data_kind;
    284 	BeamformerShaderBakeParameters bake;
    285 } BeamformerShaderDescriptor;
    286 
    287 typedef struct BeamformerComputePlan BeamformerComputePlan;
    288 struct BeamformerComputePlan {
    289 	BeamformerComputePipeline pipeline;
    290 
    291 	VulkanHandle vulkan_pipelines[BeamformerMaxComputeShaderStages];
    292 
    293 	u32 first_image_shader_index;
    294 	u32 channel_count;
    295 	u32 raw_channel_byte_stride;
    296 
    297 	u32 dirty_programs;
    298 
    299 	BeamformerAcquisitionKind acquisition_kind;
    300 	u32                       acquisition_count;
    301 
    302 	u32 rf_size;
    303 	i32 hadamard_order;
    304 	b32 iq_pipeline;
    305 
    306 	m4  voxel_transform;
    307 	m4  ui_voxel_transform;
    308 
    309 	iv3 output_points;
    310 	i32 average_frames;
    311 
    312 	// TODO(rnp): specialization constants
    313 	v2  xdc_element_pitch;
    314 	m4  xdc_transform;
    315 	// TODO(rnp): probably just compute this everytime
    316 	m4  das_voxel_transform;
    317 
    318 	GPUBuffer array_parameters;
    319 
    320 	BeamformerFilter filters[BeamformerFilterSlots];
    321 
    322 	u128 shader_hashes[BeamformerMaxComputeShaderStages];
    323 	BeamformerShaderDescriptor shader_descriptors[BeamformerMaxComputeShaderStages];
    324 
    325 	BeamformerComputePlan *next;
    326 };
    327 
    328 typedef struct {
    329 	u64 upload_complete_values[BeamformerMaxRawDataFramesInFlight];
    330 	u64 compute_complete_values[BeamformerMaxRawDataFramesInFlight];
    331 
    332 	GPUBuffer buffer;
    333 
    334 	u32 active_rf_size;
    335 
    336 	u64 timestamp;
    337 
    338 	u64 insertion_index;
    339 	u64 compute_index;
    340 } BeamformerRFBuffer;
    341 
    342 typedef struct {
    343 	BeamformerComputeStatsTable table;
    344 	f32 average_times[BeamformerShaderKind_Count];
    345 
    346 	u64 last_rf_timer_count;
    347 	f32 rf_time_delta_average;
    348 
    349 	u32 latest_frame_index;
    350 	u32 latest_rf_index;
    351 } ComputeShaderStats;
    352 
    353 /* TODO(rnp): maybe this also gets used for CPU timing info as well */
    354 typedef enum {
    355 	ComputeTimingInfoKind_ComputeFrameBegin,
    356 	ComputeTimingInfoKind_ComputeFrameEnd,
    357 	ComputeTimingInfoKind_Shader,
    358 	ComputeTimingInfoKind_RF_Data,
    359 } ComputeTimingInfoKind;
    360 
    361 typedef struct {
    362 	u64 timer_count;
    363 	ComputeTimingInfoKind kind;
    364 	union {
    365 		struct {
    366 			static_assert(BeamformerShaderKind_Count <= U16_MAX, "");
    367 			u16 shader;
    368 			u16 shader_slot;
    369 		};
    370 	};
    371 } ComputeTimingInfo;
    372 
    373 typedef struct {
    374 	u32 write_index;
    375 	u32 read_index;
    376 	b32 compute_frame_active;
    377 
    378 	u32                  in_flight_shader_count;
    379 	BeamformerShaderKind in_flight_shader_ids[BeamformerMaxComputeShaderStages];
    380 
    381 	ComputeTimingInfo buffer[4096];
    382 } ComputeTimingTable;
    383 
    384 typedef struct {
    385 	BeamformerRFBuffer      *rf_buffer;
    386 	BeamformerSharedMemory  *shared_memory;
    387 	i64                      shared_memory_size;
    388 	ComputeTimingTable      *compute_timing_table;
    389 	i32                     *compute_worker_sync;
    390 } BeamformerUploadThreadContext;
    391 
    392 typedef struct {
    393 	u64 buffer_offset;
    394 	u64 timeline_valid_value;
    395 
    396 	/* NOTE: for use when displaying either prebeamformed frames or on the current frame
    397 	 * when we intend to recompute on the next frame */
    398 	m4  voxel_transform;
    399 
    400 	iv3 points;
    401 
    402 	u32                       id;
    403 	u32                       compound_count;
    404 	BeamformerDataKind        data_kind;
    405 	BeamformerAcquisitionKind acquisition_kind;
    406 	BeamformerViewPlaneTag    view_plane_tag;
    407 } BeamformerFrame;
    408 
    409 /* NOTE(rnp): backing storage for beamformed frames. The amount of backlog frames
    410 * is dependant on the currently requested output size. */
    411 typedef struct {
    412 	GPUBuffer   buffer[1];
    413 
    414 	u64         next_offset;
    415 	u64         counter;
    416 
    417 	BeamformerFrame frames[BeamformerMaxBacklogFrames];
    418 } BeamformerFrameBacklog;
    419 
    420 typedef struct {
    421 	BeamformerRFBuffer rf_buffer;
    422 
    423 	BeamformerComputePlan *compute_plans[BeamformerMaxParameterBlocks];
    424 	BeamformerComputePlan *compute_plan_freelist;
    425 
    426 	VulkanHandle compute_internal_pipelines[BeamformerShaderKind_ComputeInternalCount];
    427 
    428 	/* NOTE(rnp): used to ping pong data between compute stages.
    429 	 *
    430 	 * Allocate one extra slot for DAS output to allow overlap with the next
    431 	 * channel chunk batch. To obtain optimal overlap we need 2 extra slots
    432 	 * and we need to ping pong submissions between queues. This is not
    433 	 * implemented so we only do 1 extra slot for now.
    434 	 */
    435 	#define PING_PONG_BUFFER_SLOTS (2 + 1)
    436 	GPUBuffer ping_pong_buffer;
    437 	u32 ping_pong_input_index;
    438 
    439 	f32 processing_progress;
    440 	b32 processing_compute;
    441 
    442 	BeamformerFrameBacklog backlog;
    443 } BeamformerComputeContext;
    444 
    445 typedef struct {
    446 	OSThread handle;
    447 
    448 	Arena arena;
    449 	iptr  user_context;
    450 	i32   sync_variable;
    451 	b32   awake;
    452 } GLWorkerThreadContext;
    453 
    454 typedef enum {
    455 	BeamformerState_Uninitialized = 0,
    456 	BeamformerState_Running,
    457 	BeamformerState_ShouldClose,
    458 	BeamformerState_Terminated,
    459 } BeamformerState;
    460 
    461 typedef struct {
    462 	BeamformerState state;
    463 
    464 	iv2 window_size;
    465 
    466 	Arena  arena;
    467 	Arena  ui_backing_store;
    468 	void  *ui;
    469 	u32    ui_dirty_parameter_blocks;
    470 
    471 	u64    frame_timestamp;
    472 
    473 	Stream error_stream;
    474 
    475 	BeamformerSharedMemory *shared_memory;
    476 	i64                     shared_memory_size;
    477 
    478 	BeamformerFrame *latest_frame;
    479 
    480 	// TODO(rnp): track elsewhere
    481 	b32 render_shader_updated;
    482 
    483 	/* NOTE: this will only be used when we are averaging */
    484 	u32             averaged_frame_index;
    485 	BeamformerFrame averaged_frames[2];
    486 
    487 	GLWorkerThreadContext  upload_worker;
    488 	GLWorkerThreadContext  compute_worker;
    489 
    490 	BeamformerComputeContext compute_context;
    491 
    492 	ComputeShaderStats compute_shader_stats[1];
    493 	ComputeTimingTable compute_timing_table[1];
    494 
    495 	BeamformWorkQueue  beamform_work_queue[1];
    496 } BeamformerCtx;
    497 #define BeamformerContextMemory(m) (BeamformerCtx *)align_pointer_up((m), alignof(BeamformerCtx));
    498 
    499 typedef enum {
    500 	BeamformerFileReloadKind_ComputeInternalShader,
    501 	BeamformerFileReloadKind_ComputeShader,
    502 	BeamformerFileReloadKind_RenderShader,
    503 } BeamformerFileReloadKind;
    504 
    505 typedef struct {
    506 	BeamformerShaderKind shader;
    507 	VulkanHandle *       pipeline;
    508 } BeamformerShaderReloadData;
    509 
    510 typedef struct {
    511 	BeamformerShaderKind  shader;
    512 	VulkanShaderKind      shader_kind;
    513 
    514 	// NOTE(rnp): based on BakeShaders compile time value
    515 	s8                    filename_or_data;
    516 
    517 	BeamformerShaderDescriptor *shader_descriptor;
    518 
    519 	uv3 layout;
    520 } BeamformerShaderReloadInfo;
    521 
    522 typedef struct {
    523 	BeamformerFileReloadKind kind;
    524 	union {
    525 		BeamformerShaderReloadData shader_reload;
    526 	};
    527 } BeamformerFileReloadContext;
    528 
    529 #define BEAMFORMER_COMPLETE_COMPUTE_FN(name) void name(BeamformerCtx *ctx, Arena *arena)
    530 typedef BEAMFORMER_COMPLETE_COMPUTE_FN(beamformer_complete_compute_fn);
    531 
    532 #define BEAMFORMER_RF_UPLOAD_FN(name) void name(BeamformerUploadThreadContext *ctx)
    533 typedef BEAMFORMER_RF_UPLOAD_FN(beamformer_rf_upload_fn);
    534 
    535 #define BEAMFORMER_DEBUG_UI_DEINIT_FN(name) void name(BeamformerCtx *ctx)
    536 typedef BEAMFORMER_DEBUG_UI_DEINIT_FN(beamformer_debug_ui_deinit_fn);
    537 
    538 #endif /* BEAMFORMER_INTERNAL_H */