ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

beamformer_shared_memory.c (9706B)


      1 /* See LICENSE for license details. */
      2 #define BEAMFORMER_SHARED_MEMORY_VERSION (31UL)
      3 
      4 typedef enum {
      5 	BeamformerWorkKind_Compute,
      6 	BeamformerWorkKind_ComputeIndirect,
      7 	BeamformerWorkKind_CreateFilter,
      8 	BeamformerWorkKind_ExportBuffer,
      9 } BeamformerWorkKind;
     10 
     11 typedef struct {
     12 	BeamformerFilterParameters parameters;
     13 	u8 filter_slot;
     14 	u8 parameter_block;
     15 	static_assert(BeamformerFilterSlots        <= 255, "CreateFilterContext only supports 255 filter slots");
     16 	static_assert(BeamformerMaxParameterBlocks <= 255, "CreateFilterContext only supports 255 parameter blocks");
     17 } BeamformerCreateFilterContext;
     18 
     19 typedef enum {
     20 	BeamformerExportKind_BeamformedData,
     21 	BeamformerExportKind_Stats,
     22 } BeamformerExportKind;
     23 
     24 typedef struct {
     25 	BeamformerExportKind kind;
     26 	u32 size;
     27 } BeamformerExportContext;
     28 
     29 #define BEAMFORMER_SHARED_MEMORY_LOCKS \
     30 	X(ScratchSpace)    \
     31 	X(UploadRF)        \
     32 	X(ExportSync)      \
     33 	X(DispatchCompute)
     34 
     35 #define X(name) BeamformerSharedMemoryLockKind_##name,
     36 typedef enum {BEAMFORMER_SHARED_MEMORY_LOCKS BeamformerSharedMemoryLockKind_Count} BeamformerSharedMemoryLockKind;
     37 #undef X
     38 
     39 typedef struct {
     40 	BeamformerViewPlaneTag view_plane;
     41 	u32                    parameter_block;
     42 } BeamformerComputeWorkContext;
     43 
     44 /* NOTE: discriminated union based on type */
     45 typedef struct {
     46 	BeamformerWorkKind kind;
     47 	BeamformerSharedMemoryLockKind lock;
     48 	union {
     49 		void                                 *generic;
     50 		BeamformerComputeWorkContext          compute_context;
     51 		BeamformerCreateFilterContext         create_filter_context;
     52 		BeamformerExportContext               export_context;
     53 		BeamformerShaderKind                  reload_shader;
     54 	};
     55 } BeamformWork;
     56 
     57 typedef struct {
     58 	union {
     59 		u64 queue;
     60 		struct {u32 widx, ridx;};
     61 	};
     62 	BeamformWork work_items[1 << 6];
     63 } BeamformWorkQueue;
     64 
     65 #define X(name, id) BeamformerLiveImagingDirtyFlags_##name = (1 << id),
     66 typedef enum {BEAMFORMER_LIVE_IMAGING_DIRTY_FLAG_LIST} BeamformerLiveImagingDirtyFlags;
     67 #undef X
     68 
     69 #define BEAMFORMER_PARAMETER_BLOCK_REGION_LIST \
     70 	X(ComputePipeline,             pipeline)        \
     71 	X(ChannelMapping,              channel_mapping) \
     72 	X(FocalVectors,                focal_vectors)   \
     73 	X(Parameters,                  parameters)      \
     74 	X(SparseElements,              sparse_elements) \
     75 	X(TransmitReceiveOrientations, transmit_receive_orientations) \
     76 
     77 #define BEAMFORMER_PARAMETER_BLOCK_REGION_FLAG_LIST \
     78 	BEAMFORMER_PARAMETER_BLOCK_REGION_LIST \
     79 	X(NotifyUI) \
     80 
     81 typedef enum {
     82 	#define X(k, ...) BeamformerParameterBlockRegion_##k,
     83 	BEAMFORMER_PARAMETER_BLOCK_REGION_LIST
     84 	#undef X
     85 	BeamformerParameterBlockRegion_Count
     86 } BeamformerParameterBlockRegions;
     87 
     88 typedef enum {
     89 	#define X(k, ...) BeamformerParameterRegionFlag_##k,
     90 	BEAMFORMER_PARAMETER_BLOCK_REGION_FLAG_LIST
     91 	#undef X
     92 	BeamformerParameterRegionFlag_Count,
     93 } BeamformerParameterRegionFlags;
     94 
     95 typedef union {
     96 	u8 filter_slot;
     97 } BeamformerShaderParameters;
     98 
     99 typedef struct {
    100 	BeamformerShaderKind       shaders[BeamformerMaxComputeShaderStages];
    101 	BeamformerShaderParameters parameters[BeamformerMaxComputeShaderStages];
    102 	u32                        shader_count;
    103 	BeamformerDataKind         data_kind;
    104 } BeamformerComputePipeline;
    105 
    106 typedef struct {
    107 	alignas(16) union {
    108 		BeamformerParameters parameters;
    109 		struct {
    110 			BeamformerParametersHead  parameters_head;
    111 			BeamformerUIParameters    parameters_ui;
    112 			BeamformerExtraParameters parameters_extra;
    113 		};
    114 	};
    115 
    116 	/* NOTE(rnp): signals to the beamformer that a subregion of a block has been updated */
    117 	u32 region_update_flags;
    118 	static_assert(BeamformerParameterRegionFlag_Count <= 32, "");
    119 
    120 	BeamformerComputePipeline pipeline;
    121 
    122 	alignas(16) i16 channel_mapping[BeamformerMaxChannelCount];
    123 	alignas(16) i16 sparse_elements[BeamformerMaxChannelCount];
    124 	alignas(16) u8  transmit_receive_orientations[BeamformerMaxChannelCount];
    125 	/* NOTE(rnp): interleaved transmit angle, focal depth pairs */
    126 	alignas(16) v2  focal_vectors[BeamformerMaxChannelCount];
    127 } BeamformerParameterBlock;
    128 static_assert(sizeof(BeamformerParameterBlock) % alignof(BeamformerParameterBlock) == 0,
    129               "sizeof(BeamformerParametersBlock) must be a multiple of its alignment");
    130 
    131 #define X(k, field) [BeamformerParameterBlockRegion_##k] = offsetof(BeamformerParameterBlock, field),
    132 read_only global u16 BeamformerParameterBlockRegionOffsets[BeamformerParameterBlockRegion_Count] = {
    133 	BEAMFORMER_PARAMETER_BLOCK_REGION_LIST
    134 };
    135 #undef X
    136 
    137 typedef struct {
    138 	u32 version;
    139 
    140 	/* NOTE(rnp): causes future library calls to fail.
    141 	 * see note in beamformer_invalidate_shared_memory() */
    142 	b32 invalid;
    143 
    144 	/* NOTE(rnp): not used for locking on w32 but we can use these to peek at the status of
    145 	 * the lock without leaving userspace. */
    146 	i32 locks[(u32)BeamformerSharedMemoryLockKind_Count + (u32)BeamformerMaxParameterBlocks];
    147 
    148 	/* NOTE(rnp): total number of parameter block regions the client has requested.
    149 	 * used to calculate offset to scratch space and to track number of allocated
    150 	 * semaphores on w32. Defaults to 1 but can be changed at runtime */
    151 	u32 reserved_parameter_blocks;
    152 
    153 	/* TODO(rnp): this is really sucky. we need a better way to communicate this */
    154 	u64 rf_block_rf_size;
    155 
    156 	// NOTE(rnp): currently this cannot be directly user readable. its interpretation
    157 	// requires beamformer implementation details
    158 	u64 beamformed_frame_buffer_size;
    159 
    160 	struct {
    161 		u64 max_rf_data_size;
    162 		b8  cuda;
    163 		b8  hilbert;
    164 	} capabilities;
    165 
    166 	BeamformerLiveImagingParameters live_imaging_parameters;
    167 	BeamformerLiveImagingDirtyFlags live_imaging_dirty_flags;
    168 
    169 	BeamformWorkQueue external_work_queue;
    170 } BeamformerSharedMemory;
    171 
    172 function BeamformWork *
    173 beamform_work_queue_pop(BeamformWorkQueue *q)
    174 {
    175 	BeamformWork *result = 0;
    176 
    177 	static_assert(ISPOWEROF2(countof(q->work_items)), "queue capacity must be a power of 2");
    178 	u64 val  = atomic_load_u64(&q->queue);
    179 	u64 mask = countof(q->work_items) - 1;
    180 	u64 widx = val       & mask;
    181 	u64 ridx = val >> 32 & mask;
    182 
    183 	if (ridx != widx)
    184 		result = q->work_items + ridx;
    185 
    186 	return result;
    187 }
    188 
    189 function void
    190 beamform_work_queue_pop_commit(BeamformWorkQueue *q)
    191 {
    192 	atomic_add_u64(&q->queue, 0x100000000ULL);
    193 }
    194 
    195 function BeamformWork *
    196 beamform_work_queue_push(BeamformWorkQueue *q)
    197 {
    198 	BeamformWork *result = 0;
    199 
    200 	static_assert(ISPOWEROF2(countof(q->work_items)), "queue capacity must be a power of 2");
    201 	u64 val  = atomic_load_u64(&q->queue);
    202 	u64 mask = countof(q->work_items) - 1;
    203 	u64 widx = val        & mask;
    204 	u64 ridx = val >> 32  & mask;
    205 	u64 next = (widx + 1) & mask;
    206 
    207 	if (val & 0x80000000)
    208 		atomic_and_u64(&q->queue, ~0x80000000);
    209 
    210 	if (next != ridx) {
    211 		result = q->work_items + widx;
    212 		zero_struct(result);
    213 	}
    214 
    215 	return result;
    216 }
    217 
    218 function void
    219 beamform_work_queue_push_commit(BeamformWorkQueue *q)
    220 {
    221 	atomic_add_u64(&q->queue, 1);
    222 }
    223 
    224 #if OS_WINDOWS
    225 // NOTE(rnp): junk needed on w32 to watch a value across processes while yielding
    226 // control back to the kernel. There are user level CPU instructions that allow
    227 // this so why w32 can't do it in kernel mode sounds like shitty design to me.
    228 DEBUG_IMPORT OSW32Semaphore os_w32_shared_memory_semaphores[countof(((BeamformerSharedMemory *)0)->locks)];
    229 #endif
    230 
    231 function b32
    232 beamformer_shared_memory_take_lock(BeamformerSharedMemory *sm, i32 lock, u32 timeout_ms)
    233 {
    234 #if OS_WINDOWS
    235 	b32 result = os_w32_semaphore_wait(os_w32_shared_memory_semaphores[lock], timeout_ms);
    236 	if (result) atomic_store_u32(sm->locks + lock, 1);
    237 #else
    238 	b32 result = take_lock(sm->locks + lock, timeout_ms);
    239 #endif
    240 	return result;
    241 }
    242 
    243 function void
    244 beamformer_shared_memory_release_lock(BeamformerSharedMemory *sm, i32 lock)
    245 {
    246 	release_lock(sm->locks + lock);
    247 #if OS_WINDOWS
    248 	os_w32_semaphore_release(os_w32_shared_memory_semaphores[lock], 1);
    249 #endif
    250 }
    251 
    252 function BeamformerParameterBlock *
    253 beamformer_parameter_block(BeamformerSharedMemory *sm, u32 block)
    254 {
    255 	assert(sm->reserved_parameter_blocks > block);
    256 	BeamformerParameterBlock *result = (typeof(result))((u8 *)(sm + 1) + block * sizeof(*result));
    257 	return result;
    258 }
    259 
    260 function b32
    261 beamformer_parameter_block_dirty(BeamformerSharedMemory *sm, u32 block)
    262 {
    263 	b32 result = beamformer_parameter_block(sm, block)->region_update_flags != 0;
    264 	return result;
    265 }
    266 
    267 function BeamformerParameterBlock *
    268 beamformer_parameter_block_lock(BeamformerSharedMemory *sm, u32 block, i32 timeout_ms)
    269 {
    270 	assert(block < BeamformerMaxParameterBlocks);
    271 	BeamformerParameterBlock *result = 0;
    272 	if (beamformer_shared_memory_take_lock(sm, BeamformerSharedMemoryLockKind_Count + block, (u32)timeout_ms))
    273 		result = beamformer_parameter_block(sm, block);
    274 	return result;
    275 }
    276 
    277 function void
    278 beamformer_parameter_block_unlock(BeamformerSharedMemory *sm, u32 block)
    279 {
    280 	assert(block < BeamformerMaxParameterBlocks);
    281 	beamformer_shared_memory_release_lock(sm, BeamformerSharedMemoryLockKind_Count + block);
    282 }
    283 
    284 function Arena
    285 beamformer_shared_memory_scratch_arena(BeamformerSharedMemory *sm, i64 shared_memory_size)
    286 {
    287 	assert(sm->reserved_parameter_blocks > 0);
    288 	BeamformerParameterBlock *last = beamformer_parameter_block(sm, sm->reserved_parameter_blocks - 1);
    289 	Arena result = {.beg = (u8 *)(last + 1), .end = (u8 *)sm + shared_memory_size};
    290 	result.beg = arena_aligned_start(result, KB(4));
    291 	return result;
    292 }
    293 
    294 function void
    295 mark_parameter_block_region_dirty(BeamformerSharedMemory *sm, u32 block, BeamformerParameterBlockRegions region)
    296 {
    297 	BeamformerParameterBlock *pb = beamformer_parameter_block(sm, block);
    298 	atomic_or_u32(&pb->region_update_flags, 1u << region);
    299 }
    300 
    301 function void
    302 post_sync_barrier(BeamformerSharedMemory *sm, BeamformerSharedMemoryLockKind lock)
    303 {
    304 	/* NOTE(rnp): debug: here it is not a bug to release the lock if it
    305 	 * isn't held but elswhere it is */
    306 	DEBUG_DECL(if (sm->locks[lock])) {
    307 		beamformer_shared_memory_release_lock(sm, lock);
    308 	}
    309 }