beamformer_shared_memory.c (9706B)
1 /* See LICENSE for license details. */ 2 #define BEAMFORMER_SHARED_MEMORY_VERSION (31UL) 3 4 typedef enum { 5 BeamformerWorkKind_Compute, 6 BeamformerWorkKind_ComputeIndirect, 7 BeamformerWorkKind_CreateFilter, 8 BeamformerWorkKind_ExportBuffer, 9 } BeamformerWorkKind; 10 11 typedef struct { 12 BeamformerFilterParameters parameters; 13 u8 filter_slot; 14 u8 parameter_block; 15 static_assert(BeamformerFilterSlots <= 255, "CreateFilterContext only supports 255 filter slots"); 16 static_assert(BeamformerMaxParameterBlocks <= 255, "CreateFilterContext only supports 255 parameter blocks"); 17 } BeamformerCreateFilterContext; 18 19 typedef enum { 20 BeamformerExportKind_BeamformedData, 21 BeamformerExportKind_Stats, 22 } BeamformerExportKind; 23 24 typedef struct { 25 BeamformerExportKind kind; 26 u32 size; 27 } BeamformerExportContext; 28 29 #define BEAMFORMER_SHARED_MEMORY_LOCKS \ 30 X(ScratchSpace) \ 31 X(UploadRF) \ 32 X(ExportSync) \ 33 X(DispatchCompute) 34 35 #define X(name) BeamformerSharedMemoryLockKind_##name, 36 typedef enum {BEAMFORMER_SHARED_MEMORY_LOCKS BeamformerSharedMemoryLockKind_Count} BeamformerSharedMemoryLockKind; 37 #undef X 38 39 typedef struct { 40 BeamformerViewPlaneTag view_plane; 41 u32 parameter_block; 42 } BeamformerComputeWorkContext; 43 44 /* NOTE: discriminated union based on type */ 45 typedef struct { 46 BeamformerWorkKind kind; 47 BeamformerSharedMemoryLockKind lock; 48 union { 49 void *generic; 50 BeamformerComputeWorkContext compute_context; 51 BeamformerCreateFilterContext create_filter_context; 52 BeamformerExportContext export_context; 53 BeamformerShaderKind reload_shader; 54 }; 55 } BeamformWork; 56 57 typedef struct { 58 union { 59 u64 queue; 60 struct {u32 widx, ridx;}; 61 }; 62 BeamformWork work_items[1 << 6]; 63 } BeamformWorkQueue; 64 65 #define X(name, id) BeamformerLiveImagingDirtyFlags_##name = (1 << id), 66 typedef enum {BEAMFORMER_LIVE_IMAGING_DIRTY_FLAG_LIST} BeamformerLiveImagingDirtyFlags; 67 #undef X 68 69 #define BEAMFORMER_PARAMETER_BLOCK_REGION_LIST \ 70 X(ComputePipeline, pipeline) \ 71 X(ChannelMapping, channel_mapping) \ 72 X(FocalVectors, focal_vectors) \ 73 X(Parameters, parameters) \ 74 X(SparseElements, sparse_elements) \ 75 X(TransmitReceiveOrientations, transmit_receive_orientations) \ 76 77 #define BEAMFORMER_PARAMETER_BLOCK_REGION_FLAG_LIST \ 78 BEAMFORMER_PARAMETER_BLOCK_REGION_LIST \ 79 X(NotifyUI) \ 80 81 typedef enum { 82 #define X(k, ...) BeamformerParameterBlockRegion_##k, 83 BEAMFORMER_PARAMETER_BLOCK_REGION_LIST 84 #undef X 85 BeamformerParameterBlockRegion_Count 86 } BeamformerParameterBlockRegions; 87 88 typedef enum { 89 #define X(k, ...) BeamformerParameterRegionFlag_##k, 90 BEAMFORMER_PARAMETER_BLOCK_REGION_FLAG_LIST 91 #undef X 92 BeamformerParameterRegionFlag_Count, 93 } BeamformerParameterRegionFlags; 94 95 typedef union { 96 u8 filter_slot; 97 } BeamformerShaderParameters; 98 99 typedef struct { 100 BeamformerShaderKind shaders[BeamformerMaxComputeShaderStages]; 101 BeamformerShaderParameters parameters[BeamformerMaxComputeShaderStages]; 102 u32 shader_count; 103 BeamformerDataKind data_kind; 104 } BeamformerComputePipeline; 105 106 typedef struct { 107 alignas(16) union { 108 BeamformerParameters parameters; 109 struct { 110 BeamformerParametersHead parameters_head; 111 BeamformerUIParameters parameters_ui; 112 BeamformerExtraParameters parameters_extra; 113 }; 114 }; 115 116 /* NOTE(rnp): signals to the beamformer that a subregion of a block has been updated */ 117 u32 region_update_flags; 118 static_assert(BeamformerParameterRegionFlag_Count <= 32, ""); 119 120 BeamformerComputePipeline pipeline; 121 122 alignas(16) i16 channel_mapping[BeamformerMaxChannelCount]; 123 alignas(16) i16 sparse_elements[BeamformerMaxChannelCount]; 124 alignas(16) u8 transmit_receive_orientations[BeamformerMaxChannelCount]; 125 /* NOTE(rnp): interleaved transmit angle, focal depth pairs */ 126 alignas(16) v2 focal_vectors[BeamformerMaxChannelCount]; 127 } BeamformerParameterBlock; 128 static_assert(sizeof(BeamformerParameterBlock) % alignof(BeamformerParameterBlock) == 0, 129 "sizeof(BeamformerParametersBlock) must be a multiple of its alignment"); 130 131 #define X(k, field) [BeamformerParameterBlockRegion_##k] = offsetof(BeamformerParameterBlock, field), 132 read_only global u16 BeamformerParameterBlockRegionOffsets[BeamformerParameterBlockRegion_Count] = { 133 BEAMFORMER_PARAMETER_BLOCK_REGION_LIST 134 }; 135 #undef X 136 137 typedef struct { 138 u32 version; 139 140 /* NOTE(rnp): causes future library calls to fail. 141 * see note in beamformer_invalidate_shared_memory() */ 142 b32 invalid; 143 144 /* NOTE(rnp): not used for locking on w32 but we can use these to peek at the status of 145 * the lock without leaving userspace. */ 146 i32 locks[(u32)BeamformerSharedMemoryLockKind_Count + (u32)BeamformerMaxParameterBlocks]; 147 148 /* NOTE(rnp): total number of parameter block regions the client has requested. 149 * used to calculate offset to scratch space and to track number of allocated 150 * semaphores on w32. Defaults to 1 but can be changed at runtime */ 151 u32 reserved_parameter_blocks; 152 153 /* TODO(rnp): this is really sucky. we need a better way to communicate this */ 154 u64 rf_block_rf_size; 155 156 // NOTE(rnp): currently this cannot be directly user readable. its interpretation 157 // requires beamformer implementation details 158 u64 beamformed_frame_buffer_size; 159 160 struct { 161 u64 max_rf_data_size; 162 b8 cuda; 163 b8 hilbert; 164 } capabilities; 165 166 BeamformerLiveImagingParameters live_imaging_parameters; 167 BeamformerLiveImagingDirtyFlags live_imaging_dirty_flags; 168 169 BeamformWorkQueue external_work_queue; 170 } BeamformerSharedMemory; 171 172 function BeamformWork * 173 beamform_work_queue_pop(BeamformWorkQueue *q) 174 { 175 BeamformWork *result = 0; 176 177 static_assert(ISPOWEROF2(countof(q->work_items)), "queue capacity must be a power of 2"); 178 u64 val = atomic_load_u64(&q->queue); 179 u64 mask = countof(q->work_items) - 1; 180 u64 widx = val & mask; 181 u64 ridx = val >> 32 & mask; 182 183 if (ridx != widx) 184 result = q->work_items + ridx; 185 186 return result; 187 } 188 189 function void 190 beamform_work_queue_pop_commit(BeamformWorkQueue *q) 191 { 192 atomic_add_u64(&q->queue, 0x100000000ULL); 193 } 194 195 function BeamformWork * 196 beamform_work_queue_push(BeamformWorkQueue *q) 197 { 198 BeamformWork *result = 0; 199 200 static_assert(ISPOWEROF2(countof(q->work_items)), "queue capacity must be a power of 2"); 201 u64 val = atomic_load_u64(&q->queue); 202 u64 mask = countof(q->work_items) - 1; 203 u64 widx = val & mask; 204 u64 ridx = val >> 32 & mask; 205 u64 next = (widx + 1) & mask; 206 207 if (val & 0x80000000) 208 atomic_and_u64(&q->queue, ~0x80000000); 209 210 if (next != ridx) { 211 result = q->work_items + widx; 212 zero_struct(result); 213 } 214 215 return result; 216 } 217 218 function void 219 beamform_work_queue_push_commit(BeamformWorkQueue *q) 220 { 221 atomic_add_u64(&q->queue, 1); 222 } 223 224 #if OS_WINDOWS 225 // NOTE(rnp): junk needed on w32 to watch a value across processes while yielding 226 // control back to the kernel. There are user level CPU instructions that allow 227 // this so why w32 can't do it in kernel mode sounds like shitty design to me. 228 DEBUG_IMPORT OSW32Semaphore os_w32_shared_memory_semaphores[countof(((BeamformerSharedMemory *)0)->locks)]; 229 #endif 230 231 function b32 232 beamformer_shared_memory_take_lock(BeamformerSharedMemory *sm, i32 lock, u32 timeout_ms) 233 { 234 #if OS_WINDOWS 235 b32 result = os_w32_semaphore_wait(os_w32_shared_memory_semaphores[lock], timeout_ms); 236 if (result) atomic_store_u32(sm->locks + lock, 1); 237 #else 238 b32 result = take_lock(sm->locks + lock, timeout_ms); 239 #endif 240 return result; 241 } 242 243 function void 244 beamformer_shared_memory_release_lock(BeamformerSharedMemory *sm, i32 lock) 245 { 246 release_lock(sm->locks + lock); 247 #if OS_WINDOWS 248 os_w32_semaphore_release(os_w32_shared_memory_semaphores[lock], 1); 249 #endif 250 } 251 252 function BeamformerParameterBlock * 253 beamformer_parameter_block(BeamformerSharedMemory *sm, u32 block) 254 { 255 assert(sm->reserved_parameter_blocks > block); 256 BeamformerParameterBlock *result = (typeof(result))((u8 *)(sm + 1) + block * sizeof(*result)); 257 return result; 258 } 259 260 function b32 261 beamformer_parameter_block_dirty(BeamformerSharedMemory *sm, u32 block) 262 { 263 b32 result = beamformer_parameter_block(sm, block)->region_update_flags != 0; 264 return result; 265 } 266 267 function BeamformerParameterBlock * 268 beamformer_parameter_block_lock(BeamformerSharedMemory *sm, u32 block, i32 timeout_ms) 269 { 270 assert(block < BeamformerMaxParameterBlocks); 271 BeamformerParameterBlock *result = 0; 272 if (beamformer_shared_memory_take_lock(sm, BeamformerSharedMemoryLockKind_Count + block, (u32)timeout_ms)) 273 result = beamformer_parameter_block(sm, block); 274 return result; 275 } 276 277 function void 278 beamformer_parameter_block_unlock(BeamformerSharedMemory *sm, u32 block) 279 { 280 assert(block < BeamformerMaxParameterBlocks); 281 beamformer_shared_memory_release_lock(sm, BeamformerSharedMemoryLockKind_Count + block); 282 } 283 284 function Arena 285 beamformer_shared_memory_scratch_arena(BeamformerSharedMemory *sm, i64 shared_memory_size) 286 { 287 assert(sm->reserved_parameter_blocks > 0); 288 BeamformerParameterBlock *last = beamformer_parameter_block(sm, sm->reserved_parameter_blocks - 1); 289 Arena result = {.beg = (u8 *)(last + 1), .end = (u8 *)sm + shared_memory_size}; 290 result.beg = arena_aligned_start(result, KB(4)); 291 return result; 292 } 293 294 function void 295 mark_parameter_block_region_dirty(BeamformerSharedMemory *sm, u32 block, BeamformerParameterBlockRegions region) 296 { 297 BeamformerParameterBlock *pb = beamformer_parameter_block(sm, block); 298 atomic_or_u32(&pb->region_update_flags, 1u << region); 299 } 300 301 function void 302 post_sync_barrier(BeamformerSharedMemory *sm, BeamformerSharedMemoryLockKind lock) 303 { 304 /* NOTE(rnp): debug: here it is not a bug to release the lock if it 305 * isn't held but elswhere it is */ 306 DEBUG_DECL(if (sm->locks[lock])) { 307 beamformer_shared_memory_release_lock(sm, lock); 308 } 309 }