ogl_beamformer_lib.c (26804B)
1 /* See LICENSE for license details. */ 2 #include "../compiler.h" 3 4 #define BEAMFORMER_IMPORT static 5 6 #include "../beamformer.h" 7 8 #include "../util.h" 9 10 #include "../generated/beamformer.meta.c" 11 #include "../beamformer_parameters.h" 12 #include "ogl_beamformer_lib_base.h" 13 14 #if OS_LINUX 15 #include "../os_linux.c" 16 #elif OS_WINDOWS 17 #include "../os_win32.c" 18 19 W32(iptr) OpenFileMappingA(u32, b32, c8 *); 20 21 #else 22 #error Unsupported Platform 23 #endif 24 25 #include "../util_os.c" 26 #include "../beamformer_shared_memory.c" 27 28 global struct { 29 BeamformerSharedMemory *bp; 30 i32 timeout_ms; 31 BeamformerLibErrorKind last_error; 32 i64 shared_memory_size; 33 } g_beamformer_library_context; 34 35 #if OS_LINUX 36 37 function s8 38 os_open_shared_memory_area(char *name) 39 { 40 s8 result = {0}; 41 i32 fd = shm_open(name, O_RDWR, S_IRUSR|S_IWUSR); 42 if (fd > 0) { 43 struct stat sb; 44 if (fstat(fd, &sb) != -1) { 45 void *new = mmap(0, sb.st_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 46 if (new != MAP_FAILED) { 47 result.data = new; 48 result.len = sb.st_size; 49 } 50 } 51 close(fd); 52 } 53 return result; 54 } 55 56 function void 57 os_close_shared_memory_area(void *memory, i64 size) 58 { 59 munmap(memory, size); 60 } 61 62 #elif OS_WINDOWS 63 64 W32(u64) VirtualQuery(void *base_address, void *memory_basic_info, u64 memory_basic_info_size); 65 W32(b32) UnmapViewOfFile(void *); 66 67 function b32 68 os_reserve_region_locks(void) 69 { 70 u8 buffer[1024]; 71 Stream sb = {.data = buffer, .cap = countof(buffer)}; 72 stream_append_s8(&sb, s8(OS_SHARED_MEMORY_NAME "_lock_")); 73 74 i32 start_index = sb.widx; 75 u32 reserved_count = 0; 76 for EachElement(os_w32_shared_memory_semaphores, it) { 77 stream_reset(&sb, start_index); 78 stream_append_u64(&sb, it); 79 stream_append_byte(&sb, 0); 80 os_w32_shared_memory_semaphores[it] = os_w32_create_semaphore((c8 *)sb.data, 1, 1); 81 if InvalidHandle(os_w32_shared_memory_semaphores[it]) 82 break; 83 reserved_count++; 84 } 85 86 b32 result = reserved_count == countof(os_w32_shared_memory_semaphores); 87 if (!result) { 88 for (u32 i = 0; i < reserved_count; i++) 89 CloseHandle(os_w32_shared_memory_semaphores[i].value[0]); 90 } 91 92 return result; 93 } 94 95 function s8 96 os_open_shared_memory_area(char *name) 97 { 98 struct alignas(16) { 99 void *BaseAddress; 100 void *AllocationBase; 101 u32 AllocationProtect; 102 u32 __alignment1; 103 u64 RegionSize; 104 u32 State; 105 u32 Protect; 106 u32 Type; 107 u32 __alignment2; 108 } memory_basic_info; 109 110 s8 result = {0}; 111 iptr h = OpenFileMappingA(FILE_MAP_ALL_ACCESS, 0, name); 112 if (h != INVALID_FILE) { 113 // NOTE(rnp): a size of 0 maps the whole region, we can determine its size after 114 void *new = MapViewOfFile(h, FILE_MAP_ALL_ACCESS, 0, 0, 0); 115 if (new && 116 VirtualQuery(new, &memory_basic_info, sizeof(memory_basic_info)) == sizeof(memory_basic_info) && 117 os_reserve_region_locks()) 118 { 119 result.data = new; 120 result.len = (i64)memory_basic_info.RegionSize; 121 } 122 123 if (new && !result.data) 124 UnmapViewOfFile(new); 125 126 CloseHandle(h); 127 } 128 return result; 129 } 130 131 function void 132 os_close_shared_memory_area(void *memory, i64 size) 133 { 134 UnmapViewOfFile(memory); 135 } 136 137 #endif 138 139 #define lib_error_check(c, e) lib_error_check_(c, BeamformerLibErrorKind_##e) 140 function b32 141 lib_error_check_(b32 condition, BeamformerLibErrorKind error_kind) 142 { 143 b32 result = condition; 144 if (!result) g_beamformer_library_context.last_error = error_kind; 145 assert(result); 146 return result; 147 } 148 149 function b32 150 check_shared_memory(void) 151 { 152 b32 result = g_beamformer_library_context.bp != 0; 153 if unlikely(!g_beamformer_library_context.bp) { 154 s8 shared_memory = os_open_shared_memory_area(OS_SHARED_MEMORY_NAME); 155 if (lib_error_check(shared_memory.data != 0, SharedMemory)) { 156 BeamformerSharedMemory *bp = (BeamformerSharedMemory *)shared_memory.data; 157 result = lib_error_check(bp->version == BEAMFORMER_SHARED_MEMORY_VERSION, VersionMismatch); 158 if (result) { 159 g_beamformer_library_context.bp = bp; 160 g_beamformer_library_context.shared_memory_size = shared_memory.len; 161 } else { 162 os_close_shared_memory_area(shared_memory.data, shared_memory.len); 163 } 164 } 165 } 166 167 if likely(g_beamformer_library_context.bp) 168 result = lib_error_check(likely(!g_beamformer_library_context.bp->invalid), InvalidAccess); 169 return result; 170 } 171 172 function b32 173 valid_parameter_block(u32 block) 174 { 175 b32 result = check_shared_memory(); 176 if (result) { 177 result = lib_error_check(block < g_beamformer_library_context.bp->reserved_parameter_blocks, 178 ParameterBlockUnallocated); 179 } 180 return result; 181 } 182 183 function BeamformWork * 184 try_push_work_queue(void) 185 { 186 BeamformWork *result = beamform_work_queue_push(&g_beamformer_library_context.bp->external_work_queue); 187 lib_error_check(result != 0, WorkQueueFull); 188 return result; 189 } 190 191 function b32 192 lib_try_lock(i32 lock, i32 timeout_ms) 193 { 194 b32 result = beamformer_shared_memory_take_lock(g_beamformer_library_context.bp, lock, (u32)timeout_ms); 195 lib_error_check(result, SyncVariable); 196 return result; 197 } 198 199 function void 200 lib_release_lock(i32 lock) 201 { 202 beamformer_shared_memory_release_lock(g_beamformer_library_context.bp, lock); 203 } 204 205 u32 206 beamformer_get_api_version(void) 207 { 208 return BEAMFORMER_SHARED_MEMORY_VERSION; 209 } 210 211 const char * 212 beamformer_error_string(BeamformerLibErrorKind kind) 213 { 214 #define X(type, num, string) string, 215 local_persist const char *error_string_table[] = {BEAMFORMER_LIB_ERRORS "invalid error kind"}; 216 #undef X 217 return error_string_table[MIN(kind, countof(error_string_table) - 1)]; 218 } 219 220 BeamformerLibErrorKind 221 beamformer_get_last_error(void) 222 { 223 return g_beamformer_library_context.last_error; 224 } 225 226 const char * 227 beamformer_get_last_error_string(void) 228 { 229 return beamformer_error_string(beamformer_get_last_error()); 230 } 231 232 void 233 beamformer_set_global_timeout(u32 timeout_ms) 234 { 235 g_beamformer_library_context.timeout_ms = timeout_ms; 236 } 237 238 b32 239 beamformer_reserve_parameter_blocks(uint32_t count) 240 { 241 b32 result = 0; 242 if (check_shared_memory() && 243 lib_error_check(count <= BeamformerMaxParameterBlocks, ParameterBlockOverflow)) 244 { 245 g_beamformer_library_context.bp->reserved_parameter_blocks = count; 246 result = 1; 247 } 248 return result; 249 } 250 251 function b32 252 validate_parameters(BeamformerParameters *bp) 253 { 254 if (!lib_error_check(Between(bp->contrast_mode, 0, BeamformerContrastMode_Count - 1), InvalidContrastMode)) 255 return 0; 256 257 u32 contrast_raw_sample_count = bp->acquisition_count * bp->sample_count * beamformer_contrast_mode_samples[bp->contrast_mode]; 258 if (!lib_error_check(contrast_raw_sample_count <= bp->raw_data_dimensions.x, DataSizeMismatch)) 259 return 0; 260 261 // NOTE(rnp): frame size checks 262 { 263 // TODO(rnp): this check is overly conservative, what if we are exporting something smaller than Float32Complex 264 u64 buffer_size = g_beamformer_library_context.bp->beamformed_frame_buffer_size; 265 u64 frame_size = Max(1, bp->output_points.x) * Max(1, bp->output_points.y) * Max(1, bp->output_points.z) 266 * beamformer_data_kind_byte_size[BeamformerDataKind_Float32Complex]; 267 u64 incoherent_size = frame_size / 2; 268 if (bp->coherency_weighting) 269 buffer_size -= incoherent_size; 270 271 if (!lib_error_check(frame_size <= buffer_size, FrameSizeOverflow)) 272 return 0; 273 } 274 275 return 1; 276 } 277 278 function b32 279 validate_pipeline(i32 *shaders, u32 shader_count, BeamformerDataKind data_kind) 280 { 281 b32 data_kind_test = Between(data_kind, 0, BeamformerDataKind_Count - 1); 282 if (!lib_error_check(data_kind_test, InvalidDataKind)) 283 return 0; 284 285 if (!lib_error_check(shader_count <= BeamformerMaxComputeShaderStages, ComputeStageOverflow)) 286 return 0; 287 288 for (u32 i = 0; i < shader_count; i++) { 289 b32 stage_test = Between(shaders[i], BeamformerShaderKind_ComputeFirst, BeamformerShaderKind_ComputeLast); 290 if (!lib_error_check(stage_test, InvalidComputeStage)) 291 return 0; 292 293 if (shaders[i] == BeamformerShaderKind_Hilbert && 294 !lib_error_check(g_beamformer_library_context.bp->capabilities.hilbert != 0, InvalidComputeStage)) 295 return 0; 296 297 if (shaders[i] == BeamformerShaderKind_Demodulate && 298 !lib_error_check(!beamformer_data_kind_complex[data_kind], InvalidDemodulationDataKind)) 299 { 300 return 0; 301 } 302 } 303 304 b32 start_stage_test = shaders[0] == BeamformerShaderKind_Demodulate || 305 shaders[0] == BeamformerShaderKind_Decode; 306 if (!lib_error_check(start_stage_test, InvalidStartShader)) 307 return 0; 308 309 return 1; 310 } 311 312 u64 313 beamformer_maximum_rf_data_size(void) 314 { 315 u64 result = U64_MAX; 316 if (check_shared_memory()) { 317 Arena sm = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp, 318 g_beamformer_library_context.shared_memory_size); 319 result = Min((u64)arena_capacity(&sm, u8), g_beamformer_library_context.bp->capabilities.max_rf_data_size); 320 } 321 return result; 322 } 323 324 u64 325 beamformer_maximum_frames_for_parameters(BeamformerParameters *bp) 326 { 327 u64 result = U64_MAX; 328 if (check_shared_memory() && validate_parameters(bp)) { 329 // TODO(rnp): overly conservative frame size check 330 u64 buffer_size = g_beamformer_library_context.bp->beamformed_frame_buffer_size; 331 u64 frame_size = Max(1, bp->output_points.x) * Max(1, bp->output_points.y) * Max(1, bp->output_points.z) 332 * beamformer_data_kind_byte_size[BeamformerDataKind_Float32Complex]; 333 u64 incoherent_size = frame_size / 2; 334 if (bp->coherency_weighting) 335 buffer_size -= incoherent_size; 336 result = buffer_size / frame_size; 337 } 338 return result; 339 } 340 341 u64 342 beamformer_maximum_frames_for_simple_parameters(BeamformerSimpleParameters *bp) 343 { 344 u64 result = beamformer_maximum_frames_for_parameters((BeamformerParameters *)bp); 345 return result; 346 } 347 348 function b32 349 parameter_block_region_upload(void *data, u32 size, u32 block, BeamformerParameterBlockRegions region_id, 350 u32 block_offset, i32 timeout_ms) 351 { 352 i32 lock = BeamformerSharedMemoryLockKind_Count + (i32)block; 353 b32 result = valid_parameter_block(block) && lib_try_lock(lock, timeout_ms); 354 if (result) { 355 mem_copy((u8 *)beamformer_parameter_block(g_beamformer_library_context.bp, block) + block_offset, 356 data, size); 357 mark_parameter_block_region_dirty(g_beamformer_library_context.bp, block, region_id); 358 lib_release_lock(lock); 359 } 360 return result; 361 } 362 363 b32 364 beamformer_set_pipeline_stage_parameters_at(u32 stage_index, i32 parameter, u32 block) 365 { 366 u32 offset = BeamformerParameterBlockRegionOffsets[BeamformerParameterBlockRegion_ComputePipeline]; 367 offset += offsetof(BeamformerComputePipeline, parameters); 368 offset += (stage_index % BeamformerMaxComputeShaderStages) * sizeof(BeamformerShaderParameters); 369 b32 result = parameter_block_region_upload(¶meter, sizeof(BeamformerShaderParameters), block, 370 BeamformerParameterBlockRegion_ComputePipeline, offset, 371 g_beamformer_library_context.timeout_ms); 372 return result; 373 } 374 375 b32 376 beamformer_set_pipeline_stage_parameters(u32 stage_index, i32 parameter) 377 { 378 b32 result = beamformer_set_pipeline_stage_parameters_at(stage_index, parameter, 0); 379 return result; 380 } 381 382 b32 383 beamformer_push_pipeline_at(i32 *shaders, u32 shader_count, BeamformerDataKind data_kind, u32 block) 384 { 385 b32 result = 0; 386 if (check_shared_memory() && validate_pipeline(shaders, shader_count, data_kind)) { 387 i32 lock = BeamformerSharedMemoryLockKind_Count + (i32)block; 388 if (valid_parameter_block(block) && lib_try_lock(lock, g_beamformer_library_context.timeout_ms)) { 389 BeamformerParameterBlock *b = beamformer_parameter_block(g_beamformer_library_context.bp, block); 390 mem_copy(&b->pipeline.shaders, shaders, shader_count * sizeof(*shaders)); 391 mark_parameter_block_region_dirty(g_beamformer_library_context.bp, block, 392 BeamformerParameterBlockRegion_ComputePipeline); 393 b->pipeline.shader_count = shader_count; 394 b->pipeline.data_kind = data_kind; 395 lib_release_lock(lock); 396 result = 1; 397 } 398 } 399 return result; 400 } 401 402 b32 403 beamformer_push_pipeline(i32 *shaders, u32 shader_count, BeamformerDataKind data_kind) 404 { 405 b32 result = beamformer_push_pipeline_at(shaders, shader_count, data_kind, 0); 406 return result; 407 } 408 409 b32 410 beamformer_create_filter(BeamformerFilterParameters *filter, u8 filter_slot, u8 parameter_block) 411 { 412 b32 result = 0; 413 if (lib_error_check(filter->kind >= 0 && filter->kind < BeamformerFilterKind_Count, InvalidFilterKind)) { 414 if (check_shared_memory()) { 415 BeamformWork *work = try_push_work_queue(); 416 if (work) { 417 BeamformerCreateFilterContext *ctx = &work->create_filter_context; 418 work->kind = BeamformerWorkKind_CreateFilter; 419 ctx->parameters = *filter; 420 ctx->filter_slot = filter_slot % BeamformerFilterSlots; 421 ctx->parameter_block = parameter_block % BeamformerMaxParameterBlocks; 422 beamform_work_queue_push_commit(&g_beamformer_library_context.bp->external_work_queue); 423 result = 1; 424 } 425 } 426 } 427 return result; 428 } 429 430 function void 431 beamformer_flush_commands(void) 432 { 433 i32 lock = BeamformerSharedMemoryLockKind_DispatchCompute; 434 beamformer_shared_memory_take_lock(g_beamformer_library_context.bp, lock, 0); 435 } 436 437 #define BEAMFORMER_UPLOAD_FNS \ 438 X(channel_mapping, i16, 1, ChannelMapping) \ 439 X(focal_vectors, f32, 2, FocalVectors) \ 440 X(sparse_elements, i16, 1, SparseElements) \ 441 X(transmit_receive_orientations, u8, 1, TransmitReceiveOrientations) 442 443 #define X(name, dtype, elements, region_name) \ 444 b32 beamformer_push_##name ##_at(dtype *data, u32 count, u32 block) { \ 445 b32 result = 0; \ 446 if (lib_error_check(count <= countof(((BeamformerParameterBlock *)0)->name), BufferOverflow)) { \ 447 result = parameter_block_region_upload(data, count * elements * sizeof(dtype), block, \ 448 BeamformerParameterBlockRegion_##region_name, \ 449 offsetof(BeamformerParameterBlock, name), \ 450 g_beamformer_library_context.timeout_ms); \ 451 } \ 452 return result; \ 453 } 454 BEAMFORMER_UPLOAD_FNS 455 #undef X 456 457 #define X(name, dtype, ...) \ 458 b32 beamformer_push_##name (dtype *data, u32 count) { \ 459 b32 result = beamformer_push_##name ##_at(data, count, 0); \ 460 return result; \ 461 } 462 BEAMFORMER_UPLOAD_FNS 463 #undef X 464 465 #define BEAMFORMER_REDUCE_A1S2_CONTRAST_FN(name) void name(void *restrict output_v, \ 466 void *restrict input_v, \ 467 u32 sample_count) 468 typedef BEAMFORMER_REDUCE_A1S2_CONTRAST_FN(beamformer_reduce_a1s2_contrast_fn); 469 470 #define BEAMFORMER_REDUCE_A1S2_CONTRAST_LIST \ 471 X(i16) \ 472 X(f32) \ 473 X(f16) \ 474 475 static_assert(BeamformerDataKind_Float16Complex == (BeamformerDataKind_Count - 1), ""); 476 477 #define X(type, ...) \ 478 function BEAMFORMER_REDUCE_A1S2_CONTRAST_FN(beamformer_reduce_a1s2_contrast_##type) \ 479 { \ 480 type *input_a = (type *)input_v + 0 * sample_count; \ 481 type *input_b = (type *)input_v + 1 * sample_count; \ 482 type *input_c = (type *)input_v + 2 * sample_count; \ 483 type *output = (type *)output_v; \ 484 for (u32 sample = 0; sample < sample_count; sample++) \ 485 output[sample] = input_a[sample] - input_b[sample] - input_c[sample]; \ 486 } 487 BEAMFORMER_REDUCE_A1S2_CONTRAST_LIST 488 #undef X 489 490 function b32 491 beamformer_push_data_base(void *data, u32 data_size, i32 timeout_ms, u32 block) 492 { 493 b32 result = 0; 494 Arena scratch = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp, 495 g_beamformer_library_context.shared_memory_size); 496 BeamformerParameterBlock *b = beamformer_parameter_block(g_beamformer_library_context.bp, block); 497 BeamformerParameters *bp = &b->parameters; 498 BeamformerDataKind data_kind = b->pipeline.data_kind; 499 BeamformerContrastMode contrast_mode = bp->contrast_mode; 500 501 502 u64 max_rf_size = g_beamformer_library_context.bp->capabilities.max_rf_data_size; 503 u32 rf_size = bp->acquisition_count * bp->sample_count * bp->channel_count * beamformer_data_kind_byte_size[data_kind]; 504 u32 raw_size = bp->raw_data_dimensions.x * bp->raw_data_dimensions.y * beamformer_data_kind_byte_size[data_kind]; 505 506 // TODO(rnp): support multi push upload so that max_rf_size is actual limit 507 if (lib_error_check(rf_size <= arena_capacity(&scratch, u8), BufferOverflow) && 508 lib_error_check(rf_size <= max_rf_size, RFDataSizeOverflow) && 509 lib_error_check(rf_size <= data_size && data_size == raw_size, DataSizeMismatch)) 510 { 511 if (lib_try_lock(BeamformerSharedMemoryLockKind_UploadRF, timeout_ms)) { 512 if (lib_try_lock(BeamformerSharedMemoryLockKind_ScratchSpace, 0)) { 513 u32 channel_count = bp->channel_count; 514 u32 out_channel_stride = beamformer_data_kind_byte_size[data_kind] * bp->sample_count * bp->acquisition_count; 515 u32 in_channel_stride = beamformer_data_kind_byte_size[data_kind] * bp->raw_data_dimensions.x; 516 517 for (u32 channel = 0; channel < channel_count; channel++) { 518 u16 data_channel = (u16)b->channel_mapping[channel]; 519 u32 out_off = out_channel_stride * channel; 520 u32 in_off = in_channel_stride * data_channel; 521 switch (contrast_mode) { 522 default:{ 523 /* NOTE(rnp): non temporal copy would be better, but we can't ensure 524 * 64 byte boundaries. */ 525 memory_copy(scratch.beg + out_off, (u8 *)data + in_off, out_channel_stride); 526 }break; 527 528 case BeamformerContrastMode_A1S2:{ 529 read_only local_persist u8 reduce_a1s2_index_map[] = { 530 [BeamformerDataKind_Int16] = 0, 531 [BeamformerDataKind_Int16Complex] = 0, 532 [BeamformerDataKind_Float32] = 1, 533 [BeamformerDataKind_Float32Complex] = 1, 534 [BeamformerDataKind_Float16] = 2, 535 [BeamformerDataKind_Float16Complex] = 2, 536 }; 537 static_assert(BeamformerDataKind_Float16Complex == (BeamformerDataKind_Count - 1), ""); 538 539 read_only local_persist beamformer_reduce_a1s2_contrast_fn *reduce_a1s2_fn_table[] = { 540 #define X(type, ...) beamformer_reduce_a1s2_contrast_##type, 541 BEAMFORMER_REDUCE_A1S2_CONTRAST_LIST 542 #undef X 543 }; 544 545 // TODO(rnp): HACK: for some unknown reason loading contrast data after loading 546 // non-contrast data causes the dataset to not be stored correctly (it looks 547 // like mix of the old and new dataset). Putting this here fixes the issue. 548 // Counter-intuitively this improves throughput on my zen4 test computer, 549 // however it obviously should not be needed. 550 memory_clear(scratch.beg + out_off, 0, out_channel_stride); 551 552 u32 sample_count = bp->sample_count * beamformer_data_kind_element_count[data_kind]; 553 reduce_a1s2_fn_table[reduce_a1s2_index_map[data_kind]](scratch.beg + out_off, 554 (u8 *)data + in_off, 555 sample_count); 556 }break; 557 } 558 } 559 560 lib_release_lock(BeamformerSharedMemoryLockKind_ScratchSpace); 561 /* TODO(rnp): need a better way to communicate this */ 562 u64 rf_block_rf_size = (u64)block << 32ULL | (u64)rf_size; 563 atomic_store_u64(&g_beamformer_library_context.bp->rf_block_rf_size, rf_block_rf_size); 564 result = 1; 565 } 566 } 567 } 568 return result; 569 } 570 571 b32 572 beamformer_push_data_with_compute(void *data, u32 data_size, u32 image_plane_tag, u32 parameter_slot) 573 { 574 b32 result = 0; 575 if (check_shared_memory()) { 576 u32 reserved_blocks = g_beamformer_library_context.bp->reserved_parameter_blocks; 577 if (lib_error_check(image_plane_tag < BeamformerViewPlaneTag_Count, InvalidImagePlane) && 578 lib_error_check(parameter_slot < reserved_blocks, ParameterBlockUnallocated) && 579 beamformer_push_data_base(data, data_size, g_beamformer_library_context.timeout_ms, parameter_slot)) 580 { 581 BeamformWork *work = try_push_work_queue(); 582 if (work) { 583 work->kind = BeamformerWorkKind_ComputeIndirect; 584 work->compute_context.view_plane = image_plane_tag; 585 work->compute_context.parameter_block = parameter_slot; 586 beamform_work_queue_push_commit(&g_beamformer_library_context.bp->external_work_queue); 587 beamformer_flush_commands(); 588 result = 1; 589 } 590 } 591 } 592 return result; 593 } 594 595 b32 596 beamformer_push_parameters_at(BeamformerParameters *bp, u32 block) 597 { 598 b32 result = check_shared_memory() && validate_parameters(bp); 599 if (result) { 600 result = parameter_block_region_upload(bp, sizeof(*bp), block, 601 BeamformerParameterBlockRegion_Parameters, 602 offsetof(BeamformerParameterBlock, parameters), 603 g_beamformer_library_context.timeout_ms); 604 if (result) { 605 BeamformerParameterBlock *pb = beamformer_parameter_block(g_beamformer_library_context.bp, block); 606 atomic_or_u32(&pb->region_update_flags, 1u << BeamformerParameterRegionFlag_NotifyUI); 607 } 608 } 609 return result; 610 } 611 612 b32 613 beamformer_push_parameters(BeamformerParameters *bp) 614 { 615 b32 result = beamformer_push_parameters_at(bp, 0); 616 return result; 617 } 618 619 b32 620 beamformer_push_simple_parameters_at(BeamformerSimpleParameters *bp, u32 block) 621 { 622 b32 result = check_shared_memory(); 623 if (result) { 624 alignas(64) v2 focal_vectors[countof(bp->steering_angles)]; 625 for (u32 i = 0; i < countof(bp->steering_angles); i++) 626 focal_vectors[i] = (v2){{bp->steering_angles[i], bp->focal_depths[i]}}; 627 628 result &= beamformer_push_parameters_at((BeamformerParameters *)bp, block); 629 result &= beamformer_push_pipeline_at(bp->compute_stages, bp->compute_stages_count, (BeamformerDataKind)bp->data_kind, block); 630 result &= beamformer_push_channel_mapping_at(bp->channel_mapping, bp->channel_count, block); 631 result &= beamformer_push_focal_vectors_at((f32 *)focal_vectors, countof(focal_vectors), block); 632 result &= beamformer_push_transmit_receive_orientations_at(bp->transmit_receive_orientations, 633 bp->acquisition_count, block); 634 635 if (bp->acquisition_kind == BeamformerAcquisitionKind_UFORCES || 636 bp->acquisition_kind == BeamformerAcquisitionKind_UHERCULES) 637 { 638 result &= beamformer_push_sparse_elements_at(bp->sparse_elements, bp->acquisition_count, block); 639 } 640 641 for (u32 stage = 0; stage < bp->compute_stages_count; stage++) 642 result &= beamformer_set_pipeline_stage_parameters_at(stage, bp->compute_stage_parameters[stage], block); 643 } 644 return result; 645 } 646 647 b32 648 beamformer_push_simple_parameters(BeamformerSimpleParameters *bp) 649 { 650 b32 result = beamformer_push_simple_parameters_at(bp, 0); 651 return result; 652 } 653 654 function b32 655 beamformer_export_buffer(BeamformerExportContext export_context) 656 { 657 BeamformWork *work = try_push_work_queue(); 658 b32 result = work && lib_try_lock(BeamformerSharedMemoryLockKind_ExportSync, 0); 659 if (result) { 660 work->export_context = export_context; 661 work->kind = BeamformerWorkKind_ExportBuffer; 662 work->lock = BeamformerSharedMemoryLockKind_ScratchSpace; 663 beamform_work_queue_push_commit(&g_beamformer_library_context.bp->external_work_queue); 664 } 665 return result; 666 } 667 668 function b32 669 beamformer_export(BeamformerExportContext export, void *out, i32 timeout_ms) 670 { 671 b32 result = 0; 672 if (beamformer_export_buffer(export)) { 673 /* NOTE(rnp): if this fails it just means that the work from push_data hasn't 674 * started yet. This is here to catch the other case where the work started 675 * and finished before we finished queuing the export work item */ 676 beamformer_flush_commands(); 677 678 if (lib_try_lock(BeamformerSharedMemoryLockKind_ExportSync, timeout_ms)) { 679 if (lib_try_lock(BeamformerSharedMemoryLockKind_ScratchSpace, 0)) { 680 Arena scratch = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp, 681 g_beamformer_library_context.shared_memory_size); 682 mem_copy(out, scratch.beg, export.size); 683 lib_release_lock(BeamformerSharedMemoryLockKind_ScratchSpace); 684 result = 1; 685 } 686 lib_release_lock(BeamformerSharedMemoryLockKind_ExportSync); 687 } 688 } 689 return result; 690 } 691 692 b32 693 beamformer_beamform_data(BeamformerSimpleParameters *bp, void *data, uint32_t data_size, 694 void *out_data, int32_t timeout_ms) 695 { 696 b32 result = beamformer_push_simple_parameters(bp); 697 if (result) { 698 iv3 output_points = bp->output_points.xyz; 699 output_points.E[0] = Max(1, output_points.E[0]); 700 output_points.E[1] = Max(1, output_points.E[1]); 701 output_points.E[2] = Max(1, output_points.E[2]); 702 703 b32 complex = 0; 704 for (u32 stage = 0; stage < bp->compute_stages_count; stage++) { 705 BeamformerShaderKind shader = (BeamformerShaderKind)bp->compute_stages[stage]; 706 complex |= shader == BeamformerShaderKind_Demodulate || shader == BeamformerShaderKind_Hilbert; 707 } 708 709 u64 output_size = output_points.x * output_points.y * output_points.z * sizeof(f32); 710 if (complex) output_size *= 2; 711 712 Arena scratch = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp, 713 g_beamformer_library_context.shared_memory_size); 714 if (result && out_data) result &= lib_error_check((iz)output_size <= arena_capacity(&scratch, u8), ExportSpaceOverflow); 715 716 if (result) { 717 result = beamformer_push_data_with_compute(data, data_size, 0, 0); 718 if (result && out_data) { 719 BeamformerExportContext export; 720 export.kind = BeamformerExportKind_BeamformedData; 721 export.size = (u32)output_size; 722 result = beamformer_export(export, out_data, timeout_ms); 723 } 724 } 725 } 726 return result; 727 } 728 729 b32 730 beamformer_compute_timings(BeamformerComputeStatsTable *output, i32 timeout_ms) 731 { 732 b32 result = 0; 733 if (check_shared_memory()) { 734 Arena scratch = beamformer_shared_memory_scratch_arena(g_beamformer_library_context.bp, 735 g_beamformer_library_context.shared_memory_size); 736 if (lib_error_check((iz)sizeof(*output) <= arena_capacity(&scratch, u8), ExportSpaceOverflow)) { 737 BeamformerExportContext export; 738 export.kind = BeamformerExportKind_Stats; 739 export.size = sizeof(*output); 740 result = beamformer_export(export, output, timeout_ms); 741 } 742 } 743 return result; 744 } 745 746 i32 747 beamformer_live_parameters_get_dirty_flag(void) 748 { 749 i32 result = -1; 750 if (check_shared_memory()) { 751 u32 flag = ctz_u64(g_beamformer_library_context.bp->live_imaging_dirty_flags); 752 if (flag != 64) { 753 atomic_and_u32(&g_beamformer_library_context.bp->live_imaging_dirty_flags, ~(1u << flag)); 754 result = (i32)flag; 755 } 756 } 757 return result; 758 } 759 760 BeamformerLiveImagingParameters * 761 beamformer_get_live_parameters(void) 762 { 763 BeamformerLiveImagingParameters *result = 0; 764 if (check_shared_memory()) result = &g_beamformer_library_context.bp->live_imaging_parameters; 765 return result; 766 } 767 768 b32 769 beamformer_set_live_parameters(BeamformerLiveImagingParameters *new) 770 { 771 b32 result = 0; 772 if (check_shared_memory()) { 773 mem_copy(&g_beamformer_library_context.bp->live_imaging_parameters, new, sizeof(*new)); 774 store_fence(); 775 result = 1; 776 } 777 return result; 778 }