vulkan.c (99298B)
1 /* See LICENSE for license details. */ 2 // TODO(rnp) 3 // [ ]: what is needed for HDR? I think it makes sense to just default to it nowadays 4 // [ ]: once opengl is removed switch images to SRGB and/or 16 bit Float 5 6 #include "beamformer_internal.h" 7 #include "vulkan.h" 8 #include "external/glslang/glslang/Include/glslang_c_interface.h" 9 10 #define ForceSingleQueue (0) 11 12 #define glslang_info(s) s8("[glslang] " s) 13 #define vulkan_info(s) s8("[vulkan] " s) 14 15 #define ValidVulkanHandle(h) ((h).value[0] != 0) 16 17 #define MaxCommandBuffersInFlight BeamformerMaxRawDataFramesInFlight 18 #define MaxCommandBufferTimestamps (1024) 19 20 typedef enum { 21 VulkanQueueKind_Graphics, 22 VulkanQueueKind_Compute, 23 VulkanQueueKind_Transfer, 24 VulkanQueueKind_Count, 25 } VulkanQueueKind; 26 27 typedef enum { 28 VulkanMemoryKind_Device, 29 VulkanMemoryKind_BAR, 30 VulkanMemoryKind_Host, 31 VulkanMemoryKind_Count, 32 } VulkanMemoryKind; 33 34 typedef struct { 35 VkDeviceMemory memory; 36 VkBuffer buffer; 37 u64 memory_size; 38 39 void * host_pointer; 40 41 VulkanMemoryKind memory_kind; 42 43 // NOTE: only used when the buffer is backing a VulkanRenderModel. 44 VkIndexType index_type; 45 } VulkanBuffer; 46 47 typedef struct { 48 VkDeviceMemory memory; 49 VkImage image; 50 VkImageView view; 51 } VulkanImage; 52 53 typedef struct { 54 VkPipeline pipeline; 55 VkPipelineLayout layout; 56 VkShaderStageFlags stage_flags; 57 } VulkanPipeline; 58 59 typedef struct { 60 VkSemaphore semaphore; 61 u64 value; 62 } VulkanSemaphore; 63 64 typedef struct { 65 VulkanTimeline timeline; 66 u32 buffer_index; 67 68 // NOTE(rnp): since there may not be QueueKind_Count queues, when putting values into this 69 // array you must be careful to map through the queue_indices array in the vulkan_context. 70 u64 in_flight_wait_values[VulkanQueueKind_Count]; 71 } VulkanCommandBuffer; 72 73 typedef enum { 74 VulkanEntityKind_Buffer, 75 VulkanEntityKind_CommandBuffer, 76 VulkanEntityKind_Image, 77 VulkanEntityKind_Pipeline, 78 VulkanEntityKind_RenderModel, 79 VulkanEntityKind_Semaphore, 80 } VulkanEntityKind; 81 82 typedef struct VulkanEntity VulkanEntity; 83 struct VulkanEntity { 84 VulkanEntity * next; 85 VulkanEntityKind kind; 86 union { 87 VulkanBuffer buffer; 88 VulkanCommandBuffer command_buffer; 89 VulkanImage image; 90 VulkanPipeline pipeline; 91 VulkanSemaphore semaphore; 92 } as; 93 }; 94 95 typedef alignas(64) struct { 96 i32 lock; 97 98 u16 queue_family; 99 u16 queue_index; 100 VkQueue queue; 101 102 VulkanSemaphore timeline_semaphore; 103 104 VkPipelineStageFlags2 pipeline_stage_flags; 105 } VulkanQueue; 106 static_assert(alignof(VulkanQueue) == 64, "VulkanQueue must be placed on its own cacheline"); 107 108 typedef alignas(64) struct { 109 i32 lock; 110 u32 next_index; 111 112 VulkanPipeline *bound_pipeline; 113 114 VkCommandPool handle; 115 VkQueryPool query_pool; 116 VkCommandBuffer buffers[MaxCommandBuffersInFlight]; 117 118 u64 submission_values[MaxCommandBuffersInFlight]; 119 u32 queries_occupied[MaxCommandBuffersInFlight]; 120 } VulkanCommandPool; 121 122 typedef struct { 123 Arena arena; 124 i32 arena_lock; 125 126 VkInstance handle; 127 VkDevice device; 128 VkPhysicalDevice physical_device; 129 130 VkDescriptorPool descriptor_pool; 131 VkDescriptorSetLayout descriptor_set_layouts[BeamformerShaderResourceKind_Count]; 132 VkDescriptorSet descriptor_sets[BeamformerShaderResourceKind_Count]; 133 // NOTE(rnp): must store these if we want to allow partial updates easily 134 VkDescriptorBufferInfo descriptor_buffer_infos[BeamformerShaderBufferSlot_Count]; 135 136 // NOTE(rnp): fallback for when a shader fails to compile 137 VulkanPipeline default_compute_pipeline; 138 VulkanPipeline default_graphics_pipeline; 139 140 GPUInfo gpu_info; 141 142 struct { 143 u64 max_allocation_size; 144 u64 non_coherent_atom_size; 145 u8 gpu_heap_index; 146 i8 memory_type_indices[VulkanMemoryKind_Count]; 147 b8 memory_host_coherent[VulkanMemoryKind_Count]; 148 static_assert(VK_MAX_MEMORY_HEAPS < I8_MAX, ""); 149 static_assert(VK_MAX_MEMORY_TYPES < U8_MAX, ""); 150 } memory_info; 151 152 VulkanCommandPool * command_pools[VulkanTimeline_Count]; 153 VulkanQueue * queues[VulkanQueueKind_Count]; 154 // NOTE(rnp): there are a few places in the code where simply going through the queues map 155 // is not sufficient. those places need to know of the unique queues which unique queue 156 // is being referred to. that code uses this map instead. 157 u16 queue_indices[VulkanQueueKind_Count]; 158 u16 unique_queues; 159 160 VkFormat swap_chain_image_format; 161 VkFormat depth_stencil_format; 162 163 VulkanEntity * entity_freelist; 164 Arena entity_arena; 165 i32 entity_lock; 166 } VulkanContext; 167 168 read_only global const char *vk_required_instance_extensions[] = { 169 }; 170 171 #if OS_WINDOWS 172 #define VK_OS_REQUIRED_DEVICE_EXTENSIONS_LIST \ 173 X("VK_KHR_external_memory_win32") \ 174 X("VK_KHR_external_semaphore_win32") \ 175 176 #else 177 #define VK_OS_REQUIRED_DEVICE_EXTENSIONS_LIST \ 178 X("VK_KHR_external_memory_fd") \ 179 X("VK_KHR_external_semaphore_fd") \ 180 181 #endif 182 183 #define VK_REQUIRED_DEVICE_EXTENSIONS_LIST \ 184 X("VK_KHR_16bit_storage") \ 185 X("VK_KHR_external_memory") \ 186 X("VK_KHR_external_semaphore") \ 187 X("VK_KHR_storage_buffer_storage_class") \ 188 X("VK_KHR_timeline_semaphore") \ 189 VK_OS_REQUIRED_DEVICE_EXTENSIONS_LIST 190 191 #define X(str) s8_comp(str), 192 read_only global s8 vk_required_device_extensions[] = {VK_REQUIRED_DEVICE_EXTENSIONS_LIST}; 193 #undef X 194 195 #define VK_OPTIONAL_DEVICE_EXTENSIONS_LIST \ 196 X(VK_KHR, cooperative_matrix) \ 197 198 #define X(p, s, ...) s8_comp(#p "_" #s), 199 read_only global s8 vk_optional_device_extensions[] = {VK_OPTIONAL_DEVICE_EXTENSIONS_LIST}; 200 #undef X 201 202 #define VK_REQUIRED_PHYSICAL_FEATURES \ 203 X(shaderInt16) \ 204 X(shaderInt64) \ 205 206 #define VK_REQUIRED_PHYSICAL_11_FEATURES \ 207 X(storageBuffer16BitAccess) \ 208 209 #define VK_REQUIRED_PHYSICAL_12_FEATURES \ 210 X(bufferDeviceAddress) \ 211 X(shaderFloat16) \ 212 X(timelineSemaphore) \ 213 X(vulkanMemoryModel) \ 214 215 #define VK_REQUIRED_PHYSICAL_13_FEATURES \ 216 X(dynamicRendering) \ 217 X(synchronization2) \ 218 219 #define VK_DEBUG_EXTENSIONS \ 220 X(VK_KHR, shader_non_semantic_info) \ 221 X(VK_KHR, shader_relaxed_extended_instruction) \ 222 223 #define X(p, s, ...) s8_comp(#p "_" #s), 224 read_only global s8 vk_debug_extensions[] = {VK_DEBUG_EXTENSIONS}; 225 #undef X 226 227 #define VK_INSTANCE_DEBUG_EXTENSIONS_LIST \ 228 X(VK_EXT, debug_utils) \ 229 230 #define X(p, s, ...) s8_comp(#p "_" #s), 231 read_only global s8 vk_instance_debug_extensions[] = {VK_INSTANCE_DEBUG_EXTENSIONS_LIST}; 232 #undef X 233 234 global struct { 235 union { 236 struct { 237 #define X(_, name, ...) b8 name; 238 VK_OPTIONAL_DEVICE_EXTENSIONS_LIST 239 #undef X 240 }; 241 b8 E[countof(vk_optional_device_extensions)]; 242 } optional; 243 244 union { 245 struct { 246 #define X(_, name, ...) b8 name; 247 VK_DEBUG_EXTENSIONS 248 #undef X 249 }; 250 b8 E[countof(vk_debug_extensions)]; 251 } debug; 252 253 union { 254 struct { 255 #define X(_, name, ...) b8 name; 256 VK_INSTANCE_DEBUG_EXTENSIONS_LIST 257 #undef X 258 }; 259 b8 E[countof(vk_instance_debug_extensions)]; 260 } instance; 261 } vulkan_config; 262 263 #define MAX_ENABLED_EXTENSIONS ( countof(vk_required_device_extensions) \ 264 + countof(vk_optional_device_extensions) \ 265 + countof(vk_debug_extensions) \ 266 ) 267 268 global VulkanContext vulkan_context[1]; 269 270 /* NOTE(rnp): the idea here is to set reasonable development constraints. 271 * They should probably not match one to one with the maximums of the dev 272 * machine's hardware. Instead these are here to cause compile time failure 273 * for features which are not expected to work everywhere. */ 274 global glslang_resource_t glslc_resource_constraints[1] = {{ 275 .max_compute_work_group_count_x = 65535, 276 .max_compute_work_group_count_y = 65535, 277 .max_compute_work_group_count_z = 65535, 278 .max_compute_work_group_size_x = 1024, 279 .max_compute_work_group_size_y = 1024, 280 .max_compute_work_group_size_z = 1024, 281 282 // NOTE: taken from glslang defaults 283 .max_lights = 32, 284 .max_clip_planes = 6, 285 .max_texture_units = 32, 286 .max_texture_coords = 32, 287 .max_vertex_attribs = 64, 288 .max_vertex_uniform_components = 4096, 289 .max_varying_floats = 64, 290 .max_vertex_texture_image_units = 32, 291 .max_combined_texture_image_units = 80, 292 .max_texture_image_units = 32, 293 .max_fragment_uniform_components = 4096, 294 .max_draw_buffers = 32, 295 .max_vertex_uniform_vectors = 128, 296 .max_varying_vectors = 8, 297 .max_fragment_uniform_vectors = 16, 298 .max_vertex_output_vectors = 16, 299 .max_fragment_input_vectors = 15, 300 .min_program_texel_offset = -8, 301 .max_program_texel_offset = 7, 302 .max_clip_distances = 8, 303 .max_compute_uniform_components = 1024, 304 .max_compute_texture_image_units = 16, 305 .max_compute_image_uniforms = 8, 306 .max_compute_atomic_counters = 8, 307 .max_compute_atomic_counter_buffers = 1, 308 .max_varying_components = 60, 309 .max_vertex_output_components = 64, 310 .max_fragment_input_components = 128, 311 .max_image_units = 8, 312 .max_combined_image_units_and_fragment_outputs = 8, 313 .max_combined_shader_output_resources = 8, 314 .max_image_samples = 0, 315 .max_vertex_image_uniforms = 0, 316 .max_fragment_image_uniforms = 8, 317 .max_combined_image_uniforms = 8, 318 .max_viewports = 16, 319 .max_vertex_atomic_counters = 0, 320 .max_fragment_atomic_counters = 8, 321 .max_combined_atomic_counters = 8, 322 .max_atomic_counter_bindings = 1, 323 .max_vertex_atomic_counter_buffers = 0, 324 .max_fragment_atomic_counter_buffers = 1, 325 .max_combined_atomic_counter_buffers = 1, 326 .max_atomic_counter_buffer_size = 16384, 327 .max_transform_feedback_buffers = 4, 328 .max_transform_feedback_interleaved_components = 64, 329 .max_cull_distances = 8, 330 .max_combined_clip_and_cull_distances = 8, 331 .max_samples = 4, 332 .max_mesh_output_vertices_ext = 256, 333 .max_mesh_output_primitives_ext = 256, 334 .max_mesh_work_group_size_x_ext = 128, 335 .max_mesh_work_group_size_y_ext = 128, 336 .max_mesh_work_group_size_z_ext = 128, 337 .max_task_work_group_size_x_ext = 128, 338 .max_task_work_group_size_y_ext = 128, 339 .max_task_work_group_size_z_ext = 128, 340 .max_mesh_view_count_ext = 4, 341 .max_dual_source_draw_buffers_ext = 1, 342 343 .limits = { 344 .non_inductive_for_loops = 1, 345 .while_loops = 1, 346 .do_while_loops = 1, 347 .general_uniform_indexing = 1, 348 .general_attribute_matrix_vector_indexing = 1, 349 .general_varying_indexing = 1, 350 .general_sampler_indexing = 1, 351 .general_variable_indexing = 1, 352 .general_constant_matrix_vector_indexing = 1, 353 }, 354 }}; 355 356 #if BEAMFORMER_RENDERDOC_HOOKS 357 DEBUG_IMPORT void * 358 vk_renderdoc_instance_handle(void) 359 { 360 return *((void **)vulkan_context->handle); 361 } 362 #endif 363 364 #if BEAMFORMER_DEBUG 365 #define vk_label_object(k, h, label, extra) vk_label_object_(VK_OBJECT_TYPE_##k, (u64)h, label, extra) 366 function void 367 vk_label_object_(VkObjectType kind, u64 handle, s8 label, s8 extra) 368 { 369 local_persist u8 buffer[1024]; 370 Stream sb = arena_stream(arena_from_memory(buffer, sizeof(buffer))); 371 if (vulkan_config.instance.debug_utils && label.len > 0) { 372 stream_append_s8s(&sb, label, s8(" ("), extra, s8(")")); 373 stream_append_byte(&sb, 0); 374 if (!sb.errors) { 375 VkDebugUtilsObjectNameInfoEXT object_name_info = { 376 .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, 377 .objectType = kind, 378 .objectHandle = handle, 379 .pObjectName = (char *)sb.data, 380 }; 381 vkSetDebugUtilsObjectNameEXT(vulkan_context->device, &object_name_info); 382 } 383 } 384 } 385 #else 386 #define vk_label_object(...) 387 #define vk_label_object_(...) 388 #endif 389 390 function VulkanEntity * 391 vk_entity_allocate(VulkanEntityKind kind) 392 { 393 VulkanEntity *result = 0; 394 DeferLoop(take_lock(&vulkan_context->entity_lock, -1), release_lock(&vulkan_context->entity_lock)) 395 { 396 result = SLLPopFreelist(vulkan_context->entity_freelist); 397 if (!result) result = push_array_no_zero(&vulkan_context->entity_arena, VulkanEntity, 1); 398 } 399 400 zero_struct(result); 401 result->kind = kind; 402 return result; 403 } 404 405 function void 406 vk_entity_release(VulkanEntity *entity) 407 { 408 DeferLoop(take_lock(&vulkan_context->entity_lock, -1), release_lock(&vulkan_context->entity_lock)) 409 { 410 SLLStackPush(vulkan_context->entity_freelist, entity, next); 411 } 412 } 413 414 function void * 415 vk_entity_data(VulkanHandle h, VulkanEntityKind kind) 416 { 417 VulkanEntity *e = (VulkanEntity *)h.value[0]; 418 assert(ValidVulkanHandle(h) && e->kind == kind); 419 return &e->as; 420 } 421 422 function VkCommandBuffer 423 vk_command_buffer(VulkanHandle h) 424 { 425 VulkanCommandBuffer *vcb = vk_entity_data(h, VulkanEntityKind_CommandBuffer); 426 VulkanCommandPool *vcp = vulkan_context->command_pools[vcb->timeline]; 427 VkCommandBuffer result = vcp->buffers[vcb->buffer_index]; 428 return result; 429 } 430 431 #define glslang_log(a, ...) glslang_log_(a, arg_list(s8, __VA_ARGS__)) 432 function void 433 glslang_log_(Arena arena, s8 *items, uz count) 434 { 435 Stream sb = arena_stream(arena); 436 stream_append_s8(&sb, glslang_info("")); 437 stream_append_s8s_(&sb, items, count); 438 if (sb.data[sb.widx - 1] != '\n') stream_append_byte(&sb, '\n'); 439 os_console_log(sb.data, sb.widx); 440 } 441 442 function s8 443 glsl_to_spirv(Arena *arena, u32 kind, s8 shader_text, s8 name) 444 { 445 /* NOTE(rnp): glslang's garbage c interface doesn't expose internal usage of strings with length */ 446 assert(shader_text.data[shader_text.len] == 0); 447 448 glslang_input_t input = { 449 .language = GLSLANG_SOURCE_GLSL, 450 .stage = kind, 451 .client = GLSLANG_CLIENT_VULKAN, 452 .client_version = GLSLANG_TARGET_VULKAN_1_4, 453 .target_language = GLSLANG_TARGET_SPV, 454 .target_language_version = GLSLANG_TARGET_SPV_1_6, 455 .code = (c8 *)shader_text.data, 456 .default_version = 460, 457 .default_profile = GLSLANG_NO_PROFILE, 458 .force_default_version_and_profile = 0, 459 .forward_compatible = 0, 460 .messages = GLSLANG_MSG_DEFAULT_BIT, 461 .resource = glslc_resource_constraints, 462 }; 463 glslang_shader_t *shader = glslang_shader_create(&input); 464 465 s8 error = {0}; 466 if (glslang_shader_preprocess(shader, &input)) { 467 if (!glslang_shader_parse(shader, &input)) 468 error = s8("parsing failed"); 469 } else { 470 error = s8("preprocessing failed"); 471 } 472 473 if (error.len) { 474 glslang_log(*arena, name, s8(": "), error, s8("\n"), 475 c_str_to_s8((c8 *)glslang_shader_get_info_log(shader)), 476 c_str_to_s8((c8 *)glslang_shader_get_info_debug_log(shader))); 477 glslang_shader_delete(shader); 478 shader = 0; 479 } 480 481 s8 result = {0}; 482 if (shader) { 483 glslang_program_t *program = glslang_program_create(); 484 glslang_program_add_shader(program, shader); 485 i32 messages = GLSLANG_MSG_DEBUG_INFO_BIT|GLSLANG_MSG_SPV_RULES_BIT|GLSLANG_MSG_VULKAN_RULES_BIT; 486 if (glslang_program_link(program, messages)) { 487 glslang_spv_options_t options = {.validate = 1,}; 488 489 if (vulkan_config.debug.shader_non_semantic_info) { 490 options.generate_debug_info = 1; 491 options.emit_nonsemantic_shader_debug_info = 1; 492 options.emit_nonsemantic_shader_debug_source = 1; 493 } 494 495 glslang_program_add_source_text(program, kind, (c8 *)shader_text.data, shader_text.len); 496 glslang_program_SPIRV_generate_with_options(program, kind, &options); 497 498 u32 words = glslang_program_SPIRV_get_size(program); 499 result.data = (u8 *)push_array(arena, u32, words); 500 result.len = words * sizeof(u32); 501 glslang_program_SPIRV_get(program, (u32 *)result.data); 502 503 s8 spirv_msg = c_str_to_s8((c8 *)glslang_program_SPIRV_get_messages(program)); 504 if (spirv_msg.len) glslang_log(*arena, name, s8(": spirv info: "), spirv_msg); 505 } else { 506 glslang_log(*arena, name, s8(": shader linking failed\n"), 507 c_str_to_s8((c8 *)glslang_program_get_info_log(program)), 508 c_str_to_s8((c8 *)glslang_program_get_info_debug_log(program))); 509 } 510 glslang_shader_delete(shader); 511 glslang_program_delete(program); 512 } 513 514 return result; 515 } 516 517 function u32 518 vk_shader_kind_to_glslang_shader_kind(u32 kind) 519 { 520 u32 result = ctz_u64(kind); 521 return result; 522 } 523 524 function VkShaderModule 525 vk_compile_shader_module(Arena arena, u32 kind, s8 text, s8 name) 526 { 527 VkShaderModule result = {0}; 528 s8 spirv = glsl_to_spirv(&arena, vk_shader_kind_to_glslang_shader_kind(kind), text, name); 529 VkShaderModuleCreateInfo create_info = { 530 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, 531 .codeSize = (uz)spirv.len, 532 .pCode = (u32 *)spirv.data, 533 }; 534 if (spirv.len > 0) vkCreateShaderModule(vulkan_context->device, &create_info, 0, &result); 535 536 return result; 537 } 538 539 function VkShaderStageFlags 540 vk_stage_flags_from_shader_kind(VulkanShaderKind kind) 541 { 542 read_only local_persist VkShaderStageFlags map[VulkanShaderKind_Count + 1] = { 543 [VulkanShaderKind_Vertex] = VK_SHADER_STAGE_VERTEX_BIT, 544 [VulkanShaderKind_Mesh] = VK_SHADER_STAGE_MESH_BIT_EXT, 545 [VulkanShaderKind_Fragment] = VK_SHADER_STAGE_FRAGMENT_BIT, 546 [VulkanShaderKind_Compute] = VK_SHADER_STAGE_COMPUTE_BIT, 547 [VulkanShaderKind_Count] = 0, 548 }; 549 VkShaderStageFlags result = map[Clamp((u32)kind, 0, VulkanShaderKind_Count)]; 550 return result; 551 } 552 553 function VulkanPipeline 554 vk_compute_pipeline_from_shader_text(Arena arena, s8 text, s8 name, u32 push_constants_size) 555 { 556 VulkanPipeline result = {.stage_flags = VK_SHADER_STAGE_COMPUTE_BIT}; 557 VkShaderModule module = vk_compile_shader_module(arena, VK_SHADER_STAGE_COMPUTE_BIT, text, name); 558 if (module) { 559 VkPushConstantRange push_constant_range = { 560 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 561 .offset = 0, 562 .size = push_constants_size, 563 }; 564 565 VkPipelineLayoutCreateInfo pipeline_layout_create_info = { 566 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 567 .setLayoutCount = countof(vulkan_context->descriptor_set_layouts), 568 .pSetLayouts = vulkan_context->descriptor_set_layouts, 569 .pushConstantRangeCount = push_constants_size ? 1 : 0, 570 .pPushConstantRanges = push_constants_size ? &push_constant_range : 0, 571 }; 572 573 vkCreatePipelineLayout(vulkan_context->device, &pipeline_layout_create_info, 0, &result.layout); 574 575 VkComputePipelineCreateInfo pipeline_create_info = { 576 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 577 .layout = result.layout, 578 .stage = { 579 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 580 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 581 .module = module, 582 .pName = "main", 583 }, 584 }; 585 586 vkCreateComputePipelines(vulkan_context->device, 0, 1, &pipeline_create_info, 0, &result.pipeline); 587 588 vk_label_object(PIPELINE, result.pipeline, name, s8("Pipeline")); 589 vk_label_object(PIPELINE_LAYOUT, result.layout, name, s8("Pipeline Layout")); 590 vk_label_object(SHADER_MODULE, module, name, s8("Module")); 591 592 vkDestroyShaderModule(vulkan_context->device, module, 0); 593 } 594 if (result.pipeline == 0) result = vulkan_context->default_compute_pipeline; 595 596 return result; 597 } 598 599 function VulkanPipeline 600 vk_graphics_pipeline_from_infos(Arena arena, VulkanPipelineCreateInfo *infos, u32 count, u32 push_constants_size) 601 { 602 assume(count == 2); 603 604 VulkanPipeline result = {0}; 605 VkShaderModule modules[2]; 606 607 modules[0] = vk_compile_shader_module(arena, vk_stage_flags_from_shader_kind(infos[0].kind), 608 infos[0].text, infos[0].name); 609 modules[1] = vk_compile_shader_module(arena, vk_stage_flags_from_shader_kind(infos[1].kind), 610 infos[1].text, infos[1].name); 611 if (modules[0] && modules[1]) { 612 result.stage_flags = vk_stage_flags_from_shader_kind(infos[0].kind) 613 | vk_stage_flags_from_shader_kind(infos[1].kind); 614 615 VkPushConstantRange pcr = { 616 .stageFlags = result.stage_flags, 617 .offset = 0, 618 .size = push_constants_size, 619 }; 620 621 VkPipelineLayoutCreateInfo pipeline_layout_info = { 622 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 623 .setLayoutCount = countof(vulkan_context->descriptor_set_layouts), 624 .pSetLayouts = vulkan_context->descriptor_set_layouts, 625 .pushConstantRangeCount = push_constants_size ? 1 : 0, 626 .pPushConstantRanges = push_constants_size ? &pcr : 0, 627 }; 628 629 vkCreatePipelineLayout(vulkan_context->device, &pipeline_layout_info, 0, &result.layout); 630 631 VkPipelineShaderStageCreateInfo shader_stage_create_infos[2] = { 632 { 633 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 634 .stage = vk_stage_flags_from_shader_kind(infos[0].kind), 635 .module = modules[0], 636 .pName = "main", 637 }, 638 { 639 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 640 .stage = vk_stage_flags_from_shader_kind(infos[1].kind), 641 .module = modules[1], 642 .pName = "main", 643 }, 644 }; 645 646 VkPipelineVertexInputStateCreateInfo vertex_input_info = { 647 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, 648 }; 649 650 VkPipelineInputAssemblyStateCreateInfo input_assembly_info = { 651 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, 652 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, 653 }; 654 655 VkPipelineViewportStateCreateInfo viewport_info = { 656 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, 657 .viewportCount = 1, 658 .scissorCount = 1, 659 }; 660 661 VkPipelineRasterizationStateCreateInfo rasterization_info = { 662 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, 663 .polygonMode = VK_POLYGON_MODE_FILL, 664 .lineWidth = 1.0f, 665 .cullMode = VK_CULL_MODE_BACK_BIT, 666 .frontFace = VK_FRONT_FACE_CLOCKWISE, 667 }; 668 669 VkPipelineMultisampleStateCreateInfo multisampling_info = { 670 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, 671 .rasterizationSamples = vulkan_context->gpu_info.max_msaa_samples, 672 }; 673 674 VkPipelineDepthStencilStateCreateInfo depth_test_create_info = { 675 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, 676 .depthTestEnable = 1, 677 .depthWriteEnable = 1, 678 .depthCompareOp = VK_COMPARE_OP_LESS, 679 .depthBoundsTestEnable = 1, 680 .stencilTestEnable = 0, 681 .front = {0}, 682 .back = {0}, 683 .minDepthBounds = 0.0f, 684 .maxDepthBounds = 1.0f, 685 }; 686 687 u32 colour_mask = VK_COLOR_COMPONENT_R_BIT|VK_COLOR_COMPONENT_G_BIT|VK_COLOR_COMPONENT_B_BIT|VK_COLOR_COMPONENT_A_BIT; 688 VkPipelineColorBlendAttachmentState blend_state = { 689 .colorWriteMask = colour_mask, 690 .blendEnable = 1, 691 .srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA, 692 .dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, 693 .colorBlendOp = VK_BLEND_OP_ADD, 694 .srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE, 695 .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, 696 .alphaBlendOp = VK_BLEND_OP_ADD, 697 }; 698 699 VkPipelineColorBlendStateCreateInfo colour_blend_state_create = { 700 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, 701 .logicOpEnable = 0, 702 .logicOp = VK_LOGIC_OP_COPY, 703 .attachmentCount = 1, 704 .pAttachments = &blend_state, 705 }; 706 707 VkDynamicState dynamic_states[] = { 708 VK_DYNAMIC_STATE_VIEWPORT, 709 VK_DYNAMIC_STATE_SCISSOR, 710 }; 711 712 VkPipelineDynamicStateCreateInfo dynamic_state_info = { 713 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, 714 .dynamicStateCount = countof(dynamic_states), 715 .pDynamicStates = dynamic_states, 716 }; 717 718 //VkFormat colour_attachment_format = VK_FORMAT_R8G8B8A8_SRGB; 719 VkFormat colour_attachment_format = VK_FORMAT_R8G8B8A8_UNORM; 720 VkPipelineRenderingCreateInfo rendering_create_info = { 721 .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO, 722 .colorAttachmentCount = 1, 723 .pColorAttachmentFormats = &colour_attachment_format, 724 .depthAttachmentFormat = vulkan_context->depth_stencil_format, 725 .stencilAttachmentFormat = vulkan_context->depth_stencil_format, 726 }; 727 728 VkGraphicsPipelineCreateInfo pci = { 729 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, 730 .pNext = &rendering_create_info, 731 .stageCount = countof(shader_stage_create_infos), 732 .pStages = shader_stage_create_infos, 733 .pVertexInputState = &vertex_input_info, 734 .pInputAssemblyState = &input_assembly_info, 735 .pViewportState = &viewport_info, 736 .pRasterizationState = &rasterization_info, 737 .pMultisampleState = &multisampling_info, 738 .pDepthStencilState = &depth_test_create_info, 739 .pColorBlendState = &colour_blend_state_create, 740 .pDynamicState = &dynamic_state_info, 741 .layout = result.layout, 742 }; 743 744 vkCreateGraphicsPipelines(vulkan_context->device, 0, 1, &pci,0, &result.pipeline); 745 746 s8 extras[] = { 747 [VulkanShaderKind_Vertex] = s8_comp("Vertex Module"), 748 [VulkanShaderKind_Mesh] = s8_comp("Mesh Module"), 749 [VulkanShaderKind_Fragment] = s8_comp("Fragment Module"), 750 }; 751 assert(infos[0].kind < countof(extras)); 752 assert(infos[1].kind < countof(extras)); 753 754 vk_label_object(PIPELINE, result.pipeline, infos[0].name, s8("Pipeline")); 755 vk_label_object(PIPELINE_LAYOUT, result.layout, infos[0].name, s8("Pipeline Layout")); 756 //vk_label_object_(VK_OBJECT_TYPE_SHADER_MODULE, (u64)modules[0], infos[0].name, extras[infos[0].kind]); 757 //vk_label_object_(VK_OBJECT_TYPE_SHADER_MODULE, (u64)modules[1], infos[1].name, extras[infos[1].kind]); 758 } 759 760 if (modules[0]) vkDestroyShaderModule(vulkan_context->device, modules[0], 0); 761 if (modules[1]) vkDestroyShaderModule(vulkan_context->device, modules[1], 0); 762 763 if (result.pipeline == 0) result = vulkan_context->default_graphics_pipeline; 764 765 return result; 766 } 767 768 function VulkanSemaphore 769 vk_make_semaphore(OSHandle *export) 770 { 771 VulkanContext *vk = vulkan_context; 772 773 VkSemaphoreCreateInfo sci = {.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO}; 774 VkExportSemaphoreCreateInfo esci = { 775 .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO, 776 .handleTypes = OS_WINDOWS ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT 777 : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, 778 }; 779 VkSemaphoreTypeCreateInfo stc = { 780 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, 781 .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE, 782 }; 783 784 if (export) sci.pNext = &esci; 785 else sci.pNext = &stc; 786 787 VulkanSemaphore result = {0}; 788 789 vkCreateSemaphore(vk->device, &sci, 0, &result.semaphore); 790 791 if (export) { 792 if (OS_WINDOWS) { 793 VkSemaphoreGetWin32HandleInfoKHR ghi = { 794 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR, 795 .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT, 796 .semaphore = result.semaphore, 797 }; 798 void *handle; 799 vkGetSemaphoreWin32HandleKHR(vk->device, &ghi, &handle); 800 export->value[0] = (u64)handle; 801 } else { 802 VkSemaphoreGetFdInfoKHR ghi = { 803 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR, 804 .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, 805 .semaphore = result.semaphore, 806 }; 807 i32 handle; 808 vkGetSemaphoreFdKHR(vk->device, &ghi, &handle); 809 export->value[0] = (u64)handle; 810 } 811 } 812 813 return result; 814 } 815 816 function void 817 vk_release_memory(VkDeviceMemory memory, u64 size) 818 { 819 VulkanContext *vk = vulkan_context; 820 vkFreeMemory(vk->device, memory, 0); 821 atomic_add_u64(&vk->gpu_info.gpu_heap_used, -size); 822 } 823 824 function b32 825 vk_allocate_memory(VkDeviceMemory *memory, u64 size, VulkanMemoryKind kind, VkMemoryAllocateFlags flags, 826 VkMemoryDedicatedAllocateInfo *dedicated_allocate_info, OSHandle *export) 827 { 828 VulkanContext *vk = vulkan_context; 829 830 VkExportMemoryAllocateInfo export_info = { 831 .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO, 832 .handleTypes = OS_WINDOWS ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT 833 : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, 834 }; 835 836 VkMemoryAllocateFlagsInfo memory_allocate_flags_info = { 837 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, 838 .flags = flags, 839 .pNext = dedicated_allocate_info, 840 }; 841 842 if (export) { 843 export_info.pNext = dedicated_allocate_info; 844 memory_allocate_flags_info.pNext = &export_info; 845 } 846 847 VkMemoryAllocateInfo memory_allocate_info = { 848 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, 849 .allocationSize = size, 850 .memoryTypeIndex = vk->memory_info.memory_type_indices[kind], 851 .pNext = &memory_allocate_flags_info, 852 }; 853 854 b32 result = vkAllocateMemory(vk->device, &memory_allocate_info, 0, memory) == VK_SUCCESS; 855 if (result) { 856 atomic_add_u64(&vk->gpu_info.gpu_heap_used, memory_allocate_info.allocationSize); 857 858 if (export) { 859 if (OS_WINDOWS) { 860 VkMemoryGetWin32HandleInfoKHR handle_info = { 861 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR, 862 .memory = *memory, 863 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT, 864 }; 865 void *handle; 866 vkGetMemoryWin32HandleKHR(vk->device, &handle_info, &handle); 867 export->value[0] = (u64)handle; 868 } else { 869 VkMemoryGetFdInfoKHR fd_info = { 870 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR, 871 .memory = *memory, 872 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, 873 }; 874 i32 fd; 875 vkGetMemoryFdKHR(vk->device, &fd_info, &fd); 876 export->value[0] = (u64)fd; 877 } 878 } 879 } 880 return result; 881 } 882 883 function u32 884 vk_index_size(VkIndexType type) 885 { 886 u32 result = 0; 887 switch (type) { 888 case VK_INDEX_TYPE_UINT16:{ result = 2; }break; 889 case VK_INDEX_TYPE_UINT32:{ result = 4; }break; 890 InvalidDefaultCase; 891 } 892 return result; 893 } 894 895 typedef struct { 896 GPUBuffer *gpu_buffer; 897 u64 size; 898 VulkanUsageFlags flags; 899 u32 queue_family_count; 900 u32 queue_family_indices[VulkanTimeline_Count]; 901 VkIndexType index_type; 902 s8 label; 903 } VulkanBufferAllocateInfo; 904 905 function b32 906 vk_buffer_allocate_common(VulkanBuffer *vb, VulkanBufferAllocateInfo *ai) 907 { 908 VulkanContext *vk = vulkan_context; 909 910 // TODO(rnp): this probably should be handled, its usually 4GB. likely 911 // need to chain multiple allocations and handle it in shader code 912 u64 clamp_size = vk->memory_info.max_allocation_size & ~(vk->memory_info.non_coherent_atom_size - 1); 913 914 // NOTE(rnp): renderdoc can't handle buffers that are too close to the allocation size limit 915 if (renderdoc_attached()) 916 clamp_size -= MB(8); 917 918 u64 size = Min(ai->size, clamp_size); 919 920 VkBufferCreateInfo buffer_create_info = { 921 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 922 .usage = VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT|VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 923 .size = size, 924 .sharingMode = ai->queue_family_count > 1 ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE, 925 .queueFamilyIndexCount = ai->queue_family_count, 926 .pQueueFamilyIndices = ai->queue_family_indices, 927 }; 928 929 if (ai->flags & VulkanUsageFlag_TransferSource) 930 buffer_create_info.usage |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT; 931 932 if (ai->flags & VulkanUsageFlag_TransferDestination) 933 buffer_create_info.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; 934 935 if (ai->index_type != VK_INDEX_TYPE_NONE_KHR) 936 buffer_create_info.usage |= VK_BUFFER_USAGE_INDEX_BUFFER_BIT; 937 938 vkCreateBuffer(vk->device, &buffer_create_info, 0, &vb->buffer); 939 vk_label_object(BUFFER, vb->buffer, ai->label, s8("Buffer")); 940 941 VkMemoryRequirements memory_requirements; 942 vkGetBufferMemoryRequirements(vk->device, vb->buffer, &memory_requirements); 943 944 assert((u64)size <= memory_requirements.size); 945 size = memory_requirements.size; 946 947 VkMemoryDedicatedAllocateInfo dedicated_allocate_info = { 948 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, 949 .buffer = vb->buffer, 950 }; 951 952 /* NOTE(rnp): to create a CPU writable buffer: 953 * 1. try to allocate and map the entire buffer 954 * - this may fail if the buffer is bigger than the BAR size 955 * (unknowable from vulkan), or the memory space has become 956 * too fragmented (unlikely) 957 * 2. if allocation or mapping fails we must chain a host buffer 958 * for staging. If this happens in practice we should add 959 * the ability to import an existing external allocation 960 */ 961 b32 host_read_write = (ai->flags & VulkanUsageFlag_HostReadWrite) != 0; 962 vb->memory_kind = host_read_write ? VulkanMemoryKind_BAR : VulkanMemoryKind_Device; 963 964 b32 result = 0; 965 // TODO(rnp): this may fail if the allocation is too big for the BAR size 966 // it needs to handled properly 967 if (vk_allocate_memory(&vb->memory, size, vb->memory_kind, VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT, &dedicated_allocate_info, 0)) { 968 result = 1; 969 ai->gpu_buffer->size = size; 970 vb->memory_size = size; 971 972 vb->index_type = ai->index_type; 973 974 vk_label_object(DEVICE_MEMORY, vb->memory, ai->label, s8("Memory")); 975 976 if (host_read_write) 977 vkMapMemory(vk->device, vb->memory, 0, size, 0, &vb->host_pointer); 978 979 vkBindBufferMemory(vk->device, vb->buffer, vb->memory, 0); 980 VkBufferDeviceAddressInfo buffer_device_address_info = { 981 .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, 982 .buffer = vb->buffer, 983 }; 984 ai->gpu_buffer->gpu_pointer = vkGetBufferDeviceAddress(vk->device, &buffer_device_address_info); 985 } 986 return result; 987 } 988 989 function void 990 vk_load_instance(Arena arena, Stream *err) 991 { 992 #define X(name, ...) name = (name##_fn *)vkGetInstanceProcAddr(0, #name); 993 VkBaseProcedureList 994 #undef X 995 996 s8 validation_layers[] = { 997 #if BEAMFORMER_DEBUG 998 s8_comp("VK_LAYER_KHRONOS_validation"), 999 #endif 1000 }; 1001 1002 u32 enabled_validation_layers_count = 0; 1003 const char *enabled_validation_layers[countof(validation_layers)]; 1004 1005 u32 enabled_instance_extensions_count = 0; 1006 const char *enabled_instance_extensions[countof(vk_required_instance_extensions) + countof(vk_instance_debug_extensions)]; 1007 1008 static_assert(countof(vk_required_instance_extensions) == 0, ""); 1009 //for EachElement(vk_required_instance_extensions, it) 1010 // enabled_instance_extensions[enabled_instance_extensions_count++] = vk_required_instance_extensions[it]; 1011 1012 #if BEAMFORMER_DEBUG 1013 { 1014 u32 layer_count = 0; 1015 vkEnumerateInstanceLayerProperties(&layer_count, 0); 1016 1017 VkLayerProperties *layers = push_array(&arena, VkLayerProperties, layer_count); 1018 s8 *layer_s8s = push_array(&arena, s8, layer_count); 1019 vkEnumerateInstanceLayerProperties(&layer_count, layers); 1020 1021 for (u32 i = 0; i < layer_count; i++) 1022 layer_s8s[i] = c_str_to_s8(layers[i].layerName); 1023 1024 b32 supported_layers[countof(validation_layers)] = {0}; 1025 for EachElement(validation_layers, it) { 1026 for(u32 i = 0; i < layer_count; i++) { 1027 if (s8_equal(validation_layers[it], layer_s8s[i])) { 1028 u32 index = enabled_validation_layers_count++; 1029 enabled_validation_layers[index] = (char *)validation_layers[it].data; 1030 supported_layers[it] = 1; 1031 break; 1032 } 1033 } 1034 } 1035 1036 if (countof(validation_layers) != enabled_validation_layers_count) { 1037 i32 missing_count = countof(validation_layers) - enabled_validation_layers_count; 1038 stream_append_s8s(err, vulkan_info("missing validation layer"), 1039 missing_count > 1 ? s8("s:") : s8(":"), s8("\n")); 1040 1041 for EachElement(validation_layers, it) { 1042 if (supported_layers[it] == 0) 1043 stream_append_s8s(err, s8(" "), validation_layers[it], s8("\n")); 1044 } 1045 } 1046 1047 u32 instance_extension_count = 0; 1048 vkEnumerateInstanceExtensionProperties(0, &instance_extension_count, 0); 1049 1050 VkExtensionProperties *instance_extensions = push_array(&arena, VkExtensionProperties, instance_extension_count); 1051 s8 *instance_ext_s8s = push_array(&arena, s8, instance_extension_count); 1052 vkEnumerateInstanceExtensionProperties(0, &instance_extension_count, instance_extensions); 1053 for EachIndex(instance_extension_count, it) 1054 instance_ext_s8s[it] = c_str_to_s8(instance_extensions[it].extensionName); 1055 1056 for EachElement(vk_instance_debug_extensions, it) { 1057 for EachIndex(instance_extension_count, i) { 1058 if (s8_equal(vk_instance_debug_extensions[it], instance_ext_s8s[i])) { 1059 u32 index = enabled_instance_extensions_count++; 1060 enabled_instance_extensions[index] = (char *)vk_instance_debug_extensions[it].data; 1061 vulkan_config.instance.E[it] = 1; 1062 break; 1063 } 1064 } 1065 } 1066 } 1067 #endif 1068 1069 VkApplicationInfo app_info = { 1070 .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, 1071 .pApplicationName = BEAMFORMER_NAME_STRING, 1072 .applicationVersion = 0, 1073 .pEngineName = "No Engine", 1074 .engineVersion = 0, 1075 .apiVersion = VK_MAKE_API_VERSION(1, 3, 0, 0), 1076 }; 1077 1078 VkInstanceCreateInfo instance_create_info = { 1079 .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, 1080 .pApplicationInfo = &app_info, 1081 .ppEnabledExtensionNames = enabled_instance_extensions, 1082 .enabledExtensionCount = enabled_instance_extensions_count, 1083 .ppEnabledLayerNames = enabled_validation_layers, 1084 .enabledLayerCount = enabled_validation_layers_count, 1085 }; 1086 1087 #if 0 && BEAMFORMER_DEBUG 1088 VkValidationFeatureEnableEXT validation_feature_enables[] = { 1089 VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT, 1090 VK_VALIDATION_FEATURE_ENABLE_BEST_PRACTICES_EXT, 1091 VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT, 1092 VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT, 1093 }; 1094 1095 VkValidationFeaturesEXT validation_features = { 1096 .sType = VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT, 1097 .enabledValidationFeatureCount = countof(validation_feature_enables), 1098 .pEnabledValidationFeatures = validation_feature_enables, 1099 }; 1100 1101 instance_create_info.pNext = &validation_features; 1102 #endif 1103 1104 vkCreateInstance(&instance_create_info, 0, &vulkan_context->handle); 1105 1106 #define X(name, ...) name = (name##_fn *)vkGetInstanceProcAddr(vulkan_context->handle, #name); 1107 VkInstanceProcedureList 1108 #undef X 1109 } 1110 1111 function void 1112 vk_load_physical_device(Arena arena, Stream *err) 1113 { 1114 VulkanContext *vk = vulkan_context; 1115 1116 u32 device_count; 1117 vkEnumeratePhysicalDevices(vk->handle, &device_count, 0); 1118 1119 VkPhysicalDevice *devices = push_array(&arena, typeof(*devices), device_count); 1120 vkEnumeratePhysicalDevices(vk->handle, &device_count, devices); 1121 1122 i32 best_index = -1, best_score = -1; 1123 for (u32 i = 0; i < device_count; i++) { 1124 Arena scratch = arena; 1125 VkPhysicalDeviceProperties2 *dp = push_struct(&scratch, typeof(*dp)); 1126 dp->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; 1127 vkGetPhysicalDeviceProperties2(devices[i], dp); 1128 1129 i32 score = 0; 1130 if (dp->properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU) 1131 score++; 1132 1133 if (score > best_score) { 1134 best_score = score; 1135 best_index = (i32)i; 1136 } 1137 } 1138 1139 vk->physical_device = best_index >= 0 ? devices[best_index] : 0; 1140 if (!vk->physical_device) 1141 fatal(vulkan_info("failed to find a suitable GPU\n")); 1142 1143 VkPhysicalDeviceProperties2 dp = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2}; 1144 VkPhysicalDeviceVulkan11Properties v11p = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES}; 1145 dp.pNext = &v11p; 1146 1147 vkGetPhysicalDeviceProperties2(vk->physical_device, &dp); 1148 1149 stream_append_s8s(err, vulkan_info("selecting device: "), c_str_to_s8(dp.properties.deviceName), s8("\n")); 1150 1151 { 1152 Arena scratch = arena; 1153 u32 extension_count = 0; 1154 vkEnumerateDeviceExtensionProperties(vk->physical_device, 0, &extension_count, 0); 1155 VkExtensionProperties *extensions = push_array(&scratch, VkExtensionProperties, extension_count); 1156 vkEnumerateDeviceExtensionProperties(vk->physical_device, 0, &extension_count, extensions); 1157 1158 s8 *ext_str8s = push_array(&scratch, s8, extension_count); 1159 for (u32 index = 0; index < extension_count; index++) 1160 ext_str8s[index] = c_str_to_s8(extensions[index].extensionName); 1161 1162 b8 *supported = push_array(&scratch, b8, countof(vk_required_device_extensions)); 1163 for EachIndex(extension_count, index) 1164 for EachElement(vk_required_device_extensions, it) 1165 supported[it] |= s8_equal(vk_required_device_extensions[it], ext_str8s[index]); 1166 1167 u32 supported_count = 0; 1168 for EachElement(vk_required_device_extensions, it) 1169 supported_count += supported[it]; 1170 1171 u32 missing_count = countof(vk_required_device_extensions) - supported_count; 1172 if (missing_count) { 1173 stream_append_s8s(err, vulkan_info("fatal error: missing required device extension"), 1174 missing_count > 1 ? s8("s") : s8(""), s8(":\n")); 1175 for EachElement(vk_required_device_extensions, it) { 1176 if (!supported[it]) { 1177 s8 name = vk_required_device_extensions[it]; 1178 stream_append_s8s(err, vulkan_info(" "), name, s8("\n")); 1179 } 1180 } 1181 fatal(stream_to_s8(err)); 1182 } 1183 1184 for EachIndex(extension_count, index) 1185 for EachElement(vk_optional_device_extensions, it) 1186 vulkan_config.optional.E[it] |= s8_equal(vk_optional_device_extensions[it], ext_str8s[index]); 1187 1188 #if BEAMFORMER_DEBUG 1189 for EachIndex(extension_count, index) 1190 for EachElement(vk_debug_extensions, it) 1191 vulkan_config.debug.E[it] |= s8_equal(vk_debug_extensions[it], ext_str8s[index]); 1192 #endif 1193 } 1194 1195 { 1196 VkPhysicalDeviceFeatures2 df = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2}; 1197 VkPhysicalDeviceVulkan11Features v11f = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES}; 1198 VkPhysicalDeviceVulkan12Features v12f = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES}; 1199 VkPhysicalDeviceVulkan13Features v13f = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES}; 1200 df.pNext = &v11f; 1201 v11f.pNext = &v12f; 1202 v12f.pNext = &v13f; 1203 vkGetPhysicalDeviceFeatures2(vk->physical_device, &df); 1204 1205 { 1206 b32 all_supported = 1; 1207 #define X(name, ...) all_supported &= df.features.name; 1208 VK_REQUIRED_PHYSICAL_FEATURES 1209 #undef X 1210 1211 if (!all_supported) { 1212 stream_append_s8(err, vulkan_info("fatal error: missing physical device features:\n")); 1213 #define X(name, ...) if (!df.features.name) stream_append_s8(err, s8(" " #name "\n")); 1214 VK_REQUIRED_PHYSICAL_FEATURES 1215 #undef X 1216 fatal(stream_to_s8(err)); 1217 } 1218 } 1219 1220 { 1221 b32 all_supported = 1; 1222 #define X(name, ...) all_supported &= v11f.name; 1223 VK_REQUIRED_PHYSICAL_11_FEATURES 1224 #undef X 1225 1226 if (!all_supported) { 1227 stream_append_s8(err, vulkan_info("fatal error: missing physical device features:\n")); 1228 #define X(name, ...) if (!v11f.name) stream_append_s8(err, s8(" " #name "\n")); 1229 VK_REQUIRED_PHYSICAL_11_FEATURES 1230 #undef X 1231 fatal(stream_to_s8(err)); 1232 } 1233 } 1234 1235 { 1236 b32 all_supported = 1; 1237 #define X(name, ...) all_supported &= v12f.name; 1238 VK_REQUIRED_PHYSICAL_12_FEATURES 1239 #undef X 1240 1241 if (!all_supported) { 1242 stream_append_s8(err, vulkan_info("fatal error: missing physical device features:\n")); 1243 #define X(name, ...) if (!v12f.name) stream_append_s8(err, s8(" " #name "\n")); 1244 VK_REQUIRED_PHYSICAL_12_FEATURES 1245 #undef X 1246 fatal(stream_to_s8(err)); 1247 } 1248 } 1249 1250 { 1251 b32 all_supported = 1; 1252 #define X(name, ...) all_supported &= v13f.name; 1253 VK_REQUIRED_PHYSICAL_13_FEATURES 1254 #undef X 1255 1256 if (!all_supported) { 1257 stream_append_s8(err, vulkan_info("fatal error: missing physical device features:\n")); 1258 #define X(name, ...) if (!v13f.name) stream_append_s8(err, s8(" " #name "\n")); 1259 VK_REQUIRED_PHYSICAL_13_FEATURES 1260 #undef X 1261 fatal(stream_to_s8(err)); 1262 } 1263 } 1264 1265 if (vulkan_config.optional.cooperative_matrix) { 1266 Arena scratch = arena; 1267 u32 property_count = 0; 1268 vkGetPhysicalDeviceCooperativeMatrixPropertiesKHR(vk->physical_device, &property_count, 0); 1269 1270 VkCooperativeMatrixPropertiesKHR *mat = push_array(&scratch, VkCooperativeMatrixPropertiesKHR, property_count); 1271 1272 // NOTE(rnp): validation layer stupidity 1273 for EachIndex(property_count, it) 1274 mat[it].sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR; 1275 1276 vkGetPhysicalDeviceCooperativeMatrixPropertiesKHR(vk->physical_device, &property_count, mat); 1277 b32 supported = 0; 1278 // TODO(rnp): for now the requirements are hardcoded, it is possible to support a couple 1279 // variations if needed. 1280 for EachIndex(property_count, it) { 1281 b32 match = 1; 1282 supported &= mat[it].scope == VK_SCOPE_SUBGROUP_KHR; 1283 1284 supported &= mat[it].MSize == 16; 1285 supported &= mat[it].NSize == 16; 1286 supported &= mat[it].KSize == 16; 1287 1288 supported &= mat[it].AType == VK_COMPONENT_TYPE_FLOAT16_KHR; 1289 supported &= mat[it].BType == VK_COMPONENT_TYPE_FLOAT16_KHR; 1290 supported &= mat[it].CType == VK_COMPONENT_TYPE_FLOAT32_KHR; 1291 supported &= mat[it].ResultType == VK_COMPONENT_TYPE_FLOAT32_KHR; 1292 1293 supported |= match; 1294 } 1295 vk->gpu_info.cooperative_matrix = supported; 1296 } 1297 } 1298 1299 VkPhysicalDeviceMemoryProperties2 mp = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2}; 1300 vkGetPhysicalDeviceMemoryProperties2(vk->physical_device, &mp); 1301 1302 VkPhysicalDeviceMemoryProperties *bmp = &mp.memoryProperties; 1303 1304 // NOTE(rnp): vulkan spec says that highest performance memory types must 1305 // come first. just take the first one found. 1306 1307 for (u32 i = 0; i < bmp->memoryHeapCount; i++) { 1308 if (bmp->memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) { 1309 vk->memory_info.gpu_heap_index = i; 1310 break; 1311 } 1312 } 1313 1314 for (u32 i = 0; i < bmp->memoryTypeCount; i++) { 1315 if (bmp->memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) { 1316 assert(bmp->memoryTypes[i].heapIndex == vk->memory_info.gpu_heap_index); 1317 vk->memory_info.memory_type_indices[VulkanMemoryKind_Device] = i; 1318 break; 1319 } 1320 } 1321 1322 // TODO(rnp): it is possible that this isn't available. for devices like that we would need 1323 // to copy into a staging buffer then DMA. For now that is unsupported. 1324 u32 bar_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT|VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; 1325 i32 bar_index = -1; 1326 for (u32 i = 0; i < bmp->memoryTypeCount; i++) { 1327 if ((bmp->memoryTypes[i].propertyFlags & bar_flags) == bar_flags) { 1328 assert(bmp->memoryTypes[i].heapIndex == vk->memory_info.gpu_heap_index); 1329 bar_index = (i32)i; 1330 break; 1331 } 1332 } 1333 1334 // TODO(rnp): this shouldn't be fatal 1335 if (bar_index == -1) { 1336 stream_append_s8(err, vulkan_info("fatal error: GPU does not support host bar memory\n")); 1337 fatal(stream_to_s8(err)); 1338 } 1339 1340 vk->memory_info.memory_type_indices[VulkanMemoryKind_BAR] = bar_index; 1341 1342 for (u32 i = 0; i < bmp->memoryTypeCount; i++) { 1343 if ((bmp->memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) == 0) { 1344 assert(bmp->memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); 1345 vk->memory_info.memory_type_indices[VulkanMemoryKind_Host] = i; 1346 break; 1347 } 1348 } 1349 1350 for EachElement(vk->memory_info.memory_type_indices, it) { 1351 u32 ti = vk->memory_info.memory_type_indices[it]; 1352 u32 flags = bmp->memoryTypes[ti].propertyFlags; 1353 vk->memory_info.memory_host_coherent[it] = (flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0; 1354 } 1355 1356 vk->memory_info.max_allocation_size = v11p.maxMemoryAllocationSize; 1357 vk->memory_info.non_coherent_atom_size = dp.properties.limits.nonCoherentAtomSize; 1358 vk->gpu_info.vendor = dp.properties.vendorID; 1359 vk->gpu_info.gpu_heap_size = bmp->memoryHeaps[vk->memory_info.gpu_heap_index].size; 1360 vk->gpu_info.timestamp_period_ns = dp.properties.limits.timestampPeriod; 1361 vk->gpu_info.max_image_dimension_2D = dp.properties.limits.maxImageDimension2D; 1362 vk->gpu_info.max_image_dimension_3D = dp.properties.limits.maxImageDimension3D; 1363 vk->gpu_info.max_msaa_samples = round_down_power_of_two(dp.properties.limits.framebufferColorSampleCounts); 1364 vk->gpu_info.subgroup_size = v11p.subgroupSize; 1365 vk->gpu_info.max_compute_shared_memory_size = dp.properties.limits.maxComputeSharedMemorySize; 1366 1367 // IMPORTANT(rnp): memory must only be pushed at the end of the function 1368 vk->gpu_info.name = push_s8(&vk->arena, c_str_to_s8(dp.properties.deviceName)); 1369 } 1370 1371 function void 1372 vk_load_queues(Arena *memory, Stream *err) 1373 { 1374 /////////////////////////////////////////////////////// 1375 // NOTE(rnp): try to allocate an appropriate queue for 1376 // each of the following tasks: 1377 // * UI Rendering (Graphics) 1378 // * Beamforming (Compute) 1379 // * Upload (Transfer) 1380 // Then create a logical device ready for use 1381 1382 VulkanContext *vk = vulkan_context; 1383 1384 u32 queue_family_count; 1385 vkGetPhysicalDeviceQueueFamilyProperties(vk->physical_device, &queue_family_count, 0); 1386 1387 TempArena arena_save = begin_temp_arena(memory); 1388 VkQueueFamilyProperties *queues = push_array(memory, typeof(*queues), queue_family_count); 1389 vkGetPhysicalDeviceQueueFamilyProperties(vk->physical_device, &queue_family_count, queues); 1390 1391 i32 queue_indices[VulkanQueueKind_Count]; 1392 for EachElement(queue_indices, it) queue_indices[it] = -1; 1393 1394 /////////////////////////////////////////////////////////////// 1395 // NOTE(rnp): start by assigning queue families for each queue 1396 1397 /* NOTE(rnp): try for exclusive transfer queue */ 1398 #if !ForceSingleQueue 1399 { 1400 u32 mask = VK_QUEUE_GRAPHICS_BIT|VK_QUEUE_COMPUTE_BIT|VK_QUEUE_TRANSFER_BIT; 1401 u32 max_timestamp_bits = 0; 1402 for (u32 index = 0; index < queue_family_count; index++) { 1403 if ((queues[index].queueFlags & mask) == VK_QUEUE_TRANSFER_BIT) { 1404 if (queues[index].timestampValidBits > max_timestamp_bits) { 1405 max_timestamp_bits = queues[index].timestampValidBits; 1406 queue_indices[VulkanQueueKind_Transfer] = (i32)index; 1407 } 1408 } 1409 } 1410 } 1411 1412 /* NOTE(rnp): try for compute separate from graphics */ 1413 for (u32 index = 0; index < queue_family_count; index++) { 1414 if ((queues[index].queueFlags & VK_QUEUE_COMPUTE_BIT) != 0 && 1415 (queues[index].queueFlags & VK_QUEUE_GRAPHICS_BIT) == 0) 1416 { 1417 queue_indices[VulkanQueueKind_Compute] = (i32)index; 1418 break; 1419 } 1420 } 1421 #endif /* !ForceSingleQueue */ 1422 1423 /* NOTE(rnp): find graphics family and verify it is exclusive */ 1424 b32 multi_graphics = 0; 1425 for (u32 index = 0; index < queue_family_count; index++) { 1426 if ((queues[index].queueFlags & VK_QUEUE_GRAPHICS_BIT) != 0) { 1427 // TODO(rnp): check for presentation support 1428 multi_graphics = queue_indices[VulkanQueueKind_Graphics] != -1; 1429 queue_indices[VulkanQueueKind_Graphics] = (i32)index; 1430 } 1431 } 1432 1433 if (multi_graphics) 1434 stream_append_s8(err, vulkan_info("warning: multiple queue families reported graphics support\n")); 1435 1436 if (queue_indices[VulkanQueueKind_Graphics] == -1) { 1437 stream_append_s8(err, vulkan_info("fatal error: GPU does not support graphics presentation\n")); 1438 fatal(stream_to_s8(err)); 1439 } 1440 1441 if (queue_indices[VulkanQueueKind_Compute] == -1) 1442 if ((queues[queue_indices[VulkanQueueKind_Graphics]].queueFlags & VK_QUEUE_COMPUTE_BIT) != 0) 1443 queue_indices[VulkanQueueKind_Compute] = queue_indices[VulkanQueueKind_Graphics]; 1444 1445 if (queue_indices[VulkanQueueKind_Compute] == -1) { 1446 stream_append_s8(err, vulkan_info("fatal error: GPU does not support compute\n")); 1447 fatal(stream_to_s8(err)); 1448 } 1449 1450 if (queue_indices[VulkanQueueKind_Transfer] == -1) { 1451 if ((queues[queue_indices[VulkanQueueKind_Compute]].queueFlags & VK_QUEUE_TRANSFER_BIT) != 0) 1452 queue_indices[VulkanQueueKind_Transfer] = queue_indices[VulkanQueueKind_Compute]; 1453 else if ((queues[queue_indices[VulkanQueueKind_Graphics]].queueFlags & VK_QUEUE_TRANSFER_BIT) != 0) 1454 queue_indices[VulkanQueueKind_Transfer] = queue_indices[VulkanQueueKind_Graphics]; 1455 } 1456 1457 if (queue_indices[VulkanQueueKind_Transfer] == -1) { 1458 stream_append_s8(err, vulkan_info("fatal error: GPU does not support data transfer\n")); 1459 fatal(stream_to_s8(err)); 1460 } 1461 1462 ///////////////////////////////////////////////////////////////// 1463 // NOTE(rnp): if queues share families try to allocate subqueues 1464 1465 u32 assigned_subindices[VulkanQueueKind_Count] = {0}; 1466 i32 queue_subindices[VulkanQueueKind_Count] = {0}; 1467 1468 assigned_subindices[VulkanQueueKind_Graphics] += 1; 1469 1470 if (queue_indices[VulkanQueueKind_Compute] == queue_indices[VulkanQueueKind_Graphics]) { 1471 if (assigned_subindices[VulkanQueueKind_Graphics] < queues[queue_indices[VulkanQueueKind_Graphics]].queueCount) 1472 queue_subindices[VulkanQueueKind_Compute] = assigned_subindices[VulkanQueueKind_Graphics]++; 1473 } else { 1474 assigned_subindices[VulkanQueueKind_Compute] += 1; 1475 } 1476 1477 if (queue_indices[VulkanQueueKind_Transfer] == queue_indices[VulkanQueueKind_Graphics]) { 1478 if (assigned_subindices[VulkanQueueKind_Graphics] < queues[queue_indices[VulkanQueueKind_Graphics]].queueCount) 1479 queue_subindices[VulkanQueueKind_Transfer] = assigned_subindices[VulkanQueueKind_Graphics]++; 1480 } else if (queue_indices[VulkanQueueKind_Transfer] == queue_indices[VulkanQueueKind_Compute]) { 1481 if (assigned_subindices[VulkanQueueKind_Compute] < queues[queue_indices[VulkanQueueKind_Compute]].queueCount) 1482 queue_subindices[VulkanQueueKind_Transfer] = assigned_subindices[VulkanQueueKind_Compute]++; 1483 } else { 1484 assigned_subindices[VulkanQueueKind_Transfer] += 1; 1485 } 1486 1487 for EachElement(assigned_subindices, it) 1488 vk->unique_queues += assigned_subindices[it]; 1489 1490 end_temp_arena(arena_save); 1491 1492 ///////////////////////////////////////////// 1493 // NOTE(rnp): fill in info and create device 1494 for EachElement(vk->queues, it) { 1495 u32 index = queue_subindices[it]; 1496 for (i32 i = 0; i < queue_indices[it]; i++) 1497 index += assigned_subindices[i]; 1498 vk->queue_indices[it] = index; 1499 } 1500 1501 for EachElement(vk->queues, it) { 1502 if (vk->queues[vk->queue_indices[it]] == 0) { 1503 vk->queues[vk->queue_indices[it]] = push_struct(memory, VulkanQueue); 1504 vk->queues[vk->queue_indices[it]]->queue_family = queue_indices[it]; 1505 vk->queues[vk->queue_indices[it]]->queue_index = queue_subindices[it]; 1506 } 1507 vk->queues[it] = vk->queues[vk->queue_indices[it]]; 1508 } 1509 1510 for EachElement(vk->command_pools, it) 1511 vk->command_pools[it] = push_struct(memory, VulkanCommandPool); 1512 1513 VkDeviceQueueCreateInfo queue_create_infos[VulkanQueueKind_Count]; 1514 1515 f32 queue_priorities[VulkanQueueKind_Count][VulkanQueueKind_Count]; 1516 for (u32 i = 0; i < VulkanQueueKind_Count; i++) 1517 for (u32 j = 0; j < VulkanQueueKind_Count; j++) 1518 queue_priorities[i][j] = 1.0f; 1519 queue_priorities[queue_indices[VulkanQueueKind_Compute]][queue_subindices[VulkanQueueKind_Compute]] = 0.5f; 1520 1521 u32 queue_create_index = 0; 1522 b32 queue_info_filled[VulkanQueueKind_Count] = {0}; 1523 for (u32 q = 0; q < vk->unique_queues; q++) { 1524 u32 base_q = queue_indices[q]; 1525 if (!queue_info_filled[base_q]) { 1526 queue_create_infos[queue_create_index++] = (VkDeviceQueueCreateInfo){ 1527 .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, 1528 .queueFamilyIndex = base_q, 1529 .queueCount = assigned_subindices[q], 1530 .pQueuePriorities = queue_priorities[q], 1531 }; 1532 } 1533 queue_info_filled[base_q] = 1; 1534 } 1535 1536 u32 enabled_count = 0; 1537 const char *enabled_extensions[MAX_ENABLED_EXTENSIONS]; 1538 1539 for EachElement(vk_required_device_extensions, it) 1540 enabled_extensions[enabled_count++] = (char *)vk_required_device_extensions[it].data; 1541 1542 for EachElement(vk_optional_device_extensions, it) 1543 if (vulkan_config.optional.E[it]) 1544 enabled_extensions[enabled_count++] = (char *)vk_optional_device_extensions[it].data; 1545 1546 for EachElement(vk_debug_extensions, it) 1547 if (vulkan_config.debug.E[it]) 1548 enabled_extensions[enabled_count++] = (char *)vk_debug_extensions[it].data; 1549 1550 VkDeviceCreateInfo device_create_info = { 1551 .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, 1552 .pQueueCreateInfos = queue_create_infos, 1553 .queueCreateInfoCount = queue_create_index, 1554 .ppEnabledExtensionNames = enabled_extensions, 1555 .enabledExtensionCount = enabled_count, 1556 }; 1557 1558 VkPhysicalDeviceShaderRelaxedExtendedInstructionFeaturesKHR pdsre = { 1559 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_RELAXED_EXTENDED_INSTRUCTION_FEATURES_KHR, 1560 .shaderRelaxedExtendedInstruction = 1, 1561 }; 1562 if (vulkan_config.debug.shader_relaxed_extended_instruction) { 1563 pdsre.pNext = (void *)device_create_info.pNext; 1564 device_create_info.pNext = &pdsre; 1565 } 1566 1567 VkPhysicalDeviceCooperativeMatrixFeaturesKHR coop_mat_features = { 1568 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_KHR, 1569 .cooperativeMatrix = 1, 1570 .cooperativeMatrixRobustBufferAccess = 0, 1571 }; 1572 if (vk->gpu_info.cooperative_matrix) { 1573 coop_mat_features.pNext = (void *)device_create_info.pNext; 1574 device_create_info.pNext = &coop_mat_features; 1575 } 1576 1577 VkPhysicalDeviceVulkan13Features v13f = { 1578 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES, 1579 .pNext = (void *)device_create_info.pNext, 1580 #define X(name, ...) .name = 1, 1581 VK_REQUIRED_PHYSICAL_13_FEATURES 1582 #undef X 1583 }; 1584 device_create_info.pNext = &v13f; 1585 1586 VkPhysicalDeviceVulkan12Features v12f = { 1587 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES, 1588 .pNext = (void *)device_create_info.pNext, 1589 #define X(name, ...) .name = 1, 1590 VK_REQUIRED_PHYSICAL_12_FEATURES 1591 #undef X 1592 }; 1593 device_create_info.pNext = &v12f; 1594 1595 VkPhysicalDeviceVulkan11Features v11f = { 1596 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES, 1597 .pNext = (void *)device_create_info.pNext, 1598 #define X(name, ...) .name = 1, 1599 VK_REQUIRED_PHYSICAL_11_FEATURES 1600 #undef X 1601 }; 1602 device_create_info.pNext = &v11f; 1603 1604 VkPhysicalDeviceFeatures2 device_features = { 1605 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, 1606 .pNext = (void *)device_create_info.pNext, 1607 .features = { 1608 #define X(name, ...) .name = 1, 1609 VK_REQUIRED_PHYSICAL_FEATURES 1610 #undef X 1611 }, 1612 }; 1613 device_create_info.pNext = &device_features; 1614 1615 vkCreateDevice(vk->physical_device, &device_create_info, 0, &vk->device); 1616 1617 #define X(name, ...) name = (name##_fn *)vkGetDeviceProcAddr(vk->device, #name); 1618 VkDeviceProcedureList 1619 #undef X 1620 1621 for (u32 q = 0; q < vk->unique_queues; q++) { 1622 VulkanQueue *qp = vk->queues[q]; 1623 vkGetDeviceQueue(vk->device, qp->queue_family, qp->queue_index, &qp->queue); 1624 1625 qp->timeline_semaphore = vk_make_semaphore(0); 1626 } 1627 1628 vk->queues[VulkanQueueKind_Graphics]->pipeline_stage_flags |= VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT; 1629 vk->queues[VulkanQueueKind_Compute]->pipeline_stage_flags |= VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT; 1630 1631 for EachElement(vk->command_pools, it) { 1632 VulkanCommandPool *vcp = vk->command_pools[it]; 1633 1634 VkCommandPoolCreateInfo command_pool_create_info = { 1635 .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, 1636 .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, 1637 .queueFamilyIndex = vk->queues[it]->queue_family, 1638 }; 1639 1640 vkCreateCommandPool(vk->device, &command_pool_create_info, 0, &vcp->handle); 1641 1642 VkCommandBufferAllocateInfo command_buffer_allocate_info = { 1643 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, 1644 .commandPool = vcp->handle, 1645 .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, 1646 .commandBufferCount = countof(vcp->buffers), 1647 }; 1648 vkAllocateCommandBuffers(vk->device, &command_buffer_allocate_info, vcp->buffers); 1649 1650 VkQueryPoolCreateInfo query_pool_create_info = { 1651 .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, 1652 .queryType = VK_QUERY_TYPE_TIMESTAMP, 1653 .queryCount = MaxCommandBuffersInFlight * MaxCommandBufferTimestamps, 1654 }; 1655 vkCreateQueryPool(vk->device, &query_pool_create_info, 0, &vcp->query_pool); 1656 } 1657 } 1658 1659 function void 1660 vk_load_graphics(void) 1661 { 1662 VulkanContext *vk = vulkan_context; 1663 1664 // NOTE: swap chain image format 1665 { 1666 } 1667 1668 // NOTE: depth/stencil format 1669 { 1670 VkFormat depth_formats[] = { 1671 VK_FORMAT_D32_SFLOAT_S8_UINT, 1672 VK_FORMAT_D24_UNORM_S8_UINT, 1673 VK_FORMAT_D16_UNORM_S8_UINT, 1674 }; 1675 1676 vk->depth_stencil_format = VK_FORMAT_UNDEFINED; 1677 for EachElement(depth_formats, it) { 1678 VkFormatProperties3 format_properties3 = {.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3}; 1679 VkFormatProperties2 format_properties2 = { 1680 .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, 1681 .pNext = &format_properties3, 1682 }; 1683 vkGetPhysicalDeviceFormatProperties2(vk->physical_device, depth_formats[it], &format_properties2); 1684 if (format_properties3.optimalTilingFeatures & VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT) { 1685 vk->depth_stencil_format = depth_formats[it]; 1686 break; 1687 } 1688 } 1689 } 1690 } 1691 1692 function void 1693 vk_load_descriptor_block(void) 1694 { 1695 // NOTE(rnp): 1696 // * One Descriptor Pool 1697 // * One Descriptor Set Per Resource Kind 1698 // * Shaders know the ResourceKind enumeration 1699 // * Shaders know the per set binding points 1700 1701 VulkanContext *vk = vulkan_context; 1702 1703 // NOTE(rnp): Pool 1704 VkDescriptorPoolSize pool_sizes[] = { 1705 { 1706 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1707 .descriptorCount = BeamformerShaderBufferSlot_Count, 1708 }, 1709 }; 1710 static_assert(countof(pool_sizes) == BeamformerShaderResourceKind_Count, ""); 1711 1712 VkDescriptorPoolCreateInfo pool_create_info = { 1713 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, 1714 .maxSets = BeamformerShaderResourceKind_Count, 1715 .poolSizeCount = countof(pool_sizes), 1716 .pPoolSizes = pool_sizes, 1717 }; 1718 1719 vkCreateDescriptorPool(vk->device, &pool_create_info, 0, &vk->descriptor_pool); 1720 1721 // NOTE(rnp): Set Layouts 1722 VkDescriptorSetLayoutCreateInfo layout_create_info = { 1723 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 1724 }; 1725 1726 { 1727 VkDescriptorSetLayoutBinding layout_bindings[BeamformerShaderBufferSlot_Count]; 1728 for EachEnumValue(BeamformerShaderBufferSlot, it) { 1729 layout_bindings[it] = (VkDescriptorSetLayoutBinding){ 1730 .binding = it, 1731 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1732 .descriptorCount = 1, 1733 .stageFlags = VK_SHADER_STAGE_ALL, 1734 }; 1735 } 1736 layout_create_info.bindingCount = countof(layout_bindings), 1737 layout_create_info.pBindings = layout_bindings, 1738 vkCreateDescriptorSetLayout(vk->device, &layout_create_info, 0, 1739 vk->descriptor_set_layouts + BeamformerShaderResourceKind_Buffer); 1740 } 1741 1742 // NOTE(rnp): Sets 1743 VkDescriptorSetAllocateInfo set_allocate_info = { 1744 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, 1745 .descriptorPool = vk->descriptor_pool, 1746 .descriptorSetCount = countof(vk->descriptor_sets), 1747 .pSetLayouts = vk->descriptor_set_layouts, 1748 }; 1749 static_assert(countof(vk->descriptor_set_layouts) == countof(vk->descriptor_sets), ""); 1750 vkAllocateDescriptorSets(vk->device, &set_allocate_info, vk->descriptor_sets); 1751 1752 vk_label_object(DESCRIPTOR_POOL, vk->descriptor_pool, s8("Beamformer Resources"), s8("Pool")); 1753 1754 DeferLoop(take_lock(&vk->arena_lock, -1), release_lock(&vk->arena_lock)) { 1755 Arena scratch = vk->arena; 1756 for EachElement(vk->descriptor_sets, it) { 1757 Stream sb = arena_stream(scratch); 1758 stream_append_s8s(&sb, s8("Beamformer "), beamformer_shader_resource_kind_strings[it], s8("s")); 1759 vk_label_object(DESCRIPTOR_SET, vk->descriptor_sets[it], stream_to_s8(&sb), s8("Set")); 1760 vk_label_object(DESCRIPTOR_SET_LAYOUT, vk->descriptor_set_layouts[it], stream_to_s8(&sb), s8("Set Layout")); 1761 } 1762 } 1763 } 1764 1765 /////////////////////// 1766 // NOTE(rnp): User API 1767 1768 DEBUG_IMPORT void 1769 vk_load(OSLibrary vulkan_library_handle, Arena *memory, Stream *err) 1770 { 1771 #define X(name, ...) name = (name##_fn *)os_lookup_symbol(vulkan_library_handle, #name); 1772 VkLoaderProcedureList 1773 #undef X 1774 1775 if (!vkGetInstanceProcAddr) { 1776 stream_append_s8(err, vulkan_info("fatal error: failed to find \"vkGetInstanceProcAddr\"\n")); 1777 fatal(stream_to_s8(err)); 1778 } 1779 1780 VulkanContext *vk = vulkan_context; 1781 vk->entity_arena = sub_arena_end(memory, KB(64), KB(4)); 1782 vk->arena = sub_arena_end(memory, KB(96), KB(4)); 1783 1784 vk_load_instance(vk->arena, err); 1785 vk_load_physical_device(vk->arena, err); 1786 vk_load_queues(&vk->arena, err); 1787 vk_load_graphics(); 1788 vk_load_descriptor_block(); 1789 1790 read_only local_persist s8 default_compute_shader = s8("" 1791 "#version 430 core\n" 1792 "layout(push_constant) uniform pc { uint data[256 / 4]; };\n" 1793 "void main() {}\n" 1794 "\n"); 1795 vk->default_compute_pipeline = vk_compute_pipeline_from_shader_text(vk->arena, default_compute_shader, 1796 s8("error_compute_shader"), 256); 1797 1798 read_only local_persist s8 default_vertex_shader = s8("" 1799 "#version 430 core\n" 1800 "layout(push_constant) uniform pc { uint data[256 / 4]; };\n" 1801 "void main() {gl_Position = vec4(0);}\n" 1802 "\n"); 1803 read_only local_persist s8 default_fragment_shader = s8("" 1804 "#version 430 core\n" 1805 "layout(location = 0) out vec4 out_colour;" 1806 "layout(push_constant) uniform pc { uint data[256 / 4]; };\n" 1807 "void main() {out_colour = vec4(0.5f, 0.0f, 0.5f, 1.0f);}\n" 1808 "\n"); 1809 1810 VulkanPipelineCreateInfo pipeline_create_infos[2] = { 1811 { 1812 .kind = VulkanShaderKind_Vertex, 1813 .text = default_vertex_shader, 1814 .name = s8("error_vertex_shader"), 1815 }, 1816 { 1817 .kind = VulkanShaderKind_Fragment, 1818 .text = default_fragment_shader, 1819 .name = s8("error_fragment_shader"), 1820 }, 1821 }; 1822 vk->default_graphics_pipeline = vk_graphics_pipeline_from_infos(vk->arena, pipeline_create_infos, 2, 256); 1823 1824 // TODO: setup ui render pipeline 1825 1826 if (err->widx > 0) { 1827 os_console_log(err->data, err->widx); 1828 stream_reset(err, 0); 1829 } 1830 } 1831 1832 DEBUG_IMPORT GPUInfo * 1833 vk_gpu_info(void) 1834 { 1835 return &vulkan_context->gpu_info; 1836 } 1837 1838 function void 1839 vk_vulkan_buffer_release(VulkanBuffer *vb) 1840 { 1841 VulkanContext *vk = vulkan_context; 1842 VulkanEntity *e = (VulkanEntity *)((u8 *)vb - offsetof(VulkanEntity, as)); 1843 // TODO(rnp): this happens implicitly, probably just delete this if block 1844 if (vb->host_pointer) 1845 vkUnmapMemory(vk->device, vb->memory); 1846 1847 if (vb->buffer) 1848 vkDestroyBuffer(vk->device, vb->buffer, 0); 1849 1850 vk_release_memory(vb->memory, vb->memory_kind != VulkanMemoryKind_Host ? vb->memory_size : 0); 1851 vk_entity_release(e); 1852 } 1853 1854 DEBUG_IMPORT void 1855 vk_buffer_release(GPUBuffer *b) 1856 { 1857 if ValidVulkanHandle(b->handle) 1858 vk_vulkan_buffer_release(vk_entity_data(b->handle, VulkanEntityKind_Buffer)); 1859 zero_struct(b); 1860 } 1861 1862 DEBUG_IMPORT void 1863 vk_buffer_allocate(GPUBuffer *b, GPUBufferAllocateInfo *info) 1864 { 1865 VulkanContext *vk = vulkan_context; 1866 1867 vk_buffer_release(b); 1868 1869 assert(info->size > 0); 1870 1871 VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_Buffer); 1872 VulkanBufferAllocateInfo vulkan_buffer_allocate_info = { 1873 .gpu_buffer = b, 1874 .size = (u64)info->size, 1875 .flags = info->flags, 1876 .index_type = VK_INDEX_TYPE_NONE_KHR, 1877 .label = info->label, 1878 }; 1879 1880 u32 queue_index_hit_count[VulkanQueueKind_Count] = {0}; 1881 for (u32 it = 0; it < info->timeline_count; it++) 1882 queue_index_hit_count[vk->queue_indices[info->timelines_used[it]]]++; 1883 1884 for EachElement(queue_index_hit_count, it) { 1885 if (queue_index_hit_count[it] > 0) { 1886 u32 index = vulkan_buffer_allocate_info.queue_family_count++; 1887 vulkan_buffer_allocate_info.queue_family_indices[index] = vk->queues[vk->queue_indices[it]]->queue_family; 1888 } 1889 } 1890 1891 if (vk_buffer_allocate_common(&e->as.buffer, &vulkan_buffer_allocate_info)) { 1892 b->handle.value[0] = (u64)e; 1893 } else { 1894 vk_entity_release(e); 1895 } 1896 } 1897 1898 DEBUG_IMPORT b32 1899 vk_buffer_needs_sync(GPUBuffer *b) 1900 { 1901 b32 result = 0; 1902 if ValidVulkanHandle(b->handle) { 1903 VulkanBuffer *vb = vk_entity_data(b->handle, VulkanEntityKind_Buffer); 1904 1905 // TODO(rnp): not correct check. need to check if we used transfer queue 1906 result = vb->memory_kind != VulkanMemoryKind_BAR; 1907 } 1908 1909 return result; 1910 } 1911 1912 DEBUG_IMPORT u64 1913 vk_round_up_to_sync_size(u64 size, u64 min) 1914 { 1915 iz round = (iz)Max(min, vulkan_context->memory_info.non_coherent_atom_size); 1916 u64 result = (u64)round_up_to((iz)size, round); 1917 return result; 1918 } 1919 1920 function force_inline void 1921 vk_buffer_buffer_copy(VulkanBuffer *destination, VulkanBuffer *source, u64 destination_offset, u64 source_offset, u64 size, b32 non_temporal) 1922 { 1923 VulkanContext *vk = vulkan_context; 1924 1925 switch (source->memory_kind) { 1926 case VulkanMemoryKind_BAR: 1927 { 1928 switch (destination->memory_kind) { 1929 case VulkanMemoryKind_Host:{ 1930 if (destination->memory) { 1931 // TODO(rnp): there is likely a more efficient way of doing this in this case 1932 InvalidCodePath; 1933 } else { 1934 assert(source->host_pointer); 1935 b32 coherent = vk->memory_info.memory_host_coherent[source->memory_kind]; 1936 if (!coherent) { 1937 u64 nca_size = vk->memory_info.non_coherent_atom_size; 1938 VkMappedMemoryRange mrs[1] = {{ 1939 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, 1940 .memory = source->memory, 1941 .offset = source_offset - (source_offset % nca_size), 1942 .size = vk_round_up_to_sync_size(size, nca_size), 1943 }}; 1944 vkInvalidateMappedMemoryRanges(vk->device, countof(mrs), mrs); 1945 } 1946 1947 void *dest = (u8 *)destination->host_pointer + destination_offset; 1948 void *src = (u8 *)source->host_pointer + source_offset; 1949 1950 // NOTE(rnp): don't trash the CPU cache for large data stores 1951 if (non_temporal) memory_copy_non_temporal(dest, src, size); 1952 else mem_copy(dest, src, size); 1953 } 1954 }break; 1955 InvalidDefaultCase; 1956 } 1957 }break; 1958 1959 case VulkanMemoryKind_Host:{ 1960 switch (destination->memory_kind) { 1961 case VulkanMemoryKind_BAR:{ 1962 assert(destination->host_pointer); 1963 1964 void *dest = (u8 *)destination->host_pointer + destination_offset; 1965 void *src = (u8 *)source->host_pointer + source_offset; 1966 1967 // NOTE(rnp): don't trash the CPU cache for large data stores 1968 if (non_temporal) memory_copy_non_temporal(dest, src, size); 1969 else mem_copy(dest, src, size); 1970 1971 b32 coherent = vk->memory_info.memory_host_coherent[destination->memory_kind]; 1972 if (!coherent) { 1973 u64 nca_size = vk->memory_info.non_coherent_atom_size; 1974 VkMappedMemoryRange mrs[1] = {{ 1975 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, 1976 .memory = destination->memory, 1977 .offset = destination_offset - (destination_offset % nca_size), 1978 .size = vk_round_up_to_sync_size(size, nca_size), 1979 }}; 1980 vkFlushMappedMemoryRanges(vk->device, countof(mrs), mrs); 1981 } 1982 }break; 1983 InvalidDefaultCase; 1984 1985 } 1986 }break; 1987 1988 // TODO(rnp): use transfer queue when not mapped 1989 InvalidDefaultCase; 1990 } 1991 } 1992 1993 DEBUG_IMPORT void 1994 vk_buffer_range_upload(GPUBuffer *b, void *data, u64 offset, u64 size, b32 non_temporal) 1995 { 1996 VulkanBuffer *db = vk_entity_data(b->handle, VulkanEntityKind_Buffer); 1997 VulkanBuffer sb = { 1998 .host_pointer = data, 1999 .memory_kind = VulkanMemoryKind_Host, 2000 }; 2001 vk_buffer_buffer_copy(db, &sb, offset, 0, size, non_temporal); 2002 } 2003 2004 DEBUG_IMPORT void 2005 vk_buffer_range_download(void *destination, GPUBuffer *source, u64 offset, u64 size, b32 non_temporal) 2006 { 2007 VulkanBuffer *sb = vk_entity_data(source->handle, VulkanEntityKind_Buffer); 2008 VulkanBuffer db = { 2009 .host_pointer = destination, 2010 .memory_kind = VulkanMemoryKind_Host, 2011 }; 2012 vk_buffer_buffer_copy(&db, sb, 0, offset, size, non_temporal); 2013 } 2014 2015 DEBUG_IMPORT void 2016 vk_render_model_release(GPUBuffer *model) 2017 { 2018 if ValidVulkanHandle(model->handle) 2019 vk_vulkan_buffer_release(vk_entity_data(model->handle, VulkanEntityKind_RenderModel)); 2020 zero_struct(model); 2021 } 2022 2023 DEBUG_IMPORT void 2024 vk_render_model_allocate(GPUBuffer *model, void *indices, u64 index_count, u64 model_size, s8 label) 2025 { 2026 vk_render_model_release(model); 2027 2028 VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_RenderModel); 2029 2030 assert(index_count <= U32_MAX); 2031 VkIndexType index_type; 2032 if (index_count <= U16_MAX) index_type = VK_INDEX_TYPE_UINT16; 2033 else index_type = VK_INDEX_TYPE_UINT32; 2034 2035 i64 indices_size = round_up_to(vk_index_size(index_type) * index_count, 64); 2036 2037 i64 size = round_up_to(model_size + indices_size, 64); 2038 assert(size > 0); 2039 2040 VulkanBufferAllocateInfo vulkan_buffer_allocate_info = { 2041 .gpu_buffer = model, 2042 .size = (u64)size, 2043 .flags = VulkanUsageFlag_HostReadWrite, 2044 .index_type = index_type, 2045 .label = label, 2046 .queue_family_count = 1, 2047 .queue_family_indices[0] = vulkan_context->queues[VulkanQueueKind_Graphics]->queue_family, 2048 }; 2049 if (vk_buffer_allocate_common(&e->as.buffer, &vulkan_buffer_allocate_info)) { 2050 model->handle.value[0] = (u64)e; 2051 model->index_count = index_count; 2052 model->gpu_pointer += indices_size; 2053 2054 VulkanBuffer sb = { 2055 .host_pointer = indices, 2056 .memory_kind = VulkanMemoryKind_Host, 2057 }; 2058 2059 vk_buffer_buffer_copy(&e->as.buffer, &sb, 0, 0, vk_index_size(index_type) * index_count, 0); 2060 } else { 2061 vk_entity_release(e); 2062 } 2063 } 2064 2065 DEBUG_IMPORT void 2066 vk_render_model_range_upload(GPUBuffer *model, void *data, u64 offset, u64 size, b32 non_temporal) 2067 { 2068 VulkanBuffer *db = vk_entity_data(model->handle, VulkanEntityKind_RenderModel); 2069 VulkanBuffer sb = { 2070 .host_pointer = data, 2071 .memory_kind = VulkanMemoryKind_Host, 2072 }; 2073 2074 offset += round_up_to(vk_index_size(db->index_type) * model->index_count, 64); 2075 2076 vk_buffer_buffer_copy(db, &sb, offset, 0, size, non_temporal); 2077 } 2078 2079 DEBUG_IMPORT void 2080 vk_image_release(GPUImage *image) 2081 { 2082 if ValidVulkanHandle(image->image) { 2083 VulkanContext *vk = vulkan_context; 2084 VulkanImage *vi = vk_entity_data(image->image, VulkanEntityKind_Image); 2085 2086 vkDestroyImageView(vk->device, vi->view, 0); 2087 vkDestroyImage(vk->device, vi->image, 0); 2088 vk_release_memory(vi->memory, image->memory_size); 2089 2090 vk_entity_release((VulkanEntity *)image->image.value[0]); 2091 } 2092 zero_struct(image); 2093 } 2094 2095 DEBUG_IMPORT void 2096 vk_image_allocate(GPUImage *image, u32 width, u32 height, u32 mips, u32 samples, 2097 VulkanImageUsage usage, VulkanUsageFlags flags, OSHandle *export, s8 label) 2098 { 2099 assert(IsPowerOfTwo(samples)); 2100 2101 vk_image_release(image); 2102 2103 VulkanContext *vk = vulkan_context; 2104 VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_Image); 2105 VulkanImage *vi = &e->as.image; 2106 2107 image->image.value[0] = (u64)e; 2108 image->width = Min(width, vk->gpu_info.max_image_dimension_2D); 2109 image->height = Min(height, vk->gpu_info.max_image_dimension_2D); 2110 image->mip_map_levels = Max(mips, 1); 2111 image->samples = Min(samples, vk->gpu_info.max_msaa_samples); 2112 2113 VkFormat usage_format_map[VulkanImageUsage_Count + 1] = { 2114 [VulkanImageUsage_None] = VK_FORMAT_UNDEFINED, 2115 //[VulkanImageUsage_Colour] = VK_FORMAT_R8G8B8A8_SRGB, 2116 [VulkanImageUsage_Colour] = VK_FORMAT_R8G8B8A8_UNORM, 2117 [VulkanImageUsage_DepthStencil] = vk->depth_stencil_format, 2118 [VulkanImageUsage_Count] = VK_FORMAT_UNDEFINED, 2119 }; 2120 2121 read_only local_persist VkImageUsageFlagBits usage_extra_bit_map[VulkanImageUsage_Count + 1] = { 2122 [VulkanImageUsage_None] = 0, 2123 [VulkanImageUsage_Colour] = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, 2124 [VulkanImageUsage_DepthStencil] = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, 2125 [VulkanImageUsage_Count] = 0, 2126 }; 2127 2128 read_only local_persist VkImageAspectFlags usage_image_aspect_map[VulkanImageUsage_Count + 1] = { 2129 [VulkanImageUsage_None] = 0, 2130 [VulkanImageUsage_Colour] = VK_IMAGE_ASPECT_COLOR_BIT, 2131 [VulkanImageUsage_DepthStencil] = VK_IMAGE_ASPECT_DEPTH_BIT|VK_IMAGE_ASPECT_STENCIL_BIT, 2132 [VulkanImageUsage_Count] = 0, 2133 }; 2134 2135 usage = Clamp((u32)usage, 0, VulkanImageUsage_Count); 2136 VkImageUsageFlagBits usage_flags = usage_extra_bit_map[usage]; 2137 2138 if (flags & VulkanUsageFlag_ImageSampling) usage_flags |= VK_IMAGE_USAGE_SAMPLED_BIT; 2139 if (flags & VulkanUsageFlag_TransferSource) usage_flags |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; 2140 if (flags & VulkanUsageFlag_TransferDestination) usage_flags |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; 2141 2142 u32 queue_family = vk->queues[VulkanQueueKind_Graphics]->queue_family; 2143 VkImageCreateInfo image_create_info = { 2144 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, 2145 .flags = export ? VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT : 0, 2146 .imageType = VK_IMAGE_TYPE_2D, 2147 .format = usage_format_map[usage], 2148 .extent = {image->width, image->height, 1}, 2149 .mipLevels = image->mip_map_levels, 2150 .arrayLayers = 1, 2151 .samples = image->samples, 2152 .tiling = VK_IMAGE_TILING_OPTIMAL, 2153 .usage = usage_flags, 2154 // NOTE(rnp): needed if multiple queue families are accessed 2155 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 2156 .queueFamilyIndexCount = 1, 2157 .pQueueFamilyIndices = &queue_family, 2158 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, 2159 }; 2160 2161 VkExternalMemoryImageCreateInfo external_memory_image_create_info = { 2162 .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO, 2163 .handleTypes = OS_WINDOWS ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT 2164 : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, 2165 }; 2166 2167 if (export) image_create_info.pNext = &external_memory_image_create_info; 2168 2169 vkCreateImage(vk->device, &image_create_info, 0, &vi->image); 2170 2171 VkMemoryRequirements memory_requirements; 2172 vkGetImageMemoryRequirements(vk->device, vi->image, &memory_requirements); 2173 2174 VkMemoryDedicatedAllocateInfo dedicated_allocate_info = { 2175 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, 2176 .image = vi->image, 2177 }; 2178 2179 if (vk_allocate_memory(&vi->memory, memory_requirements.size, VulkanMemoryKind_Device, 0, &dedicated_allocate_info, export)) { 2180 image->memory_size = memory_requirements.size; 2181 vkBindImageMemory(vk->device, vi->image, vi->memory, 0); 2182 2183 VkImageViewCreateInfo image_view_info = { 2184 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 2185 .image = vi->image, 2186 .viewType = VK_IMAGE_VIEW_TYPE_2D, 2187 .format = usage_format_map[usage], 2188 .subresourceRange = { 2189 .aspectMask = usage_image_aspect_map[usage], 2190 .baseMipLevel = 0, 2191 .levelCount = 1, 2192 .baseArrayLayer = 0, 2193 .layerCount = 1, 2194 }, 2195 }; 2196 vkCreateImageView(vk->device, &image_view_info, 0, &vi->view); 2197 2198 vk_label_object(IMAGE, vi->image, label, s8("Image")); 2199 vk_label_object(IMAGE_VIEW, vi->view, label, s8("Image View")); 2200 vk_label_object(DEVICE_MEMORY, vi->memory, label, s8("Memory")); 2201 } else { 2202 vkDestroyImage(vk->device, vi->image, 0); 2203 vk_entity_release(e); 2204 zero_struct(image); 2205 } 2206 } 2207 2208 DEBUG_IMPORT VulkanHandle 2209 vk_create_semaphore(OSHandle *export) 2210 { 2211 VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_Semaphore); 2212 e->as.semaphore = vk_make_semaphore(export); 2213 VulkanHandle result = {(u64)e}; 2214 return result; 2215 } 2216 2217 DEBUG_IMPORT b32 2218 vk_host_wait_timeline(VulkanTimeline timeline, u64 value, u64 timeout_ns) 2219 { 2220 b32 result = 0; 2221 if Between(timeline, 0, VulkanTimeline_Count - 1) { 2222 VulkanContext *vk = vulkan_context; 2223 VulkanQueue *vq = vk->queues[timeline]; 2224 VkSemaphoreWaitInfo semaphore_wait_info = { 2225 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, 2226 .pSemaphores = &vq->timeline_semaphore.semaphore, 2227 .semaphoreCount = 1, 2228 .pValues = &value, 2229 }; 2230 result = vkWaitSemaphores(vk->device, &semaphore_wait_info, timeout_ns) == VK_SUCCESS; 2231 } 2232 return result; 2233 } 2234 2235 DEBUG_IMPORT u64 2236 vk_host_signal_timeline(VulkanTimeline timeline) 2237 { 2238 u64 result = -1; 2239 if Between(timeline, 0, VulkanTimeline_Count - 1) { 2240 VulkanContext *vk = vulkan_context; 2241 VulkanQueue *vq = vk->queues[timeline]; 2242 VulkanSemaphore *vs = &vq->timeline_semaphore; 2243 result = ++vs->value; 2244 VkSemaphoreSignalInfo ssi = { 2245 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO, 2246 .semaphore = vs->semaphore, 2247 .value = result, 2248 }; 2249 vkSignalSemaphore(vk->device, &ssi); 2250 } 2251 return result; 2252 } 2253 2254 DEBUG_IMPORT VulkanHandle 2255 vk_pipeline(VulkanPipelineCreateInfo *infos, u32 count, u32 push_constants_size) 2256 { 2257 assert(Between(count, 1, 2)); 2258 assert(count == 2 || infos[0].kind == VulkanShaderKind_Compute); 2259 2260 VulkanHandle result = {0}; 2261 DeferLoop(take_lock(&vulkan_context->arena_lock, -1), release_lock(&vulkan_context->arena_lock)) 2262 { 2263 Arena arena = vulkan_context->arena; 2264 2265 VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_Pipeline); 2266 result = (VulkanHandle){(u64)e}; 2267 2268 if (count == 2) e->as.pipeline = vk_graphics_pipeline_from_infos(arena, infos, count, push_constants_size); 2269 else e->as.pipeline = vk_compute_pipeline_from_shader_text(arena, infos[0].text, infos[0].name, push_constants_size); 2270 } 2271 return result; 2272 } 2273 2274 DEBUG_IMPORT b32 2275 vk_pipeline_valid(VulkanHandle h) 2276 { 2277 b32 result = 0; 2278 if ValidVulkanHandle(h) { 2279 VulkanPipeline *vp = vk_entity_data(h, VulkanEntityKind_Pipeline); 2280 if (vp->stage_flags == VK_SHADER_STAGE_COMPUTE_BIT) 2281 result = vp->pipeline != vulkan_context->default_compute_pipeline.pipeline; 2282 else 2283 result = vp->pipeline != vulkan_context->default_graphics_pipeline.pipeline; 2284 } 2285 return result; 2286 } 2287 2288 DEBUG_IMPORT void 2289 vk_pipeline_release(VulkanHandle h) 2290 { 2291 if (vk_pipeline_valid(h)) { 2292 VulkanEntity *e = (VulkanEntity *)h.value[0]; 2293 VulkanTimeline timeline; 2294 if (e->as.pipeline.stage_flags == VK_SHADER_STAGE_COMPUTE_BIT) timeline = VulkanTimeline_Compute; 2295 else timeline = VulkanTimeline_Graphics; 2296 2297 // NOTE(rnp): block more command buffers from being recorded 2298 VulkanCommandPool *vcp = vulkan_context->command_pools[timeline]; 2299 DeferLoop(take_lock(&vcp->lock, -1), release_lock(&vcp->lock)) { 2300 u32 index = (vcp->next_index - 1) % countof(vcp->buffers); 2301 vk_host_wait_timeline(timeline, vcp->submission_values[index], -1ULL); 2302 vkDestroyPipeline(vulkan_context->device, e->as.pipeline.pipeline, 0); 2303 vkDestroyPipelineLayout(vulkan_context->device, e->as.pipeline.layout, 0); 2304 2305 if (&e->as.pipeline == vcp->bound_pipeline) 2306 vcp->bound_pipeline = 0; 2307 } 2308 vk_entity_release(e); 2309 } 2310 } 2311 2312 DEBUG_IMPORT void 2313 vk_bind_shader_resources(BeamformerShaderResourceInfo *infos, u64 info_count) 2314 { 2315 VulkanContext *vk = vulkan_context; 2316 2317 VkWriteDescriptorSet write_sets[BeamformerShaderResourceKind_Count] = {0}; 2318 2319 for EachIndex(info_count, it) { 2320 switch (infos[it].kind) { 2321 case BeamformerShaderResourceKind_Buffer:{ 2322 VulkanBuffer *vb = vk_entity_data(infos[it].handle, VulkanEntityKind_Buffer); 2323 vk->descriptor_buffer_infos[infos[it].slot].buffer = vb->buffer; 2324 vk->descriptor_buffer_infos[infos[it].slot].offset = 0; 2325 vk->descriptor_buffer_infos[infos[it].slot].range = vb->memory_size; 2326 }break; 2327 2328 InvalidDefaultCase; 2329 } 2330 } 2331 2332 write_sets[BeamformerShaderResourceKind_Buffer].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; 2333 write_sets[BeamformerShaderResourceKind_Buffer].dstSet = vk->descriptor_sets[BeamformerShaderResourceKind_Buffer]; 2334 write_sets[BeamformerShaderResourceKind_Buffer].dstBinding = 0; 2335 write_sets[BeamformerShaderResourceKind_Buffer].descriptorCount = countof(vk->descriptor_buffer_infos); 2336 write_sets[BeamformerShaderResourceKind_Buffer].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; 2337 write_sets[BeamformerShaderResourceKind_Buffer].pBufferInfo = vk->descriptor_buffer_infos; 2338 2339 vkUpdateDescriptorSets(vk->device, countof(write_sets), write_sets, 0, 0); 2340 } 2341 2342 DEBUG_IMPORT VulkanHandle 2343 vk_command_begin(VulkanTimeline timeline) 2344 { 2345 VulkanHandle result = {0}; 2346 if Between(timeline, 0, VulkanTimeline_Count - 1) { 2347 VulkanContext *vk = vulkan_context; 2348 VulkanCommandPool *vcp = vk->command_pools[timeline]; 2349 2350 take_lock(&vcp->lock, -1); 2351 2352 VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_CommandBuffer); 2353 VulkanCommandBuffer *vcb = &e->as.command_buffer; 2354 vcb->timeline = timeline; 2355 vcb->buffer_index = vcp->next_index++ % countof(vcp->buffers); 2356 2357 u32 index = vcb->buffer_index; 2358 // TODO(rnp): probably not the best to have this here but it will likely not be hit 2359 b32 wait_result = vk_host_wait_timeline(timeline, vcp->submission_values[index], -1ULL); 2360 assert(wait_result); 2361 2362 vcp->queries_occupied[index] = 0; 2363 2364 VkCommandBufferBeginInfo buffer_begin_info = { 2365 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, 2366 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, 2367 }; 2368 2369 vkBeginCommandBuffer(vcp->buffers[index], &buffer_begin_info); 2370 vkCmdResetQueryPool(vcp->buffers[index], vcp->query_pool, index * MaxCommandBufferTimestamps, 2371 MaxCommandBufferTimestamps); 2372 2373 result = (VulkanHandle){(u64)e}; 2374 } 2375 return result; 2376 } 2377 2378 DEBUG_IMPORT void 2379 vk_command_bind_pipeline(VulkanHandle command, VulkanHandle pipeline) 2380 { 2381 if ValidVulkanHandle(command) { 2382 VulkanContext *vk = vulkan_context; 2383 VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer); 2384 VulkanCommandPool *vcp = vk->command_pools[vcb->timeline]; 2385 2386 VulkanPipeline *vp = 0; 2387 if ValidVulkanHandle(pipeline) { 2388 vp = vk_entity_data(pipeline, VulkanEntityKind_Pipeline); 2389 } else if (vcb->timeline == VulkanTimeline_Compute) { 2390 vp = &vk->default_compute_pipeline; 2391 } else if (vcb->timeline == VulkanTimeline_Graphics) { 2392 vp = &vk->default_graphics_pipeline; 2393 } else { 2394 InvalidCodePath; 2395 } 2396 2397 read_only local_persist VkPipelineBindPoint bind_point_lut[VulkanTimeline_Count] = { 2398 [VulkanTimeline_Graphics] = VK_PIPELINE_BIND_POINT_GRAPHICS, 2399 [VulkanTimeline_Compute] = VK_PIPELINE_BIND_POINT_COMPUTE, 2400 [VulkanTimeline_Transfer] = -1, 2401 }; 2402 2403 VkPipelineBindPoint bind_point = bind_point_lut[vcb->timeline]; 2404 assert(bind_point != (VkPipelineBindPoint)-1); 2405 2406 vkCmdBindPipeline(vcp->buffers[vcb->buffer_index], bind_point, vp->pipeline); 2407 vkCmdBindDescriptorSets(vcp->buffers[vcb->buffer_index], bind_point, vp->layout, 2408 0, countof(vk->descriptor_sets), vk->descriptor_sets, 0, 0); 2409 vcp->bound_pipeline = vp; 2410 } 2411 } 2412 2413 DEBUG_IMPORT void 2414 vk_command_buffer_memory_barriers(VulkanHandle command, GPUMemoryBarrierInfo *barriers, u64 count) 2415 { 2416 if ValidVulkanHandle(command) { 2417 VulkanContext *vk = vulkan_context; 2418 VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer); 2419 VulkanCommandPool *vcp = vk->command_pools[vcb->timeline]; 2420 VulkanQueue *vq = vk->queues[vcb->timeline]; 2421 2422 DeferLoop(take_lock(&vk->arena_lock, -1), release_lock(&vk->arena_lock)) 2423 { 2424 Arena arena = vk->arena; 2425 u32 valid_count = 0; 2426 VkBufferMemoryBarrier2 *memory_barriers = push_array(&arena, VkBufferMemoryBarrier2, count); 2427 for (u64 it = 0; it < count; it++) { 2428 if ValidVulkanHandle(barriers[it].gpu_buffer->handle) { 2429 u32 index = valid_count++; 2430 VulkanBuffer *vb = vk_entity_data(barriers[it].gpu_buffer->handle, VulkanEntityKind_Buffer); 2431 memory_barriers[index].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2; 2432 memory_barriers[index].srcStageMask = vq->pipeline_stage_flags; 2433 memory_barriers[index].srcAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT; 2434 memory_barriers[index].dstStageMask = vq->pipeline_stage_flags; 2435 memory_barriers[index].dstAccessMask = VK_ACCESS_2_MEMORY_READ_BIT; 2436 memory_barriers[index].srcQueueFamilyIndex = vq->queue_family; 2437 memory_barriers[index].dstQueueFamilyIndex = vq->queue_family; 2438 memory_barriers[index].buffer = vb->buffer; 2439 memory_barriers[index].offset = barriers[it].offset; 2440 memory_barriers[index].size = barriers[it].size; 2441 } 2442 } 2443 2444 VkDependencyInfo dependancy_info = { 2445 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, 2446 .bufferMemoryBarrierCount = valid_count, 2447 .pBufferMemoryBarriers = memory_barriers, 2448 }; 2449 2450 vkCmdPipelineBarrier2(vcp->buffers[vcb->buffer_index], &dependancy_info); 2451 } 2452 } 2453 } 2454 2455 DEBUG_IMPORT void 2456 vk_command_dispatch_compute(VulkanHandle command, uv3 dispatch) 2457 { 2458 assert(dispatch.x <= U16_MAX); 2459 assert(dispatch.y <= U16_MAX); 2460 assert(dispatch.z <= U16_MAX); 2461 if ValidVulkanHandle(command) { 2462 VkCommandBuffer cmd = vk_command_buffer(command); 2463 vkCmdDispatch(cmd, dispatch.x, dispatch.y, dispatch.z); 2464 } 2465 } 2466 2467 DEBUG_IMPORT void 2468 vk_command_push_constants(VulkanHandle command, u32 offset, u32 size, void *values) 2469 { 2470 if ValidVulkanHandle(command) { 2471 VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer); 2472 VulkanCommandPool *vcp = vulkan_context->command_pools[vcb->timeline]; 2473 VulkanPipeline *vp = vcp->bound_pipeline; 2474 2475 assert(vp); 2476 2477 vkCmdPushConstants(vcp->buffers[vcb->buffer_index], vp->layout, vp->stage_flags, offset, size, values); 2478 } 2479 } 2480 2481 DEBUG_IMPORT void 2482 vk_command_timestamp(VulkanHandle command) 2483 { 2484 if ValidVulkanHandle(command) { 2485 VulkanContext *vk = vulkan_context; 2486 VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer); 2487 VulkanCommandPool *vcp = vk->command_pools[vcb->timeline]; 2488 2489 read_only local_persist VkPipelineStageFlags2 stage_lut[VulkanTimeline_Count] = { 2490 [VulkanTimeline_Graphics] = VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT, 2491 [VulkanTimeline_Compute] = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, 2492 [VulkanTimeline_Transfer] = -1, 2493 }; 2494 2495 VkPipelineStageFlags2 stage = stage_lut[vcb->timeline]; 2496 assert(stage != (VkPipelineStageFlags2)-1); 2497 2498 if (vcp->queries_occupied[vcb->buffer_index] < MaxCommandBufferTimestamps) { 2499 u32 query_index = vcp->queries_occupied[vcb->buffer_index]++; 2500 vkCmdWriteTimestamp2(vcp->buffers[vcb->buffer_index], stage, vcp->query_pool, 2501 vcb->buffer_index * MaxCommandBufferTimestamps + query_index); 2502 } 2503 } 2504 } 2505 2506 DEBUG_IMPORT void 2507 vk_command_wait_timeline(VulkanHandle command, VulkanTimeline timeline, u64 value) 2508 { 2509 if (ValidVulkanHandle(command) && Between(timeline, 0, VulkanTimeline_Count - 1)) { 2510 VulkanContext *vk = vulkan_context; 2511 VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer); 2512 2513 u32 wait_index = vk->queue_indices[timeline]; 2514 vcb->in_flight_wait_values[wait_index] = Max(value, vcb->in_flight_wait_values[wait_index]); 2515 } 2516 } 2517 2518 DEBUG_IMPORT u64 2519 vk_command_end(VulkanHandle command, VulkanHandle wait_semaphore, VulkanHandle finished_semaphore) 2520 { 2521 u64 result = -1; 2522 if ValidVulkanHandle(command) { 2523 VulkanContext *vk = vulkan_context; 2524 VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer); 2525 VulkanCommandPool *vcp = vk->command_pools[vcb->timeline]; 2526 VulkanQueue *vq = vk->queues[vcb->timeline]; 2527 VulkanSemaphore *vs = &vq->timeline_semaphore; 2528 2529 vkEndCommandBuffer(vcp->buffers[vcb->buffer_index]); 2530 2531 DeferLoop(take_lock(&vq->lock, -1), release_lock(&vq->lock)) { 2532 VkCommandBufferSubmitInfo command_buffer_submit_info = { 2533 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, 2534 .commandBuffer = vcp->buffers[vcb->buffer_index], 2535 }; 2536 2537 result = ++vs->value; 2538 2539 u32 signal_submit_info_count = 1; 2540 VkSemaphoreSubmitInfo signal_submit_infos[2] = {{ 2541 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, 2542 .semaphore = vs->semaphore, 2543 .value = result, 2544 .stageMask = vq->pipeline_stage_flags, 2545 }}; 2546 2547 if ValidVulkanHandle(finished_semaphore) { 2548 VulkanSemaphore *fs = vk_entity_data(finished_semaphore, VulkanEntityKind_Semaphore); 2549 signal_submit_infos[signal_submit_info_count++] = (VkSemaphoreSubmitInfo){ 2550 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, 2551 .semaphore = fs->semaphore, 2552 .stageMask = vq->pipeline_stage_flags, 2553 }; 2554 } 2555 2556 u32 wait_submit_info_count = 0; 2557 VkSemaphoreSubmitInfo wait_submit_infos[VulkanQueueKind_Count + 1]; 2558 for (u32 i = 0; i < vk->unique_queues; i++) { 2559 u32 queue_index = vk->queue_indices[i]; 2560 if (vcb->in_flight_wait_values[queue_index] > 0) { 2561 VulkanQueue *q = vk->queues[queue_index]; 2562 VkSemaphoreSubmitInfo wait_ssi = { 2563 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, 2564 .semaphore = q->timeline_semaphore.semaphore, 2565 .value = vcb->in_flight_wait_values[queue_index], 2566 .stageMask = q->pipeline_stage_flags, 2567 }; 2568 wait_submit_infos[wait_submit_info_count++] = wait_ssi; 2569 } 2570 } 2571 2572 if ValidVulkanHandle(wait_semaphore) { 2573 VulkanSemaphore *ws = vk_entity_data(wait_semaphore, VulkanEntityKind_Semaphore); 2574 wait_submit_infos[wait_submit_info_count++] = (VkSemaphoreSubmitInfo){ 2575 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, 2576 .semaphore = ws->semaphore, 2577 .stageMask = vq->pipeline_stage_flags, 2578 }; 2579 } 2580 2581 VkSubmitInfo2 submit_info = { 2582 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2, 2583 .commandBufferInfoCount = 1, 2584 .pCommandBufferInfos = &command_buffer_submit_info, 2585 .waitSemaphoreInfoCount = wait_submit_info_count, 2586 .pWaitSemaphoreInfos = wait_submit_infos, 2587 .signalSemaphoreInfoCount = signal_submit_info_count, 2588 .pSignalSemaphoreInfos = signal_submit_infos, 2589 }; 2590 2591 vkQueueSubmit2(vq->queue, 1, &submit_info, 0); 2592 2593 vcp->bound_pipeline = 0; 2594 2595 atomic_store_u64(vcp->submission_values + vcb->buffer_index, result); 2596 } 2597 2598 release_lock(&vcp->lock); 2599 2600 vk_entity_release((VulkanEntity *)command.value[0]); 2601 } 2602 return result; 2603 } 2604 2605 DEBUG_IMPORT void 2606 vk_command_begin_rendering(VulkanHandle command, GPUImage *colour, GPUImage *depth, GPUImage *resolve) 2607 { 2608 if ValidVulkanHandle(command) { 2609 VkCommandBuffer cmd = vk_command_buffer(command); 2610 2611 assert((colour->width == depth->width) && (colour->height == depth->height)); 2612 2613 VulkanImage *ci = vk_entity_data(colour->image, VulkanEntityKind_Image); 2614 VulkanImage *di = vk_entity_data(depth->image, VulkanEntityKind_Image); 2615 VulkanImage *ri = 0; 2616 if (resolve) ri = vk_entity_data(resolve->image, VulkanEntityKind_Image); 2617 2618 // NOTE: Layout Transitions 2619 { 2620 u32 image_memory_barrier_count = 2; 2621 VkImageMemoryBarrier2 image_memory_barriers[3] = { 2622 { 2623 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, 2624 .srcStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, 2625 .srcAccessMask = 0, 2626 .dstStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT, 2627 .dstAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT|VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, 2628 .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, 2629 .newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, 2630 .image = ci->image, 2631 .subresourceRange = { 2632 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, 2633 .baseMipLevel = 0, 2634 .levelCount = 1, 2635 .baseArrayLayer = 0, 2636 .layerCount = 1, 2637 }, 2638 }, 2639 { 2640 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, 2641 .srcStageMask = VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT|VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT, 2642 .srcAccessMask = 0, 2643 .dstStageMask = VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT|VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT, 2644 .dstAccessMask = VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, 2645 .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, 2646 .newLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, 2647 .image = di->image, 2648 .subresourceRange = { 2649 .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT|VK_IMAGE_ASPECT_STENCIL_BIT, 2650 .baseMipLevel = 0, 2651 .levelCount = 1, 2652 .baseArrayLayer = 0, 2653 .layerCount = 1, 2654 }, 2655 }, 2656 }; 2657 2658 if (resolve) image_memory_barriers[image_memory_barrier_count++] = (VkImageMemoryBarrier2){ 2659 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, 2660 .srcStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, 2661 .srcAccessMask = 0, 2662 .dstStageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT|VK_PIPELINE_STAGE_2_RESOLVE_BIT, 2663 .dstAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT|VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, 2664 .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, 2665 .newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, 2666 .image = ri->image, 2667 .subresourceRange = { 2668 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, 2669 .baseMipLevel = 0, 2670 .levelCount = 1, 2671 .baseArrayLayer = 0, 2672 .layerCount = 1, 2673 }, 2674 }; 2675 2676 VkDependencyInfo dependency_info = { 2677 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, 2678 .imageMemoryBarrierCount = image_memory_barrier_count, 2679 .pImageMemoryBarriers = image_memory_barriers, 2680 }; 2681 2682 vkCmdPipelineBarrier2(cmd, &dependency_info); 2683 } 2684 2685 VkRenderingAttachmentInfo colour_attachment = { 2686 .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, 2687 .imageView = ci->view, 2688 .imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, 2689 .resolveMode = ri ? VK_RESOLVE_MODE_AVERAGE_BIT : 0, 2690 .resolveImageView = ri ? ri->view : 0, 2691 .resolveImageLayout = ri ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : 0, 2692 .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, 2693 .storeOp = VK_ATTACHMENT_STORE_OP_STORE, 2694 .clearValue = {.color = {{0.0f, 0.0f, 0.0f, 0.0f}}}, 2695 }; 2696 2697 VkRenderingAttachmentInfo depth_stencil_attachment = { 2698 .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, 2699 .imageView = di->view, 2700 .imageLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, 2701 .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, 2702 .storeOp = VK_ATTACHMENT_STORE_OP_STORE, 2703 .clearValue = {.depthStencil = {1.0f, 0}}, 2704 }; 2705 2706 VkRenderingInfo rendering_info = { 2707 .sType = VK_STRUCTURE_TYPE_RENDERING_INFO, 2708 .renderArea = {.offset = {0}, .extent = {colour->width, colour->height}}, 2709 .layerCount = 1, 2710 .colorAttachmentCount = 1, 2711 .pColorAttachments = &colour_attachment, 2712 .pDepthAttachment = &depth_stencil_attachment, 2713 .pStencilAttachment = &depth_stencil_attachment, 2714 }; 2715 2716 vkCmdBeginRendering(cmd, &rendering_info); 2717 } 2718 } 2719 2720 DEBUG_IMPORT void 2721 vk_command_draw(VulkanHandle command, GPUBuffer *model) 2722 { 2723 if (ValidVulkanHandle(command) && ValidVulkanHandle(model->handle)) { 2724 VkCommandBuffer cmd = vk_command_buffer(command); 2725 VulkanBuffer *vb = vk_entity_data(model->handle, VulkanEntityKind_RenderModel); 2726 vkCmdBindIndexBuffer2(cmd, vb->buffer, 0, vk_index_size(vb->index_type) * model->index_count, vb->index_type); 2727 vkCmdDrawIndexed(cmd, model->index_count, 1, 0, 0, 0); 2728 } 2729 } 2730 2731 DEBUG_IMPORT void 2732 vk_command_scissor(VulkanHandle command, u32 width, u32 height, u32 x_offset, u32 y_offset) 2733 { 2734 if ValidVulkanHandle(command) { 2735 VkCommandBuffer cmd = vk_command_buffer(command); 2736 VkRect2D scissor = {.offset = {x_offset, y_offset}, .extent = {width, height}}; 2737 vkCmdSetScissor(cmd, 0, 1, &scissor); 2738 } 2739 } 2740 2741 DEBUG_IMPORT void 2742 vk_command_viewport(VulkanHandle command, f32 width, f32 height, f32 x_offset, f32 y_offset, f32 min_depth, f32 max_depth) 2743 { 2744 if ValidVulkanHandle(command) { 2745 VkCommandBuffer cmd = vk_command_buffer(command); 2746 VkViewport viewport = {x_offset, y_offset, width, height, min_depth, max_depth}; 2747 vkCmdSetViewport(cmd, 0, 1, &viewport); 2748 } 2749 } 2750 2751 DEBUG_IMPORT void 2752 vk_command_end_rendering(VulkanHandle command) 2753 { 2754 if ValidVulkanHandle(command) vkCmdEndRendering(vk_command_buffer(command)); 2755 } 2756 2757 DEBUG_IMPORT void 2758 vk_command_copy_buffer(VulkanHandle command, GPUBuffer *restrict destination, 2759 GPUBuffer *restrict source, u64 source_offset, i64 size) 2760 { 2761 if (ValidVulkanHandle(command) && ValidVulkanHandle(destination->handle) && ValidVulkanHandle(source->handle)) { 2762 VkCommandBuffer cmd = vk_command_buffer(command); 2763 VulkanBuffer *db = vk_entity_data(destination->handle, VulkanEntityKind_Buffer); 2764 VulkanBuffer *sb = vk_entity_data(source->handle, VulkanEntityKind_Buffer); 2765 2766 VkBufferCopy2 buffer_copy = { 2767 .sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2, 2768 .srcOffset = source_offset, 2769 .dstOffset = 0, 2770 .size = size, 2771 }; 2772 2773 VkCopyBufferInfo2 copy_buffer_info = { 2774 .sType = VK_STRUCTURE_TYPE_COPY_BUFFER_INFO_2, 2775 .srcBuffer = sb->buffer, 2776 .dstBuffer = db->buffer, 2777 .regionCount = 1, 2778 .pRegions = &buffer_copy, 2779 }; 2780 2781 vkCmdCopyBuffer2(cmd, ©_buffer_info); 2782 } 2783 } 2784 2785 DEBUG_IMPORT u64 * 2786 vk_command_read_timestamps(VulkanTimeline timeline, Arena *arena) 2787 { 2788 u64 *result = 0; 2789 if Between(timeline, 0, VulkanTimeline_Count - 1) { 2790 VulkanContext *vk = vulkan_context; 2791 VulkanCommandPool *vcp = vk->command_pools[timeline]; 2792 DeferLoop(take_lock(&vcp->lock, -1), release_lock(&vcp->lock)) { 2793 u32 index = (vcp->next_index - 1) % countof(vcp->buffers); 2794 u32 count = vcp->queries_occupied[index]; 2795 if (count > 0) { 2796 result = push_array(arena, u64, count + 1); 2797 result[0] = count; 2798 2799 vk_host_wait_timeline(timeline, vcp->submission_values[index], -1ULL); 2800 2801 vkGetQueryPoolResults(vk->device, vcp->query_pool, index * MaxCommandBufferTimestamps, count, 2802 count * sizeof(u64), result + 1, 8, VK_QUERY_RESULT_WAIT_BIT); 2803 } 2804 } 2805 } else { 2806 result = push_array(arena, u64, 1); 2807 } 2808 return result; 2809 }