ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

vulkan.c (99298B)


      1 /* See LICENSE for license details. */
      2 // TODO(rnp)
      3 // [ ]: what is needed for HDR? I think it makes sense to just default to it nowadays
      4 // [ ]: once opengl is removed switch images to SRGB and/or 16 bit Float
      5 
      6 #include "beamformer_internal.h"
      7 #include "vulkan.h"
      8 #include "external/glslang/glslang/Include/glslang_c_interface.h"
      9 
     10 #define ForceSingleQueue (0)
     11 
     12 #define glslang_info(s) s8("[glslang] " s)
     13 #define vulkan_info(s)  s8("[vulkan]  " s)
     14 
     15 #define ValidVulkanHandle(h) ((h).value[0] != 0)
     16 
     17 #define MaxCommandBuffersInFlight  BeamformerMaxRawDataFramesInFlight
     18 #define MaxCommandBufferTimestamps (1024)
     19 
     20 typedef enum {
     21 	VulkanQueueKind_Graphics,
     22 	VulkanQueueKind_Compute,
     23 	VulkanQueueKind_Transfer,
     24 	VulkanQueueKind_Count,
     25 } VulkanQueueKind;
     26 
     27 typedef enum {
     28 	VulkanMemoryKind_Device,
     29 	VulkanMemoryKind_BAR,
     30 	VulkanMemoryKind_Host,
     31 	VulkanMemoryKind_Count,
     32 } VulkanMemoryKind;
     33 
     34 typedef struct {
     35 	VkDeviceMemory    memory;
     36 	VkBuffer          buffer;
     37 	u64               memory_size;
     38 
     39 	void *            host_pointer;
     40 
     41 	VulkanMemoryKind  memory_kind;
     42 
     43 	// NOTE: only used when the buffer is backing a VulkanRenderModel.
     44 	VkIndexType       index_type;
     45 } VulkanBuffer;
     46 
     47 typedef struct {
     48 	VkDeviceMemory    memory;
     49 	VkImage           image;
     50 	VkImageView       view;
     51 } VulkanImage;
     52 
     53 typedef struct {
     54 	VkPipeline         pipeline;
     55 	VkPipelineLayout   layout;
     56 	VkShaderStageFlags stage_flags;
     57 } VulkanPipeline;
     58 
     59 typedef struct {
     60 	VkSemaphore semaphore;
     61 	u64         value;
     62 } VulkanSemaphore;
     63 
     64 typedef struct {
     65 	VulkanTimeline timeline;
     66 	u32            buffer_index;
     67 
     68 	// NOTE(rnp): since there may not be QueueKind_Count queues, when putting values into this
     69 	// array you must be careful to map through the queue_indices array in the vulkan_context.
     70 	u64 in_flight_wait_values[VulkanQueueKind_Count];
     71 } VulkanCommandBuffer;
     72 
     73 typedef enum {
     74 	VulkanEntityKind_Buffer,
     75 	VulkanEntityKind_CommandBuffer,
     76 	VulkanEntityKind_Image,
     77 	VulkanEntityKind_Pipeline,
     78 	VulkanEntityKind_RenderModel,
     79 	VulkanEntityKind_Semaphore,
     80 } VulkanEntityKind;
     81 
     82 typedef struct VulkanEntity VulkanEntity;
     83 struct VulkanEntity {
     84 	VulkanEntity *   next;
     85 	VulkanEntityKind kind;
     86 	union {
     87 		VulkanBuffer        buffer;
     88 		VulkanCommandBuffer command_buffer;
     89 		VulkanImage         image;
     90 		VulkanPipeline      pipeline;
     91 		VulkanSemaphore     semaphore;
     92 	} as;
     93 };
     94 
     95 typedef alignas(64) struct {
     96 	i32 lock;
     97 
     98 	u16     queue_family;
     99 	u16     queue_index;
    100 	VkQueue queue;
    101 
    102 	VulkanSemaphore timeline_semaphore;
    103 
    104 	VkPipelineStageFlags2 pipeline_stage_flags;
    105 } VulkanQueue;
    106 static_assert(alignof(VulkanQueue) == 64, "VulkanQueue must be placed on its own cacheline");
    107 
    108 typedef alignas(64) struct {
    109 	i32             lock;
    110 	u32             next_index;
    111 
    112 	VulkanPipeline *bound_pipeline;
    113 
    114 	VkCommandPool   handle;
    115 	VkQueryPool     query_pool;
    116 	VkCommandBuffer buffers[MaxCommandBuffersInFlight];
    117 
    118 	u64             submission_values[MaxCommandBuffersInFlight];
    119 	u32             queries_occupied[MaxCommandBuffersInFlight];
    120 } VulkanCommandPool;
    121 
    122 typedef struct {
    123 	Arena             arena;
    124 	i32               arena_lock;
    125 
    126 	VkInstance        handle;
    127 	VkDevice          device;
    128 	VkPhysicalDevice  physical_device;
    129 
    130 	VkDescriptorPool       descriptor_pool;
    131 	VkDescriptorSetLayout  descriptor_set_layouts[BeamformerShaderResourceKind_Count];
    132 	VkDescriptorSet        descriptor_sets[BeamformerShaderResourceKind_Count];
    133 	// NOTE(rnp): must store these if we want to allow partial updates easily
    134 	VkDescriptorBufferInfo descriptor_buffer_infos[BeamformerShaderBufferSlot_Count];
    135 
    136 	// NOTE(rnp): fallback for when a shader fails to compile
    137 	VulkanPipeline    default_compute_pipeline;
    138 	VulkanPipeline    default_graphics_pipeline;
    139 
    140 	GPUInfo           gpu_info;
    141 
    142 	struct {
    143 		u64             max_allocation_size;
    144 		u64             non_coherent_atom_size;
    145 		u8              gpu_heap_index;
    146 		i8              memory_type_indices[VulkanMemoryKind_Count];
    147 		b8              memory_host_coherent[VulkanMemoryKind_Count];
    148 		static_assert(VK_MAX_MEMORY_HEAPS < I8_MAX, "");
    149 		static_assert(VK_MAX_MEMORY_TYPES < U8_MAX, "");
    150 	} memory_info;
    151 
    152 	VulkanCommandPool * command_pools[VulkanTimeline_Count];
    153 	VulkanQueue *       queues[VulkanQueueKind_Count];
    154 	// NOTE(rnp): there are a few places in the code where simply going through the queues map
    155 	// is not sufficient. those places need to know of the unique queues which unique queue
    156 	// is being referred to. that code uses this map instead.
    157 	u16               queue_indices[VulkanQueueKind_Count];
    158 	u16               unique_queues;
    159 
    160 	VkFormat          swap_chain_image_format;
    161 	VkFormat          depth_stencil_format;
    162 
    163 	VulkanEntity *    entity_freelist;
    164 	Arena             entity_arena;
    165 	i32               entity_lock;
    166 } VulkanContext;
    167 
    168 read_only global const char *vk_required_instance_extensions[] = {
    169 };
    170 
    171 #if OS_WINDOWS
    172 #define VK_OS_REQUIRED_DEVICE_EXTENSIONS_LIST \
    173 	X("VK_KHR_external_memory_win32") \
    174 	X("VK_KHR_external_semaphore_win32") \
    175 
    176 #else
    177 #define VK_OS_REQUIRED_DEVICE_EXTENSIONS_LIST \
    178 	X("VK_KHR_external_memory_fd") \
    179 	X("VK_KHR_external_semaphore_fd") \
    180 
    181 #endif
    182 
    183 #define VK_REQUIRED_DEVICE_EXTENSIONS_LIST \
    184 	X("VK_KHR_16bit_storage") \
    185 	X("VK_KHR_external_memory") \
    186 	X("VK_KHR_external_semaphore") \
    187 	X("VK_KHR_storage_buffer_storage_class") \
    188 	X("VK_KHR_timeline_semaphore") \
    189 	VK_OS_REQUIRED_DEVICE_EXTENSIONS_LIST
    190 
    191 #define X(str) s8_comp(str),
    192 read_only global s8 vk_required_device_extensions[] = {VK_REQUIRED_DEVICE_EXTENSIONS_LIST};
    193 #undef X
    194 
    195 #define VK_OPTIONAL_DEVICE_EXTENSIONS_LIST \
    196 	X(VK_KHR, cooperative_matrix) \
    197 
    198 #define X(p, s, ...) s8_comp(#p "_" #s),
    199 read_only global s8 vk_optional_device_extensions[] = {VK_OPTIONAL_DEVICE_EXTENSIONS_LIST};
    200 #undef X
    201 
    202 #define VK_REQUIRED_PHYSICAL_FEATURES \
    203 	X(shaderInt16) \
    204 	X(shaderInt64) \
    205 
    206 #define VK_REQUIRED_PHYSICAL_11_FEATURES \
    207 	X(storageBuffer16BitAccess) \
    208 
    209 #define VK_REQUIRED_PHYSICAL_12_FEATURES \
    210 	X(bufferDeviceAddress) \
    211 	X(shaderFloat16) \
    212 	X(timelineSemaphore) \
    213 	X(vulkanMemoryModel) \
    214 
    215 #define VK_REQUIRED_PHYSICAL_13_FEATURES \
    216 	X(dynamicRendering) \
    217 	X(synchronization2) \
    218 
    219 #define VK_DEBUG_EXTENSIONS \
    220 	X(VK_KHR, shader_non_semantic_info) \
    221 	X(VK_KHR, shader_relaxed_extended_instruction) \
    222 
    223 #define X(p, s, ...) s8_comp(#p "_" #s),
    224 read_only global s8 vk_debug_extensions[] = {VK_DEBUG_EXTENSIONS};
    225 #undef X
    226 
    227 #define VK_INSTANCE_DEBUG_EXTENSIONS_LIST \
    228 	X(VK_EXT, debug_utils) \
    229 
    230 #define X(p, s, ...) s8_comp(#p "_" #s),
    231 read_only global s8 vk_instance_debug_extensions[] = {VK_INSTANCE_DEBUG_EXTENSIONS_LIST};
    232 #undef X
    233 
    234 global struct {
    235 	union {
    236 		struct {
    237 			#define X(_, name, ...) b8 name;
    238 			VK_OPTIONAL_DEVICE_EXTENSIONS_LIST
    239 			#undef X
    240 		};
    241 		b8 E[countof(vk_optional_device_extensions)];
    242 	} optional;
    243 
    244 	union {
    245 		struct {
    246 			#define X(_, name, ...) b8 name;
    247 			VK_DEBUG_EXTENSIONS
    248 			#undef X
    249 		};
    250 		b8 E[countof(vk_debug_extensions)];
    251 	} debug;
    252 
    253 	union {
    254 		struct {
    255 			#define X(_, name, ...) b8 name;
    256 			VK_INSTANCE_DEBUG_EXTENSIONS_LIST
    257 			#undef X
    258 		};
    259 		b8 E[countof(vk_instance_debug_extensions)];
    260 	} instance;
    261 } vulkan_config;
    262 
    263 #define MAX_ENABLED_EXTENSIONS (  countof(vk_required_device_extensions) \
    264                                 + countof(vk_optional_device_extensions) \
    265                                 + countof(vk_debug_extensions) \
    266                                )
    267 
    268 global VulkanContext vulkan_context[1];
    269 
    270 /* NOTE(rnp): the idea here is to set reasonable development constraints.
    271  * They should probably not match one to one with the maximums of the dev
    272  * machine's hardware. Instead these are here to cause compile time failure
    273  * for features which are not expected to work everywhere. */
    274 global glslang_resource_t glslc_resource_constraints[1] = {{
    275 	.max_compute_work_group_count_x = 65535,
    276 	.max_compute_work_group_count_y = 65535,
    277 	.max_compute_work_group_count_z = 65535,
    278 	.max_compute_work_group_size_x  = 1024,
    279 	.max_compute_work_group_size_y  = 1024,
    280 	.max_compute_work_group_size_z  = 1024,
    281 
    282 	// NOTE: taken from glslang defaults
    283 	.max_lights = 32,
    284 	.max_clip_planes = 6,
    285 	.max_texture_units = 32,
    286 	.max_texture_coords = 32,
    287 	.max_vertex_attribs = 64,
    288 	.max_vertex_uniform_components = 4096,
    289 	.max_varying_floats = 64,
    290 	.max_vertex_texture_image_units = 32,
    291 	.max_combined_texture_image_units = 80,
    292 	.max_texture_image_units = 32,
    293 	.max_fragment_uniform_components = 4096,
    294 	.max_draw_buffers = 32,
    295 	.max_vertex_uniform_vectors = 128,
    296 	.max_varying_vectors = 8,
    297 	.max_fragment_uniform_vectors = 16,
    298 	.max_vertex_output_vectors = 16,
    299 	.max_fragment_input_vectors = 15,
    300 	.min_program_texel_offset = -8,
    301 	.max_program_texel_offset = 7,
    302 	.max_clip_distances = 8,
    303 	.max_compute_uniform_components = 1024,
    304 	.max_compute_texture_image_units = 16,
    305 	.max_compute_image_uniforms = 8,
    306 	.max_compute_atomic_counters = 8,
    307 	.max_compute_atomic_counter_buffers = 1,
    308 	.max_varying_components = 60,
    309 	.max_vertex_output_components = 64,
    310 	.max_fragment_input_components = 128,
    311 	.max_image_units = 8,
    312 	.max_combined_image_units_and_fragment_outputs = 8,
    313 	.max_combined_shader_output_resources = 8,
    314 	.max_image_samples = 0,
    315 	.max_vertex_image_uniforms = 0,
    316 	.max_fragment_image_uniforms = 8,
    317 	.max_combined_image_uniforms = 8,
    318 	.max_viewports = 16,
    319 	.max_vertex_atomic_counters = 0,
    320 	.max_fragment_atomic_counters = 8,
    321 	.max_combined_atomic_counters = 8,
    322 	.max_atomic_counter_bindings = 1,
    323 	.max_vertex_atomic_counter_buffers = 0,
    324 	.max_fragment_atomic_counter_buffers = 1,
    325 	.max_combined_atomic_counter_buffers = 1,
    326 	.max_atomic_counter_buffer_size = 16384,
    327 	.max_transform_feedback_buffers = 4,
    328 	.max_transform_feedback_interleaved_components = 64,
    329 	.max_cull_distances = 8,
    330 	.max_combined_clip_and_cull_distances = 8,
    331 	.max_samples = 4,
    332 	.max_mesh_output_vertices_ext = 256,
    333 	.max_mesh_output_primitives_ext = 256,
    334 	.max_mesh_work_group_size_x_ext = 128,
    335 	.max_mesh_work_group_size_y_ext = 128,
    336 	.max_mesh_work_group_size_z_ext = 128,
    337 	.max_task_work_group_size_x_ext = 128,
    338 	.max_task_work_group_size_y_ext = 128,
    339 	.max_task_work_group_size_z_ext = 128,
    340 	.max_mesh_view_count_ext = 4,
    341 	.max_dual_source_draw_buffers_ext = 1,
    342 
    343 	.limits = {
    344 		.non_inductive_for_loops                  = 1,
    345 		.while_loops                              = 1,
    346 		.do_while_loops                           = 1,
    347 		.general_uniform_indexing                 = 1,
    348 		.general_attribute_matrix_vector_indexing = 1,
    349 		.general_varying_indexing                 = 1,
    350 		.general_sampler_indexing                 = 1,
    351 		.general_variable_indexing                = 1,
    352 		.general_constant_matrix_vector_indexing  = 1,
    353 	},
    354 }};
    355 
    356 #if BEAMFORMER_RENDERDOC_HOOKS
    357 DEBUG_IMPORT void *
    358 vk_renderdoc_instance_handle(void)
    359 {
    360 	return *((void **)vulkan_context->handle);
    361 }
    362 #endif
    363 
    364 #if BEAMFORMER_DEBUG
    365 #define vk_label_object(k, h, label, extra) vk_label_object_(VK_OBJECT_TYPE_##k, (u64)h, label, extra)
    366 function void
    367 vk_label_object_(VkObjectType kind, u64 handle, s8 label, s8 extra)
    368 {
    369 	local_persist u8 buffer[1024];
    370 	Stream sb = arena_stream(arena_from_memory(buffer, sizeof(buffer)));
    371 	if (vulkan_config.instance.debug_utils && label.len > 0) {
    372 		stream_append_s8s(&sb, label, s8(" ("), extra, s8(")"));
    373 		stream_append_byte(&sb, 0);
    374 		if (!sb.errors) {
    375 			VkDebugUtilsObjectNameInfoEXT object_name_info = {
    376 				.sType        = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
    377 				.objectType   = kind,
    378 				.objectHandle = handle,
    379 				.pObjectName  = (char *)sb.data,
    380 			};
    381 			vkSetDebugUtilsObjectNameEXT(vulkan_context->device, &object_name_info);
    382 		}
    383 	}
    384 }
    385 #else
    386 #define vk_label_object(...)
    387 #define vk_label_object_(...)
    388 #endif
    389 
    390 function VulkanEntity *
    391 vk_entity_allocate(VulkanEntityKind kind)
    392 {
    393 	VulkanEntity *result = 0;
    394 	DeferLoop(take_lock(&vulkan_context->entity_lock, -1), release_lock(&vulkan_context->entity_lock))
    395 	{
    396 		result = SLLPopFreelist(vulkan_context->entity_freelist);
    397 		if (!result) result = push_array_no_zero(&vulkan_context->entity_arena, VulkanEntity, 1);
    398 	}
    399 
    400 	zero_struct(result);
    401 	result->kind = kind;
    402 	return result;
    403 }
    404 
    405 function void
    406 vk_entity_release(VulkanEntity *entity)
    407 {
    408 	DeferLoop(take_lock(&vulkan_context->entity_lock, -1), release_lock(&vulkan_context->entity_lock))
    409 	{
    410 		SLLStackPush(vulkan_context->entity_freelist, entity, next);
    411 	}
    412 }
    413 
    414 function void *
    415 vk_entity_data(VulkanHandle h, VulkanEntityKind kind)
    416 {
    417 	VulkanEntity *e = (VulkanEntity *)h.value[0];
    418 	assert(ValidVulkanHandle(h) && e->kind == kind);
    419 	return &e->as;
    420 }
    421 
    422 function VkCommandBuffer
    423 vk_command_buffer(VulkanHandle h)
    424 {
    425 	VulkanCommandBuffer *vcb = vk_entity_data(h, VulkanEntityKind_CommandBuffer);
    426 	VulkanCommandPool   *vcp = vulkan_context->command_pools[vcb->timeline];
    427 	VkCommandBuffer result = vcp->buffers[vcb->buffer_index];
    428 	return result;
    429 }
    430 
    431 #define glslang_log(a, ...) glslang_log_(a, arg_list(s8, __VA_ARGS__))
    432 function void
    433 glslang_log_(Arena arena, s8 *items, uz count)
    434 {
    435 	Stream sb = arena_stream(arena);
    436 	stream_append_s8(&sb, glslang_info(""));
    437 	stream_append_s8s_(&sb, items, count);
    438 	if (sb.data[sb.widx - 1] != '\n') stream_append_byte(&sb, '\n');
    439 	os_console_log(sb.data, sb.widx);
    440 }
    441 
    442 function s8
    443 glsl_to_spirv(Arena *arena, u32 kind, s8 shader_text, s8 name)
    444 {
    445 	/* NOTE(rnp): glslang's garbage c interface doesn't expose internal usage of strings with length */
    446 	assert(shader_text.data[shader_text.len] == 0);
    447 
    448 	glslang_input_t input = {
    449 		.language                          = GLSLANG_SOURCE_GLSL,
    450 		.stage                             = kind,
    451 		.client                            = GLSLANG_CLIENT_VULKAN,
    452 		.client_version                    = GLSLANG_TARGET_VULKAN_1_4,
    453 		.target_language                   = GLSLANG_TARGET_SPV,
    454 		.target_language_version           = GLSLANG_TARGET_SPV_1_6,
    455 		.code                              = (c8 *)shader_text.data,
    456 		.default_version                   = 460,
    457 		.default_profile                   = GLSLANG_NO_PROFILE,
    458 		.force_default_version_and_profile = 0,
    459 		.forward_compatible                = 0,
    460 		.messages                          = GLSLANG_MSG_DEFAULT_BIT,
    461 		.resource                          = glslc_resource_constraints,
    462 	};
    463 	glslang_shader_t *shader = glslang_shader_create(&input);
    464 
    465 	s8 error = {0};
    466 	if (glslang_shader_preprocess(shader, &input)) {
    467 		if (!glslang_shader_parse(shader, &input))
    468 			error = s8("parsing failed");
    469 	} else {
    470 		error = s8("preprocessing failed");
    471 	}
    472 
    473 	if (error.len) {
    474 		glslang_log(*arena, name, s8(": "), error, s8("\n"),
    475 		            c_str_to_s8((c8 *)glslang_shader_get_info_log(shader)),
    476 		            c_str_to_s8((c8 *)glslang_shader_get_info_debug_log(shader)));
    477 		glslang_shader_delete(shader);
    478 		shader = 0;
    479 	}
    480 
    481 	s8 result = {0};
    482 	if (shader) {
    483 		glslang_program_t *program = glslang_program_create();
    484 		glslang_program_add_shader(program, shader);
    485 		i32 messages = GLSLANG_MSG_DEBUG_INFO_BIT|GLSLANG_MSG_SPV_RULES_BIT|GLSLANG_MSG_VULKAN_RULES_BIT;
    486 		if (glslang_program_link(program, messages)) {
    487 			glslang_spv_options_t options = {.validate = 1,};
    488 
    489 			if (vulkan_config.debug.shader_non_semantic_info) {
    490 				options.generate_debug_info                  = 1;
    491 				options.emit_nonsemantic_shader_debug_info   = 1;
    492 				options.emit_nonsemantic_shader_debug_source = 1;
    493 			}
    494 
    495 			glslang_program_add_source_text(program, kind, (c8 *)shader_text.data, shader_text.len);
    496 			glslang_program_SPIRV_generate_with_options(program, kind, &options);
    497 
    498 			u32 words   = glslang_program_SPIRV_get_size(program);
    499 			result.data = (u8 *)push_array(arena, u32, words);
    500 			result.len  = words * sizeof(u32);
    501 			glslang_program_SPIRV_get(program, (u32 *)result.data);
    502 
    503 			s8 spirv_msg = c_str_to_s8((c8 *)glslang_program_SPIRV_get_messages(program));
    504 			if (spirv_msg.len) glslang_log(*arena, name, s8(": spirv info: "), spirv_msg);
    505 		} else {
    506 			glslang_log(*arena, name, s8(": shader linking failed\n"),
    507 			            c_str_to_s8((c8 *)glslang_program_get_info_log(program)),
    508 			            c_str_to_s8((c8 *)glslang_program_get_info_debug_log(program)));
    509 		}
    510 		glslang_shader_delete(shader);
    511 		glslang_program_delete(program);
    512 	}
    513 
    514 	return result;
    515 }
    516 
    517 function u32
    518 vk_shader_kind_to_glslang_shader_kind(u32 kind)
    519 {
    520 	u32 result = ctz_u64(kind);
    521 	return result;
    522 }
    523 
    524 function VkShaderModule
    525 vk_compile_shader_module(Arena arena, u32 kind, s8 text, s8 name)
    526 {
    527 	VkShaderModule result = {0};
    528 	s8 spirv = glsl_to_spirv(&arena, vk_shader_kind_to_glslang_shader_kind(kind), text, name);
    529 	VkShaderModuleCreateInfo create_info = {
    530 		.sType    = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
    531 		.codeSize = (uz)spirv.len,
    532 		.pCode    = (u32 *)spirv.data,
    533 	};
    534 	if (spirv.len > 0) vkCreateShaderModule(vulkan_context->device, &create_info, 0, &result);
    535 
    536 	return result;
    537 }
    538 
    539 function VkShaderStageFlags
    540 vk_stage_flags_from_shader_kind(VulkanShaderKind kind)
    541 {
    542 	read_only local_persist VkShaderStageFlags map[VulkanShaderKind_Count + 1] = {
    543 		[VulkanShaderKind_Vertex]   = VK_SHADER_STAGE_VERTEX_BIT,
    544 		[VulkanShaderKind_Mesh]     = VK_SHADER_STAGE_MESH_BIT_EXT,
    545 		[VulkanShaderKind_Fragment] = VK_SHADER_STAGE_FRAGMENT_BIT,
    546 		[VulkanShaderKind_Compute]  = VK_SHADER_STAGE_COMPUTE_BIT,
    547 		[VulkanShaderKind_Count]    = 0,
    548 	};
    549 	VkShaderStageFlags result = map[Clamp((u32)kind, 0, VulkanShaderKind_Count)];
    550 	return result;
    551 }
    552 
    553 function VulkanPipeline
    554 vk_compute_pipeline_from_shader_text(Arena arena, s8 text, s8 name, u32 push_constants_size)
    555 {
    556 	VulkanPipeline result = {.stage_flags = VK_SHADER_STAGE_COMPUTE_BIT};
    557 	VkShaderModule module = vk_compile_shader_module(arena, VK_SHADER_STAGE_COMPUTE_BIT, text, name);
    558 	if (module) {
    559 		VkPushConstantRange push_constant_range = {
    560 			.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
    561 			.offset     = 0,
    562 			.size       = push_constants_size,
    563 		};
    564 
    565 		VkPipelineLayoutCreateInfo pipeline_layout_create_info = {
    566 			.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
    567 			.setLayoutCount         = countof(vulkan_context->descriptor_set_layouts),
    568 			.pSetLayouts            = vulkan_context->descriptor_set_layouts,
    569 			.pushConstantRangeCount = push_constants_size ? 1 : 0,
    570 			.pPushConstantRanges    = push_constants_size ? &push_constant_range : 0,
    571 		};
    572 
    573 		vkCreatePipelineLayout(vulkan_context->device, &pipeline_layout_create_info, 0, &result.layout);
    574 
    575 		VkComputePipelineCreateInfo pipeline_create_info = {
    576 			.sType  = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
    577 			.layout = result.layout,
    578 			.stage  = {
    579 				.sType  = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
    580 				.stage  = VK_SHADER_STAGE_COMPUTE_BIT,
    581 				.module = module,
    582 				.pName  = "main",
    583 			},
    584 		};
    585 
    586 		vkCreateComputePipelines(vulkan_context->device, 0, 1, &pipeline_create_info, 0, &result.pipeline);
    587 
    588 		vk_label_object(PIPELINE,        result.pipeline, name, s8("Pipeline"));
    589 		vk_label_object(PIPELINE_LAYOUT, result.layout,   name, s8("Pipeline Layout"));
    590 		vk_label_object(SHADER_MODULE,   module,          name, s8("Module"));
    591 
    592 		vkDestroyShaderModule(vulkan_context->device, module, 0);
    593 	}
    594 	if (result.pipeline == 0) result = vulkan_context->default_compute_pipeline;
    595 
    596 	return result;
    597 }
    598 
    599 function VulkanPipeline
    600 vk_graphics_pipeline_from_infos(Arena arena, VulkanPipelineCreateInfo *infos, u32 count, u32 push_constants_size)
    601 {
    602 	assume(count == 2);
    603 
    604 	VulkanPipeline result = {0};
    605 	VkShaderModule modules[2];
    606 
    607 	modules[0] = vk_compile_shader_module(arena, vk_stage_flags_from_shader_kind(infos[0].kind),
    608 	                                      infos[0].text, infos[0].name);
    609 	modules[1] = vk_compile_shader_module(arena, vk_stage_flags_from_shader_kind(infos[1].kind),
    610 	                                      infos[1].text, infos[1].name);
    611 	if (modules[0] && modules[1]) {
    612 		result.stage_flags = vk_stage_flags_from_shader_kind(infos[0].kind)
    613 		                     | vk_stage_flags_from_shader_kind(infos[1].kind);
    614 
    615 		VkPushConstantRange pcr = {
    616 			.stageFlags = result.stage_flags,
    617 			.offset     = 0,
    618 			.size       = push_constants_size,
    619 		};
    620 
    621 		VkPipelineLayoutCreateInfo pipeline_layout_info = {
    622 			.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
    623 			.setLayoutCount         = countof(vulkan_context->descriptor_set_layouts),
    624 			.pSetLayouts            = vulkan_context->descriptor_set_layouts,
    625 			.pushConstantRangeCount = push_constants_size ? 1    : 0,
    626 			.pPushConstantRanges    = push_constants_size ? &pcr : 0,
    627 		};
    628 
    629 		vkCreatePipelineLayout(vulkan_context->device, &pipeline_layout_info, 0, &result.layout);
    630 
    631 		VkPipelineShaderStageCreateInfo shader_stage_create_infos[2] = {
    632 			{
    633 				.sType  = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
    634 				.stage  = vk_stage_flags_from_shader_kind(infos[0].kind),
    635 				.module = modules[0],
    636 				.pName  = "main",
    637 			},
    638 			{
    639 				.sType  = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
    640 				.stage  = vk_stage_flags_from_shader_kind(infos[1].kind),
    641 				.module = modules[1],
    642 				.pName  = "main",
    643 			},
    644 		};
    645 
    646 		VkPipelineVertexInputStateCreateInfo vertex_input_info = {
    647 			.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
    648 		};
    649 
    650 		VkPipelineInputAssemblyStateCreateInfo input_assembly_info = {
    651 			.sType    = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
    652 			.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
    653 		};
    654 
    655 		VkPipelineViewportStateCreateInfo viewport_info = {
    656 			.sType         = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
    657 			.viewportCount = 1,
    658 			.scissorCount  = 1,
    659 		};
    660 
    661 		VkPipelineRasterizationStateCreateInfo rasterization_info = {
    662 			.sType       = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
    663 			.polygonMode = VK_POLYGON_MODE_FILL,
    664 			.lineWidth   = 1.0f,
    665 			.cullMode    = VK_CULL_MODE_BACK_BIT,
    666 			.frontFace   = VK_FRONT_FACE_CLOCKWISE,
    667 		};
    668 
    669 		VkPipelineMultisampleStateCreateInfo multisampling_info = {
    670 			.sType                = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
    671 			.rasterizationSamples = vulkan_context->gpu_info.max_msaa_samples,
    672 		};
    673 
    674 		VkPipelineDepthStencilStateCreateInfo depth_test_create_info = {
    675 			.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
    676 			.depthTestEnable       = 1,
    677 			.depthWriteEnable      = 1,
    678 			.depthCompareOp        = VK_COMPARE_OP_LESS,
    679 			.depthBoundsTestEnable = 1,
    680 			.stencilTestEnable     = 0,
    681 			.front                 = {0},
    682 			.back                  = {0},
    683 			.minDepthBounds        = 0.0f,
    684 			.maxDepthBounds        = 1.0f,
    685 		};
    686 
    687 		u32 colour_mask = VK_COLOR_COMPONENT_R_BIT|VK_COLOR_COMPONENT_G_BIT|VK_COLOR_COMPONENT_B_BIT|VK_COLOR_COMPONENT_A_BIT;
    688 		VkPipelineColorBlendAttachmentState blend_state = {
    689 			.colorWriteMask      = colour_mask,
    690 			.blendEnable         = 1,
    691 			.srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA,
    692 			.dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
    693 			.colorBlendOp        = VK_BLEND_OP_ADD,
    694 			.srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE,
    695 			.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
    696 			.alphaBlendOp        = VK_BLEND_OP_ADD,
    697 		};
    698 
    699 		VkPipelineColorBlendStateCreateInfo colour_blend_state_create = {
    700 			.sType           = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
    701 			.logicOpEnable   = 0,
    702 			.logicOp         = VK_LOGIC_OP_COPY,
    703 			.attachmentCount = 1,
    704 			.pAttachments    = &blend_state,
    705 		};
    706 
    707 		VkDynamicState dynamic_states[] = {
    708 			VK_DYNAMIC_STATE_VIEWPORT,
    709 			VK_DYNAMIC_STATE_SCISSOR,
    710 		};
    711 
    712 		VkPipelineDynamicStateCreateInfo dynamic_state_info = {
    713 			.sType             = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
    714 			.dynamicStateCount = countof(dynamic_states),
    715 			.pDynamicStates    = dynamic_states,
    716 		};
    717 
    718 		//VkFormat colour_attachment_format = VK_FORMAT_R8G8B8A8_SRGB;
    719 		VkFormat colour_attachment_format = VK_FORMAT_R8G8B8A8_UNORM;
    720 		VkPipelineRenderingCreateInfo rendering_create_info = {
    721 			.sType                   = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
    722 			.colorAttachmentCount    = 1,
    723 			.pColorAttachmentFormats = &colour_attachment_format,
    724 			.depthAttachmentFormat   = vulkan_context->depth_stencil_format,
    725 			.stencilAttachmentFormat = vulkan_context->depth_stencil_format,
    726 		};
    727 
    728 		VkGraphicsPipelineCreateInfo pci = {
    729 			.sType               = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
    730 			.pNext               = &rendering_create_info,
    731 			.stageCount          = countof(shader_stage_create_infos),
    732 			.pStages             = shader_stage_create_infos,
    733 			.pVertexInputState   = &vertex_input_info,
    734 			.pInputAssemblyState = &input_assembly_info,
    735 			.pViewportState      = &viewport_info,
    736 			.pRasterizationState = &rasterization_info,
    737 			.pMultisampleState   = &multisampling_info,
    738 			.pDepthStencilState  = &depth_test_create_info,
    739 			.pColorBlendState    = &colour_blend_state_create,
    740 			.pDynamicState       = &dynamic_state_info,
    741 			.layout              = result.layout,
    742 		};
    743 
    744 		vkCreateGraphicsPipelines(vulkan_context->device, 0, 1, &pci,0, &result.pipeline);
    745 
    746 		s8 extras[] = {
    747 			[VulkanShaderKind_Vertex]   = s8_comp("Vertex Module"),
    748 			[VulkanShaderKind_Mesh]     = s8_comp("Mesh Module"),
    749 			[VulkanShaderKind_Fragment] = s8_comp("Fragment Module"),
    750 		};
    751 		assert(infos[0].kind < countof(extras));
    752 		assert(infos[1].kind < countof(extras));
    753 
    754 		vk_label_object(PIPELINE,        result.pipeline, infos[0].name, s8("Pipeline"));
    755 		vk_label_object(PIPELINE_LAYOUT, result.layout,   infos[0].name, s8("Pipeline Layout"));
    756 		//vk_label_object_(VK_OBJECT_TYPE_SHADER_MODULE, (u64)modules[0], infos[0].name, extras[infos[0].kind]);
    757 		//vk_label_object_(VK_OBJECT_TYPE_SHADER_MODULE, (u64)modules[1], infos[1].name, extras[infos[1].kind]);
    758 	}
    759 
    760 	if (modules[0]) vkDestroyShaderModule(vulkan_context->device, modules[0], 0);
    761 	if (modules[1]) vkDestroyShaderModule(vulkan_context->device, modules[1], 0);
    762 
    763 	if (result.pipeline == 0) result = vulkan_context->default_graphics_pipeline;
    764 
    765 	return result;
    766 }
    767 
    768 function VulkanSemaphore
    769 vk_make_semaphore(OSHandle *export)
    770 {
    771 	VulkanContext *vk = vulkan_context;
    772 
    773 	VkSemaphoreCreateInfo       sci  = {.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO};
    774 	VkExportSemaphoreCreateInfo esci = {
    775 		.sType       = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
    776 		.handleTypes = OS_WINDOWS ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
    777 		                          : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
    778 	};
    779 	VkSemaphoreTypeCreateInfo stc = {
    780 		.sType         = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
    781 		.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
    782 	};
    783 
    784 	if (export) sci.pNext = &esci;
    785 	else        sci.pNext = &stc;
    786 
    787 	VulkanSemaphore result = {0};
    788 
    789 	vkCreateSemaphore(vk->device, &sci, 0, &result.semaphore);
    790 
    791 	if (export) {
    792 		if (OS_WINDOWS) {
    793 			VkSemaphoreGetWin32HandleInfoKHR ghi = {
    794 				.sType      = VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR,
    795 				.handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT,
    796 				.semaphore  = result.semaphore,
    797 			};
    798 			void *handle;
    799 			vkGetSemaphoreWin32HandleKHR(vk->device, &ghi, &handle);
    800 			export->value[0] = (u64)handle;
    801 		} else {
    802 			VkSemaphoreGetFdInfoKHR ghi = {
    803 				.sType      = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
    804 				.handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
    805 				.semaphore  = result.semaphore,
    806 			};
    807 			i32 handle;
    808 			vkGetSemaphoreFdKHR(vk->device, &ghi, &handle);
    809 			export->value[0] = (u64)handle;
    810 		}
    811 	}
    812 
    813 	return result;
    814 }
    815 
    816 function void
    817 vk_release_memory(VkDeviceMemory memory, u64 size)
    818 {
    819 	VulkanContext *vk = vulkan_context;
    820 	vkFreeMemory(vk->device, memory, 0);
    821 	atomic_add_u64(&vk->gpu_info.gpu_heap_used, -size);
    822 }
    823 
    824 function b32
    825 vk_allocate_memory(VkDeviceMemory *memory, u64 size, VulkanMemoryKind kind, VkMemoryAllocateFlags flags,
    826                    VkMemoryDedicatedAllocateInfo *dedicated_allocate_info, OSHandle *export)
    827 {
    828 	VulkanContext *vk = vulkan_context;
    829 
    830 	VkExportMemoryAllocateInfo export_info = {
    831 		.sType       = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO,
    832 		.handleTypes = OS_WINDOWS ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
    833 		                          : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
    834 	};
    835 
    836 	VkMemoryAllocateFlagsInfo memory_allocate_flags_info = {
    837 		.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
    838 		.flags = flags,
    839 		.pNext = dedicated_allocate_info,
    840 	};
    841 
    842 	if (export) {
    843 		export_info.pNext = dedicated_allocate_info;
    844 		memory_allocate_flags_info.pNext = &export_info;
    845 	}
    846 
    847 	VkMemoryAllocateInfo memory_allocate_info = {
    848 		.sType           = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
    849 		.allocationSize  = size,
    850 		.memoryTypeIndex = vk->memory_info.memory_type_indices[kind],
    851 		.pNext           = &memory_allocate_flags_info,
    852 	};
    853 
    854 	b32 result = vkAllocateMemory(vk->device, &memory_allocate_info, 0, memory) == VK_SUCCESS;
    855 	if (result) {
    856 		atomic_add_u64(&vk->gpu_info.gpu_heap_used, memory_allocate_info.allocationSize);
    857 
    858 		if (export) {
    859 			if (OS_WINDOWS) {
    860 				VkMemoryGetWin32HandleInfoKHR handle_info = {
    861 					.sType      = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR,
    862 					.memory     = *memory,
    863 					.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT,
    864 				};
    865 				void *handle;
    866 				vkGetMemoryWin32HandleKHR(vk->device, &handle_info, &handle);
    867 				export->value[0] = (u64)handle;
    868 			} else {
    869 				VkMemoryGetFdInfoKHR fd_info = {
    870 					.sType      = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
    871 					.memory     = *memory,
    872 					.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
    873 				};
    874 				i32 fd;
    875 				vkGetMemoryFdKHR(vk->device, &fd_info, &fd);
    876 				export->value[0] = (u64)fd;
    877 			}
    878 		}
    879 	}
    880 	return result;
    881 }
    882 
    883 function u32
    884 vk_index_size(VkIndexType type)
    885 {
    886 	u32 result = 0;
    887 	switch (type) {
    888 	case VK_INDEX_TYPE_UINT16:{ result = 2; }break;
    889 	case VK_INDEX_TYPE_UINT32:{ result = 4; }break;
    890 	InvalidDefaultCase;
    891 	}
    892 	return result;
    893 }
    894 
    895 typedef struct {
    896 	GPUBuffer        *gpu_buffer;
    897 	u64               size;
    898 	VulkanUsageFlags  flags;
    899 	u32               queue_family_count;
    900 	u32               queue_family_indices[VulkanTimeline_Count];
    901 	VkIndexType       index_type;
    902 	s8                label;
    903 } VulkanBufferAllocateInfo;
    904 
    905 function b32
    906 vk_buffer_allocate_common(VulkanBuffer *vb, VulkanBufferAllocateInfo *ai)
    907 {
    908 	VulkanContext *vk = vulkan_context;
    909 
    910 	// TODO(rnp): this probably should be handled, its usually 4GB. likely
    911 	// need to chain multiple allocations and handle it in shader code
    912 	u64 clamp_size = vk->memory_info.max_allocation_size & ~(vk->memory_info.non_coherent_atom_size - 1);
    913 
    914 	// NOTE(rnp): renderdoc can't handle buffers that are too close to the allocation size limit
    915 	if (renderdoc_attached())
    916 		clamp_size -= MB(8);
    917 
    918 	u64 size = Min(ai->size, clamp_size);
    919 
    920 	VkBufferCreateInfo buffer_create_info = {
    921 		.sType       = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
    922 		.usage       = VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT|VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
    923 		.size        = size,
    924 		.sharingMode = ai->queue_family_count > 1 ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE,
    925 		.queueFamilyIndexCount = ai->queue_family_count,
    926 		.pQueueFamilyIndices   = ai->queue_family_indices,
    927 	};
    928 
    929 	if (ai->flags & VulkanUsageFlag_TransferSource)
    930 		buffer_create_info.usage |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
    931 
    932 	if (ai->flags & VulkanUsageFlag_TransferDestination)
    933 		buffer_create_info.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
    934 
    935 	if (ai->index_type != VK_INDEX_TYPE_NONE_KHR)
    936 		buffer_create_info.usage |= VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
    937 
    938 	vkCreateBuffer(vk->device, &buffer_create_info, 0, &vb->buffer);
    939 	vk_label_object(BUFFER, vb->buffer, ai->label, s8("Buffer"));
    940 
    941 	VkMemoryRequirements memory_requirements;
    942 	vkGetBufferMemoryRequirements(vk->device, vb->buffer, &memory_requirements);
    943 
    944 	assert((u64)size <= memory_requirements.size);
    945 	size = memory_requirements.size;
    946 
    947 	VkMemoryDedicatedAllocateInfo dedicated_allocate_info = {
    948 		.sType  = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
    949 		.buffer = vb->buffer,
    950 	};
    951 
    952 	/* NOTE(rnp): to create a CPU writable buffer:
    953 	 * 1. try to allocate and map the entire buffer
    954 	 *    - this may fail if the buffer is bigger than the BAR size
    955 	 *      (unknowable from vulkan), or the memory space has become
    956 	 *      too fragmented (unlikely)
    957 	 * 2. if allocation or mapping fails we must chain a host buffer
    958 	 *    for staging. If this happens in practice we should add
    959 	 *    the ability to import an existing external allocation
    960 	 */
    961 	b32 host_read_write = (ai->flags & VulkanUsageFlag_HostReadWrite) != 0;
    962 	vb->memory_kind = host_read_write ? VulkanMemoryKind_BAR : VulkanMemoryKind_Device;
    963 
    964 	b32 result = 0;
    965 	// TODO(rnp): this may fail if the allocation is too big for the BAR size
    966 	// it needs to handled properly
    967 	if (vk_allocate_memory(&vb->memory, size, vb->memory_kind, VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT, &dedicated_allocate_info, 0)) {
    968 		result  = 1;
    969 		ai->gpu_buffer->size = size;
    970 		vb->memory_size = size;
    971 
    972 		vb->index_type = ai->index_type;
    973 
    974 		vk_label_object(DEVICE_MEMORY, vb->memory, ai->label, s8("Memory"));
    975 
    976 		if (host_read_write)
    977 			vkMapMemory(vk->device, vb->memory, 0, size, 0, &vb->host_pointer);
    978 
    979 		vkBindBufferMemory(vk->device, vb->buffer, vb->memory, 0);
    980 		VkBufferDeviceAddressInfo buffer_device_address_info = {
    981 			.sType  = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
    982 			.buffer = vb->buffer,
    983 		};
    984 		ai->gpu_buffer->gpu_pointer = vkGetBufferDeviceAddress(vk->device, &buffer_device_address_info);
    985 	}
    986 	return result;
    987 }
    988 
    989 function void
    990 vk_load_instance(Arena arena, Stream *err)
    991 {
    992 	#define X(name, ...) name = (name##_fn *)vkGetInstanceProcAddr(0, #name);
    993 	VkBaseProcedureList
    994 	#undef X
    995 
    996 	s8 validation_layers[] = {
    997 		#if BEAMFORMER_DEBUG
    998 		s8_comp("VK_LAYER_KHRONOS_validation"),
    999 		#endif
   1000 	};
   1001 
   1002 	u32 enabled_validation_layers_count = 0;
   1003 	const char *enabled_validation_layers[countof(validation_layers)];
   1004 
   1005 	u32 enabled_instance_extensions_count = 0;
   1006 	const char *enabled_instance_extensions[countof(vk_required_instance_extensions) + countof(vk_instance_debug_extensions)];
   1007 
   1008 	static_assert(countof(vk_required_instance_extensions) == 0, "");
   1009 	//for EachElement(vk_required_instance_extensions, it)
   1010 	//	enabled_instance_extensions[enabled_instance_extensions_count++] = vk_required_instance_extensions[it];
   1011 
   1012 	#if BEAMFORMER_DEBUG
   1013 	{
   1014 		u32 layer_count = 0;
   1015 		vkEnumerateInstanceLayerProperties(&layer_count, 0);
   1016 
   1017 		VkLayerProperties *layers    = push_array(&arena, VkLayerProperties, layer_count);
   1018 		s8                *layer_s8s = push_array(&arena, s8,                layer_count);
   1019 		vkEnumerateInstanceLayerProperties(&layer_count, layers);
   1020 
   1021 		for (u32 i = 0; i < layer_count; i++)
   1022 			layer_s8s[i] = c_str_to_s8(layers[i].layerName);
   1023 
   1024 		b32 supported_layers[countof(validation_layers)] = {0};
   1025 		for EachElement(validation_layers, it) {
   1026 			for(u32 i = 0; i < layer_count; i++) {
   1027 				if (s8_equal(validation_layers[it], layer_s8s[i])) {
   1028 					u32 index = enabled_validation_layers_count++;
   1029 					enabled_validation_layers[index] = (char *)validation_layers[it].data;
   1030 					supported_layers[it] = 1;
   1031 					break;
   1032 				}
   1033 			}
   1034 		}
   1035 
   1036 		if (countof(validation_layers) != enabled_validation_layers_count) {
   1037 			i32 missing_count = countof(validation_layers) - enabled_validation_layers_count;
   1038 			stream_append_s8s(err, vulkan_info("missing validation layer"),
   1039 			                  missing_count > 1 ? s8("s:") : s8(":"), s8("\n"));
   1040 
   1041 			for EachElement(validation_layers, it) {
   1042 				if (supported_layers[it] == 0)
   1043 					stream_append_s8s(err, s8("    "), validation_layers[it], s8("\n"));
   1044 			}
   1045 		}
   1046 
   1047 		u32 instance_extension_count = 0;
   1048 		vkEnumerateInstanceExtensionProperties(0, &instance_extension_count, 0);
   1049 
   1050 		VkExtensionProperties *instance_extensions = push_array(&arena, VkExtensionProperties, instance_extension_count);
   1051 		s8                    *instance_ext_s8s    = push_array(&arena, s8,                    instance_extension_count);
   1052 		vkEnumerateInstanceExtensionProperties(0, &instance_extension_count, instance_extensions);
   1053 		for EachIndex(instance_extension_count, it)
   1054 			instance_ext_s8s[it] = c_str_to_s8(instance_extensions[it].extensionName);
   1055 
   1056 		for EachElement(vk_instance_debug_extensions, it) {
   1057 			for EachIndex(instance_extension_count, i) {
   1058 				if (s8_equal(vk_instance_debug_extensions[it], instance_ext_s8s[i])) {
   1059 					u32 index = enabled_instance_extensions_count++;
   1060 					enabled_instance_extensions[index] = (char *)vk_instance_debug_extensions[it].data;
   1061 					vulkan_config.instance.E[it] = 1;
   1062 					break;
   1063 				}
   1064 			}
   1065 		}
   1066 	}
   1067 	#endif
   1068 
   1069 	VkApplicationInfo app_info = {
   1070 		.sType              = VK_STRUCTURE_TYPE_APPLICATION_INFO,
   1071 		.pApplicationName   = BEAMFORMER_NAME_STRING,
   1072 		.applicationVersion = 0,
   1073 		.pEngineName        = "No Engine",
   1074 		.engineVersion      = 0,
   1075 		.apiVersion         = VK_MAKE_API_VERSION(1, 3, 0, 0),
   1076 	};
   1077 
   1078 	VkInstanceCreateInfo instance_create_info = {
   1079 		.sType                   = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
   1080 		.pApplicationInfo        = &app_info,
   1081 		.ppEnabledExtensionNames = enabled_instance_extensions,
   1082 		.enabledExtensionCount   = enabled_instance_extensions_count,
   1083 		.ppEnabledLayerNames     = enabled_validation_layers,
   1084 		.enabledLayerCount       = enabled_validation_layers_count,
   1085 	};
   1086 
   1087 	#if 0 && BEAMFORMER_DEBUG
   1088 	VkValidationFeatureEnableEXT validation_feature_enables[] = {
   1089 		VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT,
   1090 		VK_VALIDATION_FEATURE_ENABLE_BEST_PRACTICES_EXT,
   1091 		VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT,
   1092 		VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT,
   1093 	};
   1094 
   1095 	VkValidationFeaturesEXT validation_features = {
   1096 		.sType                         = VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT,
   1097 		.enabledValidationFeatureCount = countof(validation_feature_enables),
   1098 		.pEnabledValidationFeatures    = validation_feature_enables,
   1099 	};
   1100 
   1101 	instance_create_info.pNext = &validation_features;
   1102 	#endif
   1103 
   1104 	vkCreateInstance(&instance_create_info, 0, &vulkan_context->handle);
   1105 
   1106 	#define X(name, ...) name = (name##_fn *)vkGetInstanceProcAddr(vulkan_context->handle, #name);
   1107 	VkInstanceProcedureList
   1108 	#undef X
   1109 }
   1110 
   1111 function void
   1112 vk_load_physical_device(Arena arena, Stream *err)
   1113 {
   1114 	VulkanContext *vk = vulkan_context;
   1115 
   1116 	u32 device_count;
   1117 	vkEnumeratePhysicalDevices(vk->handle, &device_count, 0);
   1118 
   1119 	VkPhysicalDevice *devices = push_array(&arena, typeof(*devices), device_count);
   1120 	vkEnumeratePhysicalDevices(vk->handle, &device_count, devices);
   1121 
   1122 	i32 best_index = -1, best_score = -1;
   1123 	for (u32 i = 0; i < device_count; i++) {
   1124 		Arena scratch = arena;
   1125 		VkPhysicalDeviceProperties2 *dp = push_struct(&scratch, typeof(*dp));
   1126 		dp->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
   1127 		vkGetPhysicalDeviceProperties2(devices[i], dp);
   1128 
   1129 		i32 score = 0;
   1130 		if (dp->properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU)
   1131 			score++;
   1132 
   1133 		if (score > best_score) {
   1134 			best_score = score;
   1135 			best_index = (i32)i;
   1136 		}
   1137 	}
   1138 
   1139 	vk->physical_device = best_index >= 0 ? devices[best_index] : 0;
   1140 	if (!vk->physical_device)
   1141 		fatal(vulkan_info("failed to find a suitable GPU\n"));
   1142 
   1143 	VkPhysicalDeviceProperties2        dp   = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2};
   1144 	VkPhysicalDeviceVulkan11Properties v11p = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES};
   1145 	dp.pNext = &v11p;
   1146 
   1147 	vkGetPhysicalDeviceProperties2(vk->physical_device, &dp);
   1148 
   1149 	stream_append_s8s(err, vulkan_info("selecting device: "), c_str_to_s8(dp.properties.deviceName), s8("\n"));
   1150 
   1151 	{
   1152 		Arena scratch = arena;
   1153 		u32 extension_count = 0;
   1154 		vkEnumerateDeviceExtensionProperties(vk->physical_device, 0, &extension_count, 0);
   1155 		VkExtensionProperties *extensions = push_array(&scratch, VkExtensionProperties, extension_count);
   1156 		vkEnumerateDeviceExtensionProperties(vk->physical_device, 0, &extension_count, extensions);
   1157 
   1158 		s8 *ext_str8s = push_array(&scratch, s8, extension_count);
   1159 		for (u32 index = 0; index < extension_count; index++)
   1160 			ext_str8s[index] = c_str_to_s8(extensions[index].extensionName);
   1161 
   1162 		b8 *supported = push_array(&scratch, b8, countof(vk_required_device_extensions));
   1163 		for EachIndex(extension_count, index)
   1164 			for EachElement(vk_required_device_extensions, it)
   1165 				supported[it] |= s8_equal(vk_required_device_extensions[it], ext_str8s[index]);
   1166 
   1167 		u32 supported_count = 0;
   1168 		for EachElement(vk_required_device_extensions, it)
   1169 		 supported_count += supported[it];
   1170 
   1171 		u32 missing_count = countof(vk_required_device_extensions) - supported_count;
   1172 		if (missing_count) {
   1173 			stream_append_s8s(err, vulkan_info("fatal error: missing required device extension"),
   1174 			                  missing_count > 1 ? s8("s") : s8(""), s8(":\n"));
   1175 			for EachElement(vk_required_device_extensions, it) {
   1176 				if (!supported[it]) {
   1177 					s8 name = vk_required_device_extensions[it];
   1178 					stream_append_s8s(err, vulkan_info("    "), name, s8("\n"));
   1179 				}
   1180 			}
   1181 			fatal(stream_to_s8(err));
   1182 		}
   1183 
   1184 		for EachIndex(extension_count, index)
   1185 			for EachElement(vk_optional_device_extensions, it)
   1186 				vulkan_config.optional.E[it] |= s8_equal(vk_optional_device_extensions[it], ext_str8s[index]);
   1187 
   1188 		#if BEAMFORMER_DEBUG
   1189 		for EachIndex(extension_count, index)
   1190 			for EachElement(vk_debug_extensions, it)
   1191 				vulkan_config.debug.E[it] |= s8_equal(vk_debug_extensions[it], ext_str8s[index]);
   1192 		#endif
   1193 	}
   1194 
   1195 	{
   1196 		VkPhysicalDeviceFeatures2        df   = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2};
   1197 		VkPhysicalDeviceVulkan11Features v11f = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES};
   1198 		VkPhysicalDeviceVulkan12Features v12f = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES};
   1199 		VkPhysicalDeviceVulkan13Features v13f = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES};
   1200 		df.pNext   = &v11f;
   1201 		v11f.pNext = &v12f;
   1202 		v12f.pNext = &v13f;
   1203 		vkGetPhysicalDeviceFeatures2(vk->physical_device, &df);
   1204 
   1205 		{
   1206 			b32 all_supported = 1;
   1207 			#define X(name, ...) all_supported &= df.features.name;
   1208 			VK_REQUIRED_PHYSICAL_FEATURES
   1209 			#undef X
   1210 
   1211 			if (!all_supported) {
   1212 				stream_append_s8(err, vulkan_info("fatal error: missing physical device features:\n"));
   1213 				#define X(name, ...) if (!df.features.name) stream_append_s8(err, s8("    " #name "\n"));
   1214 				VK_REQUIRED_PHYSICAL_FEATURES
   1215 				#undef X
   1216 				fatal(stream_to_s8(err));
   1217 			}
   1218 		}
   1219 
   1220 		{
   1221 			b32 all_supported = 1;
   1222 			#define X(name, ...) all_supported &= v11f.name;
   1223 			VK_REQUIRED_PHYSICAL_11_FEATURES
   1224 			#undef X
   1225 
   1226 			if (!all_supported) {
   1227 				stream_append_s8(err, vulkan_info("fatal error: missing physical device features:\n"));
   1228 				#define X(name, ...) if (!v11f.name) stream_append_s8(err, s8("    " #name "\n"));
   1229 				VK_REQUIRED_PHYSICAL_11_FEATURES
   1230 				#undef X
   1231 				fatal(stream_to_s8(err));
   1232 			}
   1233 		}
   1234 
   1235 		{
   1236 			b32 all_supported = 1;
   1237 			#define X(name, ...) all_supported &= v12f.name;
   1238 			VK_REQUIRED_PHYSICAL_12_FEATURES
   1239 			#undef X
   1240 
   1241 			if (!all_supported) {
   1242 				stream_append_s8(err, vulkan_info("fatal error: missing physical device features:\n"));
   1243 				#define X(name, ...) if (!v12f.name) stream_append_s8(err, s8("    " #name "\n"));
   1244 				VK_REQUIRED_PHYSICAL_12_FEATURES
   1245 				#undef X
   1246 				fatal(stream_to_s8(err));
   1247 			}
   1248 		}
   1249 
   1250 		{
   1251 			b32 all_supported = 1;
   1252 			#define X(name, ...) all_supported &= v13f.name;
   1253 			VK_REQUIRED_PHYSICAL_13_FEATURES
   1254 			#undef X
   1255 
   1256 			if (!all_supported) {
   1257 				stream_append_s8(err, vulkan_info("fatal error: missing physical device features:\n"));
   1258 				#define X(name, ...) if (!v13f.name) stream_append_s8(err, s8("    " #name "\n"));
   1259 				VK_REQUIRED_PHYSICAL_13_FEATURES
   1260 				#undef X
   1261 				fatal(stream_to_s8(err));
   1262 			}
   1263 		}
   1264 
   1265 		if (vulkan_config.optional.cooperative_matrix) {
   1266 			Arena scratch = arena;
   1267 			u32 property_count = 0;
   1268 			vkGetPhysicalDeviceCooperativeMatrixPropertiesKHR(vk->physical_device, &property_count, 0);
   1269 
   1270 			VkCooperativeMatrixPropertiesKHR *mat = push_array(&scratch, VkCooperativeMatrixPropertiesKHR, property_count);
   1271 
   1272 			// NOTE(rnp): validation layer stupidity
   1273 			for EachIndex(property_count, it)
   1274 				mat[it].sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR;
   1275 
   1276 			vkGetPhysicalDeviceCooperativeMatrixPropertiesKHR(vk->physical_device, &property_count, mat);
   1277 			b32 supported = 0;
   1278 			// TODO(rnp): for now the requirements are hardcoded, it is possible to support a couple
   1279 			// variations if needed.
   1280 			for EachIndex(property_count, it) {
   1281 				b32 match = 1;
   1282 				supported &= mat[it].scope == VK_SCOPE_SUBGROUP_KHR;
   1283 
   1284 				supported &= mat[it].MSize == 16;
   1285 				supported &= mat[it].NSize == 16;
   1286 				supported &= mat[it].KSize == 16;
   1287 
   1288 				supported &= mat[it].AType == VK_COMPONENT_TYPE_FLOAT16_KHR;
   1289 				supported &= mat[it].BType == VK_COMPONENT_TYPE_FLOAT16_KHR;
   1290 				supported &= mat[it].CType == VK_COMPONENT_TYPE_FLOAT32_KHR;
   1291 				supported &= mat[it].ResultType == VK_COMPONENT_TYPE_FLOAT32_KHR;
   1292 
   1293 				supported |= match;
   1294 			}
   1295 			vk->gpu_info.cooperative_matrix = supported;
   1296 		}
   1297 	}
   1298 
   1299 	VkPhysicalDeviceMemoryProperties2 mp = {.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2};
   1300 	vkGetPhysicalDeviceMemoryProperties2(vk->physical_device, &mp);
   1301 
   1302 	VkPhysicalDeviceMemoryProperties *bmp = &mp.memoryProperties;
   1303 
   1304 	// NOTE(rnp): vulkan spec says that highest performance memory types must
   1305 	// come first. just take the first one found.
   1306 
   1307 	for (u32 i = 0; i < bmp->memoryHeapCount; i++) {
   1308 		if (bmp->memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) {
   1309 			vk->memory_info.gpu_heap_index = i;
   1310 			break;
   1311 		}
   1312 	}
   1313 
   1314 	for (u32 i = 0; i < bmp->memoryTypeCount; i++) {
   1315 		if (bmp->memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) {
   1316 			assert(bmp->memoryTypes[i].heapIndex == vk->memory_info.gpu_heap_index);
   1317 			vk->memory_info.memory_type_indices[VulkanMemoryKind_Device] = i;
   1318 			break;
   1319 		}
   1320 	}
   1321 
   1322 	// TODO(rnp): it is possible that this isn't available. for devices like that we would need
   1323 	// to copy into a staging buffer then DMA. For now that is unsupported.
   1324 	u32 bar_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT|VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
   1325 	i32 bar_index = -1;
   1326 	for (u32 i = 0; i < bmp->memoryTypeCount; i++) {
   1327 		if ((bmp->memoryTypes[i].propertyFlags & bar_flags) == bar_flags) {
   1328 			assert(bmp->memoryTypes[i].heapIndex == vk->memory_info.gpu_heap_index);
   1329 			bar_index = (i32)i;
   1330 			break;
   1331 		}
   1332 	}
   1333 
   1334 	// TODO(rnp): this shouldn't be fatal
   1335 	if (bar_index == -1) {
   1336 		stream_append_s8(err, vulkan_info("fatal error: GPU does not support host bar memory\n"));
   1337 		fatal(stream_to_s8(err));
   1338 	}
   1339 
   1340 	vk->memory_info.memory_type_indices[VulkanMemoryKind_BAR] = bar_index;
   1341 
   1342 	for (u32 i = 0; i < bmp->memoryTypeCount; i++) {
   1343 		if ((bmp->memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) == 0) {
   1344 			assert(bmp->memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
   1345 			vk->memory_info.memory_type_indices[VulkanMemoryKind_Host] = i;
   1346 			break;
   1347 		}
   1348 	}
   1349 
   1350 	for EachElement(vk->memory_info.memory_type_indices, it) {
   1351 		u32 ti    = vk->memory_info.memory_type_indices[it];
   1352 		u32 flags = bmp->memoryTypes[ti].propertyFlags;
   1353 		vk->memory_info.memory_host_coherent[it] = (flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0;
   1354 	}
   1355 
   1356 	vk->memory_info.max_allocation_size    = v11p.maxMemoryAllocationSize;
   1357 	vk->memory_info.non_coherent_atom_size = dp.properties.limits.nonCoherentAtomSize;
   1358 	vk->gpu_info.vendor                    = dp.properties.vendorID;
   1359 	vk->gpu_info.gpu_heap_size             = bmp->memoryHeaps[vk->memory_info.gpu_heap_index].size;
   1360 	vk->gpu_info.timestamp_period_ns       = dp.properties.limits.timestampPeriod;
   1361 	vk->gpu_info.max_image_dimension_2D    = dp.properties.limits.maxImageDimension2D;
   1362 	vk->gpu_info.max_image_dimension_3D    = dp.properties.limits.maxImageDimension3D;
   1363 	vk->gpu_info.max_msaa_samples          = round_down_power_of_two(dp.properties.limits.framebufferColorSampleCounts);
   1364 	vk->gpu_info.subgroup_size             = v11p.subgroupSize;
   1365 	vk->gpu_info.max_compute_shared_memory_size = dp.properties.limits.maxComputeSharedMemorySize;
   1366 
   1367 	// IMPORTANT(rnp): memory must only be pushed at the end of the function
   1368 	vk->gpu_info.name = push_s8(&vk->arena, c_str_to_s8(dp.properties.deviceName));
   1369 }
   1370 
   1371 function void
   1372 vk_load_queues(Arena *memory, Stream *err)
   1373 {
   1374 	///////////////////////////////////////////////////////
   1375 	// NOTE(rnp): try to allocate an appropriate queue for
   1376 	// each of the following tasks:
   1377 	//   * UI Rendering (Graphics)
   1378 	//   * Beamforming  (Compute)
   1379 	//   * Upload       (Transfer)
   1380 	// Then create a logical device ready for use
   1381 
   1382 	VulkanContext *vk = vulkan_context;
   1383 
   1384 	u32 queue_family_count;
   1385 	vkGetPhysicalDeviceQueueFamilyProperties(vk->physical_device, &queue_family_count, 0);
   1386 
   1387 	TempArena arena_save = begin_temp_arena(memory);
   1388 	VkQueueFamilyProperties *queues = push_array(memory, typeof(*queues), queue_family_count);
   1389 	vkGetPhysicalDeviceQueueFamilyProperties(vk->physical_device, &queue_family_count, queues);
   1390 
   1391 	i32 queue_indices[VulkanQueueKind_Count];
   1392 	for EachElement(queue_indices, it) queue_indices[it] = -1;
   1393 
   1394 	///////////////////////////////////////////////////////////////
   1395 	// NOTE(rnp): start by assigning queue families for each queue
   1396 
   1397 	/* NOTE(rnp): try for exclusive transfer queue */
   1398 	#if !ForceSingleQueue
   1399 	{
   1400 		u32 mask = VK_QUEUE_GRAPHICS_BIT|VK_QUEUE_COMPUTE_BIT|VK_QUEUE_TRANSFER_BIT;
   1401 		u32 max_timestamp_bits = 0;
   1402 		for (u32 index = 0; index < queue_family_count; index++) {
   1403 			if ((queues[index].queueFlags & mask) == VK_QUEUE_TRANSFER_BIT) {
   1404 				if (queues[index].timestampValidBits > max_timestamp_bits) {
   1405 					max_timestamp_bits = queues[index].timestampValidBits;
   1406 					queue_indices[VulkanQueueKind_Transfer] = (i32)index;
   1407 				}
   1408 			}
   1409 		}
   1410 	}
   1411 
   1412 	/* NOTE(rnp): try for compute separate from graphics */
   1413 	for (u32 index = 0; index < queue_family_count; index++) {
   1414 		if ((queues[index].queueFlags & VK_QUEUE_COMPUTE_BIT)  != 0 &&
   1415 		    (queues[index].queueFlags & VK_QUEUE_GRAPHICS_BIT) == 0)
   1416 		{
   1417 			queue_indices[VulkanQueueKind_Compute] = (i32)index;
   1418 			break;
   1419 		}
   1420 	}
   1421 	#endif /* !ForceSingleQueue */
   1422 
   1423 	/* NOTE(rnp): find graphics family and verify it is exclusive */
   1424 	b32 multi_graphics = 0;
   1425 	for (u32 index = 0; index < queue_family_count; index++) {
   1426 		if ((queues[index].queueFlags & VK_QUEUE_GRAPHICS_BIT) != 0) {
   1427 			// TODO(rnp): check for presentation support
   1428 			multi_graphics = queue_indices[VulkanQueueKind_Graphics] != -1;
   1429 			queue_indices[VulkanQueueKind_Graphics] = (i32)index;
   1430 		}
   1431 	}
   1432 
   1433 	if (multi_graphics)
   1434 		stream_append_s8(err, vulkan_info("warning: multiple queue families reported graphics support\n"));
   1435 
   1436 	if (queue_indices[VulkanQueueKind_Graphics] == -1) {
   1437 		stream_append_s8(err, vulkan_info("fatal error: GPU does not support graphics presentation\n"));
   1438 		fatal(stream_to_s8(err));
   1439 	}
   1440 
   1441 	if (queue_indices[VulkanQueueKind_Compute] == -1)
   1442 		if ((queues[queue_indices[VulkanQueueKind_Graphics]].queueFlags & VK_QUEUE_COMPUTE_BIT) != 0)
   1443 			queue_indices[VulkanQueueKind_Compute] = queue_indices[VulkanQueueKind_Graphics];
   1444 
   1445 	if (queue_indices[VulkanQueueKind_Compute] == -1) {
   1446 		stream_append_s8(err, vulkan_info("fatal error: GPU does not support compute\n"));
   1447 		fatal(stream_to_s8(err));
   1448 	}
   1449 
   1450 	if (queue_indices[VulkanQueueKind_Transfer] == -1) {
   1451 		if ((queues[queue_indices[VulkanQueueKind_Compute]].queueFlags & VK_QUEUE_TRANSFER_BIT) != 0)
   1452 			queue_indices[VulkanQueueKind_Transfer] = queue_indices[VulkanQueueKind_Compute];
   1453 		else if ((queues[queue_indices[VulkanQueueKind_Graphics]].queueFlags & VK_QUEUE_TRANSFER_BIT) != 0)
   1454 			queue_indices[VulkanQueueKind_Transfer] = queue_indices[VulkanQueueKind_Graphics];
   1455 	}
   1456 
   1457 	if (queue_indices[VulkanQueueKind_Transfer] == -1) {
   1458 		stream_append_s8(err, vulkan_info("fatal error: GPU does not support data transfer\n"));
   1459 		fatal(stream_to_s8(err));
   1460 	}
   1461 
   1462 	/////////////////////////////////////////////////////////////////
   1463 	// NOTE(rnp): if queues share families try to allocate subqueues
   1464 
   1465 	u32 assigned_subindices[VulkanQueueKind_Count] = {0};
   1466 	i32 queue_subindices[VulkanQueueKind_Count]    = {0};
   1467 
   1468 	assigned_subindices[VulkanQueueKind_Graphics] += 1;
   1469 
   1470 	if (queue_indices[VulkanQueueKind_Compute] == queue_indices[VulkanQueueKind_Graphics]) {
   1471 		if (assigned_subindices[VulkanQueueKind_Graphics] < queues[queue_indices[VulkanQueueKind_Graphics]].queueCount)
   1472 			queue_subindices[VulkanQueueKind_Compute] = assigned_subindices[VulkanQueueKind_Graphics]++;
   1473 	} else {
   1474 		assigned_subindices[VulkanQueueKind_Compute] += 1;
   1475 	}
   1476 
   1477 	if (queue_indices[VulkanQueueKind_Transfer] == queue_indices[VulkanQueueKind_Graphics]) {
   1478 		if (assigned_subindices[VulkanQueueKind_Graphics] < queues[queue_indices[VulkanQueueKind_Graphics]].queueCount)
   1479 			queue_subindices[VulkanQueueKind_Transfer] = assigned_subindices[VulkanQueueKind_Graphics]++;
   1480 	} else if (queue_indices[VulkanQueueKind_Transfer] == queue_indices[VulkanQueueKind_Compute]) {
   1481 		if (assigned_subindices[VulkanQueueKind_Compute] < queues[queue_indices[VulkanQueueKind_Compute]].queueCount)
   1482 			queue_subindices[VulkanQueueKind_Transfer] = assigned_subindices[VulkanQueueKind_Compute]++;
   1483 	} else {
   1484 		assigned_subindices[VulkanQueueKind_Transfer] += 1;
   1485 	}
   1486 
   1487 	for EachElement(assigned_subindices, it)
   1488 		vk->unique_queues += assigned_subindices[it];
   1489 
   1490 	end_temp_arena(arena_save);
   1491 
   1492 	/////////////////////////////////////////////
   1493 	// NOTE(rnp): fill in info and create device
   1494 	for EachElement(vk->queues, it) {
   1495 		u32 index = queue_subindices[it];
   1496 		for (i32 i = 0; i < queue_indices[it]; i++)
   1497 			index += assigned_subindices[i];
   1498 		vk->queue_indices[it] = index;
   1499 	}
   1500 
   1501 	for EachElement(vk->queues, it) {
   1502 		if (vk->queues[vk->queue_indices[it]] == 0) {
   1503 			vk->queues[vk->queue_indices[it]] = push_struct(memory, VulkanQueue);
   1504 			vk->queues[vk->queue_indices[it]]->queue_family = queue_indices[it];
   1505 			vk->queues[vk->queue_indices[it]]->queue_index  = queue_subindices[it];
   1506 		}
   1507 		vk->queues[it] = vk->queues[vk->queue_indices[it]];
   1508 	}
   1509 
   1510 	for EachElement(vk->command_pools, it)
   1511 		vk->command_pools[it] = push_struct(memory, VulkanCommandPool);
   1512 
   1513 	VkDeviceQueueCreateInfo queue_create_infos[VulkanQueueKind_Count];
   1514 
   1515 	f32 queue_priorities[VulkanQueueKind_Count][VulkanQueueKind_Count];
   1516 	for (u32 i = 0; i < VulkanQueueKind_Count; i++)
   1517 		for (u32 j = 0; j < VulkanQueueKind_Count; j++)
   1518 			queue_priorities[i][j] = 1.0f;
   1519 	queue_priorities[queue_indices[VulkanQueueKind_Compute]][queue_subindices[VulkanQueueKind_Compute]] = 0.5f;
   1520 
   1521 	u32 queue_create_index = 0;
   1522 	b32 queue_info_filled[VulkanQueueKind_Count] = {0};
   1523 	for (u32 q = 0; q < vk->unique_queues; q++) {
   1524 		u32 base_q = queue_indices[q];
   1525 		if (!queue_info_filled[base_q]) {
   1526 			queue_create_infos[queue_create_index++] = (VkDeviceQueueCreateInfo){
   1527 				.sType            = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
   1528 				.queueFamilyIndex = base_q,
   1529 				.queueCount       = assigned_subindices[q],
   1530 				.pQueuePriorities = queue_priorities[q],
   1531 			};
   1532 		}
   1533 		queue_info_filled[base_q] = 1;
   1534 	}
   1535 
   1536 	u32 enabled_count = 0;
   1537 	const char *enabled_extensions[MAX_ENABLED_EXTENSIONS];
   1538 
   1539 	for EachElement(vk_required_device_extensions, it)
   1540 		enabled_extensions[enabled_count++] = (char *)vk_required_device_extensions[it].data;
   1541 
   1542 	for EachElement(vk_optional_device_extensions, it)
   1543 		if (vulkan_config.optional.E[it])
   1544 			enabled_extensions[enabled_count++] = (char *)vk_optional_device_extensions[it].data;
   1545 
   1546 	for EachElement(vk_debug_extensions, it)
   1547 		if (vulkan_config.debug.E[it])
   1548 			enabled_extensions[enabled_count++] = (char *)vk_debug_extensions[it].data;
   1549 
   1550 	VkDeviceCreateInfo device_create_info = {
   1551 		.sType                   = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
   1552 		.pQueueCreateInfos       = queue_create_infos,
   1553 		.queueCreateInfoCount    = queue_create_index,
   1554 		.ppEnabledExtensionNames = enabled_extensions,
   1555 		.enabledExtensionCount   = enabled_count,
   1556 	};
   1557 
   1558 	VkPhysicalDeviceShaderRelaxedExtendedInstructionFeaturesKHR pdsre = {
   1559 		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_RELAXED_EXTENDED_INSTRUCTION_FEATURES_KHR,
   1560 		.shaderRelaxedExtendedInstruction = 1,
   1561 	};
   1562 	if (vulkan_config.debug.shader_relaxed_extended_instruction) {
   1563 		pdsre.pNext = (void *)device_create_info.pNext;
   1564 		device_create_info.pNext = &pdsre;
   1565 	}
   1566 
   1567 	VkPhysicalDeviceCooperativeMatrixFeaturesKHR coop_mat_features = {
   1568 		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_KHR,
   1569 		.cooperativeMatrix = 1,
   1570 		.cooperativeMatrixRobustBufferAccess = 0,
   1571 	};
   1572 	if (vk->gpu_info.cooperative_matrix) {
   1573 		coop_mat_features.pNext = (void *)device_create_info.pNext;
   1574 		device_create_info.pNext = &coop_mat_features;
   1575 	}
   1576 
   1577 	VkPhysicalDeviceVulkan13Features v13f = {
   1578 		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES,
   1579 		.pNext = (void *)device_create_info.pNext,
   1580 		#define X(name, ...) .name = 1,
   1581 		VK_REQUIRED_PHYSICAL_13_FEATURES
   1582 		#undef X
   1583 	};
   1584 	device_create_info.pNext = &v13f;
   1585 
   1586 	VkPhysicalDeviceVulkan12Features v12f = {
   1587 		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
   1588 		.pNext = (void *)device_create_info.pNext,
   1589 		#define X(name, ...) .name = 1,
   1590 		VK_REQUIRED_PHYSICAL_12_FEATURES
   1591 		#undef X
   1592 	};
   1593 	device_create_info.pNext = &v12f;
   1594 
   1595 	VkPhysicalDeviceVulkan11Features v11f = {
   1596 		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
   1597 		.pNext = (void *)device_create_info.pNext,
   1598 		#define X(name, ...) .name = 1,
   1599 		VK_REQUIRED_PHYSICAL_11_FEATURES
   1600 		#undef X
   1601 	};
   1602 	device_create_info.pNext = &v11f;
   1603 
   1604 	VkPhysicalDeviceFeatures2 device_features = {
   1605 		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
   1606 		.pNext = (void *)device_create_info.pNext,
   1607 		.features = {
   1608 			#define X(name, ...) .name = 1,
   1609 			VK_REQUIRED_PHYSICAL_FEATURES
   1610 			#undef X
   1611 		},
   1612 	};
   1613 	device_create_info.pNext = &device_features;
   1614 
   1615 	vkCreateDevice(vk->physical_device, &device_create_info, 0, &vk->device);
   1616 
   1617 	#define X(name, ...) name = (name##_fn *)vkGetDeviceProcAddr(vk->device, #name);
   1618 	VkDeviceProcedureList
   1619 	#undef X
   1620 
   1621 	for (u32 q = 0; q < vk->unique_queues; q++) {
   1622 		VulkanQueue *qp = vk->queues[q];
   1623 		vkGetDeviceQueue(vk->device, qp->queue_family, qp->queue_index, &qp->queue);
   1624 
   1625 		qp->timeline_semaphore = vk_make_semaphore(0);
   1626 	}
   1627 
   1628 	vk->queues[VulkanQueueKind_Graphics]->pipeline_stage_flags |= VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT;
   1629 	vk->queues[VulkanQueueKind_Compute]->pipeline_stage_flags  |= VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT;
   1630 
   1631 	for EachElement(vk->command_pools, it) {
   1632 		VulkanCommandPool *vcp = vk->command_pools[it];
   1633 
   1634 		VkCommandPoolCreateInfo command_pool_create_info = {
   1635 			.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
   1636 			.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
   1637 			.queueFamilyIndex = vk->queues[it]->queue_family,
   1638 		};
   1639 
   1640 		vkCreateCommandPool(vk->device, &command_pool_create_info, 0, &vcp->handle);
   1641 
   1642 		VkCommandBufferAllocateInfo command_buffer_allocate_info = {
   1643 			.sType              = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
   1644 			.commandPool        = vcp->handle,
   1645 			.level              = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
   1646 			.commandBufferCount = countof(vcp->buffers),
   1647 		};
   1648 		vkAllocateCommandBuffers(vk->device, &command_buffer_allocate_info, vcp->buffers);
   1649 
   1650 		VkQueryPoolCreateInfo query_pool_create_info = {
   1651 			.sType      = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
   1652 			.queryType  = VK_QUERY_TYPE_TIMESTAMP,
   1653 			.queryCount = MaxCommandBuffersInFlight * MaxCommandBufferTimestamps,
   1654 		};
   1655 		vkCreateQueryPool(vk->device, &query_pool_create_info, 0, &vcp->query_pool);
   1656 	}
   1657 }
   1658 
   1659 function void
   1660 vk_load_graphics(void)
   1661 {
   1662 	VulkanContext *vk = vulkan_context;
   1663 
   1664 	// NOTE: swap chain image format
   1665 	{
   1666 	}
   1667 
   1668 	// NOTE: depth/stencil format
   1669 	{
   1670 		VkFormat depth_formats[] = {
   1671 			VK_FORMAT_D32_SFLOAT_S8_UINT,
   1672 			VK_FORMAT_D24_UNORM_S8_UINT,
   1673 			VK_FORMAT_D16_UNORM_S8_UINT,
   1674 		};
   1675 
   1676 		vk->depth_stencil_format = VK_FORMAT_UNDEFINED;
   1677 		for EachElement(depth_formats, it) {
   1678 			VkFormatProperties3 format_properties3 = {.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3};
   1679 			VkFormatProperties2 format_properties2 = {
   1680 				.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
   1681 				.pNext = &format_properties3,
   1682 			};
   1683 			vkGetPhysicalDeviceFormatProperties2(vk->physical_device, depth_formats[it], &format_properties2);
   1684 			if (format_properties3.optimalTilingFeatures & VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT) {
   1685 				vk->depth_stencil_format = depth_formats[it];
   1686 				break;
   1687 			}
   1688 		}
   1689 	}
   1690 }
   1691 
   1692 function void
   1693 vk_load_descriptor_block(void)
   1694 {
   1695 	// NOTE(rnp):
   1696 	// * One Descriptor Pool
   1697 	// * One Descriptor Set Per Resource Kind
   1698 	// * Shaders know the ResourceKind enumeration
   1699 	// * Shaders know the per set binding points
   1700 
   1701 	VulkanContext *vk = vulkan_context;
   1702 
   1703 	// NOTE(rnp): Pool
   1704 	VkDescriptorPoolSize pool_sizes[] = {
   1705 		{
   1706 			.type            = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
   1707 			.descriptorCount = BeamformerShaderBufferSlot_Count,
   1708 		},
   1709 	};
   1710 	static_assert(countof(pool_sizes) == BeamformerShaderResourceKind_Count, "");
   1711 
   1712 	VkDescriptorPoolCreateInfo pool_create_info = {
   1713 		.sType         = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
   1714 		.maxSets       = BeamformerShaderResourceKind_Count,
   1715 		.poolSizeCount = countof(pool_sizes),
   1716 		.pPoolSizes    = pool_sizes,
   1717 	};
   1718 
   1719 	vkCreateDescriptorPool(vk->device, &pool_create_info, 0, &vk->descriptor_pool);
   1720 
   1721 	// NOTE(rnp): Set Layouts
   1722 	VkDescriptorSetLayoutCreateInfo layout_create_info = {
   1723 		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
   1724 	};
   1725 
   1726 	{
   1727 		VkDescriptorSetLayoutBinding layout_bindings[BeamformerShaderBufferSlot_Count];
   1728 		for EachEnumValue(BeamformerShaderBufferSlot, it) {
   1729 			layout_bindings[it] = (VkDescriptorSetLayoutBinding){
   1730 				.binding         = it,
   1731 				.descriptorType  = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
   1732 				.descriptorCount = 1,
   1733 				.stageFlags      = VK_SHADER_STAGE_ALL,
   1734 			};
   1735 		}
   1736 		layout_create_info.bindingCount = countof(layout_bindings),
   1737 		layout_create_info.pBindings    = layout_bindings,
   1738 		vkCreateDescriptorSetLayout(vk->device, &layout_create_info, 0,
   1739 		                            vk->descriptor_set_layouts + BeamformerShaderResourceKind_Buffer);
   1740 	}
   1741 
   1742 	// NOTE(rnp): Sets
   1743 	VkDescriptorSetAllocateInfo set_allocate_info = {
   1744 		.sType              = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
   1745 		.descriptorPool     = vk->descriptor_pool,
   1746 		.descriptorSetCount = countof(vk->descriptor_sets),
   1747 		.pSetLayouts        = vk->descriptor_set_layouts,
   1748 	};
   1749 	static_assert(countof(vk->descriptor_set_layouts) == countof(vk->descriptor_sets), "");
   1750 	vkAllocateDescriptorSets(vk->device, &set_allocate_info, vk->descriptor_sets);
   1751 
   1752 	vk_label_object(DESCRIPTOR_POOL, vk->descriptor_pool, s8("Beamformer Resources"), s8("Pool"));
   1753 
   1754 	DeferLoop(take_lock(&vk->arena_lock, -1), release_lock(&vk->arena_lock)) {
   1755 		Arena scratch = vk->arena;
   1756 		for EachElement(vk->descriptor_sets, it) {
   1757 			Stream sb = arena_stream(scratch);
   1758 			stream_append_s8s(&sb, s8("Beamformer "), beamformer_shader_resource_kind_strings[it], s8("s"));
   1759 			vk_label_object(DESCRIPTOR_SET,        vk->descriptor_sets[it],        stream_to_s8(&sb), s8("Set"));
   1760 			vk_label_object(DESCRIPTOR_SET_LAYOUT, vk->descriptor_set_layouts[it], stream_to_s8(&sb), s8("Set Layout"));
   1761 		}
   1762 	}
   1763 }
   1764 
   1765 ///////////////////////
   1766 // NOTE(rnp): User API
   1767 
   1768 DEBUG_IMPORT void
   1769 vk_load(OSLibrary vulkan_library_handle, Arena *memory, Stream *err)
   1770 {
   1771 	#define X(name, ...) name = (name##_fn *)os_lookup_symbol(vulkan_library_handle, #name);
   1772 	VkLoaderProcedureList
   1773 	#undef X
   1774 
   1775 	if (!vkGetInstanceProcAddr) {
   1776 		stream_append_s8(err, vulkan_info("fatal error: failed to find \"vkGetInstanceProcAddr\"\n"));
   1777 		fatal(stream_to_s8(err));
   1778 	}
   1779 
   1780 	VulkanContext *vk = vulkan_context;
   1781 	vk->entity_arena = sub_arena_end(memory, KB(64), KB(4));
   1782 	vk->arena        = sub_arena_end(memory, KB(96), KB(4));
   1783 
   1784 	vk_load_instance(vk->arena, err);
   1785 	vk_load_physical_device(vk->arena, err);
   1786 	vk_load_queues(&vk->arena, err);
   1787 	vk_load_graphics();
   1788 	vk_load_descriptor_block();
   1789 
   1790 	read_only local_persist s8 default_compute_shader = s8(""
   1791 		"#version 430 core\n"
   1792 		"layout(push_constant) uniform pc { uint data[256 / 4]; };\n"
   1793 		"void main() {}\n"
   1794 		"\n");
   1795 	vk->default_compute_pipeline = vk_compute_pipeline_from_shader_text(vk->arena, default_compute_shader,
   1796 	                                                                    s8("error_compute_shader"), 256);
   1797 
   1798 	read_only local_persist s8 default_vertex_shader = s8(""
   1799 		"#version 430 core\n"
   1800 		"layout(push_constant) uniform pc { uint data[256 / 4]; };\n"
   1801 		"void main() {gl_Position = vec4(0);}\n"
   1802 		"\n");
   1803 	read_only local_persist s8 default_fragment_shader = s8(""
   1804 		"#version 430 core\n"
   1805 		"layout(location = 0) out vec4 out_colour;"
   1806 		"layout(push_constant) uniform pc { uint data[256 / 4]; };\n"
   1807 		"void main() {out_colour = vec4(0.5f, 0.0f, 0.5f, 1.0f);}\n"
   1808 		"\n");
   1809 
   1810 	VulkanPipelineCreateInfo pipeline_create_infos[2] = {
   1811 		{
   1812 			.kind = VulkanShaderKind_Vertex,
   1813 			.text = default_vertex_shader,
   1814 			.name = s8("error_vertex_shader"),
   1815 		},
   1816 		{
   1817 			.kind = VulkanShaderKind_Fragment,
   1818 			.text = default_fragment_shader,
   1819 			.name = s8("error_fragment_shader"),
   1820 		},
   1821 	};
   1822 	vk->default_graphics_pipeline = vk_graphics_pipeline_from_infos(vk->arena, pipeline_create_infos, 2, 256);
   1823 
   1824 	// TODO: setup ui render pipeline
   1825 
   1826 	if (err->widx > 0) {
   1827 		os_console_log(err->data, err->widx);
   1828 		stream_reset(err, 0);
   1829 	}
   1830 }
   1831 
   1832 DEBUG_IMPORT GPUInfo *
   1833 vk_gpu_info(void)
   1834 {
   1835 	return &vulkan_context->gpu_info;
   1836 }
   1837 
   1838 function void
   1839 vk_vulkan_buffer_release(VulkanBuffer *vb)
   1840 {
   1841 	VulkanContext *vk = vulkan_context;
   1842 	VulkanEntity  *e  = (VulkanEntity *)((u8 *)vb - offsetof(VulkanEntity, as));
   1843 	// TODO(rnp): this happens implicitly, probably just delete this if block
   1844 	if (vb->host_pointer)
   1845 		vkUnmapMemory(vk->device, vb->memory);
   1846 
   1847 	if (vb->buffer)
   1848 		vkDestroyBuffer(vk->device, vb->buffer, 0);
   1849 
   1850 	vk_release_memory(vb->memory, vb->memory_kind != VulkanMemoryKind_Host ? vb->memory_size : 0);
   1851 	vk_entity_release(e);
   1852 }
   1853 
   1854 DEBUG_IMPORT void
   1855 vk_buffer_release(GPUBuffer *b)
   1856 {
   1857 	if ValidVulkanHandle(b->handle)
   1858 		vk_vulkan_buffer_release(vk_entity_data(b->handle, VulkanEntityKind_Buffer));
   1859 	zero_struct(b);
   1860 }
   1861 
   1862 DEBUG_IMPORT void
   1863 vk_buffer_allocate(GPUBuffer *b, GPUBufferAllocateInfo *info)
   1864 {
   1865 	VulkanContext *vk = vulkan_context;
   1866 
   1867 	vk_buffer_release(b);
   1868 
   1869 	assert(info->size > 0);
   1870 
   1871 	VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_Buffer);
   1872 	VulkanBufferAllocateInfo vulkan_buffer_allocate_info = {
   1873 		.gpu_buffer = b,
   1874 		.size       = (u64)info->size,
   1875 		.flags      = info->flags,
   1876 		.index_type = VK_INDEX_TYPE_NONE_KHR,
   1877 		.label      = info->label,
   1878 	};
   1879 
   1880 	u32 queue_index_hit_count[VulkanQueueKind_Count] = {0};
   1881 	for (u32 it = 0; it < info->timeline_count; it++)
   1882 		queue_index_hit_count[vk->queue_indices[info->timelines_used[it]]]++;
   1883 
   1884 	for EachElement(queue_index_hit_count, it) {
   1885 		if (queue_index_hit_count[it] > 0) {
   1886 			u32 index = vulkan_buffer_allocate_info.queue_family_count++;
   1887 			vulkan_buffer_allocate_info.queue_family_indices[index] = vk->queues[vk->queue_indices[it]]->queue_family;
   1888 		}
   1889 	}
   1890 
   1891 	if (vk_buffer_allocate_common(&e->as.buffer, &vulkan_buffer_allocate_info)) {
   1892 		b->handle.value[0] = (u64)e;
   1893 	} else {
   1894 		vk_entity_release(e);
   1895 	}
   1896 }
   1897 
   1898 DEBUG_IMPORT b32
   1899 vk_buffer_needs_sync(GPUBuffer *b)
   1900 {
   1901 	b32 result = 0;
   1902 	if ValidVulkanHandle(b->handle) {
   1903 		VulkanBuffer *vb = vk_entity_data(b->handle, VulkanEntityKind_Buffer);
   1904 
   1905 		// TODO(rnp): not correct check. need to check if we used transfer queue
   1906 		result = vb->memory_kind != VulkanMemoryKind_BAR;
   1907 	}
   1908 
   1909 	return result;
   1910 }
   1911 
   1912 DEBUG_IMPORT u64
   1913 vk_round_up_to_sync_size(u64 size, u64 min)
   1914 {
   1915 	iz  round  = (iz)Max(min, vulkan_context->memory_info.non_coherent_atom_size);
   1916 	u64 result = (u64)round_up_to((iz)size, round);
   1917 	return result;
   1918 }
   1919 
   1920 function force_inline void
   1921 vk_buffer_buffer_copy(VulkanBuffer *destination, VulkanBuffer *source, u64 destination_offset, u64 source_offset, u64 size, b32 non_temporal)
   1922 {
   1923 	VulkanContext *vk = vulkan_context;
   1924 
   1925 	switch (source->memory_kind) {
   1926 	case VulkanMemoryKind_BAR:
   1927 	{
   1928 		switch (destination->memory_kind) {
   1929 		case VulkanMemoryKind_Host:{
   1930 			if (destination->memory) {
   1931 				// TODO(rnp): there is likely a more efficient way of doing this in this case
   1932 				InvalidCodePath;
   1933 			} else {
   1934 				assert(source->host_pointer);
   1935 				b32 coherent = vk->memory_info.memory_host_coherent[source->memory_kind];
   1936 				if (!coherent) {
   1937 					u64 nca_size = vk->memory_info.non_coherent_atom_size;
   1938 					VkMappedMemoryRange mrs[1] = {{
   1939 						.sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
   1940 						.memory = source->memory,
   1941 						.offset = source_offset - (source_offset % nca_size),
   1942 						.size   = vk_round_up_to_sync_size(size, nca_size),
   1943 					}};
   1944 					vkInvalidateMappedMemoryRanges(vk->device, countof(mrs), mrs);
   1945 				}
   1946 
   1947 				void *dest = (u8 *)destination->host_pointer + destination_offset;
   1948 				void *src  = (u8 *)source->host_pointer + source_offset;
   1949 
   1950 				// NOTE(rnp): don't trash the CPU cache for large data stores
   1951 				if (non_temporal) memory_copy_non_temporal(dest, src, size);
   1952 				else              mem_copy(dest, src, size);
   1953 			}
   1954 		}break;
   1955 		InvalidDefaultCase;
   1956 		}
   1957 	}break;
   1958 
   1959 	case VulkanMemoryKind_Host:{
   1960 		switch (destination->memory_kind) {
   1961 		case VulkanMemoryKind_BAR:{
   1962 			assert(destination->host_pointer);
   1963 
   1964 			void *dest = (u8 *)destination->host_pointer + destination_offset;
   1965 			void *src  = (u8 *)source->host_pointer + source_offset;
   1966 
   1967 			// NOTE(rnp): don't trash the CPU cache for large data stores
   1968 			if (non_temporal) memory_copy_non_temporal(dest, src, size);
   1969 			else              mem_copy(dest, src, size);
   1970 
   1971 			b32 coherent = vk->memory_info.memory_host_coherent[destination->memory_kind];
   1972 			if (!coherent) {
   1973 				u64 nca_size = vk->memory_info.non_coherent_atom_size;
   1974 				VkMappedMemoryRange mrs[1] = {{
   1975 					.sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
   1976 					.memory = destination->memory,
   1977 					.offset = destination_offset - (destination_offset % nca_size),
   1978 					.size   = vk_round_up_to_sync_size(size, nca_size),
   1979 				}};
   1980 				vkFlushMappedMemoryRanges(vk->device, countof(mrs), mrs);
   1981 			}
   1982 		}break;
   1983 		InvalidDefaultCase;
   1984 
   1985 		}
   1986 	}break;
   1987 
   1988 	// TODO(rnp): use transfer queue when not mapped
   1989 	InvalidDefaultCase;
   1990 	}
   1991 }
   1992 
   1993 DEBUG_IMPORT void
   1994 vk_buffer_range_upload(GPUBuffer *b, void *data, u64 offset, u64 size, b32 non_temporal)
   1995 {
   1996 	VulkanBuffer *db = vk_entity_data(b->handle, VulkanEntityKind_Buffer);
   1997 	VulkanBuffer  sb = {
   1998 		.host_pointer = data,
   1999 		.memory_kind  = VulkanMemoryKind_Host,
   2000 	};
   2001 	vk_buffer_buffer_copy(db, &sb, offset, 0, size, non_temporal);
   2002 }
   2003 
   2004 DEBUG_IMPORT void
   2005 vk_buffer_range_download(void *destination, GPUBuffer *source, u64 offset, u64 size, b32 non_temporal)
   2006 {
   2007 	VulkanBuffer *sb = vk_entity_data(source->handle, VulkanEntityKind_Buffer);
   2008 	VulkanBuffer  db = {
   2009 		.host_pointer = destination,
   2010 		.memory_kind  = VulkanMemoryKind_Host,
   2011 	};
   2012 	vk_buffer_buffer_copy(&db, sb, 0, offset, size, non_temporal);
   2013 }
   2014 
   2015 DEBUG_IMPORT void
   2016 vk_render_model_release(GPUBuffer *model)
   2017 {
   2018 	if ValidVulkanHandle(model->handle)
   2019 		vk_vulkan_buffer_release(vk_entity_data(model->handle, VulkanEntityKind_RenderModel));
   2020 	zero_struct(model);
   2021 }
   2022 
   2023 DEBUG_IMPORT void
   2024 vk_render_model_allocate(GPUBuffer *model, void *indices, u64 index_count, u64 model_size, s8 label)
   2025 {
   2026 	vk_render_model_release(model);
   2027 
   2028 	VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_RenderModel);
   2029 
   2030 	assert(index_count <= U32_MAX);
   2031 	VkIndexType index_type;
   2032 	if (index_count <= U16_MAX) index_type = VK_INDEX_TYPE_UINT16;
   2033 	else                        index_type = VK_INDEX_TYPE_UINT32;
   2034 
   2035 	i64 indices_size = round_up_to(vk_index_size(index_type) * index_count, 64);
   2036 
   2037 	i64 size = round_up_to(model_size + indices_size, 64);
   2038 	assert(size > 0);
   2039 
   2040 	VulkanBufferAllocateInfo vulkan_buffer_allocate_info = {
   2041 		.gpu_buffer              = model,
   2042 		.size                    = (u64)size,
   2043 		.flags                   = VulkanUsageFlag_HostReadWrite,
   2044 		.index_type              = index_type,
   2045 		.label                   = label,
   2046 		.queue_family_count      = 1,
   2047 		.queue_family_indices[0] = vulkan_context->queues[VulkanQueueKind_Graphics]->queue_family,
   2048 	};
   2049 	if (vk_buffer_allocate_common(&e->as.buffer, &vulkan_buffer_allocate_info)) {
   2050 		model->handle.value[0] = (u64)e;
   2051 		model->index_count  = index_count;
   2052 		model->gpu_pointer += indices_size;
   2053 
   2054 		VulkanBuffer  sb = {
   2055 			.host_pointer = indices,
   2056 			.memory_kind  = VulkanMemoryKind_Host,
   2057 		};
   2058 
   2059 		vk_buffer_buffer_copy(&e->as.buffer, &sb, 0, 0, vk_index_size(index_type) * index_count, 0);
   2060 	} else {
   2061 		vk_entity_release(e);
   2062 	}
   2063 }
   2064 
   2065 DEBUG_IMPORT void
   2066 vk_render_model_range_upload(GPUBuffer *model, void *data, u64 offset, u64 size, b32 non_temporal)
   2067 {
   2068 	VulkanBuffer *db = vk_entity_data(model->handle, VulkanEntityKind_RenderModel);
   2069 	VulkanBuffer  sb = {
   2070 		.host_pointer = data,
   2071 		.memory_kind  = VulkanMemoryKind_Host,
   2072 	};
   2073 
   2074 	offset += round_up_to(vk_index_size(db->index_type) * model->index_count, 64);
   2075 
   2076 	vk_buffer_buffer_copy(db, &sb, offset, 0, size, non_temporal);
   2077 }
   2078 
   2079 DEBUG_IMPORT void
   2080 vk_image_release(GPUImage *image)
   2081 {
   2082 	if ValidVulkanHandle(image->image) {
   2083 		VulkanContext *vk = vulkan_context;
   2084 		VulkanImage   *vi = vk_entity_data(image->image, VulkanEntityKind_Image);
   2085 
   2086 		vkDestroyImageView(vk->device, vi->view, 0);
   2087 		vkDestroyImage(vk->device, vi->image, 0);
   2088 		vk_release_memory(vi->memory, image->memory_size);
   2089 
   2090 		vk_entity_release((VulkanEntity *)image->image.value[0]);
   2091 	}
   2092 	zero_struct(image);
   2093 }
   2094 
   2095 DEBUG_IMPORT void
   2096 vk_image_allocate(GPUImage *image, u32 width, u32 height, u32 mips, u32 samples,
   2097                   VulkanImageUsage usage, VulkanUsageFlags flags, OSHandle *export, s8 label)
   2098 {
   2099 	assert(IsPowerOfTwo(samples));
   2100 
   2101 	vk_image_release(image);
   2102 
   2103 	VulkanContext *vk = vulkan_context;
   2104 	VulkanEntity  *e  = vk_entity_allocate(VulkanEntityKind_Image);
   2105 	VulkanImage   *vi = &e->as.image;
   2106 
   2107 	image->image.value[0] = (u64)e;
   2108 	image->width          = Min(width,   vk->gpu_info.max_image_dimension_2D);
   2109 	image->height         = Min(height,  vk->gpu_info.max_image_dimension_2D);
   2110 	image->mip_map_levels = Max(mips,    1);
   2111 	image->samples        = Min(samples, vk->gpu_info.max_msaa_samples);
   2112 
   2113 	VkFormat usage_format_map[VulkanImageUsage_Count + 1] = {
   2114 		[VulkanImageUsage_None]         = VK_FORMAT_UNDEFINED,
   2115 		//[VulkanImageUsage_Colour]       = VK_FORMAT_R8G8B8A8_SRGB,
   2116 		[VulkanImageUsage_Colour]       = VK_FORMAT_R8G8B8A8_UNORM,
   2117 		[VulkanImageUsage_DepthStencil] = vk->depth_stencil_format,
   2118 		[VulkanImageUsage_Count]        = VK_FORMAT_UNDEFINED,
   2119 	};
   2120 
   2121 	read_only local_persist VkImageUsageFlagBits usage_extra_bit_map[VulkanImageUsage_Count + 1] = {
   2122 		[VulkanImageUsage_None]         = 0,
   2123 		[VulkanImageUsage_Colour]       = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
   2124 		[VulkanImageUsage_DepthStencil] = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
   2125 		[VulkanImageUsage_Count]        = 0,
   2126 	};
   2127 
   2128 	read_only local_persist VkImageAspectFlags usage_image_aspect_map[VulkanImageUsage_Count + 1] = {
   2129 		[VulkanImageUsage_None]         = 0,
   2130 		[VulkanImageUsage_Colour]       = VK_IMAGE_ASPECT_COLOR_BIT,
   2131 		[VulkanImageUsage_DepthStencil] = VK_IMAGE_ASPECT_DEPTH_BIT|VK_IMAGE_ASPECT_STENCIL_BIT,
   2132 		[VulkanImageUsage_Count]        = 0,
   2133 	};
   2134 
   2135 	usage = Clamp((u32)usage, 0, VulkanImageUsage_Count);
   2136 	VkImageUsageFlagBits usage_flags = usage_extra_bit_map[usage];
   2137 
   2138 	if (flags & VulkanUsageFlag_ImageSampling)       usage_flags |= VK_IMAGE_USAGE_SAMPLED_BIT;
   2139 	if (flags & VulkanUsageFlag_TransferSource)      usage_flags |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
   2140 	if (flags & VulkanUsageFlag_TransferDestination) usage_flags |= VK_IMAGE_USAGE_TRANSFER_DST_BIT;
   2141 
   2142 	u32 queue_family = vk->queues[VulkanQueueKind_Graphics]->queue_family;
   2143 	VkImageCreateInfo image_create_info = {
   2144 		.sType                 = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
   2145 		.flags                 = export ? VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT : 0,
   2146 		.imageType             = VK_IMAGE_TYPE_2D,
   2147 		.format                = usage_format_map[usage],
   2148 		.extent                = {image->width, image->height, 1},
   2149 		.mipLevels             = image->mip_map_levels,
   2150 		.arrayLayers           = 1,
   2151 		.samples               = image->samples,
   2152 		.tiling                = VK_IMAGE_TILING_OPTIMAL,
   2153 		.usage                 = usage_flags,
   2154 		// NOTE(rnp): needed if multiple queue families are accessed
   2155 		.sharingMode           = VK_SHARING_MODE_EXCLUSIVE,
   2156 		.queueFamilyIndexCount = 1,
   2157 		.pQueueFamilyIndices   = &queue_family,
   2158 		.initialLayout         = VK_IMAGE_LAYOUT_UNDEFINED,
   2159 	};
   2160 
   2161 	VkExternalMemoryImageCreateInfo external_memory_image_create_info = {
   2162 		.sType       = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
   2163 		.handleTypes = OS_WINDOWS ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
   2164 		                          : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
   2165 	};
   2166 
   2167 	if (export) image_create_info.pNext = &external_memory_image_create_info;
   2168 
   2169 	vkCreateImage(vk->device, &image_create_info, 0, &vi->image);
   2170 
   2171 	VkMemoryRequirements memory_requirements;
   2172 	vkGetImageMemoryRequirements(vk->device, vi->image, &memory_requirements);
   2173 
   2174 	VkMemoryDedicatedAllocateInfo dedicated_allocate_info = {
   2175 		.sType  = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
   2176 		.image  = vi->image,
   2177 	};
   2178 
   2179 	if (vk_allocate_memory(&vi->memory, memory_requirements.size, VulkanMemoryKind_Device, 0, &dedicated_allocate_info, export)) {
   2180 		image->memory_size = memory_requirements.size;
   2181 		vkBindImageMemory(vk->device, vi->image, vi->memory, 0);
   2182 
   2183 		VkImageViewCreateInfo image_view_info = {
   2184 			.sType      = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
   2185 			.image      = vi->image,
   2186 			.viewType   = VK_IMAGE_VIEW_TYPE_2D,
   2187 			.format     = usage_format_map[usage],
   2188 			.subresourceRange = {
   2189 				.aspectMask     = usage_image_aspect_map[usage],
   2190 				.baseMipLevel   = 0,
   2191 				.levelCount     = 1,
   2192 				.baseArrayLayer = 0,
   2193 				.layerCount     = 1,
   2194 			},
   2195 		};
   2196 		vkCreateImageView(vk->device, &image_view_info, 0, &vi->view);
   2197 
   2198 		vk_label_object(IMAGE,         vi->image,  label, s8("Image"));
   2199 		vk_label_object(IMAGE_VIEW,    vi->view,   label, s8("Image View"));
   2200 		vk_label_object(DEVICE_MEMORY, vi->memory, label, s8("Memory"));
   2201 	} else {
   2202 		vkDestroyImage(vk->device, vi->image, 0);
   2203 		vk_entity_release(e);
   2204 		zero_struct(image);
   2205 	}
   2206 }
   2207 
   2208 DEBUG_IMPORT VulkanHandle
   2209 vk_create_semaphore(OSHandle *export)
   2210 {
   2211 	VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_Semaphore);
   2212 	e->as.semaphore = vk_make_semaphore(export);
   2213 	VulkanHandle result = {(u64)e};
   2214 	return result;
   2215 }
   2216 
   2217 DEBUG_IMPORT b32
   2218 vk_host_wait_timeline(VulkanTimeline timeline, u64 value, u64 timeout_ns)
   2219 {
   2220 	b32 result = 0;
   2221 	if Between(timeline, 0, VulkanTimeline_Count - 1) {
   2222 		VulkanContext *vk = vulkan_context;
   2223 		VulkanQueue   *vq = vk->queues[timeline];
   2224 		VkSemaphoreWaitInfo semaphore_wait_info = {
   2225 			.sType          = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
   2226 			.pSemaphores    = &vq->timeline_semaphore.semaphore,
   2227 			.semaphoreCount = 1,
   2228 			.pValues        = &value,
   2229 		};
   2230 		result = vkWaitSemaphores(vk->device, &semaphore_wait_info, timeout_ns) == VK_SUCCESS;
   2231 	}
   2232 	return result;
   2233 }
   2234 
   2235 DEBUG_IMPORT u64
   2236 vk_host_signal_timeline(VulkanTimeline timeline)
   2237 {
   2238 	u64 result = -1;
   2239 	if Between(timeline, 0, VulkanTimeline_Count - 1) {
   2240 		VulkanContext   *vk = vulkan_context;
   2241 		VulkanQueue     *vq = vk->queues[timeline];
   2242 		VulkanSemaphore *vs = &vq->timeline_semaphore;
   2243 		result = ++vs->value;
   2244 		VkSemaphoreSignalInfo ssi = {
   2245 			.sType     = VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO,
   2246 			.semaphore = vs->semaphore,
   2247 			.value     = result,
   2248 		};
   2249 		vkSignalSemaphore(vk->device, &ssi);
   2250 	}
   2251 	return result;
   2252 }
   2253 
   2254 DEBUG_IMPORT VulkanHandle
   2255 vk_pipeline(VulkanPipelineCreateInfo *infos, u32 count, u32 push_constants_size)
   2256 {
   2257 	assert(Between(count, 1, 2));
   2258 	assert(count == 2 || infos[0].kind == VulkanShaderKind_Compute);
   2259 
   2260 	VulkanHandle result = {0};
   2261 	DeferLoop(take_lock(&vulkan_context->arena_lock, -1), release_lock(&vulkan_context->arena_lock))
   2262 	{
   2263 		Arena arena = vulkan_context->arena;
   2264 
   2265 		VulkanEntity *e = vk_entity_allocate(VulkanEntityKind_Pipeline);
   2266 		result = (VulkanHandle){(u64)e};
   2267 
   2268 		if (count == 2) e->as.pipeline = vk_graphics_pipeline_from_infos(arena, infos, count, push_constants_size);
   2269 		else            e->as.pipeline = vk_compute_pipeline_from_shader_text(arena, infos[0].text, infos[0].name, push_constants_size);
   2270 	}
   2271 	return result;
   2272 }
   2273 
   2274 DEBUG_IMPORT b32
   2275 vk_pipeline_valid(VulkanHandle h)
   2276 {
   2277 	b32 result = 0;
   2278 	if ValidVulkanHandle(h) {
   2279 		VulkanPipeline *vp = vk_entity_data(h, VulkanEntityKind_Pipeline);
   2280 		if (vp->stage_flags == VK_SHADER_STAGE_COMPUTE_BIT)
   2281 			result = vp->pipeline != vulkan_context->default_compute_pipeline.pipeline;
   2282 		else
   2283 			result = vp->pipeline != vulkan_context->default_graphics_pipeline.pipeline;
   2284 	}
   2285 	return result;
   2286 }
   2287 
   2288 DEBUG_IMPORT void
   2289 vk_pipeline_release(VulkanHandle h)
   2290 {
   2291 	if (vk_pipeline_valid(h)) {
   2292 		VulkanEntity *e = (VulkanEntity *)h.value[0];
   2293 		VulkanTimeline timeline;
   2294 		if (e->as.pipeline.stage_flags == VK_SHADER_STAGE_COMPUTE_BIT) timeline = VulkanTimeline_Compute;
   2295 		else                                                           timeline = VulkanTimeline_Graphics;
   2296 
   2297 		// NOTE(rnp): block more command buffers from being recorded
   2298 		VulkanCommandPool *vcp = vulkan_context->command_pools[timeline];
   2299 		DeferLoop(take_lock(&vcp->lock, -1), release_lock(&vcp->lock)) {
   2300 			u32 index = (vcp->next_index - 1) % countof(vcp->buffers);
   2301 			vk_host_wait_timeline(timeline, vcp->submission_values[index], -1ULL);
   2302 			vkDestroyPipeline(vulkan_context->device, e->as.pipeline.pipeline, 0);
   2303 			vkDestroyPipelineLayout(vulkan_context->device, e->as.pipeline.layout, 0);
   2304 
   2305 			if (&e->as.pipeline == vcp->bound_pipeline)
   2306 				vcp->bound_pipeline = 0;
   2307 		}
   2308 		vk_entity_release(e);
   2309 	}
   2310 }
   2311 
   2312 DEBUG_IMPORT void
   2313 vk_bind_shader_resources(BeamformerShaderResourceInfo *infos, u64 info_count)
   2314 {
   2315 	VulkanContext *vk = vulkan_context;
   2316 
   2317 	VkWriteDescriptorSet   write_sets[BeamformerShaderResourceKind_Count] = {0};
   2318 
   2319 	for EachIndex(info_count, it) {
   2320 		switch (infos[it].kind) {
   2321 		case BeamformerShaderResourceKind_Buffer:{
   2322 			VulkanBuffer *vb = vk_entity_data(infos[it].handle, VulkanEntityKind_Buffer);
   2323 			vk->descriptor_buffer_infos[infos[it].slot].buffer = vb->buffer;
   2324 			vk->descriptor_buffer_infos[infos[it].slot].offset = 0;
   2325 			vk->descriptor_buffer_infos[infos[it].slot].range  = vb->memory_size;
   2326 		}break;
   2327 
   2328 		InvalidDefaultCase;
   2329 		}
   2330 	}
   2331 
   2332 	write_sets[BeamformerShaderResourceKind_Buffer].sType            = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
   2333 	write_sets[BeamformerShaderResourceKind_Buffer].dstSet           = vk->descriptor_sets[BeamformerShaderResourceKind_Buffer];
   2334 	write_sets[BeamformerShaderResourceKind_Buffer].dstBinding       = 0;
   2335 	write_sets[BeamformerShaderResourceKind_Buffer].descriptorCount  = countof(vk->descriptor_buffer_infos);
   2336 	write_sets[BeamformerShaderResourceKind_Buffer].descriptorType   = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
   2337 	write_sets[BeamformerShaderResourceKind_Buffer].pBufferInfo      = vk->descriptor_buffer_infos;
   2338 
   2339 	vkUpdateDescriptorSets(vk->device, countof(write_sets), write_sets, 0, 0);
   2340 }
   2341 
   2342 DEBUG_IMPORT VulkanHandle
   2343 vk_command_begin(VulkanTimeline timeline)
   2344 {
   2345 	VulkanHandle result = {0};
   2346 	if Between(timeline, 0, VulkanTimeline_Count - 1) {
   2347 		VulkanContext     *vk  = vulkan_context;
   2348 		VulkanCommandPool *vcp = vk->command_pools[timeline];
   2349 
   2350 		take_lock(&vcp->lock, -1);
   2351 
   2352 		VulkanEntity        *e   = vk_entity_allocate(VulkanEntityKind_CommandBuffer);
   2353 		VulkanCommandBuffer *vcb = &e->as.command_buffer;
   2354 		vcb->timeline     = timeline;
   2355 		vcb->buffer_index = vcp->next_index++ % countof(vcp->buffers);
   2356 
   2357 		u32 index = vcb->buffer_index;
   2358 		// TODO(rnp): probably not the best to have this here but it will likely not be hit
   2359 		b32 wait_result = vk_host_wait_timeline(timeline, vcp->submission_values[index], -1ULL);
   2360 		assert(wait_result);
   2361 
   2362 		vcp->queries_occupied[index] = 0;
   2363 
   2364 		VkCommandBufferBeginInfo buffer_begin_info = {
   2365 			.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
   2366 			.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
   2367 		};
   2368 
   2369 		vkBeginCommandBuffer(vcp->buffers[index], &buffer_begin_info);
   2370 		vkCmdResetQueryPool(vcp->buffers[index], vcp->query_pool, index * MaxCommandBufferTimestamps,
   2371 		                    MaxCommandBufferTimestamps);
   2372 
   2373 		result = (VulkanHandle){(u64)e};
   2374 	}
   2375 	return result;
   2376 }
   2377 
   2378 DEBUG_IMPORT void
   2379 vk_command_bind_pipeline(VulkanHandle command, VulkanHandle pipeline)
   2380 {
   2381 	if ValidVulkanHandle(command) {
   2382 		VulkanContext       *vk  = vulkan_context;
   2383 		VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer);
   2384 		VulkanCommandPool   *vcp = vk->command_pools[vcb->timeline];
   2385 
   2386 		VulkanPipeline *vp = 0;
   2387 		if ValidVulkanHandle(pipeline) {
   2388 			vp = vk_entity_data(pipeline, VulkanEntityKind_Pipeline);
   2389 		} else if (vcb->timeline == VulkanTimeline_Compute) {
   2390 			vp = &vk->default_compute_pipeline;
   2391 		} else if (vcb->timeline == VulkanTimeline_Graphics) {
   2392 			vp = &vk->default_graphics_pipeline;
   2393 		} else {
   2394 			InvalidCodePath;
   2395 		}
   2396 
   2397 		read_only local_persist VkPipelineBindPoint bind_point_lut[VulkanTimeline_Count] = {
   2398 			[VulkanTimeline_Graphics] = VK_PIPELINE_BIND_POINT_GRAPHICS,
   2399 			[VulkanTimeline_Compute]  = VK_PIPELINE_BIND_POINT_COMPUTE,
   2400 			[VulkanTimeline_Transfer] = -1,
   2401 		};
   2402 
   2403 		VkPipelineBindPoint bind_point = bind_point_lut[vcb->timeline];
   2404 		assert(bind_point != (VkPipelineBindPoint)-1);
   2405 
   2406 		vkCmdBindPipeline(vcp->buffers[vcb->buffer_index], bind_point, vp->pipeline);
   2407 		vkCmdBindDescriptorSets(vcp->buffers[vcb->buffer_index], bind_point, vp->layout,
   2408 		                        0, countof(vk->descriptor_sets), vk->descriptor_sets, 0, 0);
   2409 		vcp->bound_pipeline = vp;
   2410 	}
   2411 }
   2412 
   2413 DEBUG_IMPORT void
   2414 vk_command_buffer_memory_barriers(VulkanHandle command, GPUMemoryBarrierInfo *barriers, u64 count)
   2415 {
   2416 	if ValidVulkanHandle(command) {
   2417 		VulkanContext       *vk  = vulkan_context;
   2418 		VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer);
   2419 		VulkanCommandPool   *vcp = vk->command_pools[vcb->timeline];
   2420 		VulkanQueue         *vq  = vk->queues[vcb->timeline];
   2421 
   2422 		DeferLoop(take_lock(&vk->arena_lock, -1), release_lock(&vk->arena_lock))
   2423 		{
   2424 			Arena arena = vk->arena;
   2425 			u32 valid_count = 0;
   2426 			VkBufferMemoryBarrier2 *memory_barriers = push_array(&arena, VkBufferMemoryBarrier2, count);
   2427 			for (u64 it = 0; it < count; it++) {
   2428 				if ValidVulkanHandle(barriers[it].gpu_buffer->handle) {
   2429 					u32           index = valid_count++;
   2430 					VulkanBuffer *vb    = vk_entity_data(barriers[it].gpu_buffer->handle, VulkanEntityKind_Buffer);
   2431 					memory_barriers[index].sType               = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2;
   2432 					memory_barriers[index].srcStageMask        = vq->pipeline_stage_flags;
   2433 					memory_barriers[index].srcAccessMask       = VK_ACCESS_2_MEMORY_WRITE_BIT;
   2434 					memory_barriers[index].dstStageMask        = vq->pipeline_stage_flags;
   2435 					memory_barriers[index].dstAccessMask       = VK_ACCESS_2_MEMORY_READ_BIT;
   2436 					memory_barriers[index].srcQueueFamilyIndex = vq->queue_family;
   2437 					memory_barriers[index].dstQueueFamilyIndex = vq->queue_family;
   2438 					memory_barriers[index].buffer              = vb->buffer;
   2439 					memory_barriers[index].offset              = barriers[it].offset;
   2440 					memory_barriers[index].size                = barriers[it].size;
   2441 				}
   2442 			}
   2443 
   2444 			VkDependencyInfo dependancy_info = {
   2445 				.sType                    = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
   2446 				.bufferMemoryBarrierCount = valid_count,
   2447 				.pBufferMemoryBarriers    = memory_barriers,
   2448 			};
   2449 
   2450 			vkCmdPipelineBarrier2(vcp->buffers[vcb->buffer_index], &dependancy_info);
   2451 		}
   2452 	}
   2453 }
   2454 
   2455 DEBUG_IMPORT void
   2456 vk_command_dispatch_compute(VulkanHandle command, uv3 dispatch)
   2457 {
   2458 	assert(dispatch.x <= U16_MAX);
   2459 	assert(dispatch.y <= U16_MAX);
   2460 	assert(dispatch.z <= U16_MAX);
   2461 	if ValidVulkanHandle(command) {
   2462 		VkCommandBuffer cmd = vk_command_buffer(command);
   2463 		vkCmdDispatch(cmd, dispatch.x, dispatch.y, dispatch.z);
   2464 	}
   2465 }
   2466 
   2467 DEBUG_IMPORT void
   2468 vk_command_push_constants(VulkanHandle command, u32 offset, u32 size, void *values)
   2469 {
   2470 	if ValidVulkanHandle(command) {
   2471 		VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer);
   2472 		VulkanCommandPool   *vcp = vulkan_context->command_pools[vcb->timeline];
   2473 		VulkanPipeline      *vp  = vcp->bound_pipeline;
   2474 
   2475 		assert(vp);
   2476 
   2477 		vkCmdPushConstants(vcp->buffers[vcb->buffer_index], vp->layout, vp->stage_flags, offset, size, values);
   2478 	}
   2479 }
   2480 
   2481 DEBUG_IMPORT void
   2482 vk_command_timestamp(VulkanHandle command)
   2483 {
   2484 	if ValidVulkanHandle(command) {
   2485 		VulkanContext       *vk  = vulkan_context;
   2486 		VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer);
   2487 		VulkanCommandPool   *vcp = vk->command_pools[vcb->timeline];
   2488 
   2489 		read_only local_persist VkPipelineStageFlags2 stage_lut[VulkanTimeline_Count] = {
   2490 			[VulkanTimeline_Graphics] = VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT,
   2491 			[VulkanTimeline_Compute]  = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
   2492 			[VulkanTimeline_Transfer] = -1,
   2493 		};
   2494 
   2495 		VkPipelineStageFlags2 stage = stage_lut[vcb->timeline];
   2496 		assert(stage != (VkPipelineStageFlags2)-1);
   2497 
   2498 		if (vcp->queries_occupied[vcb->buffer_index] < MaxCommandBufferTimestamps) {
   2499 			u32 query_index = vcp->queries_occupied[vcb->buffer_index]++;
   2500 			vkCmdWriteTimestamp2(vcp->buffers[vcb->buffer_index], stage, vcp->query_pool,
   2501 			                     vcb->buffer_index * MaxCommandBufferTimestamps + query_index);
   2502 		}
   2503 	}
   2504 }
   2505 
   2506 DEBUG_IMPORT void
   2507 vk_command_wait_timeline(VulkanHandle command, VulkanTimeline timeline, u64 value)
   2508 {
   2509 	if (ValidVulkanHandle(command) && Between(timeline, 0, VulkanTimeline_Count - 1)) {
   2510 		VulkanContext       *vk  = vulkan_context;
   2511 		VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer);
   2512 
   2513 		u32 wait_index = vk->queue_indices[timeline];
   2514 		vcb->in_flight_wait_values[wait_index] = Max(value, vcb->in_flight_wait_values[wait_index]);
   2515 	}
   2516 }
   2517 
   2518 DEBUG_IMPORT u64
   2519 vk_command_end(VulkanHandle command, VulkanHandle wait_semaphore, VulkanHandle finished_semaphore)
   2520 {
   2521 	u64 result = -1;
   2522 	if ValidVulkanHandle(command) {
   2523 		VulkanContext       *vk  = vulkan_context;
   2524 		VulkanCommandBuffer *vcb = vk_entity_data(command, VulkanEntityKind_CommandBuffer);
   2525 		VulkanCommandPool   *vcp = vk->command_pools[vcb->timeline];
   2526 		VulkanQueue         *vq  = vk->queues[vcb->timeline];
   2527 		VulkanSemaphore     *vs  = &vq->timeline_semaphore;
   2528 
   2529 		vkEndCommandBuffer(vcp->buffers[vcb->buffer_index]);
   2530 
   2531 		DeferLoop(take_lock(&vq->lock, -1), release_lock(&vq->lock)) {
   2532 			VkCommandBufferSubmitInfo command_buffer_submit_info = {
   2533 				.sType         = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
   2534 				.commandBuffer = vcp->buffers[vcb->buffer_index],
   2535 			};
   2536 
   2537 			result = ++vs->value;
   2538 
   2539 			u32 signal_submit_info_count = 1;
   2540 			VkSemaphoreSubmitInfo signal_submit_infos[2] = {{
   2541 				.sType     = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
   2542 				.semaphore = vs->semaphore,
   2543 				.value     = result,
   2544 				.stageMask = vq->pipeline_stage_flags,
   2545 			}};
   2546 
   2547 			if ValidVulkanHandle(finished_semaphore) {
   2548 				VulkanSemaphore *fs = vk_entity_data(finished_semaphore, VulkanEntityKind_Semaphore);
   2549 				signal_submit_infos[signal_submit_info_count++] = (VkSemaphoreSubmitInfo){
   2550 					.sType     = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
   2551 					.semaphore = fs->semaphore,
   2552 					.stageMask = vq->pipeline_stage_flags,
   2553 				};
   2554 			}
   2555 
   2556 			u32 wait_submit_info_count = 0;
   2557 			VkSemaphoreSubmitInfo wait_submit_infos[VulkanQueueKind_Count + 1];
   2558 			for (u32 i = 0; i < vk->unique_queues; i++) {
   2559 				u32 queue_index = vk->queue_indices[i];
   2560 				if (vcb->in_flight_wait_values[queue_index] > 0) {
   2561 					VulkanQueue *q = vk->queues[queue_index];
   2562 					VkSemaphoreSubmitInfo wait_ssi = {
   2563 						.sType     = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
   2564 						.semaphore = q->timeline_semaphore.semaphore,
   2565 						.value     = vcb->in_flight_wait_values[queue_index],
   2566 						.stageMask = q->pipeline_stage_flags,
   2567 					};
   2568 					wait_submit_infos[wait_submit_info_count++] = wait_ssi;
   2569 				}
   2570 			}
   2571 
   2572 			if ValidVulkanHandle(wait_semaphore) {
   2573 				VulkanSemaphore *ws = vk_entity_data(wait_semaphore, VulkanEntityKind_Semaphore);
   2574 				wait_submit_infos[wait_submit_info_count++] = (VkSemaphoreSubmitInfo){
   2575 					.sType     = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
   2576 					.semaphore = ws->semaphore,
   2577 					.stageMask = vq->pipeline_stage_flags,
   2578 				};
   2579 			}
   2580 
   2581 			VkSubmitInfo2 submit_info = {
   2582 				.sType                    = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
   2583 				.commandBufferInfoCount   = 1,
   2584 				.pCommandBufferInfos      = &command_buffer_submit_info,
   2585 				.waitSemaphoreInfoCount   = wait_submit_info_count,
   2586 				.pWaitSemaphoreInfos      = wait_submit_infos,
   2587 				.signalSemaphoreInfoCount = signal_submit_info_count,
   2588 				.pSignalSemaphoreInfos    = signal_submit_infos,
   2589 			};
   2590 
   2591 			vkQueueSubmit2(vq->queue, 1, &submit_info, 0);
   2592 
   2593 			vcp->bound_pipeline = 0;
   2594 
   2595 			atomic_store_u64(vcp->submission_values + vcb->buffer_index, result);
   2596 		}
   2597 
   2598 		release_lock(&vcp->lock);
   2599 
   2600 		vk_entity_release((VulkanEntity *)command.value[0]);
   2601 	}
   2602 	return result;
   2603 }
   2604 
   2605 DEBUG_IMPORT void
   2606 vk_command_begin_rendering(VulkanHandle command, GPUImage *colour, GPUImage *depth, GPUImage *resolve)
   2607 {
   2608 	if ValidVulkanHandle(command) {
   2609 		VkCommandBuffer cmd = vk_command_buffer(command);
   2610 
   2611 		assert((colour->width == depth->width) && (colour->height == depth->height));
   2612 
   2613 		VulkanImage *ci = vk_entity_data(colour->image, VulkanEntityKind_Image);
   2614 		VulkanImage *di = vk_entity_data(depth->image,  VulkanEntityKind_Image);
   2615 		VulkanImage *ri = 0;
   2616 		if (resolve) ri = vk_entity_data(resolve->image, VulkanEntityKind_Image);
   2617 
   2618 		// NOTE: Layout Transitions
   2619 		{
   2620 			u32 image_memory_barrier_count = 2;
   2621 			VkImageMemoryBarrier2 image_memory_barriers[3] = {
   2622 				{
   2623 					.sType            = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
   2624 					.srcStageMask     = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
   2625 					.srcAccessMask    = 0,
   2626 					.dstStageMask     = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
   2627 					.dstAccessMask    = VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT|VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT,
   2628 					.oldLayout        = VK_IMAGE_LAYOUT_UNDEFINED,
   2629 					.newLayout        = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
   2630 					.image            = ci->image,
   2631 					.subresourceRange = {
   2632 						.aspectMask     = VK_IMAGE_ASPECT_COLOR_BIT,
   2633 						.baseMipLevel   = 0,
   2634 						.levelCount     = 1,
   2635 						.baseArrayLayer = 0,
   2636 						.layerCount     = 1,
   2637 					},
   2638 				},
   2639 				{
   2640 					.sType            = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
   2641 					.srcStageMask     = VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT|VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT,
   2642 					.srcAccessMask    = 0,
   2643 					.dstStageMask     = VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT|VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT,
   2644 					.dstAccessMask    = VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
   2645 					.oldLayout        = VK_IMAGE_LAYOUT_UNDEFINED,
   2646 					.newLayout        = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
   2647 					.image            = di->image,
   2648 					.subresourceRange = {
   2649 						.aspectMask     = VK_IMAGE_ASPECT_DEPTH_BIT|VK_IMAGE_ASPECT_STENCIL_BIT,
   2650 						.baseMipLevel   = 0,
   2651 						.levelCount     = 1,
   2652 						.baseArrayLayer = 0,
   2653 						.layerCount     = 1,
   2654 					},
   2655 				},
   2656 			};
   2657 
   2658 			if (resolve) image_memory_barriers[image_memory_barrier_count++] = (VkImageMemoryBarrier2){
   2659 				.sType            = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
   2660 				.srcStageMask     = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
   2661 				.srcAccessMask    = 0,
   2662 				.dstStageMask     = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT|VK_PIPELINE_STAGE_2_RESOLVE_BIT,
   2663 				.dstAccessMask    = VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT|VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT,
   2664 				.oldLayout        = VK_IMAGE_LAYOUT_UNDEFINED,
   2665 				.newLayout        = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
   2666 				.image            = ri->image,
   2667 				.subresourceRange = {
   2668 					.aspectMask     = VK_IMAGE_ASPECT_COLOR_BIT,
   2669 					.baseMipLevel   = 0,
   2670 					.levelCount     = 1,
   2671 					.baseArrayLayer = 0,
   2672 					.layerCount     = 1,
   2673 				},
   2674 			};
   2675 
   2676 			VkDependencyInfo dependency_info = {
   2677 				.sType                   = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
   2678 				.imageMemoryBarrierCount = image_memory_barrier_count,
   2679 				.pImageMemoryBarriers    = image_memory_barriers,
   2680 			};
   2681 
   2682 			vkCmdPipelineBarrier2(cmd, &dependency_info);
   2683 		}
   2684 
   2685 		VkRenderingAttachmentInfo colour_attachment = {
   2686 			.sType              = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
   2687 			.imageView          = ci->view,
   2688 			.imageLayout        = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
   2689 			.resolveMode        = ri ? VK_RESOLVE_MODE_AVERAGE_BIT : 0,
   2690 			.resolveImageView   = ri ? ri->view : 0,
   2691 			.resolveImageLayout = ri ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : 0,
   2692 			.loadOp             = VK_ATTACHMENT_LOAD_OP_CLEAR,
   2693 			.storeOp            = VK_ATTACHMENT_STORE_OP_STORE,
   2694 			.clearValue         = {.color = {{0.0f, 0.0f, 0.0f, 0.0f}}},
   2695 		};
   2696 
   2697 		VkRenderingAttachmentInfo depth_stencil_attachment = {
   2698 			.sType       = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
   2699 			.imageView   = di->view,
   2700 			.imageLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
   2701 			.loadOp      = VK_ATTACHMENT_LOAD_OP_CLEAR,
   2702 			.storeOp     = VK_ATTACHMENT_STORE_OP_STORE,
   2703 			.clearValue  = {.depthStencil = {1.0f, 0}},
   2704 		};
   2705 
   2706 		VkRenderingInfo rendering_info = {
   2707 			.sType                = VK_STRUCTURE_TYPE_RENDERING_INFO,
   2708 			.renderArea           = {.offset = {0}, .extent = {colour->width, colour->height}},
   2709 			.layerCount           = 1,
   2710 			.colorAttachmentCount = 1,
   2711 			.pColorAttachments    = &colour_attachment,
   2712 			.pDepthAttachment     = &depth_stencil_attachment,
   2713 			.pStencilAttachment   = &depth_stencil_attachment,
   2714 		};
   2715 
   2716 		vkCmdBeginRendering(cmd, &rendering_info);
   2717 	}
   2718 }
   2719 
   2720 DEBUG_IMPORT void
   2721 vk_command_draw(VulkanHandle command, GPUBuffer *model)
   2722 {
   2723 	if (ValidVulkanHandle(command) && ValidVulkanHandle(model->handle)) {
   2724 		VkCommandBuffer cmd = vk_command_buffer(command);
   2725 		VulkanBuffer   *vb  = vk_entity_data(model->handle, VulkanEntityKind_RenderModel);
   2726 		vkCmdBindIndexBuffer2(cmd, vb->buffer, 0, vk_index_size(vb->index_type) * model->index_count, vb->index_type);
   2727 		vkCmdDrawIndexed(cmd, model->index_count, 1, 0, 0, 0);
   2728 	}
   2729 }
   2730 
   2731 DEBUG_IMPORT void
   2732 vk_command_scissor(VulkanHandle command, u32 width, u32 height, u32 x_offset, u32 y_offset)
   2733 {
   2734 	if ValidVulkanHandle(command) {
   2735 		VkCommandBuffer cmd = vk_command_buffer(command);
   2736 		VkRect2D scissor = {.offset = {x_offset, y_offset}, .extent = {width, height}};
   2737 		vkCmdSetScissor(cmd, 0, 1, &scissor);
   2738 	}
   2739 }
   2740 
   2741 DEBUG_IMPORT void
   2742 vk_command_viewport(VulkanHandle command, f32 width, f32 height, f32 x_offset, f32 y_offset, f32 min_depth, f32 max_depth)
   2743 {
   2744 	if ValidVulkanHandle(command) {
   2745 		VkCommandBuffer cmd = vk_command_buffer(command);
   2746 		VkViewport viewport = {x_offset, y_offset, width, height, min_depth, max_depth};
   2747 		vkCmdSetViewport(cmd, 0, 1, &viewport);
   2748 	}
   2749 }
   2750 
   2751 DEBUG_IMPORT void
   2752 vk_command_end_rendering(VulkanHandle command)
   2753 {
   2754 	if ValidVulkanHandle(command) vkCmdEndRendering(vk_command_buffer(command));
   2755 }
   2756 
   2757 DEBUG_IMPORT void
   2758 vk_command_copy_buffer(VulkanHandle command, GPUBuffer *restrict destination,
   2759                        GPUBuffer *restrict source, u64 source_offset, i64 size)
   2760 {
   2761 	if (ValidVulkanHandle(command) && ValidVulkanHandle(destination->handle) && ValidVulkanHandle(source->handle)) {
   2762 		VkCommandBuffer cmd = vk_command_buffer(command);
   2763 		VulkanBuffer *db = vk_entity_data(destination->handle, VulkanEntityKind_Buffer);
   2764 		VulkanBuffer *sb = vk_entity_data(source->handle,      VulkanEntityKind_Buffer);
   2765 
   2766 		VkBufferCopy2 buffer_copy = {
   2767 			.sType     = VK_STRUCTURE_TYPE_BUFFER_COPY_2,
   2768 			.srcOffset = source_offset,
   2769 			.dstOffset = 0,
   2770 			.size      = size,
   2771 		};
   2772 
   2773 		VkCopyBufferInfo2 copy_buffer_info = {
   2774 			.sType       = VK_STRUCTURE_TYPE_COPY_BUFFER_INFO_2,
   2775 			.srcBuffer   = sb->buffer,
   2776 			.dstBuffer   = db->buffer,
   2777 			.regionCount = 1,
   2778 			.pRegions    = &buffer_copy,
   2779 		};
   2780 
   2781 		vkCmdCopyBuffer2(cmd, &copy_buffer_info);
   2782 	}
   2783 }
   2784 
   2785 DEBUG_IMPORT u64 *
   2786 vk_command_read_timestamps(VulkanTimeline timeline, Arena *arena)
   2787 {
   2788 	u64 *result = 0;
   2789 	if Between(timeline, 0, VulkanTimeline_Count - 1) {
   2790 		VulkanContext     *vk  = vulkan_context;
   2791 		VulkanCommandPool *vcp = vk->command_pools[timeline];
   2792 		DeferLoop(take_lock(&vcp->lock, -1), release_lock(&vcp->lock)) {
   2793 			u32 index = (vcp->next_index - 1) % countof(vcp->buffers);
   2794 			u32 count = vcp->queries_occupied[index];
   2795 			if (count > 0) {
   2796 				result = push_array(arena, u64, count + 1);
   2797 				result[0] = count;
   2798 
   2799 				vk_host_wait_timeline(timeline, vcp->submission_values[index], -1ULL);
   2800 
   2801 				vkGetQueryPoolResults(vk->device, vcp->query_pool, index * MaxCommandBufferTimestamps, count,
   2802 				                      count * sizeof(u64), result + 1, 8, VK_QUERY_RESULT_WAIT_BIT);
   2803 			}
   2804 		}
   2805 	} else {
   2806 		result = push_array(arena, u64, 1);
   2807 	}
   2808 	return result;
   2809 }