ogl_beamforming

Ultrasound Beamforming Implemented with OpenGL
git clone anongit@rnpnr.xyz:ogl_beamforming.git
Log | Files | Refs | Feed | Submodules | README | LICENSE

util.c (23076B)


      1 /* See LICENSE for license details. */
      2 #if   COMPILER_CLANG
      3   #pragma GCC diagnostic ignored "-Winitializer-overrides"
      4 #elif COMPILER_GCC
      5   #pragma GCC diagnostic ignored "-Woverride-init"
      6 #endif
      7 
      8 #define zero_struct(s) memory_clear((s), 0, sizeof(*(s)))
      9 function void *
     10 memory_clear(void *restrict p_, u8 c, u64 size)
     11 {
     12 	u8 *p = p_;
     13 	while (size > 0) p[--size] = c;
     14 	return p;
     15 }
     16 
     17 function b32
     18 memory_equal(void *restrict left, void *restrict right, u64 n)
     19 {
     20 	u8 *a = left, *b = right;
     21 	b32 result = 1;
     22 	for (; result && n; n--)
     23 		result &= *a++ == *b++;
     24 	return result;
     25 }
     26 
     27 #define mem_copy memory_copy
     28 function void
     29 memory_copy(void *restrict dest, void *restrict src, u64 n)
     30 {
     31 	u8 *s = src, *d = dest;
     32 	#ifdef __AVX512BW__
     33 	{
     34 		for (; n >= 64; n -= 64, s += 64, d += 64)
     35 			_mm512_storeu_epi8(d, _mm512_loadu_epi8(s));
     36 		__mmask64 k = _cvtu64_mask64(_bzhi_u64(-1ULL, n));
     37 		_mm512_mask_storeu_epi8(d, k, _mm512_maskz_loadu_epi8(k, s));
     38 	}
     39 	#else
     40 		for (; n; n--) *d++ = *s++;
     41 	#endif
     42 }
     43 
     44 /* IMPORTANT: this function may fault if dest, src, and n are not multiples of 64 */
     45 function void
     46 memory_copy_non_temporal(void *restrict dest, void *restrict src, u64 n)
     47 {
     48 	assume(((u64)dest & 63) == 0);
     49 	assume(((u64)src  & 63) == 0);
     50 	assume(((u64)n    & 63) == 0);
     51 	u8 *s = src, *d = dest;
     52 
     53 	#if defined(__AVX512BW__)
     54 	{
     55 		for (; n >= 64; n -= 64, s += 64, d += 64)
     56 			_mm512_stream_si512((__m512i *)d, _mm512_stream_load_si512((__m512i *)s));
     57 	}
     58 	#elif defined(__AVX2__)
     59 	{
     60 		for (; n >= 32; n -= 32, s += 32, d += 32)
     61 			_mm256_stream_si256((__m256i *)d, _mm256_stream_load_si256((__m256i *)s));
     62 	}
     63 	#elif ARCH_ARM64 && !COMPILER_MSVC
     64 	{
     65 		asm volatile (
     66 			"cbz  %2, 2f\n"
     67 			"1: ldnp q0, q1, [%1]\n"
     68 			"subs %2, %2, #32\n"
     69 			"add  %1, %1, #32\n"
     70 			"stnp q0, q1, [%0]\n"
     71 			"add  %0, %0, #32\n"
     72 			"b.ne 1b\n"
     73 			"2:"
     74 			:  "+r"(d), "+r"(s), "+r"(n)
     75 			:: "memory", "v0", "v1"
     76 		);
     77 	}
     78 	#else
     79 		memory_copy(d, s, n);
     80 	#endif
     81 }
     82 
     83 function void
     84 memory_move(u8 *dest, u8 *src, u64 n)
     85 {
     86 	if (dest < src) memory_copy(dest, src, n);
     87 	else            while (n) { n--; dest[n] = src[n]; }
     88 }
     89 
     90 function void *
     91 memory_scan_backwards(void *memory, u8 byte, iz n)
     92 {
     93 	void *result = 0;
     94 	u8   *s      = memory;
     95 	while (n > 0) if (s[--n] == byte) { result = s + n; break; }
     96 	return result;
     97 }
     98 
     99 function Arena
    100 arena_from_memory(void *memory, u64 size)
    101 {
    102 	Arena result;
    103 	result.beg = memory;
    104 	result.end = result.beg + size;
    105 	return result;
    106 }
    107 
    108 function void *
    109 align_pointer_up(void *p, uz alignment)
    110 {
    111 	uz padding = -(u64)p & (alignment - 1);
    112 	void *result = (u8 *)p + padding;
    113 	return result;
    114 }
    115 
    116 function void *
    117 arena_aligned_start(Arena a, uz alignment)
    118 {
    119 	return align_pointer_up(a.beg, alignment);
    120 }
    121 
    122 #define arena_capacity(a, t) arena_capacity_(a, sizeof(t), alignof(t))
    123 function iz
    124 arena_capacity_(Arena *a, iz size, uz alignment)
    125 {
    126 	iz available = a->end - (u8 *)arena_aligned_start(*a, alignment);
    127 	iz result    = available / size;
    128 	return result;
    129 }
    130 
    131 function u8 *
    132 arena_commit(Arena *a, iz size)
    133 {
    134 	assert(a->end - a->beg >= size);
    135 	u8 *result = a->beg;
    136 	a->beg += size;
    137 	return result;
    138 }
    139 
    140 function void
    141 arena_pop(Arena *a, iz length)
    142 {
    143 	a->beg -= length;
    144 }
    145 
    146 typedef enum {
    147 	ArenaAllocateFlags_NoZero = 1 << 0,
    148 } ArenaAllocateFlags;
    149 
    150 typedef struct {
    151 	iz size;
    152 	uz align;
    153 	iz count;
    154 	ArenaAllocateFlags flags;
    155 } ArenaAllocateInfo;
    156 
    157 #define arena_alloc(a, ...)         arena_alloc_(a, (ArenaAllocateInfo){.align = 8, .count = 1, ##__VA_ARGS__})
    158 #define push_array(a, t, n)         (t *)arena_alloc(a, .size = sizeof(t), .align = alignof(t), .count = n)
    159 #define push_array_no_zero(a, t, n) (t *)arena_alloc(a, .size = sizeof(t), .align = alignof(t), .count = n, .flags = ArenaAllocateFlags_NoZero)
    160 #define push_struct(a, t)           push_array(a, t, 1)
    161 #define push_struct_no_zero(a, t)   push_array_no_zero(a, t, 1)
    162 
    163 function void *
    164 arena_alloc_(Arena *a, ArenaAllocateInfo info)
    165 {
    166 	void *result = 0;
    167 	if (a->beg) {
    168 		u8 *start = arena_aligned_start(*a, info.align);
    169 		iz available = a->end - start;
    170 		assert((available >= 0 && info.count <= available / info.size));
    171 		asan_unpoison_region(start, info.count * info.size);
    172 		a->beg = start + info.count * info.size;
    173 		result = start;
    174 		if ((info.flags & ArenaAllocateFlags_NoZero) == 0)
    175 			result = memory_clear(start, 0, info.count * info.size);
    176 	}
    177 	return result;
    178 }
    179 
    180 function Arena
    181 sub_arena(Arena *a, iz size, uz align)
    182 {
    183 	Arena result = {.beg = arena_alloc(a, .size = size, .align = align, .flags = ArenaAllocateFlags_NoZero)};
    184 	result.end   = result.beg + size;
    185 	return result;
    186 }
    187 
    188 function Arena
    189 sub_arena_end(Arena *a, iz len, uz align)
    190 {
    191 	Arena result;
    192 	result.beg = (u8 *)((u64)(a->end - len) & ~(align - 1)),
    193 	result.end = a->end,
    194 
    195 	a->end = result.beg;
    196 	assert(a->end >= a->beg);
    197 
    198 	return result;
    199 }
    200 
    201 function TempArena
    202 begin_temp_arena(Arena *a)
    203 {
    204 	TempArena result = {.arena = a, .original_arena = *a};
    205 	return result;
    206 }
    207 
    208 function void
    209 end_temp_arena(TempArena ta)
    210 {
    211 	Arena *a = ta.arena;
    212 	if (a) {
    213 		assert(a->beg >= ta.original_arena.beg);
    214 		*a = ta.original_arena;
    215 	}
    216 }
    217 
    218 
    219 enum { DA_INITIAL_CAP = 16 };
    220 
    221 #define da_index(it, s) ((it) - (s)->data)
    222 #define da_reserve(a, s, n) \
    223   (s)->data = da_reserve_((a), (s)->data, &(s)->capacity, (s)->count + n, \
    224                           _Alignof(typeof(*(s)->data)), sizeof(*(s)->data))
    225 
    226 #define da_append_count(a, s, items, item_count) do { \
    227 	da_reserve((a), (s), (item_count));                                             \
    228 	mem_copy((s)->data + (s)->count, (items), sizeof(*(items)) * (uz)(item_count)); \
    229 	(s)->count += (item_count);                                                     \
    230 } while (0)
    231 
    232 #define da_push(a, s) \
    233   ((typeof((s)->data))memory_clear((s)->count == (s)->capacity  \
    234     ? da_reserve(a, s, 1),      \
    235       (s)->data + (s)->count++  \
    236     : (s)->data + (s)->count++, 0, sizeof(*(s)->data)))
    237 
    238 function void *
    239 da_reserve_(Arena *a, void *data, da_count *capacity, da_count needed, u64 align, i64 size)
    240 {
    241 	da_count cap = *capacity;
    242 
    243 	/* NOTE(rnp): handle both 0 initialized DAs and DAs that need to be moved (they started
    244 	 * on the stack or someone allocated something in the middle of the arena during usage) */
    245 	if (!data || a->beg != (u8 *)data + cap * size) {
    246 		void *copy = arena_alloc(a, .size = size, .align = align, .count = cap);
    247 		if (data) mem_copy(copy, data, (uz)(cap * size));
    248 		data = copy;
    249 	}
    250 
    251 	if (!cap) cap = DA_INITIAL_CAP;
    252 	while (cap < needed) cap *= 2;
    253 	arena_alloc(a, .size = size, .align = align, .count = cap - *capacity);
    254 	*capacity = cap;
    255 	return data;
    256 }
    257 
    258 function u32
    259 utf8_encode(u8 *out, u32 cp)
    260 {
    261 	u32 result = 1;
    262 	if (cp <= 0x7F) {
    263 		out[0] = cp & 0x7F;
    264 	} else if (cp <= 0x7FF) {
    265 		result = 2;
    266 		out[0] = ((cp >>  6) & 0x1F) | 0xC0;
    267 		out[1] = ((cp >>  0) & 0x3F) | 0x80;
    268 	} else if (cp <= 0xFFFF) {
    269 		result = 3;
    270 		out[0] = ((cp >> 12) & 0x0F) | 0xE0;
    271 		out[1] = ((cp >>  6) & 0x3F) | 0x80;
    272 		out[2] = ((cp >>  0) & 0x3F) | 0x80;
    273 	} else if (cp <= 0x10FFFF) {
    274 		result = 4;
    275 		out[0] = ((cp >> 18) & 0x07) | 0xF0;
    276 		out[1] = ((cp >> 12) & 0x3F) | 0x80;
    277 		out[2] = ((cp >>  6) & 0x3F) | 0x80;
    278 		out[3] = ((cp >>  0) & 0x3F) | 0x80;
    279 	} else {
    280 		out[0] = '?';
    281 	}
    282 	return result;
    283 }
    284 
    285 function UnicodeDecode
    286 utf16_decode(u16 *data, iz length)
    287 {
    288 	UnicodeDecode result = {.cp = U32_MAX};
    289 	if (length) {
    290 		result.consumed = 1;
    291 		result.cp = data[0];
    292 		if (length > 1 && BETWEEN(data[0], 0xD800u, 0xDBFFu)
    293 		               && BETWEEN(data[1], 0xDC00u, 0xDFFFu))
    294 		{
    295 			result.consumed = 2;
    296 			result.cp = ((data[0] - 0xD800u) << 10u) | ((data[1] - 0xDC00u) + 0x10000u);
    297 		}
    298 	}
    299 	return result;
    300 }
    301 
    302 function u32
    303 utf16_encode(u16 *out, u32 cp)
    304 {
    305 	u32 result = 1;
    306 	if (cp == U32_MAX) {
    307 		out[0] = '?';
    308 	} else if (cp < 0x10000u) {
    309 		out[0] = (u16)cp;
    310 	} else {
    311 		u32 value = cp - 0x10000u;
    312 		out[0] = (u16)(0xD800u + (value >> 10u));
    313 		out[1] = (u16)(0xDC00u + (value & 0x3FFu));
    314 		result = 2;
    315 	}
    316 	return result;
    317 }
    318 
    319 function Stream
    320 stream_from_buffer(u8 *buffer, u32 capacity)
    321 {
    322 	Stream result = {.data = buffer, .cap = (i32)capacity};
    323 	return result;
    324 }
    325 
    326 function Stream
    327 stream_alloc(Arena *a, i32 cap)
    328 {
    329 	Stream result = stream_from_buffer(arena_commit(a, cap), (u32)cap);
    330 	return result;
    331 }
    332 
    333 function s8
    334 stream_to_s8(Stream *s)
    335 {
    336 	s8 result = s8("");
    337 	if (!s->errors) result = (s8){.len = s->widx, .data = s->data};
    338 	return result;
    339 }
    340 
    341 function void
    342 stream_reset(Stream *s, i32 index)
    343 {
    344 	s->errors = s->cap <= index;
    345 	if (!s->errors)
    346 		s->widx = index;
    347 }
    348 
    349 function void
    350 stream_commit(Stream *s, i32 count)
    351 {
    352 	s->errors |= !BETWEEN(s->widx + count, 0, s->cap);
    353 	if (!s->errors)
    354 		s->widx += count;
    355 }
    356 
    357 function void
    358 stream_append(Stream *s, void *data, iz count)
    359 {
    360 	s->errors |= (s->cap - s->widx) < count;
    361 	if (!s->errors) {
    362 		mem_copy(s->data + s->widx, data, (uz)count);
    363 		s->widx += (i32)count;
    364 	}
    365 }
    366 
    367 // TODO(rnp): replace with handwritten version
    368 #include <stdarg.h>
    369 #include <stdio.h>
    370 function void
    371 stream_appendfv(Stream *s, const char *format, va_list args)
    372 {
    373 	i32 written = vsnprintf((char *)s->data + s->widx, s->cap - s->widx, format, args);
    374 	s->errors |= written > (s->cap - s->widx);
    375 	if (!s->errors) s->widx += written;
    376 }
    377 
    378 function void
    379 stream_append_byte(Stream *s, u8 b)
    380 {
    381 	stream_append(s, &b, 1);
    382 }
    383 
    384 function void
    385 stream_pad(Stream *s, u8 b, i32 n)
    386 {
    387 	while (n > 0) stream_append_byte(s, b), n--;
    388 }
    389 
    390 function void
    391 stream_append_s8(Stream *s, s8 str)
    392 {
    393 	stream_append(s, str.data, str.len);
    394 }
    395 
    396 #define stream_append_s8s(s, ...) stream_append_s8s_(s, arg_list(s8, ##__VA_ARGS__))
    397 function void
    398 stream_append_s8s_(Stream *s, s8 *strs, iz count)
    399 {
    400 	for (iz i = 0; i < count; i++)
    401 		stream_append(s, strs[i].data, strs[i].len);
    402 }
    403 
    404 function void
    405 stream_append_u64_width(Stream *s, u64 n, u64 min_width)
    406 {
    407 	u8 tmp[64];
    408 	u8 *end = tmp + sizeof(tmp);
    409 	u8 *beg = end;
    410 	min_width = MIN(sizeof(tmp), min_width);
    411 
    412 	do { *--beg = (u8)('0' + (n % 10)); } while (n /= 10);
    413 	while (end - beg > 0 && (uz)(end - beg) < min_width)
    414 		*--beg = '0';
    415 
    416 	stream_append(s, beg, end - beg);
    417 }
    418 
    419 function void
    420 stream_append_u64(Stream *s, u64 n)
    421 {
    422 	stream_append_u64_width(s, n, 0);
    423 }
    424 
    425 function void
    426 stream_append_hex_u64_width(Stream *s, u64 n, iz width)
    427 {
    428 	assert(width <= 16);
    429 	if (!s->errors) {
    430 		u8  buf[16];
    431 		u8 *end = buf + sizeof(buf);
    432 		u8 *beg = end;
    433 		while (n) {
    434 			*--beg = (u8)"0123456789abcdef"[n & 0x0F];
    435 			n >>= 4;
    436 		}
    437 		while (end - beg < width)
    438 			*--beg = '0';
    439 		stream_append(s, beg, end - beg);
    440 	}
    441 }
    442 
    443 function void
    444 stream_append_hex_u64(Stream *s, u64 n)
    445 {
    446 	stream_append_hex_u64_width(s, n, 2);
    447 }
    448 
    449 function void
    450 stream_append_i64(Stream *s, i64 n)
    451 {
    452 	if (n < 0) {
    453 		stream_append_byte(s, '-');
    454 		n *= -1;
    455 	}
    456 	stream_append_u64(s, (u64)n);
    457 }
    458 
    459 function void
    460 stream_append_f64(Stream *s, f64 f, u64 prec)
    461 {
    462 	if (f < 0) {
    463 		stream_append_byte(s, '-');
    464 		f *= -1;
    465 	}
    466 
    467 	/* NOTE: round last digit */
    468 	f += 0.5f / (f64)prec;
    469 
    470 	if (f >= (f64)(-1UL >> 1)) {
    471 		stream_append_s8(s, s8("inf"));
    472 	} else {
    473 		u64 integral = (u64)f;
    474 		u64 fraction = (u64)((f - (f64)integral) * (f64)prec);
    475 		stream_append_u64(s, integral);
    476 		stream_append_byte(s, '.');
    477 		for (u64 i = prec / 10; i > 1; i /= 10) {
    478 			if (i > fraction)
    479 				stream_append_byte(s, '0');
    480 		}
    481 		stream_append_u64(s, fraction);
    482 	}
    483 }
    484 
    485 function void
    486 stream_append_f64_e(Stream *s, f64 f)
    487 {
    488 	/* TODO: there should be a better way of doing this */
    489 	#if 0
    490 	/* NOTE: we ignore subnormal numbers for now */
    491 	union { f64 f; u64 u; } u = {.f = f};
    492 	i32 exponent = ((u.u >> 52) & 0x7ff) - 1023;
    493 	f32 log_10_of_2 = 0.301f;
    494 	i32 scale       = (exponent * log_10_of_2);
    495 	/* NOTE: normalize f */
    496 	for (i32 i = ABS(scale); i > 0; i--)
    497 		f *= (scale > 0)? 0.1f : 10.0f;
    498 	#else
    499 	i32 scale = 0;
    500 	if (f != 0) {
    501 		while (f > 1) {
    502 			f *= 0.1f;
    503 			scale++;
    504 		}
    505 		while (f < 1) {
    506 			f *= 10.0f;
    507 			scale--;
    508 		}
    509 	}
    510 	#endif
    511 
    512 	u32 prec = 100;
    513 	stream_append_f64(s, f, prec);
    514 	stream_append_byte(s, 'e');
    515 	stream_append_byte(s, scale >= 0? '+' : '-');
    516 	for (u32 i = prec / 10; i > 1; i /= 10)
    517 		stream_append_byte(s, '0');
    518 	stream_append_u64(s, (u64)Abs(scale));
    519 }
    520 
    521 function void
    522 stream_append_v2(Stream *s, v2 v)
    523 {
    524 	stream_append_byte(s, '{');
    525 	stream_append_f64(s, v.x, 100);
    526 	stream_append_s8(s, s8(", "));
    527 	stream_append_f64(s, v.y, 100);
    528 	stream_append_byte(s, '}');
    529 }
    530 
    531 function Stream
    532 arena_stream(Arena a)
    533 {
    534 	Stream result = {0};
    535 	result.data   = a.beg;
    536 	result.cap    = (i32)(a.end - a.beg);
    537 
    538 	/* TODO(rnp): no idea what to do here if we want to maintain the ergonomics */
    539 	asan_unpoison_region(result.data, result.cap);
    540 
    541 	return result;
    542 }
    543 
    544 function s8
    545 arena_stream_commit(Arena *a, Stream *s)
    546 {
    547 	assert(s->data == a->beg);
    548 	s8 result = stream_to_s8(s);
    549 	arena_commit(a, result.len);
    550 	return result;
    551 }
    552 
    553 function s8
    554 arena_stream_commit_zero(Arena *a, Stream *s)
    555 {
    556 	b32 error = s->errors || s->widx == s->cap;
    557 	if (!error)
    558 		s->data[s->widx] = 0;
    559 	s8 result = stream_to_s8(s);
    560 	arena_commit(a, result.len + 1);
    561 	return result;
    562 }
    563 
    564 function s8
    565 arena_stream_commit_and_reset(Arena *arena, Stream *s)
    566 {
    567 	s8 result = arena_stream_commit_zero(arena, s);
    568 	*s = arena_stream(*arena);
    569 	return result;
    570 }
    571 
    572 #if !defined(XXH_IMPLEMENTATION)
    573 # define XXH_INLINE_ALL
    574 # define XXH_IMPLEMENTATION
    575 # define XXH_STATIC_LINKING_ONLY
    576 # include "external/xxhash.h"
    577 #endif
    578 
    579 function u128
    580 u128_hash_from_data(void *data, uz size)
    581 {
    582 	u128 result = {0};
    583 	XXH128_hash_t hash = XXH3_128bits_withSeed(data, size, 4969);
    584 	mem_copy(&result, &hash, sizeof(result));
    585 	return result;
    586 }
    587 
    588 function u64
    589 u64_hash_from_str8_seed(str8 string, u64 seed)
    590 {
    591 	u64 result = XXH3_64bits_withSeed(string.data, (uz)string.length, seed);
    592 	return result;
    593 }
    594 
    595 function u64
    596 u64_hash_from_s8(s8 v)
    597 {
    598 	u64 result = u64_hash_from_str8_seed(str8_from_s8(v), 4969);
    599 	return result;
    600 }
    601 
    602 function s8
    603 c_str_to_s8(char *cstr)
    604 {
    605 	s8 result = {.data = (u8 *)cstr};
    606 	if (cstr) { while (*cstr) { result.len++; cstr++; } }
    607 	return result;
    608 }
    609 
    610 function str8
    611 str8_range(u8 *start, u8 *one_past_last)
    612 {
    613 	str8 result;
    614 	result.data   = start;
    615 	result.length = one_past_last - start;
    616 	return result;
    617 }
    618 
    619 function str8
    620 str8_skip(str8 s, i64 count)
    621 {
    622 	str8 result = s;
    623 	if (count > 0) {
    624 		result.data   += count;
    625 		result.length -= count;
    626 	}
    627 	return result;
    628 }
    629 
    630 function b32
    631 str8_equal(str8 a, str8 b)
    632 {
    633 	b32 result = a.length == b.length;
    634 	for (i64 i = 0; result && i < a.length; i++)
    635 		result = a.data[i] == b.data[i];
    636 	return result;
    637 }
    638 
    639 function b32
    640 s8_equal(s8 a, s8 b)
    641 {
    642 	return str8_equal(str8_from_s8(a), str8_from_s8(b));
    643 }
    644 
    645 /* NOTE(rnp): returns < 0 if byte is not found */
    646 function iz
    647 s8_scan_backwards(s8 s, u8 byte)
    648 {
    649 	iz result = (u8 *)memory_scan_backwards(s.data, byte, s.len) - s.data;
    650 	return result;
    651 }
    652 
    653 function s8
    654 s8_cut_head(s8 s, iz cut)
    655 {
    656 	s8 result = s;
    657 	if (cut > 0) {
    658 		result.data += cut;
    659 		result.len  -= cut;
    660 	}
    661 	return result;
    662 }
    663 
    664 function b32
    665 str8_match(str8 a, str8 b, StringMatchFlags flags)
    666 {
    667 	b32 result = 0;
    668 	if (flags == 0) {
    669 		result = str8_equal(a, b);
    670 	} else if (a.length == b.length || (flags & StringMatchFlag_SloppySize)) {
    671 		result = 1;
    672 		i64 length = Min(a.length, b.length);
    673 		for (i64 it = 0; it < length && result; it++) {
    674 			u8 ab = a.data[it], bb = b.data[it];
    675 			if (flags & StringMatchFlag_CaseInsensitive) {
    676 				ab |= 0x20;
    677 				bb |= 0x20;
    678 			}
    679 			result &= ab == bb;
    680 		}
    681 	}
    682 	return result;
    683 }
    684 
    685 function i64
    686 str8_find_needle(str8 string, str8 needle, StringMatchFlags flags)
    687 {
    688 	u8 *s  = string.data;
    689 	u8 *se = string.data + Max(string.length + 1, needle.length) - needle.length;
    690 	if (needle.length > 0) {
    691 		flags |= StringMatchFlag_SloppySize;
    692 
    693 		u8 nb = needle.data[0];
    694 		if (flags & StringMatchFlag_CaseInsensitive)
    695 			nb |= 0x20;
    696 
    697 		str8 needle_tail = str8_skip(needle, 1);
    698 		u8 *s_opl = string.data + string.length;
    699 		for (; s < se; s++) {
    700 			u8 sb = *s;
    701 			if (flags & StringMatchFlag_CaseInsensitive)
    702 				sb |= 0x20;
    703 
    704 			if (sb == nb && str8_match(str8_range(s + 1, s_opl), needle_tail, flags))
    705 				break;
    706 		}
    707 	}
    708 
    709 	i64 result = string.length;
    710 	if (s < se)
    711 		result = s - string.data;
    712 	return result;
    713 }
    714 
    715 
    716 function s8
    717 s8_alloc(Arena *a, iz len)
    718 {
    719 	s8 result = {.data = push_array(a, u8, len), .len = len};
    720 	return result;
    721 }
    722 
    723 function s8
    724 s16_to_s8(Arena *a, s16 in)
    725 {
    726 	s8 result = s8("");
    727 	if (in.len) {
    728 		iz commit = in.len * 4;
    729 		iz length = 0;
    730 		u8 *data = arena_commit(a, commit + 1);
    731 		u16 *beg = in.data;
    732 		u16 *end = in.data + in.len;
    733 		while (beg < end) {
    734 			UnicodeDecode decode = utf16_decode(beg, end - beg);
    735 			length += utf8_encode(data + length, decode.cp);
    736 			beg    += decode.consumed;
    737 		}
    738 		data[length] = 0;
    739 		result = (s8){.len = length, .data = data};
    740 		arena_pop(a, commit - length);
    741 	}
    742 	return result;
    743 }
    744 
    745 function s16
    746 s8_to_s16(Arena *a, s8 in)
    747 {
    748 	s16 result = {0};
    749 	if (in.len) {
    750 		iz required = 2 * in.len + 1;
    751 		u16 *data   = push_array(a, u16, required);
    752 		iz length   = 0;
    753 		/* TODO(rnp): utf8_decode */
    754 		for (iz i = 0; i < in.len; i++) {
    755 			u32 cp  = in.data[i];
    756 			length += utf16_encode(data + length, cp);
    757 		}
    758 		result = (s16){.len = length, .data = data};
    759 		arena_pop(a, required - length);
    760 	}
    761 	return result;
    762 }
    763 
    764 #define push_str8_from_parts(a, j, ...) push_str8_from_parts_((a), (j), arg_list(str8, __VA_ARGS__))
    765 function str8
    766 push_str8_from_parts_(Arena *arena, str8 joiner, str8 *parts, i64 count)
    767 {
    768 	i64 length = joiner.length * (count - 1);
    769 	for (i64 i = 0; i < count; i++)
    770 		length += parts[i].length;
    771 
    772 	str8 result = {.length = length, .data = arena_commit(arena, length + 1)};
    773 
    774 	i64 offset = 0;
    775 	for (i64 i = 0; i < count; i++) {
    776 		if (i != 0) {
    777 			memory_copy(result.data + offset, joiner.data, (uz)joiner.length);
    778 			offset += joiner.length;
    779 		}
    780 		memory_copy(result.data + offset, parts[i].data, (uz)parts[i].length);
    781 		offset += parts[i].length;
    782 	}
    783 	result.data[result.length] = 0;
    784 
    785 	return result;
    786 }
    787 
    788 #define push_s8_from_parts(a, j, ...) push_s8_from_parts_((a), (j), arg_list(s8, __VA_ARGS__))
    789 function s8
    790 push_s8_from_parts_(Arena *arena, s8 joiner, s8 *parts, iz count)
    791 {
    792 	iz length = joiner.len * (count - 1);
    793 	for (iz i = 0; i < count; i++)
    794 		length += parts[i].len;
    795 
    796 	s8 result = {.len = length, .data = arena_commit(arena, length + 1)};
    797 
    798 	iz offset = 0;
    799 	for (iz i = 0; i < count; i++) {
    800 		if (i != 0) {
    801 			mem_copy(result.data + offset, joiner.data, (uz)joiner.len);
    802 			offset += joiner.len;
    803 		}
    804 		mem_copy(result.data + offset, parts[i].data, (uz)parts[i].len);
    805 		offset += parts[i].len;
    806 	}
    807 	result.data[result.len] = 0;
    808 
    809 	return result;
    810 }
    811 
    812 function s8
    813 push_s8(Arena *a, s8 str)
    814 {
    815 	s8 result   = s8_alloc(a, str.len + 1);
    816 	result.len -= 1;
    817 	mem_copy(result.data, str.data, (uz)result.len);
    818 	return result;
    819 }
    820 
    821 // TODO(rnp): replace with handwritten version
    822 function str8
    823 push_str8_fv(Arena *arena, const char *format, va_list args)
    824 {
    825 	Stream sb = arena_stream(*arena);
    826 	stream_appendfv(&sb, format, args);
    827 	s8 s = arena_stream_commit(arena, &sb);
    828 	str8 result = {.length = s.len, .data = s.data};
    829 	return result;
    830 }
    831 
    832 function str8
    833 push_f64_string(Arena *arena, f64 value, u64 precision)
    834 {
    835 	Stream sb = arena_stream(*arena);
    836 	stream_append_f64(&sb, value, precision);
    837 	s8 s = arena_stream_commit(arena, &sb);
    838 	str8 result = {.length = s.len, .data = s.data};
    839 	return result;
    840 }
    841 
    842 /* NOTE(rnp): from Hacker's Delight */
    843 function force_inline u64
    844 round_down_power_of_two(u64 a)
    845 {
    846 	u64 result = 0x8000000000000000ULL >> clz_u64(a);
    847 	return result;
    848 }
    849 
    850 function force_inline u64
    851 round_up_power_of_two(u64 a)
    852 {
    853 	u64 result = 0x8000000000000000ULL >> (clz_u64(a - 1) - 1);
    854 	return result;
    855 }
    856 
    857 function force_inline iz
    858 round_up_to(iz value, iz multiple)
    859 {
    860 	iz result = value;
    861 	if (value % multiple != 0)
    862 		result += multiple - value % multiple;
    863 	return result;
    864 }
    865 
    866 function void
    867 split_rect_horizontal(Rect rect, f32 fraction, Rect *left, Rect *right)
    868 {
    869 	if (left) {
    870 		left->pos    = rect.pos;
    871 		left->size.h = rect.size.h;
    872 		left->size.w = rect.size.w * fraction;
    873 	}
    874 	if (right) {
    875 		right->pos    = rect.pos;
    876 		right->pos.x += rect.size.w * fraction;
    877 		right->size.h = rect.size.h;
    878 		right->size.w = rect.size.w * (1.0f - fraction);
    879 	}
    880 }
    881 
    882 function void
    883 split_rect_vertical(Rect rect, f32 fraction, Rect *top, Rect *bot)
    884 {
    885 	if (top) {
    886 		top->pos    = rect.pos;
    887 		top->size.w = rect.size.w;
    888 		top->size.h = rect.size.h * fraction;
    889 	}
    890 	if (bot) {
    891 		bot->pos    = rect.pos;
    892 		bot->pos.y += rect.size.h * fraction;
    893 		bot->size.w = rect.size.w;
    894 		bot->size.h = rect.size.h * (1.0f - fraction);
    895 	}
    896 }
    897 
    898 function void
    899 cut_rect_horizontal(Rect rect, f32 at, Rect *left, Rect *right)
    900 {
    901 	at = MIN(at, rect.size.w);
    902 	if (left) {
    903 		*left = rect;
    904 		left->size.w = at;
    905 	}
    906 	if (right) {
    907 		*right = rect;
    908 		right->pos.x  += at;
    909 		right->size.w -= at;
    910 	}
    911 }
    912 
    913 function void
    914 cut_rect_vertical(Rect rect, f32 at, Rect *top, Rect *bot)
    915 {
    916 	at = MIN(at, rect.size.h);
    917 	if (top) {
    918 		*top = rect;
    919 		top->size.h = at;
    920 	}
    921 	if (bot) {
    922 		*bot = rect;
    923 		bot->pos.y  += at;
    924 		bot->size.h -= at;
    925 	}
    926 }
    927 
    928 function NumberConversion
    929 integer_from_s8(s8 raw)
    930 {
    931 	read_only local_persist alignas(64) i8 lut[64] = {
    932 		 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1,
    933 		-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    934 		-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    935 		-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    936 	};
    937 
    938 	NumberConversion result = {.unparsed = raw};
    939 
    940 	iz  i     = 0;
    941 	i64 scale = 1;
    942 	if (raw.len > 0 && raw.data[0] == '-') {
    943 		scale = -1;
    944 		i     =  1;
    945 	}
    946 
    947 	b32 hex = 0;
    948 	if (raw.len - i > 2 && raw.data[i] == '0' && (raw.data[1] == 'x' || raw.data[1] == 'X')) {
    949 		hex = 1;
    950 		i += 2;
    951 	}
    952 
    953 	#define integer_conversion_body(radix, clamp) do {\
    954 		for (; i < raw.len; i++) {\
    955 			i64 value = lut[Min((u8)(raw.data[i] - (u8)'0'), clamp)];\
    956 			if (value >= 0) {\
    957 				if (result.U64 > (U64_MAX - (u64)value) / radix) {\
    958 					result.result = NumberConversionResult_OutOfRange;\
    959 					result.U64    = U64_MAX;\
    960 					return result;\
    961 				} else {\
    962 					result.U64 = radix * result.U64 + (u64)value;\
    963 				}\
    964 			} else {\
    965 				break;\
    966 			}\
    967 		}\
    968 	} while (0)
    969 
    970 	if (hex) integer_conversion_body(16u, 63u);
    971 	else     integer_conversion_body(10u, 15u);
    972 
    973 	#undef integer_conversion_body
    974 
    975 	result.unparsed = (s8){.len = raw.len - i, .data = raw.data + i};
    976 	result.result   = i > 0 ? NumberConversionResult_Success : NumberConversionResult_Invalid;
    977 	result.kind     = NumberConversionKind_Integer;
    978 	if (scale < 0) result.U64 = 0 - result.U64;
    979 
    980 	return result;
    981 }
    982 
    983 function NumberConversion
    984 number_from_s8(s8 s)
    985 {
    986 	NumberConversion result  = {.unparsed = s};
    987 	NumberConversion integer = integer_from_s8(s);
    988 	if (integer.result == NumberConversionResult_Success) {
    989 		if (integer.unparsed.len != 0 && integer.unparsed.data[0] == '.') {
    990 			s = integer.unparsed;
    991 			s.data++;
    992 			s.len--;
    993 
    994 			while (s.len > 0 && s.data[s.len - 1] == '0') s.len--;
    995 
    996 			NumberConversion fractional = integer_from_s8(s);
    997 			if (fractional.result == NumberConversionResult_Success || s.len == 0) {
    998 				result.F64 = (f64)fractional.U64;
    999 
   1000 				u64 divisor = (u64)(fractional.unparsed.data - s.data);
   1001 				while (divisor > 0) { result.F64 /= 10.0; divisor--; }
   1002 
   1003 				result.F64 += (f64)integer.S64;
   1004 
   1005 				result.result   = NumberConversionResult_Success;
   1006 				result.kind     = NumberConversionKind_Float;
   1007 				result.unparsed = fractional.unparsed;
   1008 			}
   1009 		} else {
   1010 			result = integer;
   1011 		}
   1012 	}
   1013 	return result;
   1014 }