math.c (24867B)
1 /* See LICENSE for license details. */ 2 #include "external/cephes.c" 3 4 function void 5 fill_kronecker_sub_matrix_f16(f16 *out, i32 out_stride, f16 scale, f16 *b, iv2 b_dim) 6 { 7 for (i32 i = 0; i < b_dim.y; i++) { 8 for (i32 j = 0; j < b_dim.x; j += 4, b += 4) { 9 out[j + 0] = scale * b[0]; 10 out[j + 1] = scale * b[1]; 11 out[j + 2] = scale * b[2]; 12 out[j + 3] = scale * b[3]; 13 } 14 out += out_stride; 15 } 16 } 17 18 /* NOTE: this won't check for valid space/etc and assumes row major order */ 19 function void 20 kronecker_product_f16(f16 *out, f16 *a, iv2 a_dim, f16 *b, iv2 b_dim) 21 { 22 iv2 out_dim = {{a_dim.x * b_dim.x, a_dim.y * b_dim.y}}; 23 assert(out_dim.y % 4 == 0); 24 for (i32 i = 0; i < a_dim.y; i++) { 25 f16 *vout = out; 26 for (i32 j = 0; j < a_dim.x; j++, a++) { 27 fill_kronecker_sub_matrix_f16(vout, out_dim.y, *a, b, b_dim); 28 vout += b_dim.y; 29 } 30 out += out_dim.y * b_dim.x; 31 } 32 } 33 34 /* NOTE/TODO: to support even more hadamard sizes use the Paley construction */ 35 function f16 * 36 make_hadamard_transpose(Arena *a, i32 dim, b32 row_major) 37 { 38 read_only local_persist f16 hadamard_12_12_transpose[] = { 39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 40 1, -1, -1, 1, -1, -1, -1, 1, 1, 1, -1, 1, 41 1, 1, -1, -1, 1, -1, -1, -1, 1, 1, 1, -1, 42 1, -1, 1, -1, -1, 1, -1, -1, -1, 1, 1, 1, 43 1, 1, -1, 1, -1, -1, 1, -1, -1, -1, 1, 1, 44 1, 1, 1, -1, 1, -1, -1, 1, -1, -1, -1, 1, 45 1, 1, 1, 1, -1, 1, -1, -1, 1, -1, -1, -1, 46 1, -1, 1, 1, 1, -1, 1, -1, -1, 1, -1, -1, 47 1, -1, -1, 1, 1, 1, -1, 1, -1, -1, 1, -1, 48 1, -1, -1, -1, 1, 1, 1, -1, 1, -1, -1, 1, 49 1, 1, -1, -1, -1, 1, 1, 1, -1, 1, -1, -1, 50 1, -1, 1, -1, -1, -1, 1, 1, 1, -1, 1, -1, 51 }; 52 53 read_only local_persist f16 hadamard_20_20_transpose[] = { 54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 55 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, 56 1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, 57 1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 58 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 59 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, 60 1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, 61 1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, 62 1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, 63 1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 64 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, 65 1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 66 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, 67 1, 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 68 1, 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 69 1, 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 70 1, 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 71 1, -1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, 72 1, -1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, 73 1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, -1, 1, -1, 1, 1, 1, 1, -1, -1, 74 }; 75 76 77 f16 *result = 0; 78 79 i32 order = dim; 80 b32 power_of_2 = IsPowerOfTwo(dim); 81 b32 multiple_of_12 = dim % 12 == 0; 82 b32 multiple_of_20 = dim % 20 == 0; 83 iz elements = dim * dim; 84 85 i32 base_dim = 0; 86 if (power_of_2) { 87 base_dim = dim; 88 } else if (multiple_of_20 && IsPowerOfTwo(dim / 20)) { 89 base_dim = 20; 90 dim /= 20; 91 } else if (multiple_of_12 && IsPowerOfTwo(dim / 12)) { 92 base_dim = 12; 93 dim /= 12; 94 } 95 96 if (power_of_2 && base_dim && arena_capacity(a, f16) >= elements * (1 + (dim != base_dim))) { 97 result = push_array(a, f16, elements); 98 99 Arena tmp = *a; 100 f16 *m = dim == base_dim ? result : push_array(&tmp, f16, elements); 101 102 #define IND(i, j) ((i) * dim + (j)) 103 m[0] = 1; 104 for (i32 k = 1; k < dim; k *= 2) { 105 for (i32 i = 0; i < k; i++) { 106 for (i32 j = 0; j < k; j++) { 107 f16 val = m[IND(i, j)]; 108 m[IND(i + k, j)] = val; 109 m[IND(i, j + k)] = val; 110 m[IND(i + k, j + k)] = -val; 111 } 112 } 113 } 114 #undef IND 115 116 f16 *m2 = 0; 117 iv2 m2_dim; 118 switch (base_dim) { 119 case 12:{ m2 = hadamard_12_12_transpose; m2_dim = (iv2){{12, 12}}; }break; 120 case 20:{ m2 = hadamard_20_20_transpose; m2_dim = (iv2){{20, 20}}; }break; 121 } 122 if (m2) kronecker_product_f16(result, m, (iv2){{dim, dim}}, m2, m2_dim); 123 } 124 125 if (row_major) { 126 for (i32 r = 0; r < order; r++) 127 for (i32 c = 0; c < order; c++) 128 swap(result[r * order + c], result[c * order + r]); 129 } 130 131 return result; 132 } 133 134 function b32 135 u128_equal(u128 a, u128 b) 136 { 137 b32 result = a.U64[0] == b.U64[0] && a.U64[1] == b.U64[1]; 138 return result; 139 } 140 141 function RangeU64 142 subrange_n_from_n_m_count(u64 n, u64 n_count, u64 m) 143 { 144 assert(n < n_count); 145 146 u64 per_lane = m / n_count; 147 u64 leftover = m - per_lane * n_count; 148 u64 leftovers_before_n = MIN(leftover, n); 149 u64 base_index = n * per_lane + leftovers_before_n; 150 u64 one_past_last_index = base_index + per_lane + ((n < leftover) ? 1 : 0); 151 152 RangeU64 result = {base_index, one_past_last_index}; 153 return result; 154 } 155 156 function i32 157 iv3_dimension(iv3 points) 158 { 159 i32 result = (points.x > 1) + (points.y > 1) + (points.z > 1); 160 return result; 161 } 162 163 function bv3 164 iv3_equal(iv3 a, iv3 b) 165 { 166 bv3 result; 167 result.x = a.x == b.x; 168 result.y = a.y == b.y; 169 result.z = a.z == b.z; 170 return result; 171 } 172 173 function b32 174 bv3_all(bv3 a) 175 { 176 b32 result = a.x != 0 && a.y != 0 && a.z != 0; 177 return result; 178 } 179 180 function b32 181 bv3_any(bv3 a) 182 { 183 b32 result = a.x != 0 || a.y != 0 || a.z != 0; 184 return result; 185 } 186 187 function v2 188 clamp_v2_rect(v2 v, Rect r) 189 { 190 v2 result = v; 191 result.x = CLAMP(v.x, r.pos.x, r.pos.x + r.size.x); 192 result.y = CLAMP(v.y, r.pos.y, r.pos.y + r.size.y); 193 return result; 194 } 195 196 function v2 197 v2_from_iv2(iv2 v) 198 { 199 v2 result; 200 result.E[0] = (f32)v.E[0]; 201 result.E[1] = (f32)v.E[1]; 202 return result; 203 } 204 205 function v2 206 v2_abs(v2 a) 207 { 208 v2 result; 209 result.x = Abs(a.x); 210 result.y = Abs(a.y); 211 return result; 212 } 213 214 function v2 215 v2_scale(v2 a, f32 scale) 216 { 217 v2 result; 218 result.x = a.x * scale; 219 result.y = a.y * scale; 220 return result; 221 } 222 223 function v2 224 v2_add(v2 a, v2 b) 225 { 226 v2 result; 227 result.x = a.x + b.x; 228 result.y = a.y + b.y; 229 return result; 230 } 231 232 function v2 233 v2_sub(v2 a, v2 b) 234 { 235 v2 result = v2_add(a, v2_scale(b, -1.0f)); 236 return result; 237 } 238 239 function v2 240 v2_mul(v2 a, v2 b) 241 { 242 v2 result; 243 result.x = a.x * b.x; 244 result.y = a.y * b.y; 245 return result; 246 } 247 248 function v2 249 v2_div(v2 a, v2 b) 250 { 251 v2 result; 252 result.x = a.x / b.x; 253 result.y = a.y / b.y; 254 return result; 255 } 256 257 function v2 258 v2_floor(v2 a) 259 { 260 v2 result; 261 result.x = (f32)((i32)a.x); 262 result.y = (f32)((i32)a.y); 263 return result; 264 } 265 266 function f32 267 v2_magnitude_squared(v2 a) 268 { 269 f32 result = a.x * a.x + a.y * a.y; 270 return result; 271 } 272 273 function f32 274 v2_magnitude(v2 a) 275 { 276 f32 result = sqrt_f32(a.x * a.x + a.y * a.y); 277 return result; 278 } 279 280 function v3 281 cross(v3 a, v3 b) 282 { 283 v3 result; 284 result.x = a.y * b.z - a.z * b.y; 285 result.y = a.z * b.x - a.x * b.z; 286 result.z = a.x * b.y - a.y * b.x; 287 return result; 288 } 289 290 function v3 291 v3_from_iv3(iv3 v) 292 { 293 v3 result; 294 result.E[0] = (f32)v.E[0]; 295 result.E[1] = (f32)v.E[1]; 296 result.E[2] = (f32)v.E[2]; 297 return result; 298 } 299 300 function v3 301 v3_abs(v3 a) 302 { 303 v3 result; 304 result.x = Abs(a.x); 305 result.y = Abs(a.y); 306 result.z = Abs(a.z); 307 return result; 308 } 309 310 function v3 311 v3_scale(v3 a, f32 scale) 312 { 313 v3 result; 314 result.x = scale * a.x; 315 result.y = scale * a.y; 316 result.z = scale * a.z; 317 return result; 318 } 319 320 function v3 321 v3_add(v3 a, v3 b) 322 { 323 v3 result; 324 result.x = a.x + b.x; 325 result.y = a.y + b.y; 326 result.z = a.z + b.z; 327 return result; 328 } 329 330 function v3 331 v3_sub(v3 a, v3 b) 332 { 333 v3 result = v3_add(a, v3_scale(b, -1.0f)); 334 return result; 335 } 336 337 function v3 338 v3_div(v3 a, v3 b) 339 { 340 v3 result; 341 result.x = a.x / b.x; 342 result.y = a.y / b.y; 343 result.z = a.z / b.z; 344 return result; 345 } 346 347 function f32 348 v3_dot(v3 a, v3 b) 349 { 350 f32 result = a.x * b.x + a.y * b.y + a.z * b.z; 351 return result; 352 } 353 354 function f32 355 v3_magnitude_squared(v3 a) 356 { 357 f32 result = v3_dot(a, a); 358 return result; 359 } 360 361 function f32 362 v3_magnitude(v3 a) 363 { 364 f32 result = sqrt_f32(v3_dot(a, a)); 365 return result; 366 } 367 368 function v3 369 v3_normalize(v3 a) 370 { 371 v3 result = v3_scale(a, 1.0f / v3_magnitude(a)); 372 return result; 373 } 374 375 function v4 376 v4_scale(v4 a, f32 scale) 377 { 378 v4 result; 379 result.x = scale * a.x; 380 result.y = scale * a.y; 381 result.z = scale * a.z; 382 result.w = scale * a.w; 383 return result; 384 } 385 386 function v4 387 v4_add(v4 a, v4 b) 388 { 389 v4 result; 390 result.x = a.x + b.x; 391 result.y = a.y + b.y; 392 result.z = a.z + b.z; 393 result.w = a.w + b.w; 394 return result; 395 } 396 397 function v4 398 v4_sub(v4 a, v4 b) 399 { 400 v4 result = v4_add(a, v4_scale(b, -1)); 401 return result; 402 } 403 404 function f32 405 v4_dot(v4 a, v4 b) 406 { 407 f32 result = a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; 408 return result; 409 } 410 411 function v4 412 v4_lerp(v4 a, v4 b, f32 t) 413 { 414 v4 result = v4_add(a, v4_scale(v4_sub(b, a), t)); 415 return result; 416 } 417 418 function b32 419 m4_equal(m4 a, m4 b) 420 { 421 b32 result = 1; 422 for EachElement(a.E, it) 423 result &= f32_equal(a.E[it], b.E[it]); 424 return result; 425 } 426 427 function m4 428 m4_identity(void) 429 { 430 m4 result; 431 result.c[0] = (v4){{1, 0, 0, 0}}; 432 result.c[1] = (v4){{0, 1, 0, 0}}; 433 result.c[2] = (v4){{0, 0, 1, 0}}; 434 result.c[3] = (v4){{0, 0, 0, 1}}; 435 return result; 436 } 437 438 function v4 439 m4_row(m4 a, u32 row) 440 { 441 v4 result; 442 result.E[0] = a.c[0].E[row]; 443 result.E[1] = a.c[1].E[row]; 444 result.E[2] = a.c[2].E[row]; 445 result.E[3] = a.c[3].E[row]; 446 return result; 447 } 448 449 function m4 450 m4_mul(m4 a, m4 b) 451 { 452 m4 result; 453 for (u32 i = 0; i < 4; i++) { 454 for (u32 j = 0; j < 4; j++) { 455 result.c[i].E[j] = v4_dot(m4_row(a, j), b.c[i]); 456 } 457 } 458 return result; 459 } 460 461 /* NOTE(rnp): based on: 462 * https://web.archive.org/web/20131215123403/ftp://download.intel.com/design/PentiumIII/sml/24504301.pdf 463 * TODO(rnp): redo with SIMD as given in the link (but need to rewrite for column-major) 464 */ 465 function m4 466 m4_inverse(m4 m) 467 { 468 m4 result; 469 result.E[ 0] = m.E[5] * m.E[10] * m.E[15] - m.E[5] * m.E[11] * m.E[14] - m.E[9] * m.E[6] * m.E[15] + m.E[9] * m.E[7] * m.E[14] + m.E[13] * m.E[6] * m.E[11] - m.E[13] * m.E[7] * m.E[10]; 470 result.E[ 4] = -m.E[4] * m.E[10] * m.E[15] + m.E[4] * m.E[11] * m.E[14] + m.E[8] * m.E[6] * m.E[15] - m.E[8] * m.E[7] * m.E[14] - m.E[12] * m.E[6] * m.E[11] + m.E[12] * m.E[7] * m.E[10]; 471 result.E[ 8] = m.E[4] * m.E[ 9] * m.E[15] - m.E[4] * m.E[11] * m.E[13] - m.E[8] * m.E[5] * m.E[15] + m.E[8] * m.E[7] * m.E[13] + m.E[12] * m.E[5] * m.E[11] - m.E[12] * m.E[7] * m.E[ 9]; 472 result.E[12] = -m.E[4] * m.E[ 9] * m.E[14] + m.E[4] * m.E[10] * m.E[13] + m.E[8] * m.E[5] * m.E[14] - m.E[8] * m.E[6] * m.E[13] - m.E[12] * m.E[5] * m.E[10] + m.E[12] * m.E[6] * m.E[ 9]; 473 result.E[ 1] = -m.E[1] * m.E[10] * m.E[15] + m.E[1] * m.E[11] * m.E[14] + m.E[9] * m.E[2] * m.E[15] - m.E[9] * m.E[3] * m.E[14] - m.E[13] * m.E[2] * m.E[11] + m.E[13] * m.E[3] * m.E[10]; 474 result.E[ 5] = m.E[0] * m.E[10] * m.E[15] - m.E[0] * m.E[11] * m.E[14] - m.E[8] * m.E[2] * m.E[15] + m.E[8] * m.E[3] * m.E[14] + m.E[12] * m.E[2] * m.E[11] - m.E[12] * m.E[3] * m.E[10]; 475 result.E[ 9] = -m.E[0] * m.E[ 9] * m.E[15] + m.E[0] * m.E[11] * m.E[13] + m.E[8] * m.E[1] * m.E[15] - m.E[8] * m.E[3] * m.E[13] - m.E[12] * m.E[1] * m.E[11] + m.E[12] * m.E[3] * m.E[ 9]; 476 result.E[13] = m.E[0] * m.E[ 9] * m.E[14] - m.E[0] * m.E[10] * m.E[13] - m.E[8] * m.E[1] * m.E[14] + m.E[8] * m.E[2] * m.E[13] + m.E[12] * m.E[1] * m.E[10] - m.E[12] * m.E[2] * m.E[ 9]; 477 result.E[ 2] = m.E[1] * m.E[ 6] * m.E[15] - m.E[1] * m.E[ 7] * m.E[14] - m.E[5] * m.E[2] * m.E[15] + m.E[5] * m.E[3] * m.E[14] + m.E[13] * m.E[2] * m.E[ 7] - m.E[13] * m.E[3] * m.E[ 6]; 478 result.E[ 6] = -m.E[0] * m.E[ 6] * m.E[15] + m.E[0] * m.E[ 7] * m.E[14] + m.E[4] * m.E[2] * m.E[15] - m.E[4] * m.E[3] * m.E[14] - m.E[12] * m.E[2] * m.E[ 7] + m.E[12] * m.E[3] * m.E[ 6]; 479 result.E[10] = m.E[0] * m.E[ 5] * m.E[15] - m.E[0] * m.E[ 7] * m.E[13] - m.E[4] * m.E[1] * m.E[15] + m.E[4] * m.E[3] * m.E[13] + m.E[12] * m.E[1] * m.E[ 7] - m.E[12] * m.E[3] * m.E[ 5]; 480 result.E[14] = -m.E[0] * m.E[ 5] * m.E[14] + m.E[0] * m.E[ 6] * m.E[13] + m.E[4] * m.E[1] * m.E[14] - m.E[4] * m.E[2] * m.E[13] - m.E[12] * m.E[1] * m.E[ 6] + m.E[12] * m.E[2] * m.E[ 5]; 481 result.E[ 3] = -m.E[1] * m.E[ 6] * m.E[11] + m.E[1] * m.E[ 7] * m.E[10] + m.E[5] * m.E[2] * m.E[11] - m.E[5] * m.E[3] * m.E[10] - m.E[ 9] * m.E[2] * m.E[ 7] + m.E[ 9] * m.E[3] * m.E[ 6]; 482 result.E[ 7] = m.E[0] * m.E[ 6] * m.E[11] - m.E[0] * m.E[ 7] * m.E[10] - m.E[4] * m.E[2] * m.E[11] + m.E[4] * m.E[3] * m.E[10] + m.E[ 8] * m.E[2] * m.E[ 7] - m.E[ 8] * m.E[3] * m.E[ 6]; 483 result.E[11] = -m.E[0] * m.E[ 5] * m.E[11] + m.E[0] * m.E[ 7] * m.E[ 9] + m.E[4] * m.E[1] * m.E[11] - m.E[4] * m.E[3] * m.E[ 9] - m.E[ 8] * m.E[1] * m.E[ 7] + m.E[ 8] * m.E[3] * m.E[ 5]; 484 result.E[15] = m.E[0] * m.E[ 5] * m.E[10] - m.E[0] * m.E[ 6] * m.E[ 9] - m.E[4] * m.E[1] * m.E[10] + m.E[4] * m.E[2] * m.E[ 9] + m.E[ 8] * m.E[1] * m.E[ 6] - m.E[ 8] * m.E[2] * m.E[ 5]; 485 486 f32 determinant = m.E[0] * result.E[0] + m.E[1] * result.E[4] + m.E[2] * result.E[8] + m.E[3] * result.E[12]; 487 determinant = 1.0f / determinant; 488 for(i32 i = 0; i < 16; i++) 489 result.E[i] *= determinant; 490 return result; 491 } 492 493 function m4 494 m4_translation(v3 delta) 495 { 496 m4 result; 497 result.c[0] = (v4){{1, 0, 0, 0}}; 498 result.c[1] = (v4){{0, 1, 0, 0}}; 499 result.c[2] = (v4){{0, 0, 1, 0}}; 500 result.c[3] = (v4){{delta.x, delta.y, delta.z, 1}}; 501 return result; 502 } 503 504 function m4 505 m4_scale(v3 scale) 506 { 507 m4 result; 508 result.c[0] = (v4){{scale.x, 0, 0, 0}}; 509 result.c[1] = (v4){{0, scale.y, 0, 0}}; 510 result.c[2] = (v4){{0, 0, scale.z, 0}}; 511 result.c[3] = (v4){{0, 0, 0, 1}}; 512 return result; 513 } 514 515 function m4 516 m4_rotation_about_axis(v3 axis, f32 turns) 517 { 518 assert(f32_equal(v3_magnitude_squared(axis), 1.0f)); 519 f32 sa = sin_f32(turns * 2 * PI); 520 f32 ca = cos_f32(turns * 2 * PI); 521 f32 mca = 1.0f - ca; 522 523 f32 x = axis.x, x2 = x * x; 524 f32 y = axis.y, y2 = y * y; 525 f32 z = axis.z, z2 = z * z; 526 527 m4 result; 528 result.c[0] = (v4){{ca + mca * x2, mca * x * y - sa * z, mca * x * z + sa * y, 0}}; 529 result.c[1] = (v4){{mca * x * y + sa * z, ca + mca * y2, mca * y * z - sa * x, 0}}; 530 result.c[2] = (v4){{mca * x * z - sa * y, mca * y * z + sa * x, ca + mca * z2, 0}}; 531 result.c[3] = (v4){{0, 0, 0, 1}}; 532 return result; 533 } 534 535 function m4 536 m4_rotation_about_y(f32 turns) 537 { 538 m4 result = m4_rotation_about_axis((v3){.y = 1.0f}, turns); 539 return result; 540 } 541 542 function m4 543 y_aligned_volume_transform(v3 extent, v3 translation, f32 rotation_turns) 544 { 545 m4 T = m4_translation(translation); 546 m4 R = m4_rotation_about_axis((v3){.y = 1.0f}, rotation_turns); 547 m4 S = m4_scale(extent); 548 m4 result = m4_mul(T, m4_mul(R, S)); 549 return result; 550 } 551 552 function v4 553 m4_mul_v4(m4 a, v4 v) 554 { 555 v4 result; 556 result.x = v4_dot(m4_row(a, 0), v); 557 result.y = v4_dot(m4_row(a, 1), v); 558 result.z = v4_dot(m4_row(a, 2), v); 559 result.w = v4_dot(m4_row(a, 3), v); 560 return result; 561 } 562 563 function v3 564 m4_mul_v3(m4 a, v3 v) 565 { 566 v3 result = m4_mul_v4(a, (v4){{v.x, v.y, v.z, 1.0f}}).xyz; 567 return result; 568 } 569 570 function m4 571 orthographic_projection(f32 n, f32 f, f32 t, f32 r) 572 { 573 m4 result; 574 f32 a = -2 / (f - n); 575 f32 b = - (f + n) / (f - n); 576 result.c[0] = (v4){{1 / r, 0, 0, 0}}; 577 result.c[1] = (v4){{0, 1 / t, 0, 0}}; 578 result.c[2] = (v4){{0, 0, a, 0}}; 579 result.c[3] = (v4){{0, 0, b, 1}}; 580 return result; 581 } 582 583 function m4 584 perspective_projection(f32 n, f32 f, f32 fov, f32 aspect) 585 { 586 m4 result; 587 f32 t = n * tan_f32(fov / 2.0f); 588 f32 r = t * aspect; 589 f32 a = -(f + n) / (f - n); 590 f32 b = -2 * f * n / (f - n); 591 result.c[0] = (v4){{n / r, 0, 0, 0}}; 592 result.c[1] = (v4){{0, n / t, 0, 0}}; 593 result.c[2] = (v4){{0, 0, a, -1}}; 594 result.c[3] = (v4){{0, 0, b, 0}}; 595 return result; 596 } 597 598 function m4 599 camera_look_at(v3 camera, v3 point) 600 { 601 v3 orthogonal = {{0, 1.0f, 0}}; 602 v3 normal = v3_normalize(v3_sub(camera, point)); 603 v3 right = cross(orthogonal, normal); 604 v3 up = cross(normal, right); 605 606 v3 translate; 607 camera = v3_sub((v3){0}, camera); 608 translate.x = v3_dot(camera, right); 609 translate.y = v3_dot(camera, up); 610 translate.z = v3_dot(camera, normal); 611 612 m4 result; 613 result.c[0] = (v4){{right.x, up.x, normal.x, 0}}; 614 result.c[1] = (v4){{right.y, up.y, normal.y, 0}}; 615 result.c[2] = (v4){{right.z, up.z, normal.z, 0}}; 616 result.c[3] = (v4){{translate.x, translate.y, translate.z, 1}}; 617 return result; 618 } 619 620 /* NOTE(rnp): adapted from "Essential Mathematics for Games and Interactive Applications" (Verth, Bishop) */ 621 function f32 622 obb_raycast(m4 obb_orientation, v3 obb_size, v3 obb_center, ray r) 623 { 624 v3 p = v3_sub(obb_center, r.origin); 625 v3 X = obb_orientation.c[0].xyz; 626 v3 Y = obb_orientation.c[1].xyz; 627 v3 Z = obb_orientation.c[2].xyz; 628 629 /* NOTE(rnp): projects direction vector onto OBB axis */ 630 v3 f; 631 f.x = v3_dot(X, r.direction); 632 f.y = v3_dot(Y, r.direction); 633 f.z = v3_dot(Z, r.direction); 634 635 /* NOTE(rnp): projects relative vector onto OBB axis */ 636 v3 e; 637 e.x = v3_dot(X, p); 638 e.y = v3_dot(Y, p); 639 e.z = v3_dot(Z, p); 640 641 f32 result = 0; 642 f32 t[6] = {0}; 643 for (i32 i = 0; i < 3; i++) { 644 if (f32_equal(f.E[i], 0)) { 645 if (-e.E[i] - obb_size.E[i] > 0 || -e.E[i] + obb_size.E[i] < 0) 646 result = -1.0f; 647 f.E[i] = F32_EPSILON; 648 } 649 t[i * 2 + 0] = (e.E[i] + obb_size.E[i]) / f.E[i]; 650 t[i * 2 + 1] = (e.E[i] - obb_size.E[i]) / f.E[i]; 651 } 652 653 if (result != -1) { 654 f32 tmin = MAX(MAX(MIN(t[0], t[1]), MIN(t[2], t[3])), MIN(t[4], t[5])); 655 f32 tmax = MIN(MIN(MAX(t[0], t[1]), MAX(t[2], t[3])), MAX(t[4], t[5])); 656 if (tmax >= 0 && tmin <= tmax) { 657 result = tmin > 0 ? tmin : tmax; 658 } else { 659 result = -1; 660 } 661 } 662 663 return result; 664 } 665 666 function f32 667 complex_filter_first_moment(v2 *filter, i32 length, f32 sampling_frequency) 668 { 669 f32 n = 0, d = 0; 670 for (i32 i = 0; i < length; i++) { 671 f32 t = v2_magnitude_squared(filter[i]); 672 n += (f32)i * t; 673 d += t; 674 } 675 f32 result = n / d / sampling_frequency; 676 return result; 677 } 678 679 function f32 680 real_filter_first_moment(f32 *filter, i32 length, f32 sampling_frequency) 681 { 682 f32 n = 0, d = 0; 683 for (i32 i = 0; i < length; i++) { 684 f32 t = filter[i] * filter[i]; 685 n += (f32)i * t; 686 d += t; 687 } 688 f32 result = n / d / sampling_frequency; 689 return result; 690 } 691 692 function f32 693 tukey_window(f32 t, f32 tapering) 694 { 695 f32 r = tapering; 696 f32 result = 1; 697 if (t < r / 2) result = 0.5f * (1 + cos_f32(2 * PI * (t - r / 2) / r)); 698 if (t >= 1 - r / 2) result = 0.5f * (1 + cos_f32(2 * PI * (t - 1 + r / 2) / r)); 699 return result; 700 } 701 702 /* NOTE(rnp): adapted from "Discrete Time Signal Processing" (Oppenheim) */ 703 function f32 * 704 kaiser_low_pass_filter(Arena *arena, f32 cutoff_frequency, f32 sampling_frequency, f32 beta, i32 length) 705 { 706 f32 *result = push_array(arena, f32, length); 707 f32 wc = 2 * PI * cutoff_frequency / sampling_frequency; 708 f32 a = (f32)length / 2.0f; 709 f32 pi_i0_b = PI * (f32)cephes_i0(beta); 710 711 for (i32 n = 0; n < length; n++) { 712 f32 t = (f32)n - a; 713 f32 impulse = !f32_equal(t, 0) ? sin_f32(wc * t) / t : wc; 714 t = t / a; 715 f32 window = (f32)cephes_i0(beta * sqrt_f32(1 - t * t)) / pi_i0_b; 716 result[n] = impulse * window; 717 } 718 719 return result; 720 } 721 722 function f32 * 723 rf_chirp(Arena *arena, f32 min_frequency, f32 max_frequency, f32 sampling_frequency, 724 i32 length, b32 reverse) 725 { 726 f32 *result = push_array(arena, f32, length); 727 for (i32 i = 0; i < length; i++) { 728 i32 index = reverse? length - 1 - i : i; 729 f32 fc = min_frequency + (f32)i * (max_frequency - min_frequency) / (2 * (f32)length); 730 f32 arg = 2 * PI * fc * (f32)i / sampling_frequency; 731 result[index] = sin_f32(arg) * tukey_window((f32)i / (f32)length, 0.2f); 732 } 733 return result; 734 } 735 736 function v2 * 737 baseband_chirp(Arena *arena, f32 min_frequency, f32 max_frequency, f32 sampling_frequency, 738 i32 length, b32 reverse, f32 scale) 739 { 740 v2 *result = push_array(arena, v2, length); 741 f32 conjugate = reverse ? -1 : 1; 742 for (i32 i = 0; i < length; i++) { 743 i32 index = reverse? length - 1 - i : i; 744 f32 fc = min_frequency + (f32)i * (max_frequency - min_frequency) / (2 * (f32)length); 745 f32 arg = 2 * PI * fc * (f32)i / sampling_frequency; 746 v2 sample = {{scale * cos_f32(arg), conjugate * scale * sin_f32(arg)}}; 747 result[index] = v2_scale(sample, tukey_window((f32)i / (f32)length, 0.2f)); 748 } 749 return result; 750 } 751 752 function iv3 753 das_output_dimension(iv3 points) 754 { 755 iv3 result; 756 result.x = Max(points.x, 1); 757 result.y = Max(points.y, 1); 758 result.z = Max(points.z, 1); 759 760 switch (iv3_dimension(result)) { 761 case 1:{ 762 if (result.y > 1) result.x = result.y; 763 if (result.z > 1) result.x = result.z; 764 result.y = result.z = 1; 765 }break; 766 767 case 2:{ 768 if (result.x > 1) { 769 if (result.z > 1) result.y = result.z; 770 } else { 771 result.x = result.z; 772 } 773 result.z = 1; 774 }break; 775 776 case 3:{}break; 777 778 InvalidDefaultCase; 779 } 780 781 return result; 782 } 783 784 function m4 785 das_transform_1d(v3 p1, v3 p2) 786 { 787 v3 extent = v3_sub(p2, p1); 788 m4 result = { 789 .c[0] = (v4){{extent.x, extent.y, extent.z, 0.0f}}, 790 .c[1] = (v4){{0.0f, 0.0f, 0.0f, 0.0f}}, 791 .c[2] = (v4){{0.0f, 0.0f, 0.0f, 0.0f}}, 792 .c[3] = (v4){{p1.x, p1.y, p1.z, 1.0f}}, 793 }; 794 return result; 795 } 796 797 function m4 798 das_transform_2d_with_normal(v3 normal, v2 min_coordinate, v2 max_coordinate, f32 offset) 799 { 800 v3 U = {{0, 1.0f, 0}}; 801 if (f32_equal(v3_dot(U, normal), 1.0f)) 802 U = (v3){{1.0f, 0, 0}}; 803 804 v3 N = normal; 805 v3 V = cross(U, N); 806 807 v3 min = v3_add(v3_scale(U, min_coordinate.x), v3_scale(V, min_coordinate.y)); 808 v3 max = v3_add(v3_scale(U, max_coordinate.x), v3_scale(V, max_coordinate.y)); 809 810 v3 extent = v3_sub(max, min); 811 U = v3_scale(U, v3_dot(U, extent)); 812 V = v3_scale(V, v3_dot(V, extent)); 813 814 v3 t = v3_add(v3_scale(N, offset), min); 815 816 m4 result; 817 result.c[0] = (v4){{U.x, U.y, U.z, 0.0f}}; 818 result.c[1] = (v4){{V.x, V.y, V.z, 0.0f}}; 819 result.c[2] = (v4){{N.x, N.y, N.z, 0.0f}}; 820 result.c[3] = (v4){{t.x, t.y, t.z, 1.0f}}; 821 822 return result; 823 } 824 825 function m4 826 das_transform_2d_xz(v2 min_coordinate, v2 max_coordinate, f32 y_off) 827 { 828 m4 result = das_transform_2d_with_normal((v3){.y = 1.0f}, min_coordinate, max_coordinate, y_off); 829 return result; 830 } 831 832 function m4 833 das_transform_2d_yz(v2 min_coordinate, v2 max_coordinate, f32 x_off) 834 { 835 m4 result = das_transform_2d_with_normal((v3){.x = 1.0f}, min_coordinate, max_coordinate, x_off); 836 return result; 837 } 838 839 function m4 840 das_transform_2d_xy(v2 min_coordinate, v2 max_coordinate, f32 z_off) 841 { 842 m4 result = das_transform_2d_with_normal((v3){.z = 1.0f}, min_coordinate, max_coordinate, z_off); 843 return result; 844 } 845 846 function m4 847 das_transform_3d(v3 min_coordinate, v3 max_coordinate) 848 { 849 v3 extent = v3_sub(max_coordinate, min_coordinate); 850 m4 result; 851 result.c[0] = (v4){{extent.x, 0.0f, 0.0f, 0.0f}}; 852 result.c[1] = (v4){{0.0f, extent.y, 0.0f, 0.0f}}; 853 result.c[2] = (v4){{0.0f, 0.0f, extent.z, 0.0f}}; 854 result.c[3] = (v4){{min_coordinate.x, min_coordinate.y, min_coordinate.z, 1.0f}}; 855 return result; 856 } 857 858 function m4 859 das_transform(v3 min_coordinate, v3 max_coordinate, iv3 *points) 860 { 861 m4 result; 862 863 *points = das_output_dimension(*points); 864 865 switch (iv3_dimension(*points)) { 866 case 1:{result = das_transform_1d( min_coordinate, max_coordinate); }break; 867 case 2:{result = das_transform_2d_xz(XY(min_coordinate), XY(max_coordinate), 0);}break; 868 case 3:{result = das_transform_3d( min_coordinate, max_coordinate); }break; 869 } 870 871 return result; 872 } 873 874 function v2 875 plane_uv(v3 point, v3 U, v3 V) 876 { 877 v2 result; 878 result.x = v3_dot(U, point) / v3_dot(U, U); 879 result.y = v3_dot(V, point) / v3_dot(V, V); 880 return result; 881 } 882 883 function v4 884 hsv_to_rgb(v4 hsv) 885 { 886 /* f(k(n)) = V - V*S*max(0, min(k, min(4 - k, 1))) 887 * k(n) = fmod((n + H * 6), 6) 888 * (R, G, B) = (f(n = 5), f(n = 3), f(n = 1)) 889 */ 890 alignas(16) f32 nval[4] = {5.0f, 3.0f, 1.0f, 0.0f}; 891 f32x4 n = load_f32x4(nval); 892 f32x4 H = dup_f32x4(hsv.x); 893 f32x4 S = dup_f32x4(hsv.y); 894 f32x4 V = dup_f32x4(hsv.z); 895 f32x4 six = dup_f32x4(6); 896 897 f32x4 t = add_f32x4(n, mul_f32x4(six, H)); 898 f32x4 rem = floor_f32x4(div_f32x4(t, six)); 899 f32x4 k = sub_f32x4(t, mul_f32x4(rem, six)); 900 901 t = min_f32x4(sub_f32x4(dup_f32x4(4), k), dup_f32x4(1)); 902 t = max_f32x4(dup_f32x4(0), min_f32x4(k, t)); 903 t = mul_f32x4(t, mul_f32x4(S, V)); 904 905 v4 rgba; 906 store_f32x4(rgba.E, sub_f32x4(V, t)); 907 rgba.a = hsv.a; 908 return rgba; 909 } 910 911 function f32 912 ease_in_out_cubic(f32 t) 913 { 914 f32 result; 915 if (t < 0.5f) { 916 result = 4.0f * t * t * t; 917 } else { 918 t = -2.0f * t + 2.0f; 919 result = 1.0f - t * t * t / 2.0f; 920 } 921 return result; 922 } 923 924 function f32 925 ease_in_out_quartic(f32 t) 926 { 927 f32 result; 928 if (t < 0.5f) { 929 result = 8.0f * t * t * t * t; 930 } else { 931 t = -2.0f * t + 2.0f; 932 result = 1.0f - t * t * t * t / 2.0f; 933 } 934 return result; 935 }