das.glsl (13017B)
1 /* See LICENSE for license details. */ 2 #if InputDataKind == DataKind_Float32 3 #if CoherencyWeighting 4 #define RESULT_TYPE vec2 5 #define RESULT_COHERENT_CAST(a) (a).x 6 #define RESULT_INCOHERENT_CAST(a) (a).y 7 #endif 8 #define SAMPLE_TYPE f32 9 #elif InputDataKind == DataKind_Float32Complex 10 #if CoherencyWeighting 11 #define RESULT_TYPE vec3 12 #define RESULT_COHERENT_CAST(a) (a).xy 13 #define RESULT_INCOHERENT_CAST(a) (a).z 14 #endif 15 #define SAMPLE_TYPE f32vec2 16 #else 17 #error InputDataKind unsupported for DAS 18 #endif 19 20 #ifndef RESULT_TYPE 21 #define RESULT_TYPE SAMPLE_TYPE 22 #endif 23 24 #ifndef RESULT_COHERENT_CAST 25 #define RESULT_COHERENT_CAST(a) (a) 26 #endif 27 28 #if CoherencyWeighting 29 #define RESULT_STORE(a) RESULT_TYPE(RESULT_COHERENT_CAST(a), length(a)) 30 #else 31 #define RESULT_STORE(a) (a) 32 #endif 33 34 layout(set = ShaderResourceKind_Buffer, binding = ShaderBufferSlot_PingPong) readonly buffer RF { 35 InputDataType rf[]; 36 }; 37 38 layout(std430, buffer_reference) restrict readonly buffer ArrayParameters { 39 DASArrayParameters data; 40 }; 41 42 layout(std430, buffer_reference) buffer Output { 43 OutputDataType x[]; 44 }; 45 46 layout(std430, buffer_reference) buffer IncoherentOutput { 47 f32 x[]; 48 }; 49 50 #define RX_ORIENTATION(tx_rx) bitfieldExtract((tx_rx), 0, 4) 51 #define TX_ORIENTATION(tx_rx) bitfieldExtract((tx_rx), 4, 4) 52 53 #define C_SPLINE 0.5 54 55 #if InputDataKind == DataKind_Float32Complex 56 vec2 rotate_iq(const vec2 iq, const float time) 57 { 58 float arg = radians(360) * DemodulationFrequency * time; 59 mat2 phasor = mat2( cos(arg), sin(arg), 60 -sin(arg), cos(arg)); 61 vec2 result = phasor * iq; 62 return result; 63 } 64 #else 65 #define rotate_iq(a, b) (a) 66 #endif 67 68 /* NOTE: See: https://cubic.org/docs/hermite.htm */ 69 SAMPLE_TYPE cubic(const int offset, const float t) 70 { 71 const mat4 h = mat4( 72 2, -3, 0, 1, 73 -2, 3, 0, 0, 74 1, -2, 1, 0, 75 1, -1, 0, 0 76 ); 77 78 SAMPLE_TYPE samples[4] = { 79 rf[offset + 0], 80 rf[offset + 1], 81 rf[offset + 2], 82 rf[offset + 3], 83 }; 84 85 vec4 S = vec4(t * t * t, t * t, t, 1); 86 SAMPLE_TYPE P1 = samples[1]; 87 SAMPLE_TYPE P2 = samples[2]; 88 SAMPLE_TYPE T1 = C_SPLINE * (P2 - samples[0]); 89 SAMPLE_TYPE T2 = C_SPLINE * (samples[3] - P1); 90 91 #if InputDataKind == DataKind_Float32 92 vec4 C = vec4(P1.x, P2.x, T1.x, T2.x); 93 SAMPLE_TYPE result = dot(S, h * C); 94 #elif InputDataKind == DataKind_Float32Complex 95 mat2x4 C = mat2x4(vec4(P1.x, P2.x, T1.x, T2.x), vec4(P1.y, P2.y, T1.y, T2.y)); 96 SAMPLE_TYPE result = S * h * C; 97 #endif 98 return result; 99 } 100 101 SAMPLE_TYPE sample_rf(const int rf_offset, const float index) 102 { 103 SAMPLE_TYPE result = SAMPLE_TYPE(0); 104 105 switch (InterpolationMode) { 106 case InterpolationMode_Nearest:{ 107 if (int(index) >= 0 && int(round(index)) < SampleCount) 108 result = rotate_iq(rf[rf_offset + int(round(index))], index / SamplingFrequency); 109 }break; 110 case InterpolationMode_Linear:{ 111 if (int(index) >= 0 && int(index) < SampleCount - 1) { 112 float tk, t = modf(index, tk); 113 int n = rf_offset + int(tk); 114 result = (1 - t) * rf[n] + t * rf[n + 1]; 115 result = rotate_iq(result, index / SamplingFrequency); 116 } 117 }break; 118 case InterpolationMode_Cubic:{ 119 if (int(index) > 0 && int(index) < SampleCount - 2) { 120 float tk, t = modf(index, tk); 121 result = rotate_iq(cubic(rf_offset + int(index), t), index / SamplingFrequency); 122 } 123 }break; 124 } 125 return result; 126 } 127 128 float sample_index(const float distance) 129 { 130 float time = distance / SpeedOfSound + TimeOffset; 131 return time * SamplingFrequency; 132 } 133 134 uint32_t output_index(uint32_t x, uint32_t y, uint32_t z) 135 { 136 uint32_t result = output_size_x * output_size_y * z + output_size_x * y + x; 137 return result; 138 } 139 140 float apodize(const float arg) 141 { 142 /* IMPORTANT: do not move calculation of arg into this function. It will generate a 143 * conditional move resulting in cos always being evaluated causing a slowdown */ 144 145 /* NOTE: constant F# dynamic receive apodization. This is implemented as: 146 * 147 * / |x_e - x_i|\ 148 * a(x, z) = cos(F# * π * ----------- ) ^ 2 149 * \ |z_e - z_i|/ 150 * 151 * where x,z_e are transducer element positions and x,z_i are image positions. */ 152 float a = cos(radians(180) * arg); 153 return a * a; 154 } 155 156 vec2 rca_plane_projection(const vec3 point, const bool rows) 157 { 158 vec2 result = vec2(point[int(rows)], point[2]); 159 return result; 160 } 161 162 float plane_wave_transmit_distance(const vec3 point, const float transmit_angle, const bool tx_rows) 163 { 164 return dot(rca_plane_projection(point, tx_rows), vec2(sin(transmit_angle), cos(transmit_angle))); 165 } 166 167 float cylindrical_wave_transmit_distance(const vec3 point, const float focal_depth, 168 const float transmit_angle, const bool tx_rows) 169 { 170 vec2 f = focal_depth * vec2(sin(transmit_angle), cos(transmit_angle)); 171 return distance(rca_plane_projection(point, tx_rows), f); 172 } 173 174 uint16_t tx_rx_orientation_for_acquisition(const int16_t acquisition) 175 { 176 uint16_t result = uint16_t(TransmitReceiveOrientation); 177 if (!bool(SingleOrientation)) 178 result = ArrayParameters(array_parameters).data.transmit_receive_orientations[acquisition]; 179 return result; 180 } 181 182 vec2 focal_vector_for_acquisition(const int16_t acquisition) 183 { 184 vec2 result = bool(SingleFocus) ? vec2(TransmitAngle, FocusDepth) 185 : ArrayParameters(array_parameters).data.focal_vectors[acquisition]; 186 return result; 187 } 188 189 float rca_transmit_distance(const vec3 world_point, const vec2 focal_vector, const uint16_t transmit_receive_orientation) 190 { 191 float result = 0; 192 if (TX_ORIENTATION(transmit_receive_orientation) != RCAOrientation_None) { 193 bool tx_rows = TX_ORIENTATION(transmit_receive_orientation) == RCAOrientation_Rows; 194 float transmit_angle = radians(focal_vector.x); 195 float focal_depth = focal_vector.y; 196 197 if (isinf(focal_depth)) { 198 result = plane_wave_transmit_distance(world_point, transmit_angle, tx_rows); 199 } else { 200 result = cylindrical_wave_transmit_distance(world_point, focal_depth, transmit_angle, tx_rows); 201 } 202 } 203 return result; 204 } 205 206 RESULT_TYPE RCA(const vec3 world_point) 207 { 208 RESULT_TYPE result = RESULT_TYPE(0); 209 for (int16_t acquisition = int16_t(0); acquisition < int16_t(AcquisitionCount); acquisition++) { 210 const uint16_t tx_rx_orientation = tx_rx_orientation_for_acquisition(acquisition); 211 const bool rx_rows = RX_ORIENTATION(tx_rx_orientation) == RCAOrientation_Rows; 212 const vec2 focal_vector = focal_vector_for_acquisition(acquisition); 213 vec2 xdc_world_point = rca_plane_projection((xdc_transform * vec4(world_point, 1)).xyz, rx_rows); 214 float transmit_distance = rca_transmit_distance(world_point, focal_vector, tx_rx_orientation); 215 216 int rf_offset = int(rf_element_offset) + acquisition * SampleCount; 217 rf_offset -= int(InterpolationMode == InterpolationMode_Cubic); 218 for (int chunk_channel = 0; chunk_channel < ChunkChannelCount; chunk_channel++) { 219 int rx_channel = channel_offset + chunk_channel; 220 vec3 rx_center = vec3(rx_channel * xdc_element_pitch, 0); 221 vec2 receive_vector = xdc_world_point - rca_plane_projection(rx_center, rx_rows); 222 float a_arg = abs(FNumber * receive_vector.x / abs(xdc_world_point.y)); 223 224 if (a_arg < 0.5f) { 225 float sidx = sample_index(transmit_distance + length(receive_vector)); 226 SAMPLE_TYPE value = apodize(a_arg) * sample_rf(rf_offset, sidx); 227 result += RESULT_STORE(value); 228 } 229 rf_offset += SampleCount * AcquisitionCount; 230 } 231 } 232 return result; 233 } 234 235 RESULT_TYPE HERCULES(const vec3 world_point) 236 { 237 const uint16_t tx_rx_orientation = tx_rx_orientation_for_acquisition(int16_t(0)); 238 const bool rx_cols = RX_ORIENTATION(tx_rx_orientation) == RCAOrientation_Columns; 239 const vec2 focal_vector = focal_vector_for_acquisition(int16_t(0)); 240 const vec3 xdc_world_point = (xdc_transform * vec4(world_point, 1)).xyz; 241 242 const float transmit_index = sample_index(rca_transmit_distance(world_point, focal_vector, tx_rx_orientation)); 243 const float z_delta_squared = xdc_world_point.z * xdc_world_point.z; 244 const float f_number_over_z = abs(FNumber / xdc_world_point.z); 245 const vec2 xy_world_point = xdc_world_point.xy; 246 const float apodization_test = 0.25f / (f_number_over_z * f_number_over_z); 247 248 RESULT_TYPE result = RESULT_TYPE(0); 249 for (f32 chunk_channel = 0; chunk_channel < f32(ChunkChannelCount); chunk_channel += 1.0f) { 250 f32 rx_channel = f32(channel_offset) + chunk_channel; 251 int rf_offset = int(rf_element_offset) + int(chunk_channel) * SampleCount * AcquisitionCount + Sparse * SampleCount; 252 rf_offset -= int(InterpolationMode == InterpolationMode_Cubic); 253 254 // NOTE(rnp): this wouldn't be so messy if we just forced an orientation like with FORCES 255 vec2 element_receive_delta_squared = xy_world_point; 256 if (rx_cols) element_receive_delta_squared.x -= rx_channel * xdc_element_pitch.x; 257 else element_receive_delta_squared.y -= rx_channel * xdc_element_pitch.y; 258 259 if (rx_cols) element_receive_delta_squared.x *= element_receive_delta_squared.x; 260 else element_receive_delta_squared.y *= element_receive_delta_squared.y; 261 262 for (int transmit = Sparse; transmit < AcquisitionCount; transmit++) { 263 int tx_channel = bool(Sparse) ? ArrayParameters(array_parameters).data.sparse_elements[transmit - Sparse] 264 : transmit; 265 266 if (rx_cols) element_receive_delta_squared.y = xy_world_point.y - tx_channel * xdc_element_pitch.y; 267 else element_receive_delta_squared.x = xy_world_point.x - tx_channel * xdc_element_pitch.x; 268 269 if (rx_cols) element_receive_delta_squared.y *= element_receive_delta_squared.y; 270 else element_receive_delta_squared.x *= element_receive_delta_squared.x; 271 272 float element_delta_squared = element_receive_delta_squared.x + element_receive_delta_squared.y; 273 if (element_delta_squared < apodization_test) { 274 /* NOTE: tribal knowledge */ 275 float apodization = transmit == 0 ? inversesqrt(float(AcquisitionCount)) : 1.0f; 276 apodization *= apodize(f_number_over_z * sqrt(element_delta_squared)); 277 278 float index = transmit_index + sqrt(z_delta_squared + element_delta_squared) * SamplingFrequency / SpeedOfSound; 279 SAMPLE_TYPE value = apodization * sample_rf(rf_offset, index); 280 result += RESULT_STORE(value); 281 } 282 283 rf_offset += SampleCount; 284 } 285 } 286 return result; 287 } 288 289 RESULT_TYPE FORCES(const vec3 xdc_world_point) 290 { 291 RESULT_TYPE result = RESULT_TYPE(0); 292 293 float z_delta_squared = xdc_world_point.z * xdc_world_point.z; 294 float transmit_y_delta = xdc_world_point.y - xdc_element_pitch.y * ChannelCount / 2; 295 float transmit_yz_squared = transmit_y_delta * transmit_y_delta + z_delta_squared; 296 297 for (f32 chunk_channel = 0; chunk_channel < f32(ChunkChannelCount); chunk_channel += 1.0f) { 298 float rx_channel = float(channel_offset) + chunk_channel; 299 float receive_x_delta = xdc_world_point.x - rx_channel * xdc_element_pitch.x; 300 float a_arg = abs(FNumber * receive_x_delta / xdc_world_point.z); 301 302 if (a_arg < 0.5f) { 303 int rf_offset = int(rf_element_offset) + int(chunk_channel) * SampleCount * AcquisitionCount + Sparse * SampleCount; 304 rf_offset -= int(InterpolationMode == InterpolationMode_Cubic); 305 306 float receive_index = sample_index(sqrt(receive_x_delta * receive_x_delta + z_delta_squared)); 307 float apodization = apodize(a_arg); 308 for (int transmit = Sparse; transmit < AcquisitionCount; transmit++) { 309 int tx_channel = bool(Sparse) ? ArrayParameters(array_parameters).data.sparse_elements[transmit - Sparse] 310 : transmit; 311 float transmit_x_delta = xdc_world_point.x - xdc_element_pitch.x * tx_channel; 312 float transmit_index = sqrt(transmit_yz_squared + transmit_x_delta * transmit_x_delta) * SamplingFrequency / SpeedOfSound; 313 314 SAMPLE_TYPE value = apodization * sample_rf(rf_offset, receive_index + transmit_index); 315 result += RESULT_STORE(value); 316 rf_offset += SampleCount; 317 } 318 } 319 } 320 return result; 321 } 322 323 void main() 324 { 325 uvec3 out_voxel = gl_GlobalInvocationID; 326 if (!all(lessThan(out_voxel, uvec3(output_size_x, output_size_y, output_size_z)))) 327 return; 328 329 vec3 image_points = vec3(output_size_x, output_size_y, output_size_z) - 1.0f; 330 vec3 point = vec3(out_voxel) / max(vec3(1.0f), image_points); 331 vec3 world_point = (voxel_transform * vec4(point, 1)).xyz; 332 333 uint32_t out_index = output_index(out_voxel.x, out_voxel.y, out_voxel.z); 334 335 RESULT_TYPE sum = RESULT_TYPE(0); 336 switch (AcquisitionKind) { 337 case AcquisitionKind_FORCES: 338 case AcquisitionKind_UFORCES: 339 { 340 sum = FORCES(world_point); 341 }break; 342 case AcquisitionKind_HERCULES: 343 case AcquisitionKind_UHERCULES: 344 case AcquisitionKind_HERO_PA: 345 { 346 sum = HERCULES(world_point); 347 }break; 348 case AcquisitionKind_Flash: 349 case AcquisitionKind_RCA_TPW: 350 case AcquisitionKind_RCA_VLS: 351 { 352 sum = RCA(world_point); 353 }break; 354 } 355 356 #if CoherencyWeighting 357 IncoherentOutput(incoherent_frame).x[out_index] += RESULT_INCOHERENT_CAST(sum); 358 #endif 359 360 Output(output_frame).x[out_index] += RESULT_COHERENT_CAST(sum); 361 }