das.glsl - ogl_beamforming - Ultrasound Beamforming Implemented with OpenGL

das.glsl (13017B)
      1 /* See LICENSE for license details. */
      2 #if   InputDataKind == DataKind_Float32
      3   #if CoherencyWeighting
      4     #define RESULT_TYPE               vec2
      5     #define RESULT_COHERENT_CAST(a)   (a).x
      6     #define RESULT_INCOHERENT_CAST(a) (a).y
      7   #endif
      8   #define SAMPLE_TYPE f32
      9 #elif InputDataKind == DataKind_Float32Complex
     10   #if CoherencyWeighting
     11     #define RESULT_TYPE               vec3
     12     #define RESULT_COHERENT_CAST(a)   (a).xy
     13     #define RESULT_INCOHERENT_CAST(a) (a).z
     14   #endif
     15   #define SAMPLE_TYPE f32vec2
     16 #else
     17   #error InputDataKind unsupported for DAS
     18 #endif
     19 
     20 #ifndef RESULT_TYPE
     21   #define RESULT_TYPE SAMPLE_TYPE
     22 #endif
     23 
     24 #ifndef RESULT_COHERENT_CAST
     25   #define RESULT_COHERENT_CAST(a) (a)
     26 #endif
     27 
     28 #if CoherencyWeighting
     29   #define RESULT_STORE(a) RESULT_TYPE(RESULT_COHERENT_CAST(a), length(a))
     30 #else
     31   #define RESULT_STORE(a) (a)
     32 #endif
     33 
     34 layout(set = ShaderResourceKind_Buffer, binding = ShaderBufferSlot_PingPong) readonly buffer RF {
     35 	InputDataType rf[];
     36 };
     37 
     38 layout(std430, buffer_reference) restrict readonly buffer ArrayParameters {
     39 	DASArrayParameters data;
     40 };
     41 
     42 layout(std430, buffer_reference) buffer Output {
     43 	OutputDataType x[];
     44 };
     45 
     46 layout(std430, buffer_reference) buffer IncoherentOutput {
     47 	f32 x[];
     48 };
     49 
     50 #define RX_ORIENTATION(tx_rx) bitfieldExtract((tx_rx), 0, 4)
     51 #define TX_ORIENTATION(tx_rx) bitfieldExtract((tx_rx), 4, 4)
     52 
     53 #define C_SPLINE 0.5
     54 
     55 #if InputDataKind == DataKind_Float32Complex
     56 vec2 rotate_iq(const vec2 iq, const float time)
     57 {
     58 	float arg    = radians(360) * DemodulationFrequency * time;
     59 	mat2  phasor = mat2( cos(arg), sin(arg),
     60 	                    -sin(arg), cos(arg));
     61 	vec2 result = phasor * iq;
     62 	return result;
     63 }
     64 #else
     65   #define rotate_iq(a, b) (a)
     66 #endif
     67 
     68 /* NOTE: See: https://cubic.org/docs/hermite.htm */
     69 SAMPLE_TYPE cubic(const int offset, const float t)
     70 {
     71 	const mat4 h = mat4(
     72 		 2, -3,  0, 1,
     73 		-2,  3,  0, 0,
     74 		 1, -2,  1, 0,
     75 		 1, -1,  0, 0
     76 	);
     77 
     78 	SAMPLE_TYPE samples[4] = {
     79 		rf[offset + 0],
     80 		rf[offset + 1],
     81 		rf[offset + 2],
     82 		rf[offset + 3],
     83 	};
     84 
     85 	vec4        S  = vec4(t * t * t, t * t, t, 1);
     86 	SAMPLE_TYPE P1 = samples[1];
     87 	SAMPLE_TYPE P2 = samples[2];
     88 	SAMPLE_TYPE T1 = C_SPLINE * (P2 - samples[0]);
     89 	SAMPLE_TYPE T2 = C_SPLINE * (samples[3] - P1);
     90 
     91 	#if   InputDataKind == DataKind_Float32
     92 	vec4 C = vec4(P1.x, P2.x, T1.x, T2.x);
     93 	SAMPLE_TYPE result = dot(S, h * C);
     94 	#elif InputDataKind == DataKind_Float32Complex
     95 	mat2x4 C = mat2x4(vec4(P1.x, P2.x, T1.x, T2.x), vec4(P1.y, P2.y, T1.y, T2.y));
     96 	SAMPLE_TYPE result = S * h * C;
     97 	#endif
     98 	return result;
     99 }
    100 
    101 SAMPLE_TYPE sample_rf(const int rf_offset, const float index)
    102 {
    103 	SAMPLE_TYPE result = SAMPLE_TYPE(0);
    104 
    105 	switch (InterpolationMode) {
    106 	case InterpolationMode_Nearest:{
    107 		if (int(index) >= 0 && int(round(index)) < SampleCount)
    108 			result = rotate_iq(rf[rf_offset + int(round(index))], index / SamplingFrequency);
    109 	}break;
    110 	case InterpolationMode_Linear:{
    111 		if (int(index) >= 0 && int(index) < SampleCount - 1) {
    112 			float tk, t = modf(index, tk);
    113 			int n = rf_offset + int(tk);
    114 			result = (1 - t) * rf[n] + t * rf[n + 1];
    115 			result = rotate_iq(result, index / SamplingFrequency);
    116 		}
    117 	}break;
    118 	case InterpolationMode_Cubic:{
    119 		if (int(index) > 0 && int(index) < SampleCount - 2) {
    120 			float tk, t = modf(index, tk);
    121 			result = rotate_iq(cubic(rf_offset + int(index), t), index / SamplingFrequency);
    122 		}
    123 	}break;
    124 	}
    125 	return result;
    126 }
    127 
    128 float sample_index(const float distance)
    129 {
    130 	float  time = distance / SpeedOfSound + TimeOffset;
    131 	return time * SamplingFrequency;
    132 }
    133 
    134 uint32_t output_index(uint32_t x, uint32_t y, uint32_t z)
    135 {
    136 	uint32_t result = output_size_x * output_size_y * z + output_size_x * y + x;
    137 	return result;
    138 }
    139 
    140 float apodize(const float arg)
    141 {
    142 	/* IMPORTANT: do not move calculation of arg into this function. It will generate a
    143 	 * conditional move resulting in cos always being evaluated causing a slowdown */
    144 
    145 	/* NOTE: constant F# dynamic receive apodization. This is implemented as:
    146 	 *
    147 	 *                  /        |x_e - x_i|\
    148 	 *    a(x, z) = cos(F# * π * ----------- ) ^ 2
    149 	 *                  \        |z_e - z_i|/
    150 	 *
    151 	 * where x,z_e are transducer element positions and x,z_i are image positions. */
    152 	float a = cos(radians(180) * arg);
    153 	return a * a;
    154 }
    155 
    156 vec2 rca_plane_projection(const vec3 point, const bool rows)
    157 {
    158 	vec2 result = vec2(point[int(rows)], point[2]);
    159 	return result;
    160 }
    161 
    162 float plane_wave_transmit_distance(const vec3 point, const float transmit_angle, const bool tx_rows)
    163 {
    164 	return dot(rca_plane_projection(point, tx_rows), vec2(sin(transmit_angle), cos(transmit_angle)));
    165 }
    166 
    167 float cylindrical_wave_transmit_distance(const vec3 point, const float focal_depth,
    168                                          const float transmit_angle, const bool tx_rows)
    169 {
    170 	vec2 f = focal_depth * vec2(sin(transmit_angle), cos(transmit_angle));
    171 	return distance(rca_plane_projection(point, tx_rows), f);
    172 }
    173 
    174 uint16_t tx_rx_orientation_for_acquisition(const int16_t acquisition)
    175 {
    176 	uint16_t result = uint16_t(TransmitReceiveOrientation);
    177 	if (!bool(SingleOrientation))
    178 		result = ArrayParameters(array_parameters).data.transmit_receive_orientations[acquisition];
    179 	return result;
    180 }
    181 
    182 vec2 focal_vector_for_acquisition(const int16_t acquisition)
    183 {
    184 	vec2 result = bool(SingleFocus) ? vec2(TransmitAngle, FocusDepth)
    185 	                                : ArrayParameters(array_parameters).data.focal_vectors[acquisition];
    186 	return result;
    187 }
    188 
    189 float rca_transmit_distance(const vec3 world_point, const vec2 focal_vector, const uint16_t transmit_receive_orientation)
    190 {
    191 	float result = 0;
    192 	if (TX_ORIENTATION(transmit_receive_orientation) != RCAOrientation_None) {
    193 		bool  tx_rows        = TX_ORIENTATION(transmit_receive_orientation) == RCAOrientation_Rows;
    194 		float transmit_angle = radians(focal_vector.x);
    195 		float focal_depth    = focal_vector.y;
    196 
    197 		if (isinf(focal_depth)) {
    198 			result = plane_wave_transmit_distance(world_point, transmit_angle, tx_rows);
    199 		} else {
    200 			result = cylindrical_wave_transmit_distance(world_point, focal_depth, transmit_angle, tx_rows);
    201 		}
    202 	}
    203 	return result;
    204 }
    205 
    206 RESULT_TYPE RCA(const vec3 world_point)
    207 {
    208 	RESULT_TYPE result = RESULT_TYPE(0);
    209 	for (int16_t acquisition = int16_t(0); acquisition < int16_t(AcquisitionCount); acquisition++) {
    210 		const uint16_t tx_rx_orientation = tx_rx_orientation_for_acquisition(acquisition);
    211 		const bool     rx_rows           = RX_ORIENTATION(tx_rx_orientation) == RCAOrientation_Rows;
    212 		const vec2     focal_vector      = focal_vector_for_acquisition(acquisition);
    213 		vec2  xdc_world_point   = rca_plane_projection((xdc_transform * vec4(world_point, 1)).xyz, rx_rows);
    214 		float transmit_distance = rca_transmit_distance(world_point, focal_vector, tx_rx_orientation);
    215 
    216 		int rf_offset  = int(rf_element_offset) + acquisition * SampleCount;
    217 		rf_offset     -= int(InterpolationMode == InterpolationMode_Cubic);
    218 		for (int chunk_channel = 0; chunk_channel < ChunkChannelCount; chunk_channel++) {
    219 			int   rx_channel     = channel_offset + chunk_channel;
    220 			vec3  rx_center      = vec3(rx_channel * xdc_element_pitch, 0);
    221 			vec2  receive_vector = xdc_world_point - rca_plane_projection(rx_center, rx_rows);
    222 			float a_arg          = abs(FNumber * receive_vector.x / abs(xdc_world_point.y));
    223 
    224 			if (a_arg < 0.5f) {
    225 				float       sidx  = sample_index(transmit_distance + length(receive_vector));
    226 				SAMPLE_TYPE value = apodize(a_arg) * sample_rf(rf_offset, sidx);
    227 				result += RESULT_STORE(value);
    228 			}
    229 			rf_offset += SampleCount * AcquisitionCount;
    230 		}
    231 	}
    232 	return result;
    233 }
    234 
    235 RESULT_TYPE HERCULES(const vec3 world_point)
    236 {
    237 	const uint16_t tx_rx_orientation = tx_rx_orientation_for_acquisition(int16_t(0));
    238 	const bool     rx_cols           = RX_ORIENTATION(tx_rx_orientation) == RCAOrientation_Columns;
    239 	const vec2     focal_vector      = focal_vector_for_acquisition(int16_t(0));
    240 	const vec3     xdc_world_point   = (xdc_transform * vec4(world_point, 1)).xyz;
    241 
    242 	const float transmit_index   = sample_index(rca_transmit_distance(world_point, focal_vector, tx_rx_orientation));
    243 	const float z_delta_squared  = xdc_world_point.z * xdc_world_point.z;
    244 	const float f_number_over_z  = abs(FNumber / xdc_world_point.z);
    245 	const vec2  xy_world_point   = xdc_world_point.xy;
    246 	const float apodization_test = 0.25f / (f_number_over_z * f_number_over_z);
    247 
    248 	RESULT_TYPE result = RESULT_TYPE(0);
    249 	for (f32 chunk_channel = 0; chunk_channel < f32(ChunkChannelCount); chunk_channel += 1.0f) {
    250 		f32 rx_channel  = f32(channel_offset) + chunk_channel;
    251 		int rf_offset   = int(rf_element_offset) + int(chunk_channel) * SampleCount * AcquisitionCount + Sparse * SampleCount;
    252 		rf_offset      -= int(InterpolationMode == InterpolationMode_Cubic);
    253 
    254 		// NOTE(rnp): this wouldn't be so messy if we just forced an orientation like with FORCES
    255 		vec2 element_receive_delta_squared = xy_world_point;
    256 		if (rx_cols) element_receive_delta_squared.x -= rx_channel * xdc_element_pitch.x;
    257 		else         element_receive_delta_squared.y -= rx_channel * xdc_element_pitch.y;
    258 
    259 		if (rx_cols) element_receive_delta_squared.x *= element_receive_delta_squared.x;
    260 		else         element_receive_delta_squared.y *= element_receive_delta_squared.y;
    261 
    262 		for (int transmit = Sparse; transmit < AcquisitionCount; transmit++) {
    263 			int tx_channel = bool(Sparse) ? ArrayParameters(array_parameters).data.sparse_elements[transmit - Sparse]
    264 			                              : transmit;
    265 
    266 			if (rx_cols) element_receive_delta_squared.y  = xy_world_point.y - tx_channel * xdc_element_pitch.y;
    267 			else         element_receive_delta_squared.x  = xy_world_point.x - tx_channel * xdc_element_pitch.x;
    268 
    269 			if (rx_cols) element_receive_delta_squared.y *= element_receive_delta_squared.y;
    270 			else         element_receive_delta_squared.x *= element_receive_delta_squared.x;
    271 
    272 			float element_delta_squared = element_receive_delta_squared.x + element_receive_delta_squared.y;
    273 			if (element_delta_squared < apodization_test) {
    274 				/* NOTE: tribal knowledge */
    275 				float apodization = transmit == 0 ? inversesqrt(float(AcquisitionCount)) : 1.0f;
    276 				apodization *= apodize(f_number_over_z * sqrt(element_delta_squared));
    277 
    278 				float index = transmit_index + sqrt(z_delta_squared + element_delta_squared) * SamplingFrequency / SpeedOfSound;
    279 				SAMPLE_TYPE value = apodization * sample_rf(rf_offset, index);
    280 				result += RESULT_STORE(value);
    281 			}
    282 
    283 			rf_offset += SampleCount;
    284 		}
    285 	}
    286 	return result;
    287 }
    288 
    289 RESULT_TYPE FORCES(const vec3 xdc_world_point)
    290 {
    291 	RESULT_TYPE result = RESULT_TYPE(0);
    292 
    293 	float z_delta_squared     = xdc_world_point.z * xdc_world_point.z;
    294 	float transmit_y_delta    = xdc_world_point.y - xdc_element_pitch.y * ChannelCount / 2;
    295 	float transmit_yz_squared = transmit_y_delta * transmit_y_delta + z_delta_squared;
    296 
    297 	for (f32 chunk_channel = 0; chunk_channel < f32(ChunkChannelCount); chunk_channel += 1.0f) {
    298 		float rx_channel      = float(channel_offset) + chunk_channel;
    299 		float receive_x_delta = xdc_world_point.x - rx_channel * xdc_element_pitch.x;
    300 		float a_arg           = abs(FNumber * receive_x_delta / xdc_world_point.z);
    301 
    302 		if (a_arg < 0.5f) {
    303 			int rf_offset  = int(rf_element_offset) + int(chunk_channel) * SampleCount * AcquisitionCount + Sparse * SampleCount;
    304 			rf_offset     -= int(InterpolationMode == InterpolationMode_Cubic);
    305 
    306 			float receive_index = sample_index(sqrt(receive_x_delta * receive_x_delta + z_delta_squared));
    307 			float apodization   = apodize(a_arg);
    308 			for (int transmit = Sparse; transmit < AcquisitionCount; transmit++) {
    309 				int tx_channel = bool(Sparse) ? ArrayParameters(array_parameters).data.sparse_elements[transmit - Sparse]
    310 				                               : transmit;
    311 				float transmit_x_delta = xdc_world_point.x - xdc_element_pitch.x * tx_channel;
    312 				float transmit_index   = sqrt(transmit_yz_squared + transmit_x_delta * transmit_x_delta) * SamplingFrequency / SpeedOfSound;
    313 
    314 				SAMPLE_TYPE value = apodization * sample_rf(rf_offset, receive_index + transmit_index);
    315 				result    += RESULT_STORE(value);
    316 				rf_offset += SampleCount;
    317 			}
    318 		}
    319 	}
    320 	return result;
    321 }
    322 
    323 void main()
    324 {
    325 	uvec3 out_voxel = gl_GlobalInvocationID;
    326 	if (!all(lessThan(out_voxel, uvec3(output_size_x, output_size_y, output_size_z))))
    327 		return;
    328 
    329 	vec3 image_points = vec3(output_size_x, output_size_y, output_size_z) - 1.0f;
    330 	vec3 point        = vec3(out_voxel) / max(vec3(1.0f), image_points);
    331 	vec3 world_point  = (voxel_transform * vec4(point, 1)).xyz;
    332 
    333 	uint32_t out_index = output_index(out_voxel.x, out_voxel.y, out_voxel.z);
    334 
    335 	RESULT_TYPE sum = RESULT_TYPE(0);
    336 	switch (AcquisitionKind) {
    337 	case AcquisitionKind_FORCES:
    338 	case AcquisitionKind_UFORCES:
    339 	{
    340 		sum = FORCES(world_point);
    341 	}break;
    342 	case AcquisitionKind_HERCULES:
    343 	case AcquisitionKind_UHERCULES:
    344 	case AcquisitionKind_HERO_PA:
    345 	{
    346 		sum = HERCULES(world_point);
    347 	}break;
    348 	case AcquisitionKind_Flash:
    349 	case AcquisitionKind_RCA_TPW:
    350 	case AcquisitionKind_RCA_VLS:
    351 	{
    352 		sum = RCA(world_point);
    353 	}break;
    354 	}
    355 
    356 	#if CoherencyWeighting
    357 	IncoherentOutput(incoherent_frame).x[out_index] += RESULT_INCOHERENT_CAST(sum);
    358 	#endif
    359 
    360 	Output(output_frame).x[out_index] += RESULT_COHERENT_CAST(sum);
    361 }