Created
May 11, 2020 22:00
-
-
Save NiallHornFX/fb2f59a40f9707ca7d90d69a4cdc0760 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Scalar Trilinear Interoplation Lambda using SSE. | |
auto trilerp_v = [&](int i0, int j0, int k0, float z, float y, float x) -> auto | |
{ | |
vec4 aa(this->getdata(i0, j0, k0), this->getdata(i0, j0 + 1, k0), this->getdata(i0 + 1, j0, k0), this->getdata(i0 + 1, j0 + 1, k0)); | |
vec4 bb(this->getdata(i0, j0, k0 + 1), this->getdata(i0, j0 + 1, k0 + 1), this->getdata(i0 + 1, j0, k0 + 1), this->getdata(i0 + 1, j0 + 1, k0 + 1)); | |
vec4 coeff_z(z); vec4 ccoeff_z(1.0f - z); | |
__m128 L_a = _mm_fmadd_ps(ccoeff_z.sa, aa.sa, _mm_mul_ps(coeff_z.sa, bb.sa)); | |
__m128 cc = _mm_shuffle_ps(L_a, L_a, _MM_SHUFFLE(1, 0, 1, 0)); // (z0,z1,|z0,z1) | |
__m128 dd = _mm_shuffle_ps(L_a, L_a, _MM_SHUFFLE(2, 1, 2, 1)); // (z1,z2,|z1,z2) | |
vec4 coeff_y(y); vec4 ccoeff_y(1.0f - y); | |
__m128 L_b = _mm_fmadd_ps(ccoeff_y.sa, cc, _mm_mul_ps(coeff_y.sa, dd)); | |
__m128 ee = _mm_shuffle_ps(L_b, L_b, _MM_SHUFFLE(0, 0, 0, 0)); // (y0,|y1,y0,y0) | |
__m128 ff = _mm_shuffle_ps(L_b, L_b, _MM_SHUFFLE(1, 1, 1, 1)); // (y1,|y1,y0,y1) | |
vec4 coeff_x(x); vec4 ccoeff_x(1.0f - x); | |
__m128 L_C = _mm_fmadd_ps(ccoeff_x.sa, ee, _mm_mul_ps(coeff_x.sa, ff)); | |
return vec4(L_C).x; | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment