Skip to content

Instantly share code, notes, and snippets.

@bitshifter
Last active June 4, 2019 13:20
Show Gist options
  • Save bitshifter/7741d701f9ea1fbc29b9e39c01fb4f1c to your computer and use it in GitHub Desktop.
Save bitshifter/7741d701f9ea1fbc29b9e39c01fb4f1c to your computer and use it in GitHub Desktop.
mathbench mat4 mul comparison

mathbench Mat4 mul comparison

mathbench lib

pub fn glam_mat4_mul(lhs: glam::Mat4, rhs: glam::Mat4) -> glam::Mat4 {
    lhs * rhs
}

pub fn nalgebra_mat4_mul(
    lhs: nalgebra::Matrix4<f32>,
    rhs: nalgebra::Matrix4<f32>,
) -> nalgebra::Matrix4<f32> {
    lhs * rhs
}

pub fn cgmath_mat4_mul(
    lhs: cgmath::Matrix4<f32>,
    rhs: cgmath::Matrix4<f32>,
) -> cgmath::Matrix4<f32> {
    lhs * rhs
}

glam

The glam version is getting inlined.

cargo asm mathbench::glam_mat4_mul:

mathbench::glam_mat4_mul (src/lib.rs:190):
 mov     rax, rdi
 movaps  xmm5, xmmword, ptr, [rsi]
 movaps  xmm1, xmmword, ptr, [rsi, +, 16]
 movaps  xmm2, xmmword, ptr, [rsi, +, 32]
 movaps  xmm3, xmmword, ptr, [rsi, +, 48]
 movaps  xmm7, xmmword, ptr, [rdx]
 movaps  xmm6, xmmword, ptr, [rdx, +, 16]
 movaps  xmm9, xmmword, ptr, [rdx, +, 32]
 movaps  xmm8, xmmword, ptr, [rdx, +, 48]
 movaps  xmm0, xmm5
 shufps  xmm0, xmm5, 0
 mulps   xmm0, xmm7
 movaps  xmm4, xmm5
 shufps  xmm4, xmm5, 85
 mulps   xmm4, xmm6
 addps   xmm4, xmm0
 movaps  xmm0, xmm5
 shufps  xmm0, xmm5, 170
 mulps   xmm0, xmm9
 addps   xmm0, xmm4
 shufps  xmm5, xmm5, 255
 mulps   xmm5, xmm8
 addps   xmm5, xmm0
 movaps  xmm0, xmm1
 shufps  xmm0, xmm1, 0
 mulps   xmm0, xmm7
 movaps  xmm4, xmm1
 shufps  xmm4, xmm1, 85
 mulps   xmm4, xmm6
 addps   xmm4, xmm0
 movaps  xmm0, xmm1
 shufps  xmm0, xmm1, 170
 mulps   xmm0, xmm9
 addps   xmm0, xmm4
 shufps  xmm1, xmm1, 255
 mulps   xmm1, xmm8
 addps   xmm1, xmm0
 movaps  xmm0, xmm2
 shufps  xmm0, xmm2, 0
 mulps   xmm0, xmm7
 movaps  xmm4, xmm2
 shufps  xmm4, xmm2, 85
 mulps   xmm4, xmm6
 addps   xmm4, xmm0
 movaps  xmm0, xmm2
 shufps  xmm0, xmm2, 170
 mulps   xmm0, xmm9
 addps   xmm0, xmm4
 shufps  xmm2, xmm2, 255
 mulps   xmm2, xmm8
 addps   xmm2, xmm0
 movaps  xmm0, xmm3
 shufps  xmm0, xmm3, 0
 mulps   xmm0, xmm7
 movaps  xmm4, xmm3
 shufps  xmm4, xmm3, 85
 mulps   xmm4, xmm6
 addps   xmm4, xmm0
 movaps  xmm0, xmm3
 shufps  xmm0, xmm3, 170
 mulps   xmm0, xmm9
 addps   xmm0, xmm4
 shufps  xmm3, xmm3, 255
 mulps   xmm3, xmm8
 addps   xmm3, xmm0
 movaps  xmmword, ptr, [rdi], xmm5
 movaps  xmmword, ptr, [rdi, +, 16], xmm1
 movaps  xmmword, ptr, [rdi, +, 32], xmm2
 movaps  xmmword, ptr, [rdi, +, 48], xmm3
 ret

nalgebra

cargo asm mathbench::nalgebra_mat4_mul

mathbench::nalgebra_mat4_mul (src/lib.rs:194):
 push    rbx
 sub     rsp, 128
 mov     rbx, rdi
 movups  xmm0, xmmword, ptr, [rsi]
 movups  xmm1, xmmword, ptr, [rsi, +, 16]
 movups  xmm2, xmmword, ptr, [rsi, +, 32]
 movups  xmm3, xmmword, ptr, [rsi, +, 48]
 movaps  xmmword, ptr, [rsp, +, 48], xmm3
 movaps  xmmword, ptr, [rsp, +, 32], xmm2
 movaps  xmmword, ptr, [rsp, +, 16], xmm1
 movaps  xmmword, ptr, [rsp], xmm0
 movups  xmm0, xmmword, ptr, [rdx]
 movups  xmm1, xmmword, ptr, [rdx, +, 16]
 movups  xmm2, xmmword, ptr, [rdx, +, 32]
 movups  xmm3, xmmword, ptr, [rdx, +, 48]
 movaps  xmmword, ptr, [rsp, +, 112], xmm3
 movaps  xmmword, ptr, [rsp, +, 96], xmm2
 movaps  xmmword, ptr, [rsp, +, 80], xmm1
 movaps  xmmword, ptr, [rsp, +, 64], xmm0
 mov     rsi, rsp
 lea     rdx, [rsp, +, 64]
 call    nalgebra::base::ops::<impl core::ops::arith::Mul<nalgebra::base::matrix::Matrix<N,R2,C2,SB>> for nalgebra::base::matrix::Matrix<N,R1,C1,SA>>::mul
 mov     rax, rbx
 add     rsp, 128
 pop     rbx
 ret

cargo asm "nalgebra::base::ops::<impl core::ops::arith::Mul<nalgebra::base::matrix::Matrix<N,R2,C2,SB>> for nalgebra::base::matrix::Matrix<N,R1,C1,SA>>::mul"

nalgebra::base::ops::<impl core::ops::arith::Mul<nalgebra::base::matrix::Matrix<N,R2,C2,SB>> for nalgebra::base::matrix::Matrix<N,R1,C1,SA>>::mul (/home/x/.cargo/registry/src/github.com-1ecc6299db9ec823/nalgebra-0.18.0/src/base/ops.rs:628):
 movups  xmm3, xmmword, ptr, [rsi]
 movups  xmm2, xmmword, ptr, [rsi, +, 16]
 movups  xmm1, xmmword, ptr, [rsi, +, 32]
 movups  xmm8, xmmword, ptr, [rsi, +, 48]
 movss   xmm4, dword, ptr, [rdx]
 movss   xmm5, dword, ptr, [rdx, +, 4]
 shufps  xmm4, xmm4, 0
 mulps   xmm4, xmm3
 shufps  xmm5, xmm5, 0
 mulps   xmm5, xmm2
 addps   xmm5, xmm4
 movss   xmm6, dword, ptr, [rdx, +, 8]
 shufps  xmm6, xmm6, 0
 mulps   xmm6, xmm1
 addps   xmm6, xmm5
 movss   xmm4, dword, ptr, [rdx, +, 12]
 shufps  xmm4, xmm4, 0
 mulps   xmm4, xmm8
 addps   xmm4, xmm6
 movss   xmm5, dword, ptr, [rdx, +, 16]
 shufps  xmm5, xmm5, 0
 mulps   xmm5, xmm3
 movss   xmm6, dword, ptr, [rdx, +, 20]
 shufps  xmm6, xmm6, 0
 mulps   xmm6, xmm2
 addps   xmm6, xmm5
 movss   xmm7, dword, ptr, [rdx, +, 24]
 shufps  xmm7, xmm7, 0
 mulps   xmm7, xmm1
 addps   xmm7, xmm6
 movss   xmm5, dword, ptr, [rdx, +, 28]
 shufps  xmm5, xmm5, 0
 mulps   xmm5, xmm8
 addps   xmm5, xmm7
 movss   xmm6, dword, ptr, [rdx, +, 32]
 shufps  xmm6, xmm6, 0
 mulps   xmm6, xmm3
 movss   xmm7, dword, ptr, [rdx, +, 36]
 shufps  xmm7, xmm7, 0
 mulps   xmm7, xmm2
 addps   xmm7, xmm6
 movss   xmm0, dword, ptr, [rdx, +, 40]
 shufps  xmm0, xmm0, 0
 mulps   xmm0, xmm1
 addps   xmm0, xmm7
 movss   xmm6, dword, ptr, [rdx, +, 44]
 shufps  xmm6, xmm6, 0
 mulps   xmm6, xmm8
 addps   xmm6, xmm0
 movss   xmm0, dword, ptr, [rdx, +, 48]
 shufps  xmm0, xmm0, 0
 mulps   xmm0, xmm3
 movss   xmm3, dword, ptr, [rdx, +, 52]
 shufps  xmm3, xmm3, 0
 mulps   xmm3, xmm2
 addps   xmm3, xmm0
 movss   xmm0, dword, ptr, [rdx, +, 56]
 shufps  xmm0, xmm0, 0
 mulps   xmm0, xmm1
 addps   xmm0, xmm3
 movss   xmm1, dword, ptr, [rdx, +, 60]
 shufps  xmm1, xmm1, 0
 mulps   xmm1, xmm8
 addps   xmm1, xmm0
 movups  xmmword, ptr, [rdi], xmm4
 movups  xmmword, ptr, [rdi, +, 16], xmm5
 movups  xmmword, ptr, [rdi, +, 32], xmm6
 movups  xmmword, ptr, [rdi, +, 48], xmm1
 ret

cgmath

cargo asm mathbench::cgmath_mat4_mul

mathbench::cgmath_mat4_mul (src/lib.rs:201):
 push    rbx
 sub     rsp, 128
 mov     rbx, rdi
 movups  xmm0, xmmword, ptr, [rsi]
 movups  xmm1, xmmword, ptr, [rsi, +, 16]
 movups  xmm2, xmmword, ptr, [rsi, +, 32]
 movups  xmm3, xmmword, ptr, [rsi, +, 48]
 movaps  xmmword, ptr, [rsp, +, 48], xmm3
 movaps  xmmword, ptr, [rsp, +, 32], xmm2
 movaps  xmmword, ptr, [rsp, +, 16], xmm1
 movaps  xmmword, ptr, [rsp], xmm0
 movups  xmm0, xmmword, ptr, [rdx]
 movups  xmm1, xmmword, ptr, [rdx, +, 16]
 movups  xmm2, xmmword, ptr, [rdx, +, 32]
 movups  xmm3, xmmword, ptr, [rdx, +, 48]
 movaps  xmmword, ptr, [rsp, +, 112], xmm3
 movaps  xmmword, ptr, [rsp, +, 96], xmm2
 movaps  xmmword, ptr, [rsp, +, 80], xmm1
 movaps  xmmword, ptr, [rsp, +, 64], xmm0
 mov     rsi, rsp
 lea     rdx, [rsp, +, 64]
 call    <cgmath::matrix::Matrix4<S> as core::ops::arith::Mul>::mul
 mov     rax, rbx
 add     rsp, 128
 pop     rbx
 ret

cargo asm "<cgmath::matrix::Matrix4<S> as core::ops::arith::Mul>::mul"

<cgmath::matrix::Matrix4<S> as core::ops::arith::Mul>::mul (/home/x/.cargo/registry/src/github.com-1ecc6299db9ec823/cgmath-0.17.0/src/macros.rs:69):
 movups  xmm4, xmmword, ptr, [rsi]
 movups  xmm3, xmmword, ptr, [rsi, +, 16]
 movups  xmm2, xmmword, ptr, [rsi, +, 32]
 movups  xmm8, xmmword, ptr, [rsi, +, 48]
 movss   xmm6, dword, ptr, [rdx]
 movss   xmm7, dword, ptr, [rdx, +, 4]
 movss   xmm5, dword, ptr, [rdx, +, 8]
 movss   xmm1, dword, ptr, [rdx, +, 12]
 shufps  xmm6, xmm6, 0
 mulps   xmm6, xmm4
 shufps  xmm7, xmm7, 0
 mulps   xmm7, xmm3
 addps   xmm7, xmm6
 movss   xmm6, dword, ptr, [rdx, +, 16]
 shufps  xmm5, xmm5, 0
 mulps   xmm5, xmm2
 addps   xmm5, xmm7
 movss   xmm7, dword, ptr, [rdx, +, 20]
 shufps  xmm1, xmm1, 0
 mulps   xmm1, xmm8
 addps   xmm1, xmm5
 movss   xmm0, dword, ptr, [rdx, +, 24]
 shufps  xmm6, xmm6, 0
 mulps   xmm6, xmm4
 shufps  xmm7, xmm7, 0
 mulps   xmm7, xmm3
 addps   xmm7, xmm6
 movss   xmm5, dword, ptr, [rdx, +, 28]
 shufps  xmm0, xmm0, 0
 mulps   xmm0, xmm2
 addps   xmm0, xmm7
 movss   xmm6, dword, ptr, [rdx, +, 32]
 shufps  xmm5, xmm5, 0
 mulps   xmm5, xmm8
 addps   xmm5, xmm0
 movss   xmm0, dword, ptr, [rdx, +, 36]
 shufps  xmm6, xmm6, 0
 mulps   xmm6, xmm4
 shufps  xmm0, xmm0, 0
 mulps   xmm0, xmm3
 addps   xmm0, xmm6
 movss   xmm6, dword, ptr, [rdx, +, 40]
 shufps  xmm6, xmm6, 0
 mulps   xmm6, xmm2
 addps   xmm6, xmm0
 movss   xmm0, dword, ptr, [rdx, +, 44]
 shufps  xmm0, xmm0, 0
 mulps   xmm0, xmm8
 addps   xmm0, xmm6
 movss   xmm6, dword, ptr, [rdx, +, 48]
 shufps  xmm6, xmm6, 0
 mulps   xmm6, xmm4
 movss   xmm4, dword, ptr, [rdx, +, 52]
 shufps  xmm4, xmm4, 0
 mulps   xmm4, xmm3
 addps   xmm4, xmm6
 movss   xmm3, dword, ptr, [rdx, +, 56]
 shufps  xmm3, xmm3, 0
 mulps   xmm3, xmm2
 addps   xmm3, xmm4
 movss   xmm2, dword, ptr, [rdx, +, 60]
 shufps  xmm2, xmm2, 0
 mulps   xmm2, xmm8
 addps   xmm2, xmm3
 movups  xmmword, ptr, [rdi], xmm1
 movups  xmmword, ptr, [rdi, +, 16], xmm5
 movups  xmmword, ptr, [rdi, +, 32], xmm0
 movups  xmmword, ptr, [rdi, +, 48], xmm2
 ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment