Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #![feature(test)]
- extern crate test;
- #[cfg(target_arch = "x86")]
- use std::arch::x86::*;
- #[cfg(target_arch = "x86_64")]
- use std::arch::x86_64::*;
- use std::time::Instant;
- struct Vector(f32, f32, f32, f32);
- impl Vector {
- #[inline(always)]
- fn new(x: f32, y: f32, z: f32, w: f32) -> Vector {
- Vector(x, y, z, w)
- }
- #[inline(always)]
- fn mul(a: &Vector, b: &Vector) -> Vector {
- Vector(a.0 * b.0, a.1 * b.1, a.2 * b.2, a.3 * b.3)
- }
- #[inline(always)]
- fn dot(a: &Vector, b: &Vector) -> f32 {
- a.0 * b.0 +
- a.1 * b.1 +
- a.2 * b.2 +
- a.3 * b.3
- }
- }
- struct Vec4(__m128); impl Vec4 {
- #[inline(always)]
- fn new(x: f32, y: f32, z: f32, w: f32) -> Vec4 {
- unsafe {
- Vec4(_mm_set_ps(x, y, z, w))
- }
- }
- #[inline(always)]
- fn mul(a: &Vec4, b: &Vec4) -> Vec4 {
- unsafe {
- Vec4(_mm_mul_ps(a.0, b.0))
- }
- }
- #[inline(always)]
- fn dot(a: &Vec4, b: &Vec4) -> f32 {
- unsafe {
- let mut x = _mm_mul_ps(a.0, b.0);
- x = _mm_hadd_ps(x, x);
- x = _mm_hadd_ps(x, x);
- _mm_cvtss_f32(x)
- }
- }
- }
- fn main() {
- let iterations = 128_000_000u64;
- // f32; 8
- {
- let a = Vector::new(1.0, 0.0, 0.0, 0.0);
- let b = Vector::new(0.0, 1.0, 0.0, 0.0);
- let now = Instant::now();
- for _ in 0..iterations {
- let x = Vector::mul(&a, &b);
- test::black_box(x);
- let d = Vector::dot(&a, &b);
- test::black_box(d);
- }
- println!("[f32: 4]: {}", now.elapsed().as_millis());
- }
- // simd
- {
- let a = Vec4::new(1.0, 0.0, 0.0, 0.0);
- let b = Vec4::new(0.0, 1.0, 0.0, 0.0);
- let now = Instant::now();
- for _ in 0..iterations {
- let x = Vec4::mul(&a, &b);
- test::black_box(x);
- let d = Vec4::dot(&a, &b);
- test::black_box(d);
- }
- println!("__m128 : {}", now.elapsed().as_millis());
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement