Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #![feature(box_syntax)]
- extern crate simd;
- use simd::x86::avx::f32x8;
- use simd::x86::avx::AvxF32x8;
- use std::time::Instant;
- const N: usize = 1024;
- const M: usize = 1024;
- /// v = v - u (<u, v> / <u, u>)
- fn sub_proj(u: &[f32x8], v: &mut [f32x8]) {
- assert_eq!(u.len(), v.len());
- let mut dot_uv_uu = f32x8::splat(0.);
- for i in 0 .. (u.len() / 2) {
- let u0 = u[2*i];
- let u1 = u[2*i+1];
- let v0 = v[2*i];
- let v1 = v[2*i+1];
- let uv0 = u0 * v0;
- let uv1 = u1 * v1;
- let uu0 = u0 * u1;
- let uu1 = u1 * u1;
- let t0 = uv0.hadd(uu0);
- let t1 = uv1.hadd(uu1);
- let t = t0.hadd(t1);
- dot_uv_uu = dot_uv_uu + t;
- }
- let t = dot_uv_uu.hadd(dot_uv_uu);
- let uv = t.extract(0) + t.extract(4);
- let uu = t.extract(1) + t.extract(5);
- let norm = f32x8::splat(uv / uu);
- for i in 0 .. u.len() {
- v[i] = v[i] - norm * u[i];
- }
- }
- fn gram_schmidt(v: &[f32x8], u: &mut [f32x8], vecs: usize) {
- assert_eq!(v.len(), u.len());
- let n = v.len() / vecs;
- let mut u_ki = Vec::with_capacity(n);
- u[..n].copy_from_slice(&v[..n]);
- for k in 1 .. vecs {
- u_ki.clear();
- u_ki.extend_from_slice(&v[k*n..k*n+n]);
- for i in 1 .. k {
- sub_proj(&u[i*n..i*n+n], &mut u_ki);
- }
- u[k*n..k*n+n].copy_from_slice(&u_ki);
- }
- }
- fn main() {
- let v = box [f32x8::splat(0.3); N/8 * M];
- let mut u = box [f32x8::splat(0.2); N/8 * M];
- let t0 = Instant::now();
- gram_schmidt(&*v, &mut *u, M);
- let dt = t0.elapsed();
- println!("{}.{:09}", dt.as_secs(), dt.subsec_nanos());
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement