Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <iostream>
- #include <complex>
- #include <pmmintrin.h>
- #include <chrono>
- template <typename T>
- struct simd_complex;
- template <>
- struct simd_complex<double> {
- simd_complex(): value(_mm_setzero_pd()) {}
- simd_complex(double r): value(_mm_set_sd(r)) {}
- simd_complex(double r, double i): value(_mm_setr_pd(r, i)) {}
- simd_complex(__m128d v): value(v) {}
- operator __m128d() { return value; }
- friend
- simd_complex operator +(simd_complex a_b, simd_complex c_d) {
- return _mm_add_pd(a_b, c_d);
- }
- simd_complex& operator +=(simd_complex rhs) {
- return *this = *this + rhs;
- }
- friend
- simd_complex operator -(simd_complex a_b, simd_complex c_d) {
- return _mm_sub_pd(a_b, c_d);
- }
- simd_complex& operator -=(simd_complex rhs) {
- return *this = *this - rhs;
- }
- friend
- simd_complex operator *(simd_complex a_b, simd_complex c_d) {
- __m128d c_c = _mm_movedup_pd(c_d);
- __m128d ac_bc = _mm_mul_pd(a_b, c_c);
- __m128d b_a = _mm_shuffle_pd(a_b, a_b, 1);
- __m128d d_d = _mm_unpackhi_pd(c_d, c_d);
- __m128d bd_ad = _mm_mul_pd(b_a, d_d);
- return _mm_addsub_pd(ac_bc, bd_ad);
- }
- simd_complex& operator *=(simd_complex rhs) {
- return *this = *this * rhs;
- }
- friend
- simd_complex operator /(simd_complex a_b, simd_complex c_d) {
- __m128d b_a = _mm_shuffle_pd(a_b, a_b, 1);
- __m128d c_c = _mm_movedup_pd(c_d);
- __m128d bc_ac = _mm_mul_pd(b_a, c_c);
- __m128d d_d = _mm_unpackhi_pd(c_d, c_d);
- __m128d ad_bd = _mm_mul_pd(a_b, d_d);
- __m128d t = _mm_addsub_pd(bc_ac, ad_bd); // (bc-ad) + (ac+bd)i
- __m128d numerator = _mm_shuffle_pd(t, t, 1); // (ac+bd) + (bc-ad)i
- __m128d cc_dd = _mm_mul_pd(c_d, c_d);
- __m128d denominator = _mm_hadd_pd(cc_dd, cc_dd); // (cc+dd) + (cc+dd)i
- return _mm_div_pd(numerator, denominator);
- }
- simd_complex operator /=(simd_complex rhs) {
- return *this = *this / rhs;
- }
- double real() { return reinterpret_cast<double (&)[2]>(value)[0]; }
- double imag() { return reinterpret_cast<double (&)[2]>(value)[1]; }
- friend std::ostream& operator <<(std::ostream& o, simd_complex rhs) {
- return o << '(' << rhs.real() << ',' << rhs.imag() << ')';
- }
- private:
- __m128d value; // [0]: real, [1]: imag
- };
- double abs(simd_complex<double> a_b) {
- __m128d aa_bb = _mm_mul_pd(a_b, a_b);
- __m128d aaAbb_aaAbb = _mm_hadd_pd(aa_bb, aa_bb);
- __m128d result = _mm_sqrt_pd(aaAbb_aaAbb);
- return reinterpret_cast<double (&)[2]>(result)[0];
- }
- int main(void) {
- using namespace std;
- double a[4];
- for (int i = 0; i < 4; ++i)
- cin >> a[i];
- auto st = chrono::steady_clock::now();
- complex<double> c1(a[0], a[1]), c2(a[2], a[3]), c3(1, 1), c4(1, 1), c5, c6;
- for (int i = 0; i < 100000000; ++i) {
- c3 *= c1;
- c4 /= c2;
- c5 += c1;
- c6 -= c2;
- }
- auto t = chrono::steady_clock::now() - st;
- cout << c1 * c2 << endl << c1 / c2 << endl;
- cout << c3 << c4 << c5 << c6 << endl << t.count() << endl;
- st = chrono::steady_clock::now();
- simd_complex<double> d1(a[0], a[1]), d2(a[2], a[3]), d3(1, 1), d4(1, 1), d5, d6;
- for (int i = 0; i < 100000000; ++i) {
- d3 *= d1;
- d4 /= d2;
- d5 += d1;
- d6 -= d2;
- }
- t = chrono::steady_clock::now() - st;
- cout << d1 * d2 << endl << d1 / d2 << endl;
- cout << d3 << d4 << d5 << d6 << endl << t.count() << endl;
- cout << abs(simd_complex<double>(3, 4)) << endl;
- }
Advertisement
Add Comment
Please, Sign In to add comment